Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.7 2274 lines 56 kB view raw
1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "xfs.h" 20#include "xfs_fs.h" 21#include "xfs_types.h" 22#include "xfs_bit.h" 23#include "xfs_log.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_dir2.h" 28#include "xfs_mount.h" 29#include "xfs_da_btree.h" 30#include "xfs_bmap_btree.h" 31#include "xfs_ialloc_btree.h" 32#include "xfs_dinode.h" 33#include "xfs_inode.h" 34#include "xfs_inode_item.h" 35#include "xfs_itable.h" 36#include "xfs_ialloc.h" 37#include "xfs_alloc.h" 38#include "xfs_bmap.h" 39#include "xfs_acl.h" 40#include "xfs_attr.h" 41#include "xfs_error.h" 42#include "xfs_quota.h" 43#include "xfs_utils.h" 44#include "xfs_rtalloc.h" 45#include "xfs_trans_space.h" 46#include "xfs_log_priv.h" 47#include "xfs_filestream.h" 48#include "xfs_vnodeops.h" 49#include "xfs_trace.h" 50 51/* 52 * The maximum pathlen is 1024 bytes. Since the minimum file system 53 * blocksize is 512 bytes, we can get a max of 2 extents back from 54 * bmapi. 55 */ 56#define SYMLINK_MAPS 2 57 58STATIC int 59xfs_readlink_bmap( 60 xfs_inode_t *ip, 61 char *link) 62{ 63 xfs_mount_t *mp = ip->i_mount; 64 int pathlen = ip->i_d.di_size; 65 int nmaps = SYMLINK_MAPS; 66 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 67 xfs_daddr_t d; 68 int byte_cnt; 69 int n; 70 xfs_buf_t *bp; 71 int error = 0; 72 73 error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, pathlen), mval, &nmaps, 74 0); 75 if (error) 76 goto out; 77 78 for (n = 0; n < nmaps; n++) { 79 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 80 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 81 82 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); 83 if (!bp) 84 return XFS_ERROR(ENOMEM); 85 error = bp->b_error; 86 if (error) { 87 xfs_buf_ioerror_alert(bp, __func__); 88 xfs_buf_relse(bp); 89 goto out; 90 } 91 if (pathlen < byte_cnt) 92 byte_cnt = pathlen; 93 pathlen -= byte_cnt; 94 95 memcpy(link, bp->b_addr, byte_cnt); 96 xfs_buf_relse(bp); 97 } 98 99 link[ip->i_d.di_size] = '\0'; 100 error = 0; 101 102 out: 103 return error; 104} 105 106int 107xfs_readlink( 108 xfs_inode_t *ip, 109 char *link) 110{ 111 xfs_mount_t *mp = ip->i_mount; 112 xfs_fsize_t pathlen; 113 int error = 0; 114 115 trace_xfs_readlink(ip); 116 117 if (XFS_FORCED_SHUTDOWN(mp)) 118 return XFS_ERROR(EIO); 119 120 xfs_ilock(ip, XFS_ILOCK_SHARED); 121 122 pathlen = ip->i_d.di_size; 123 if (!pathlen) 124 goto out; 125 126 if (pathlen < 0 || pathlen > MAXPATHLEN) { 127 xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)", 128 __func__, (unsigned long long) ip->i_ino, 129 (long long) pathlen); 130 ASSERT(0); 131 error = XFS_ERROR(EFSCORRUPTED); 132 goto out; 133 } 134 135 136 if (ip->i_df.if_flags & XFS_IFINLINE) { 137 memcpy(link, ip->i_df.if_u1.if_data, pathlen); 138 link[pathlen] = '\0'; 139 } else { 140 error = xfs_readlink_bmap(ip, link); 141 } 142 143 out: 144 xfs_iunlock(ip, XFS_ILOCK_SHARED); 145 return error; 146} 147 148/* 149 * This is called by xfs_inactive to free any blocks beyond eof 150 * when the link count isn't zero and by xfs_dm_punch_hole() when 151 * punching a hole to EOF. 152 */ 153STATIC int 154xfs_free_eofblocks( 155 xfs_mount_t *mp, 156 xfs_inode_t *ip, 157 bool need_iolock) 158{ 159 xfs_trans_t *tp; 160 int error; 161 xfs_fileoff_t end_fsb; 162 xfs_fileoff_t last_fsb; 163 xfs_filblks_t map_len; 164 int nimaps; 165 xfs_bmbt_irec_t imap; 166 167 /* 168 * Figure out if there are any blocks beyond the end 169 * of the file. If not, then there is nothing to do. 170 */ 171 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); 172 last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 173 if (last_fsb <= end_fsb) 174 return 0; 175 map_len = last_fsb - end_fsb; 176 177 nimaps = 1; 178 xfs_ilock(ip, XFS_ILOCK_SHARED); 179 error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); 180 xfs_iunlock(ip, XFS_ILOCK_SHARED); 181 182 if (!error && (nimaps != 0) && 183 (imap.br_startblock != HOLESTARTBLOCK || 184 ip->i_delayed_blks)) { 185 /* 186 * Attach the dquots to the inode up front. 187 */ 188 error = xfs_qm_dqattach(ip, 0); 189 if (error) 190 return error; 191 192 /* 193 * There are blocks after the end of file. 194 * Free them up now by truncating the file to 195 * its current size. 196 */ 197 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 198 199 if (need_iolock) { 200 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 201 xfs_trans_cancel(tp, 0); 202 return 0; 203 } 204 } 205 206 error = xfs_trans_reserve(tp, 0, 207 XFS_ITRUNCATE_LOG_RES(mp), 208 0, XFS_TRANS_PERM_LOG_RES, 209 XFS_ITRUNCATE_LOG_COUNT); 210 if (error) { 211 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 212 xfs_trans_cancel(tp, 0); 213 if (need_iolock) 214 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 215 return error; 216 } 217 218 xfs_ilock(ip, XFS_ILOCK_EXCL); 219 xfs_trans_ijoin(tp, ip, 0); 220 221 /* 222 * Do not update the on-disk file size. If we update the 223 * on-disk file size and then the system crashes before the 224 * contents of the file are flushed to disk then the files 225 * may be full of holes (ie NULL files bug). 226 */ 227 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 228 XFS_ISIZE(ip)); 229 if (error) { 230 /* 231 * If we get an error at this point we simply don't 232 * bother truncating the file. 233 */ 234 xfs_trans_cancel(tp, 235 (XFS_TRANS_RELEASE_LOG_RES | 236 XFS_TRANS_ABORT)); 237 } else { 238 error = xfs_trans_commit(tp, 239 XFS_TRANS_RELEASE_LOG_RES); 240 } 241 242 xfs_iunlock(ip, XFS_ILOCK_EXCL); 243 if (need_iolock) 244 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 245 } 246 return error; 247} 248 249/* 250 * Free a symlink that has blocks associated with it. 251 */ 252STATIC int 253xfs_inactive_symlink_rmt( 254 xfs_inode_t *ip, 255 xfs_trans_t **tpp) 256{ 257 xfs_buf_t *bp; 258 int committed; 259 int done; 260 int error; 261 xfs_fsblock_t first_block; 262 xfs_bmap_free_t free_list; 263 int i; 264 xfs_mount_t *mp; 265 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 266 int nmaps; 267 xfs_trans_t *ntp; 268 int size; 269 xfs_trans_t *tp; 270 271 tp = *tpp; 272 mp = ip->i_mount; 273 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); 274 /* 275 * We're freeing a symlink that has some 276 * blocks allocated to it. Free the 277 * blocks here. We know that we've got 278 * either 1 or 2 extents and that we can 279 * free them all in one bunmapi call. 280 */ 281 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 282 283 /* 284 * Lock the inode, fix the size, and join it to the transaction. 285 * Hold it so in the normal path, we still have it locked for 286 * the second transaction. In the error paths we need it 287 * held so the cancel won't rele it, see below. 288 */ 289 size = (int)ip->i_d.di_size; 290 ip->i_d.di_size = 0; 291 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 292 /* 293 * Find the block(s) so we can inval and unmap them. 294 */ 295 done = 0; 296 xfs_bmap_init(&free_list, &first_block); 297 nmaps = ARRAY_SIZE(mval); 298 error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, size), 299 mval, &nmaps, 0); 300 if (error) 301 goto error0; 302 /* 303 * Invalidate the block(s). 304 */ 305 for (i = 0; i < nmaps; i++) { 306 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 307 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 308 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 309 if (!bp) { 310 error = ENOMEM; 311 goto error1; 312 } 313 xfs_trans_binval(tp, bp); 314 } 315 /* 316 * Unmap the dead block(s) to the free_list. 317 */ 318 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 319 &first_block, &free_list, &done))) 320 goto error1; 321 ASSERT(done); 322 /* 323 * Commit the first transaction. This logs the EFI and the inode. 324 */ 325 if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) 326 goto error1; 327 /* 328 * The transaction must have been committed, since there were 329 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. 330 * The new tp has the extent freeing and EFDs. 331 */ 332 ASSERT(committed); 333 /* 334 * The first xact was committed, so add the inode to the new one. 335 * Mark it dirty so it will be logged and moved forward in the log as 336 * part of every commit. 337 */ 338 xfs_trans_ijoin(tp, ip, 0); 339 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 340 /* 341 * Get a new, empty transaction to return to our caller. 342 */ 343 ntp = xfs_trans_dup(tp); 344 /* 345 * Commit the transaction containing extent freeing and EFDs. 346 * If we get an error on the commit here or on the reserve below, 347 * we need to unlock the inode since the new transaction doesn't 348 * have the inode attached. 349 */ 350 error = xfs_trans_commit(tp, 0); 351 tp = ntp; 352 if (error) { 353 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 354 goto error0; 355 } 356 /* 357 * transaction commit worked ok so we can drop the extra ticket 358 * reference that we gained in xfs_trans_dup() 359 */ 360 xfs_log_ticket_put(tp->t_ticket); 361 362 /* 363 * Remove the memory for extent descriptions (just bookkeeping). 364 */ 365 if (ip->i_df.if_bytes) 366 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); 367 ASSERT(ip->i_df.if_bytes == 0); 368 /* 369 * Put an itruncate log reservation in the new transaction 370 * for our caller. 371 */ 372 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 373 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 374 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 375 goto error0; 376 } 377 378 xfs_trans_ijoin(tp, ip, 0); 379 *tpp = tp; 380 return 0; 381 382 error1: 383 xfs_bmap_cancel(&free_list); 384 error0: 385 return error; 386} 387 388int 389xfs_release( 390 xfs_inode_t *ip) 391{ 392 xfs_mount_t *mp = ip->i_mount; 393 int error; 394 395 if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0)) 396 return 0; 397 398 /* If this is a read-only mount, don't do this (would generate I/O) */ 399 if (mp->m_flags & XFS_MOUNT_RDONLY) 400 return 0; 401 402 if (!XFS_FORCED_SHUTDOWN(mp)) { 403 int truncated; 404 405 /* 406 * If we are using filestreams, and we have an unlinked 407 * file that we are processing the last close on, then nothing 408 * will be able to reopen and write to this file. Purge this 409 * inode from the filestreams cache so that it doesn't delay 410 * teardown of the inode. 411 */ 412 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) 413 xfs_filestream_deassociate(ip); 414 415 /* 416 * If we previously truncated this file and removed old data 417 * in the process, we want to initiate "early" writeout on 418 * the last close. This is an attempt to combat the notorious 419 * NULL files problem which is particularly noticeable from a 420 * truncate down, buffered (re-)write (delalloc), followed by 421 * a crash. What we are effectively doing here is 422 * significantly reducing the time window where we'd otherwise 423 * be exposed to that problem. 424 */ 425 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 426 if (truncated) { 427 xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); 428 if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) 429 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); 430 } 431 } 432 433 if (ip->i_d.di_nlink == 0) 434 return 0; 435 436 if ((S_ISREG(ip->i_d.di_mode) && 437 (VFS_I(ip)->i_size > 0 || 438 (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && 439 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 440 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 441 442 /* 443 * If we can't get the iolock just skip truncating the blocks 444 * past EOF because we could deadlock with the mmap_sem 445 * otherwise. We'll get another chance to drop them once the 446 * last reference to the inode is dropped, so we'll never leak 447 * blocks permanently. 448 * 449 * Further, check if the inode is being opened, written and 450 * closed frequently and we have delayed allocation blocks 451 * outstanding (e.g. streaming writes from the NFS server), 452 * truncating the blocks past EOF will cause fragmentation to 453 * occur. 454 * 455 * In this case don't do the truncation, either, but we have to 456 * be careful how we detect this case. Blocks beyond EOF show 457 * up as i_delayed_blks even when the inode is clean, so we 458 * need to truncate them away first before checking for a dirty 459 * release. Hence on the first dirty close we will still remove 460 * the speculative allocation, but after that we will leave it 461 * in place. 462 */ 463 if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) 464 return 0; 465 466 error = xfs_free_eofblocks(mp, ip, true); 467 if (error) 468 return error; 469 470 /* delalloc blocks after truncation means it really is dirty */ 471 if (ip->i_delayed_blks) 472 xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); 473 } 474 return 0; 475} 476 477/* 478 * xfs_inactive 479 * 480 * This is called when the vnode reference count for the vnode 481 * goes to zero. If the file has been unlinked, then it must 482 * now be truncated. Also, we clear all of the read-ahead state 483 * kept for the inode here since the file is now closed. 484 */ 485int 486xfs_inactive( 487 xfs_inode_t *ip) 488{ 489 xfs_bmap_free_t free_list; 490 xfs_fsblock_t first_block; 491 int committed; 492 xfs_trans_t *tp; 493 xfs_mount_t *mp; 494 int error; 495 int truncate = 0; 496 497 /* 498 * If the inode is already free, then there can be nothing 499 * to clean up here. 500 */ 501 if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) { 502 ASSERT(ip->i_df.if_real_bytes == 0); 503 ASSERT(ip->i_df.if_broot_bytes == 0); 504 return VN_INACTIVE_CACHE; 505 } 506 507 mp = ip->i_mount; 508 509 error = 0; 510 511 /* If this is a read-only mount, don't do this (would generate I/O) */ 512 if (mp->m_flags & XFS_MOUNT_RDONLY) 513 goto out; 514 515 if (ip->i_d.di_nlink != 0) { 516 if ((S_ISREG(ip->i_d.di_mode) && 517 (VFS_I(ip)->i_size > 0 || 518 (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && 519 (ip->i_df.if_flags & XFS_IFEXTENTS) && 520 (!(ip->i_d.di_flags & 521 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 522 ip->i_delayed_blks != 0))) { 523 error = xfs_free_eofblocks(mp, ip, false); 524 if (error) 525 return VN_INACTIVE_CACHE; 526 } 527 goto out; 528 } 529 530 if (S_ISREG(ip->i_d.di_mode) && 531 (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 || 532 ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0)) 533 truncate = 1; 534 535 error = xfs_qm_dqattach(ip, 0); 536 if (error) 537 return VN_INACTIVE_CACHE; 538 539 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 540 error = xfs_trans_reserve(tp, 0, 541 (truncate || S_ISLNK(ip->i_d.di_mode)) ? 542 XFS_ITRUNCATE_LOG_RES(mp) : 543 XFS_IFREE_LOG_RES(mp), 544 0, 545 XFS_TRANS_PERM_LOG_RES, 546 XFS_ITRUNCATE_LOG_COUNT); 547 if (error) { 548 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 549 xfs_trans_cancel(tp, 0); 550 return VN_INACTIVE_CACHE; 551 } 552 553 xfs_ilock(ip, XFS_ILOCK_EXCL); 554 xfs_trans_ijoin(tp, ip, 0); 555 556 if (S_ISLNK(ip->i_d.di_mode)) { 557 /* 558 * Zero length symlinks _can_ exist. 559 */ 560 if (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) { 561 error = xfs_inactive_symlink_rmt(ip, &tp); 562 if (error) 563 goto out_cancel; 564 } else if (ip->i_df.if_bytes > 0) { 565 xfs_idata_realloc(ip, -(ip->i_df.if_bytes), 566 XFS_DATA_FORK); 567 ASSERT(ip->i_df.if_bytes == 0); 568 } 569 } else if (truncate) { 570 ip->i_d.di_size = 0; 571 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 572 573 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); 574 if (error) 575 goto out_cancel; 576 577 ASSERT(ip->i_d.di_nextents == 0); 578 } 579 580 /* 581 * If there are attributes associated with the file then blow them away 582 * now. The code calls a routine that recursively deconstructs the 583 * attribute fork. We need to just commit the current transaction 584 * because we can't use it for xfs_attr_inactive(). 585 */ 586 if (ip->i_d.di_anextents > 0) { 587 ASSERT(ip->i_d.di_forkoff != 0); 588 589 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 590 if (error) 591 goto out_unlock; 592 593 xfs_iunlock(ip, XFS_ILOCK_EXCL); 594 595 error = xfs_attr_inactive(ip); 596 if (error) 597 goto out; 598 599 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 600 error = xfs_trans_reserve(tp, 0, 601 XFS_IFREE_LOG_RES(mp), 602 0, XFS_TRANS_PERM_LOG_RES, 603 XFS_INACTIVE_LOG_COUNT); 604 if (error) { 605 xfs_trans_cancel(tp, 0); 606 goto out; 607 } 608 609 xfs_ilock(ip, XFS_ILOCK_EXCL); 610 xfs_trans_ijoin(tp, ip, 0); 611 } 612 613 if (ip->i_afp) 614 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 615 616 ASSERT(ip->i_d.di_anextents == 0); 617 618 /* 619 * Free the inode. 620 */ 621 xfs_bmap_init(&free_list, &first_block); 622 error = xfs_ifree(tp, ip, &free_list); 623 if (error) { 624 /* 625 * If we fail to free the inode, shut down. The cancel 626 * might do that, we need to make sure. Otherwise the 627 * inode might be lost for a long time or forever. 628 */ 629 if (!XFS_FORCED_SHUTDOWN(mp)) { 630 xfs_notice(mp, "%s: xfs_ifree returned error %d", 631 __func__, error); 632 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 633 } 634 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 635 } else { 636 /* 637 * Credit the quota account(s). The inode is gone. 638 */ 639 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 640 641 /* 642 * Just ignore errors at this point. There is nothing we can 643 * do except to try to keep going. Make sure it's not a silent 644 * error. 645 */ 646 error = xfs_bmap_finish(&tp, &free_list, &committed); 647 if (error) 648 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", 649 __func__, error); 650 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 651 if (error) 652 xfs_notice(mp, "%s: xfs_trans_commit returned error %d", 653 __func__, error); 654 } 655 656 /* 657 * Release the dquots held by inode, if any. 658 */ 659 xfs_qm_dqdetach(ip); 660out_unlock: 661 xfs_iunlock(ip, XFS_ILOCK_EXCL); 662out: 663 return VN_INACTIVE_CACHE; 664out_cancel: 665 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 666 goto out_unlock; 667} 668 669/* 670 * Lookups up an inode from "name". If ci_name is not NULL, then a CI match 671 * is allowed, otherwise it has to be an exact match. If a CI match is found, 672 * ci_name->name will point to a the actual name (caller must free) or 673 * will be set to NULL if an exact match is found. 674 */ 675int 676xfs_lookup( 677 xfs_inode_t *dp, 678 struct xfs_name *name, 679 xfs_inode_t **ipp, 680 struct xfs_name *ci_name) 681{ 682 xfs_ino_t inum; 683 int error; 684 uint lock_mode; 685 686 trace_xfs_lookup(dp, name); 687 688 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 689 return XFS_ERROR(EIO); 690 691 lock_mode = xfs_ilock_map_shared(dp); 692 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 693 xfs_iunlock_map_shared(dp, lock_mode); 694 695 if (error) 696 goto out; 697 698 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); 699 if (error) 700 goto out_free_name; 701 702 return 0; 703 704out_free_name: 705 if (ci_name) 706 kmem_free(ci_name->name); 707out: 708 *ipp = NULL; 709 return error; 710} 711 712int 713xfs_create( 714 xfs_inode_t *dp, 715 struct xfs_name *name, 716 umode_t mode, 717 xfs_dev_t rdev, 718 xfs_inode_t **ipp) 719{ 720 int is_dir = S_ISDIR(mode); 721 struct xfs_mount *mp = dp->i_mount; 722 struct xfs_inode *ip = NULL; 723 struct xfs_trans *tp = NULL; 724 int error; 725 xfs_bmap_free_t free_list; 726 xfs_fsblock_t first_block; 727 boolean_t unlock_dp_on_error = B_FALSE; 728 uint cancel_flags; 729 int committed; 730 prid_t prid; 731 struct xfs_dquot *udqp = NULL; 732 struct xfs_dquot *gdqp = NULL; 733 uint resblks; 734 uint log_res; 735 uint log_count; 736 737 trace_xfs_create(dp, name); 738 739 if (XFS_FORCED_SHUTDOWN(mp)) 740 return XFS_ERROR(EIO); 741 742 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 743 prid = xfs_get_projid(dp); 744 else 745 prid = XFS_PROJID_DEFAULT; 746 747 /* 748 * Make sure that we have allocated dquot(s) on disk. 749 */ 750 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 751 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 752 if (error) 753 return error; 754 755 if (is_dir) { 756 rdev = 0; 757 resblks = XFS_MKDIR_SPACE_RES(mp, name->len); 758 log_res = XFS_MKDIR_LOG_RES(mp); 759 log_count = XFS_MKDIR_LOG_COUNT; 760 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 761 } else { 762 resblks = XFS_CREATE_SPACE_RES(mp, name->len); 763 log_res = XFS_CREATE_LOG_RES(mp); 764 log_count = XFS_CREATE_LOG_COUNT; 765 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 766 } 767 768 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 769 770 /* 771 * Initially assume that the file does not exist and 772 * reserve the resources for that case. If that is not 773 * the case we'll drop the one we have and get a more 774 * appropriate transaction later. 775 */ 776 error = xfs_trans_reserve(tp, resblks, log_res, 0, 777 XFS_TRANS_PERM_LOG_RES, log_count); 778 if (error == ENOSPC) { 779 /* flush outstanding delalloc blocks and retry */ 780 xfs_flush_inodes(dp); 781 error = xfs_trans_reserve(tp, resblks, log_res, 0, 782 XFS_TRANS_PERM_LOG_RES, log_count); 783 } 784 if (error == ENOSPC) { 785 /* No space at all so try a "no-allocation" reservation */ 786 resblks = 0; 787 error = xfs_trans_reserve(tp, 0, log_res, 0, 788 XFS_TRANS_PERM_LOG_RES, log_count); 789 } 790 if (error) { 791 cancel_flags = 0; 792 goto out_trans_cancel; 793 } 794 795 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 796 unlock_dp_on_error = B_TRUE; 797 798 xfs_bmap_init(&free_list, &first_block); 799 800 /* 801 * Reserve disk quota and the inode. 802 */ 803 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); 804 if (error) 805 goto out_trans_cancel; 806 807 error = xfs_dir_canenter(tp, dp, name, resblks); 808 if (error) 809 goto out_trans_cancel; 810 811 /* 812 * A newly created regular or special file just has one directory 813 * entry pointing to them, but a directory also the "." entry 814 * pointing to itself. 815 */ 816 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, 817 prid, resblks > 0, &ip, &committed); 818 if (error) { 819 if (error == ENOSPC) 820 goto out_trans_cancel; 821 goto out_trans_abort; 822 } 823 824 /* 825 * Now we join the directory inode to the transaction. We do not do it 826 * earlier because xfs_dir_ialloc might commit the previous transaction 827 * (and release all the locks). An error from here on will result in 828 * the transaction cancel unlocking dp so don't do it explicitly in the 829 * error path. 830 */ 831 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 832 unlock_dp_on_error = B_FALSE; 833 834 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 835 &first_block, &free_list, resblks ? 836 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 837 if (error) { 838 ASSERT(error != ENOSPC); 839 goto out_trans_abort; 840 } 841 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 842 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 843 844 if (is_dir) { 845 error = xfs_dir_init(tp, ip, dp); 846 if (error) 847 goto out_bmap_cancel; 848 849 error = xfs_bumplink(tp, dp); 850 if (error) 851 goto out_bmap_cancel; 852 } 853 854 /* 855 * If this is a synchronous mount, make sure that the 856 * create transaction goes to disk before returning to 857 * the user. 858 */ 859 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 860 xfs_trans_set_sync(tp); 861 862 /* 863 * Attach the dquot(s) to the inodes and modify them incore. 864 * These ids of the inode couldn't have changed since the new 865 * inode has been locked ever since it was created. 866 */ 867 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 868 869 error = xfs_bmap_finish(&tp, &free_list, &committed); 870 if (error) 871 goto out_bmap_cancel; 872 873 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 874 if (error) 875 goto out_release_inode; 876 877 xfs_qm_dqrele(udqp); 878 xfs_qm_dqrele(gdqp); 879 880 *ipp = ip; 881 return 0; 882 883 out_bmap_cancel: 884 xfs_bmap_cancel(&free_list); 885 out_trans_abort: 886 cancel_flags |= XFS_TRANS_ABORT; 887 out_trans_cancel: 888 xfs_trans_cancel(tp, cancel_flags); 889 out_release_inode: 890 /* 891 * Wait until after the current transaction is aborted to 892 * release the inode. This prevents recursive transactions 893 * and deadlocks from xfs_inactive. 894 */ 895 if (ip) 896 IRELE(ip); 897 898 xfs_qm_dqrele(udqp); 899 xfs_qm_dqrele(gdqp); 900 901 if (unlock_dp_on_error) 902 xfs_iunlock(dp, XFS_ILOCK_EXCL); 903 return error; 904} 905 906#ifdef DEBUG 907int xfs_locked_n; 908int xfs_small_retries; 909int xfs_middle_retries; 910int xfs_lots_retries; 911int xfs_lock_delays; 912#endif 913 914/* 915 * Bump the subclass so xfs_lock_inodes() acquires each lock with 916 * a different value 917 */ 918static inline int 919xfs_lock_inumorder(int lock_mode, int subclass) 920{ 921 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 922 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 923 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 924 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 925 926 return lock_mode; 927} 928 929/* 930 * The following routine will lock n inodes in exclusive mode. 931 * We assume the caller calls us with the inodes in i_ino order. 932 * 933 * We need to detect deadlock where an inode that we lock 934 * is in the AIL and we start waiting for another inode that is locked 935 * by a thread in a long running transaction (such as truncate). This can 936 * result in deadlock since the long running trans might need to wait 937 * for the inode we just locked in order to push the tail and free space 938 * in the log. 939 */ 940void 941xfs_lock_inodes( 942 xfs_inode_t **ips, 943 int inodes, 944 uint lock_mode) 945{ 946 int attempts = 0, i, j, try_lock; 947 xfs_log_item_t *lp; 948 949 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 950 951 try_lock = 0; 952 i = 0; 953 954again: 955 for (; i < inodes; i++) { 956 ASSERT(ips[i]); 957 958 if (i && (ips[i] == ips[i-1])) /* Already locked */ 959 continue; 960 961 /* 962 * If try_lock is not set yet, make sure all locked inodes 963 * are not in the AIL. 964 * If any are, set try_lock to be used later. 965 */ 966 967 if (!try_lock) { 968 for (j = (i - 1); j >= 0 && !try_lock; j--) { 969 lp = (xfs_log_item_t *)ips[j]->i_itemp; 970 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 971 try_lock++; 972 } 973 } 974 } 975 976 /* 977 * If any of the previous locks we have locked is in the AIL, 978 * we must TRY to get the second and subsequent locks. If 979 * we can't get any, we must release all we have 980 * and try again. 981 */ 982 983 if (try_lock) { 984 /* try_lock must be 0 if i is 0. */ 985 /* 986 * try_lock means we have an inode locked 987 * that is in the AIL. 988 */ 989 ASSERT(i != 0); 990 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 991 attempts++; 992 993 /* 994 * Unlock all previous guys and try again. 995 * xfs_iunlock will try to push the tail 996 * if the inode is in the AIL. 997 */ 998 999 for(j = i - 1; j >= 0; j--) { 1000 1001 /* 1002 * Check to see if we've already 1003 * unlocked this one. 1004 * Not the first one going back, 1005 * and the inode ptr is the same. 1006 */ 1007 if ((j != (i - 1)) && ips[j] == 1008 ips[j+1]) 1009 continue; 1010 1011 xfs_iunlock(ips[j], lock_mode); 1012 } 1013 1014 if ((attempts % 5) == 0) { 1015 delay(1); /* Don't just spin the CPU */ 1016#ifdef DEBUG 1017 xfs_lock_delays++; 1018#endif 1019 } 1020 i = 0; 1021 try_lock = 0; 1022 goto again; 1023 } 1024 } else { 1025 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); 1026 } 1027 } 1028 1029#ifdef DEBUG 1030 if (attempts) { 1031 if (attempts < 5) xfs_small_retries++; 1032 else if (attempts < 100) xfs_middle_retries++; 1033 else xfs_lots_retries++; 1034 } else { 1035 xfs_locked_n++; 1036 } 1037#endif 1038} 1039 1040/* 1041 * xfs_lock_two_inodes() can only be used to lock one type of lock 1042 * at a time - the iolock or the ilock, but not both at once. If 1043 * we lock both at once, lockdep will report false positives saying 1044 * we have violated locking orders. 1045 */ 1046void 1047xfs_lock_two_inodes( 1048 xfs_inode_t *ip0, 1049 xfs_inode_t *ip1, 1050 uint lock_mode) 1051{ 1052 xfs_inode_t *temp; 1053 int attempts = 0; 1054 xfs_log_item_t *lp; 1055 1056 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 1057 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 1058 ASSERT(ip0->i_ino != ip1->i_ino); 1059 1060 if (ip0->i_ino > ip1->i_ino) { 1061 temp = ip0; 1062 ip0 = ip1; 1063 ip1 = temp; 1064 } 1065 1066 again: 1067 xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); 1068 1069 /* 1070 * If the first lock we have locked is in the AIL, we must TRY to get 1071 * the second lock. If we can't get it, we must release the first one 1072 * and try again. 1073 */ 1074 lp = (xfs_log_item_t *)ip0->i_itemp; 1075 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 1076 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { 1077 xfs_iunlock(ip0, lock_mode); 1078 if ((++attempts % 5) == 0) 1079 delay(1); /* Don't just spin the CPU */ 1080 goto again; 1081 } 1082 } else { 1083 xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); 1084 } 1085} 1086 1087int 1088xfs_remove( 1089 xfs_inode_t *dp, 1090 struct xfs_name *name, 1091 xfs_inode_t *ip) 1092{ 1093 xfs_mount_t *mp = dp->i_mount; 1094 xfs_trans_t *tp = NULL; 1095 int is_dir = S_ISDIR(ip->i_d.di_mode); 1096 int error = 0; 1097 xfs_bmap_free_t free_list; 1098 xfs_fsblock_t first_block; 1099 int cancel_flags; 1100 int committed; 1101 int link_zero; 1102 uint resblks; 1103 uint log_count; 1104 1105 trace_xfs_remove(dp, name); 1106 1107 if (XFS_FORCED_SHUTDOWN(mp)) 1108 return XFS_ERROR(EIO); 1109 1110 error = xfs_qm_dqattach(dp, 0); 1111 if (error) 1112 goto std_return; 1113 1114 error = xfs_qm_dqattach(ip, 0); 1115 if (error) 1116 goto std_return; 1117 1118 if (is_dir) { 1119 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 1120 log_count = XFS_DEFAULT_LOG_COUNT; 1121 } else { 1122 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 1123 log_count = XFS_REMOVE_LOG_COUNT; 1124 } 1125 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1126 1127 /* 1128 * We try to get the real space reservation first, 1129 * allowing for directory btree deletion(s) implying 1130 * possible bmap insert(s). If we can't get the space 1131 * reservation then we use 0 instead, and avoid the bmap 1132 * btree insert(s) in the directory code by, if the bmap 1133 * insert tries to happen, instead trimming the LAST 1134 * block from the directory. 1135 */ 1136 resblks = XFS_REMOVE_SPACE_RES(mp); 1137 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 1138 XFS_TRANS_PERM_LOG_RES, log_count); 1139 if (error == ENOSPC) { 1140 resblks = 0; 1141 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 1142 XFS_TRANS_PERM_LOG_RES, log_count); 1143 } 1144 if (error) { 1145 ASSERT(error != ENOSPC); 1146 cancel_flags = 0; 1147 goto out_trans_cancel; 1148 } 1149 1150 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 1151 1152 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1153 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1154 1155 /* 1156 * If we're removing a directory perform some additional validation. 1157 */ 1158 if (is_dir) { 1159 ASSERT(ip->i_d.di_nlink >= 2); 1160 if (ip->i_d.di_nlink != 2) { 1161 error = XFS_ERROR(ENOTEMPTY); 1162 goto out_trans_cancel; 1163 } 1164 if (!xfs_dir_isempty(ip)) { 1165 error = XFS_ERROR(ENOTEMPTY); 1166 goto out_trans_cancel; 1167 } 1168 } 1169 1170 xfs_bmap_init(&free_list, &first_block); 1171 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 1172 &first_block, &free_list, resblks); 1173 if (error) { 1174 ASSERT(error != ENOENT); 1175 goto out_bmap_cancel; 1176 } 1177 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1178 1179 if (is_dir) { 1180 /* 1181 * Drop the link from ip's "..". 1182 */ 1183 error = xfs_droplink(tp, dp); 1184 if (error) 1185 goto out_bmap_cancel; 1186 1187 /* 1188 * Drop the "." link from ip to self. 1189 */ 1190 error = xfs_droplink(tp, ip); 1191 if (error) 1192 goto out_bmap_cancel; 1193 } else { 1194 /* 1195 * When removing a non-directory we need to log the parent 1196 * inode here. For a directory this is done implicitly 1197 * by the xfs_droplink call for the ".." entry. 1198 */ 1199 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1200 } 1201 1202 /* 1203 * Drop the link from dp to ip. 1204 */ 1205 error = xfs_droplink(tp, ip); 1206 if (error) 1207 goto out_bmap_cancel; 1208 1209 /* 1210 * Determine if this is the last link while 1211 * we are in the transaction. 1212 */ 1213 link_zero = (ip->i_d.di_nlink == 0); 1214 1215 /* 1216 * If this is a synchronous mount, make sure that the 1217 * remove transaction goes to disk before returning to 1218 * the user. 1219 */ 1220 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 1221 xfs_trans_set_sync(tp); 1222 1223 error = xfs_bmap_finish(&tp, &free_list, &committed); 1224 if (error) 1225 goto out_bmap_cancel; 1226 1227 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1228 if (error) 1229 goto std_return; 1230 1231 /* 1232 * If we are using filestreams, kill the stream association. 1233 * If the file is still open it may get a new one but that 1234 * will get killed on last close in xfs_close() so we don't 1235 * have to worry about that. 1236 */ 1237 if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 1238 xfs_filestream_deassociate(ip); 1239 1240 return 0; 1241 1242 out_bmap_cancel: 1243 xfs_bmap_cancel(&free_list); 1244 cancel_flags |= XFS_TRANS_ABORT; 1245 out_trans_cancel: 1246 xfs_trans_cancel(tp, cancel_flags); 1247 std_return: 1248 return error; 1249} 1250 1251int 1252xfs_link( 1253 xfs_inode_t *tdp, 1254 xfs_inode_t *sip, 1255 struct xfs_name *target_name) 1256{ 1257 xfs_mount_t *mp = tdp->i_mount; 1258 xfs_trans_t *tp; 1259 int error; 1260 xfs_bmap_free_t free_list; 1261 xfs_fsblock_t first_block; 1262 int cancel_flags; 1263 int committed; 1264 int resblks; 1265 1266 trace_xfs_link(tdp, target_name); 1267 1268 ASSERT(!S_ISDIR(sip->i_d.di_mode)); 1269 1270 if (XFS_FORCED_SHUTDOWN(mp)) 1271 return XFS_ERROR(EIO); 1272 1273 error = xfs_qm_dqattach(sip, 0); 1274 if (error) 1275 goto std_return; 1276 1277 error = xfs_qm_dqattach(tdp, 0); 1278 if (error) 1279 goto std_return; 1280 1281 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 1282 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1283 resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 1284 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 1285 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 1286 if (error == ENOSPC) { 1287 resblks = 0; 1288 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0, 1289 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 1290 } 1291 if (error) { 1292 cancel_flags = 0; 1293 goto error_return; 1294 } 1295 1296 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1297 1298 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 1299 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 1300 1301 /* 1302 * If we are using project inheritance, we only allow hard link 1303 * creation in our tree when the project IDs are the same; else 1304 * the tree quota mechanism could be circumvented. 1305 */ 1306 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1307 (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { 1308 error = XFS_ERROR(EXDEV); 1309 goto error_return; 1310 } 1311 1312 error = xfs_dir_canenter(tp, tdp, target_name, resblks); 1313 if (error) 1314 goto error_return; 1315 1316 xfs_bmap_init(&free_list, &first_block); 1317 1318 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1319 &first_block, &free_list, resblks); 1320 if (error) 1321 goto abort_return; 1322 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1323 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 1324 1325 error = xfs_bumplink(tp, sip); 1326 if (error) 1327 goto abort_return; 1328 1329 /* 1330 * If this is a synchronous mount, make sure that the 1331 * link transaction goes to disk before returning to 1332 * the user. 1333 */ 1334 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1335 xfs_trans_set_sync(tp); 1336 } 1337 1338 error = xfs_bmap_finish (&tp, &free_list, &committed); 1339 if (error) { 1340 xfs_bmap_cancel(&free_list); 1341 goto abort_return; 1342 } 1343 1344 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1345 1346 abort_return: 1347 cancel_flags |= XFS_TRANS_ABORT; 1348 error_return: 1349 xfs_trans_cancel(tp, cancel_flags); 1350 std_return: 1351 return error; 1352} 1353 1354int 1355xfs_symlink( 1356 xfs_inode_t *dp, 1357 struct xfs_name *link_name, 1358 const char *target_path, 1359 umode_t mode, 1360 xfs_inode_t **ipp) 1361{ 1362 xfs_mount_t *mp = dp->i_mount; 1363 xfs_trans_t *tp; 1364 xfs_inode_t *ip; 1365 int error; 1366 int pathlen; 1367 xfs_bmap_free_t free_list; 1368 xfs_fsblock_t first_block; 1369 boolean_t unlock_dp_on_error = B_FALSE; 1370 uint cancel_flags; 1371 int committed; 1372 xfs_fileoff_t first_fsb; 1373 xfs_filblks_t fs_blocks; 1374 int nmaps; 1375 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 1376 xfs_daddr_t d; 1377 const char *cur_chunk; 1378 int byte_cnt; 1379 int n; 1380 xfs_buf_t *bp; 1381 prid_t prid; 1382 struct xfs_dquot *udqp, *gdqp; 1383 uint resblks; 1384 1385 *ipp = NULL; 1386 error = 0; 1387 ip = NULL; 1388 tp = NULL; 1389 1390 trace_xfs_symlink(dp, link_name); 1391 1392 if (XFS_FORCED_SHUTDOWN(mp)) 1393 return XFS_ERROR(EIO); 1394 1395 /* 1396 * Check component lengths of the target path name. 1397 */ 1398 pathlen = strlen(target_path); 1399 if (pathlen >= MAXPATHLEN) /* total string too long */ 1400 return XFS_ERROR(ENAMETOOLONG); 1401 1402 udqp = gdqp = NULL; 1403 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1404 prid = xfs_get_projid(dp); 1405 else 1406 prid = XFS_PROJID_DEFAULT; 1407 1408 /* 1409 * Make sure that we have allocated dquot(s) on disk. 1410 */ 1411 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 1412 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 1413 if (error) 1414 goto std_return; 1415 1416 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 1417 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1418 /* 1419 * The symlink will fit into the inode data fork? 1420 * There can't be any attributes so we get the whole variable part. 1421 */ 1422 if (pathlen <= XFS_LITINO(mp)) 1423 fs_blocks = 0; 1424 else 1425 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 1426 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 1427 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 1428 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 1429 if (error == ENOSPC && fs_blocks == 0) { 1430 resblks = 0; 1431 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0, 1432 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 1433 } 1434 if (error) { 1435 cancel_flags = 0; 1436 goto error_return; 1437 } 1438 1439 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1440 unlock_dp_on_error = B_TRUE; 1441 1442 /* 1443 * Check whether the directory allows new symlinks or not. 1444 */ 1445 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 1446 error = XFS_ERROR(EPERM); 1447 goto error_return; 1448 } 1449 1450 /* 1451 * Reserve disk quota : blocks and inode. 1452 */ 1453 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); 1454 if (error) 1455 goto error_return; 1456 1457 /* 1458 * Check for ability to enter directory entry, if no space reserved. 1459 */ 1460 error = xfs_dir_canenter(tp, dp, link_name, resblks); 1461 if (error) 1462 goto error_return; 1463 /* 1464 * Initialize the bmap freelist prior to calling either 1465 * bmapi or the directory create code. 1466 */ 1467 xfs_bmap_init(&free_list, &first_block); 1468 1469 /* 1470 * Allocate an inode for the symlink. 1471 */ 1472 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, 1473 prid, resblks > 0, &ip, NULL); 1474 if (error) { 1475 if (error == ENOSPC) 1476 goto error_return; 1477 goto error1; 1478 } 1479 1480 /* 1481 * An error after we've joined dp to the transaction will result in the 1482 * transaction cancel unlocking dp so don't do it explicitly in the 1483 * error path. 1484 */ 1485 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1486 unlock_dp_on_error = B_FALSE; 1487 1488 /* 1489 * Also attach the dquot(s) to it, if applicable. 1490 */ 1491 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 1492 1493 if (resblks) 1494 resblks -= XFS_IALLOC_SPACE_RES(mp); 1495 /* 1496 * If the symlink will fit into the inode, write it inline. 1497 */ 1498 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 1499 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 1500 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); 1501 ip->i_d.di_size = pathlen; 1502 1503 /* 1504 * The inode was initially created in extent format. 1505 */ 1506 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); 1507 ip->i_df.if_flags |= XFS_IFINLINE; 1508 1509 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 1510 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 1511 1512 } else { 1513 first_fsb = 0; 1514 nmaps = SYMLINK_MAPS; 1515 1516 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks, 1517 XFS_BMAPI_METADATA, &first_block, resblks, 1518 mval, &nmaps, &free_list); 1519 if (error) 1520 goto error2; 1521 1522 if (resblks) 1523 resblks -= fs_blocks; 1524 ip->i_d.di_size = pathlen; 1525 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1526 1527 cur_chunk = target_path; 1528 for (n = 0; n < nmaps; n++) { 1529 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 1530 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 1531 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 1532 BTOBB(byte_cnt), 0); 1533 if (!bp) { 1534 error = ENOMEM; 1535 goto error2; 1536 } 1537 if (pathlen < byte_cnt) { 1538 byte_cnt = pathlen; 1539 } 1540 pathlen -= byte_cnt; 1541 1542 memcpy(bp->b_addr, cur_chunk, byte_cnt); 1543 cur_chunk += byte_cnt; 1544 1545 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); 1546 } 1547 } 1548 1549 /* 1550 * Create the directory entry for the symlink. 1551 */ 1552 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 1553 &first_block, &free_list, resblks); 1554 if (error) 1555 goto error2; 1556 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1557 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1558 1559 /* 1560 * If this is a synchronous mount, make sure that the 1561 * symlink transaction goes to disk before returning to 1562 * the user. 1563 */ 1564 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1565 xfs_trans_set_sync(tp); 1566 } 1567 1568 error = xfs_bmap_finish(&tp, &free_list, &committed); 1569 if (error) { 1570 goto error2; 1571 } 1572 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1573 xfs_qm_dqrele(udqp); 1574 xfs_qm_dqrele(gdqp); 1575 1576 *ipp = ip; 1577 return 0; 1578 1579 error2: 1580 IRELE(ip); 1581 error1: 1582 xfs_bmap_cancel(&free_list); 1583 cancel_flags |= XFS_TRANS_ABORT; 1584 error_return: 1585 xfs_trans_cancel(tp, cancel_flags); 1586 xfs_qm_dqrele(udqp); 1587 xfs_qm_dqrele(gdqp); 1588 1589 if (unlock_dp_on_error) 1590 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1591 std_return: 1592 return error; 1593} 1594 1595int 1596xfs_set_dmattrs( 1597 xfs_inode_t *ip, 1598 u_int evmask, 1599 u_int16_t state) 1600{ 1601 xfs_mount_t *mp = ip->i_mount; 1602 xfs_trans_t *tp; 1603 int error; 1604 1605 if (!capable(CAP_SYS_ADMIN)) 1606 return XFS_ERROR(EPERM); 1607 1608 if (XFS_FORCED_SHUTDOWN(mp)) 1609 return XFS_ERROR(EIO); 1610 1611 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 1612 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); 1613 if (error) { 1614 xfs_trans_cancel(tp, 0); 1615 return error; 1616 } 1617 xfs_ilock(ip, XFS_ILOCK_EXCL); 1618 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1619 1620 ip->i_d.di_dmevmask = evmask; 1621 ip->i_d.di_dmstate = state; 1622 1623 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1624 error = xfs_trans_commit(tp, 0); 1625 1626 return error; 1627} 1628 1629/* 1630 * xfs_alloc_file_space() 1631 * This routine allocates disk space for the given file. 1632 * 1633 * If alloc_type == 0, this request is for an ALLOCSP type 1634 * request which will change the file size. In this case, no 1635 * DMAPI event will be generated by the call. A TRUNCATE event 1636 * will be generated later by xfs_setattr. 1637 * 1638 * If alloc_type != 0, this request is for a RESVSP type 1639 * request, and a DMAPI DM_EVENT_WRITE will be generated if the 1640 * lower block boundary byte address is less than the file's 1641 * length. 1642 * 1643 * RETURNS: 1644 * 0 on success 1645 * errno on error 1646 * 1647 */ 1648STATIC int 1649xfs_alloc_file_space( 1650 xfs_inode_t *ip, 1651 xfs_off_t offset, 1652 xfs_off_t len, 1653 int alloc_type, 1654 int attr_flags) 1655{ 1656 xfs_mount_t *mp = ip->i_mount; 1657 xfs_off_t count; 1658 xfs_filblks_t allocated_fsb; 1659 xfs_filblks_t allocatesize_fsb; 1660 xfs_extlen_t extsz, temp; 1661 xfs_fileoff_t startoffset_fsb; 1662 xfs_fsblock_t firstfsb; 1663 int nimaps; 1664 int quota_flag; 1665 int rt; 1666 xfs_trans_t *tp; 1667 xfs_bmbt_irec_t imaps[1], *imapp; 1668 xfs_bmap_free_t free_list; 1669 uint qblocks, resblks, resrtextents; 1670 int committed; 1671 int error; 1672 1673 trace_xfs_alloc_file_space(ip); 1674 1675 if (XFS_FORCED_SHUTDOWN(mp)) 1676 return XFS_ERROR(EIO); 1677 1678 error = xfs_qm_dqattach(ip, 0); 1679 if (error) 1680 return error; 1681 1682 if (len <= 0) 1683 return XFS_ERROR(EINVAL); 1684 1685 rt = XFS_IS_REALTIME_INODE(ip); 1686 extsz = xfs_get_extsz_hint(ip); 1687 1688 count = len; 1689 imapp = &imaps[0]; 1690 nimaps = 1; 1691 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 1692 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 1693 1694 /* 1695 * Allocate file space until done or until there is an error 1696 */ 1697 while (allocatesize_fsb && !error) { 1698 xfs_fileoff_t s, e; 1699 1700 /* 1701 * Determine space reservations for data/realtime. 1702 */ 1703 if (unlikely(extsz)) { 1704 s = startoffset_fsb; 1705 do_div(s, extsz); 1706 s *= extsz; 1707 e = startoffset_fsb + allocatesize_fsb; 1708 if ((temp = do_mod(startoffset_fsb, extsz))) 1709 e += temp; 1710 if ((temp = do_mod(e, extsz))) 1711 e += extsz - temp; 1712 } else { 1713 s = 0; 1714 e = allocatesize_fsb; 1715 } 1716 1717 /* 1718 * The transaction reservation is limited to a 32-bit block 1719 * count, hence we need to limit the number of blocks we are 1720 * trying to reserve to avoid an overflow. We can't allocate 1721 * more than @nimaps extents, and an extent is limited on disk 1722 * to MAXEXTLEN (21 bits), so use that to enforce the limit. 1723 */ 1724 resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); 1725 if (unlikely(rt)) { 1726 resrtextents = qblocks = resblks; 1727 resrtextents /= mp->m_sb.sb_rextsize; 1728 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1729 quota_flag = XFS_QMOPT_RES_RTBLKS; 1730 } else { 1731 resrtextents = 0; 1732 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 1733 quota_flag = XFS_QMOPT_RES_REGBLKS; 1734 } 1735 1736 /* 1737 * Allocate and setup the transaction. 1738 */ 1739 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1740 error = xfs_trans_reserve(tp, resblks, 1741 XFS_WRITE_LOG_RES(mp), resrtextents, 1742 XFS_TRANS_PERM_LOG_RES, 1743 XFS_WRITE_LOG_COUNT); 1744 /* 1745 * Check for running out of space 1746 */ 1747 if (error) { 1748 /* 1749 * Free the transaction structure. 1750 */ 1751 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1752 xfs_trans_cancel(tp, 0); 1753 break; 1754 } 1755 xfs_ilock(ip, XFS_ILOCK_EXCL); 1756 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 1757 0, quota_flag); 1758 if (error) 1759 goto error1; 1760 1761 xfs_trans_ijoin(tp, ip, 0); 1762 1763 xfs_bmap_init(&free_list, &firstfsb); 1764 error = xfs_bmapi_write(tp, ip, startoffset_fsb, 1765 allocatesize_fsb, alloc_type, &firstfsb, 1766 0, imapp, &nimaps, &free_list); 1767 if (error) { 1768 goto error0; 1769 } 1770 1771 /* 1772 * Complete the transaction 1773 */ 1774 error = xfs_bmap_finish(&tp, &free_list, &committed); 1775 if (error) { 1776 goto error0; 1777 } 1778 1779 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1780 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1781 if (error) { 1782 break; 1783 } 1784 1785 allocated_fsb = imapp->br_blockcount; 1786 1787 if (nimaps == 0) { 1788 error = XFS_ERROR(ENOSPC); 1789 break; 1790 } 1791 1792 startoffset_fsb += allocated_fsb; 1793 allocatesize_fsb -= allocated_fsb; 1794 } 1795 1796 return error; 1797 1798error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 1799 xfs_bmap_cancel(&free_list); 1800 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 1801 1802error1: /* Just cancel transaction */ 1803 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1804 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1805 return error; 1806} 1807 1808/* 1809 * Zero file bytes between startoff and endoff inclusive. 1810 * The iolock is held exclusive and no blocks are buffered. 1811 * 1812 * This function is used by xfs_free_file_space() to zero 1813 * partial blocks when the range to free is not block aligned. 1814 * When unreserving space with boundaries that are not block 1815 * aligned we round up the start and round down the end 1816 * boundaries and then use this function to zero the parts of 1817 * the blocks that got dropped during the rounding. 1818 */ 1819STATIC int 1820xfs_zero_remaining_bytes( 1821 xfs_inode_t *ip, 1822 xfs_off_t startoff, 1823 xfs_off_t endoff) 1824{ 1825 xfs_bmbt_irec_t imap; 1826 xfs_fileoff_t offset_fsb; 1827 xfs_off_t lastoffset; 1828 xfs_off_t offset; 1829 xfs_buf_t *bp; 1830 xfs_mount_t *mp = ip->i_mount; 1831 int nimap; 1832 int error = 0; 1833 1834 /* 1835 * Avoid doing I/O beyond eof - it's not necessary 1836 * since nothing can read beyond eof. The space will 1837 * be zeroed when the file is extended anyway. 1838 */ 1839 if (startoff >= XFS_ISIZE(ip)) 1840 return 0; 1841 1842 if (endoff > XFS_ISIZE(ip)) 1843 endoff = XFS_ISIZE(ip); 1844 1845 bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? 1846 mp->m_rtdev_targp : mp->m_ddev_targp, 1847 BTOBB(mp->m_sb.sb_blocksize), 0); 1848 if (!bp) 1849 return XFS_ERROR(ENOMEM); 1850 1851 xfs_buf_unlock(bp); 1852 1853 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 1854 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1855 nimap = 1; 1856 error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); 1857 if (error || nimap < 1) 1858 break; 1859 ASSERT(imap.br_blockcount >= 1); 1860 ASSERT(imap.br_startoff == offset_fsb); 1861 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 1862 if (lastoffset > endoff) 1863 lastoffset = endoff; 1864 if (imap.br_startblock == HOLESTARTBLOCK) 1865 continue; 1866 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1867 if (imap.br_state == XFS_EXT_UNWRITTEN) 1868 continue; 1869 XFS_BUF_UNDONE(bp); 1870 XFS_BUF_UNWRITE(bp); 1871 XFS_BUF_READ(bp); 1872 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); 1873 xfsbdstrat(mp, bp); 1874 error = xfs_buf_iowait(bp); 1875 if (error) { 1876 xfs_buf_ioerror_alert(bp, 1877 "xfs_zero_remaining_bytes(read)"); 1878 break; 1879 } 1880 memset(bp->b_addr + 1881 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 1882 0, lastoffset - offset + 1); 1883 XFS_BUF_UNDONE(bp); 1884 XFS_BUF_UNREAD(bp); 1885 XFS_BUF_WRITE(bp); 1886 xfsbdstrat(mp, bp); 1887 error = xfs_buf_iowait(bp); 1888 if (error) { 1889 xfs_buf_ioerror_alert(bp, 1890 "xfs_zero_remaining_bytes(write)"); 1891 break; 1892 } 1893 } 1894 xfs_buf_free(bp); 1895 return error; 1896} 1897 1898/* 1899 * xfs_free_file_space() 1900 * This routine frees disk space for the given file. 1901 * 1902 * This routine is only called by xfs_change_file_space 1903 * for an UNRESVSP type call. 1904 * 1905 * RETURNS: 1906 * 0 on success 1907 * errno on error 1908 * 1909 */ 1910STATIC int 1911xfs_free_file_space( 1912 xfs_inode_t *ip, 1913 xfs_off_t offset, 1914 xfs_off_t len, 1915 int attr_flags) 1916{ 1917 int committed; 1918 int done; 1919 xfs_fileoff_t endoffset_fsb; 1920 int error; 1921 xfs_fsblock_t firstfsb; 1922 xfs_bmap_free_t free_list; 1923 xfs_bmbt_irec_t imap; 1924 xfs_off_t ioffset; 1925 xfs_extlen_t mod=0; 1926 xfs_mount_t *mp; 1927 int nimap; 1928 uint resblks; 1929 uint rounding; 1930 int rt; 1931 xfs_fileoff_t startoffset_fsb; 1932 xfs_trans_t *tp; 1933 int need_iolock = 1; 1934 1935 mp = ip->i_mount; 1936 1937 trace_xfs_free_file_space(ip); 1938 1939 error = xfs_qm_dqattach(ip, 0); 1940 if (error) 1941 return error; 1942 1943 error = 0; 1944 if (len <= 0) /* if nothing being freed */ 1945 return error; 1946 rt = XFS_IS_REALTIME_INODE(ip); 1947 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 1948 endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 1949 1950 if (attr_flags & XFS_ATTR_NOLOCK) 1951 need_iolock = 0; 1952 if (need_iolock) { 1953 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1954 /* wait for the completion of any pending DIOs */ 1955 inode_dio_wait(VFS_I(ip)); 1956 } 1957 1958 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 1959 ioffset = offset & ~(rounding - 1); 1960 1961 if (VN_CACHED(VFS_I(ip)) != 0) { 1962 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); 1963 if (error) 1964 goto out_unlock_iolock; 1965 } 1966 1967 /* 1968 * Need to zero the stuff we're not freeing, on disk. 1969 * If it's a realtime file & can't use unwritten extents then we 1970 * actually need to zero the extent edges. Otherwise xfs_bunmapi 1971 * will take care of it for us. 1972 */ 1973 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 1974 nimap = 1; 1975 error = xfs_bmapi_read(ip, startoffset_fsb, 1, 1976 &imap, &nimap, 0); 1977 if (error) 1978 goto out_unlock_iolock; 1979 ASSERT(nimap == 0 || nimap == 1); 1980 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1981 xfs_daddr_t block; 1982 1983 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1984 block = imap.br_startblock; 1985 mod = do_div(block, mp->m_sb.sb_rextsize); 1986 if (mod) 1987 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 1988 } 1989 nimap = 1; 1990 error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, 1991 &imap, &nimap, 0); 1992 if (error) 1993 goto out_unlock_iolock; 1994 ASSERT(nimap == 0 || nimap == 1); 1995 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1996 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1997 mod++; 1998 if (mod && (mod != mp->m_sb.sb_rextsize)) 1999 endoffset_fsb -= mod; 2000 } 2001 } 2002 if ((done = (endoffset_fsb <= startoffset_fsb))) 2003 /* 2004 * One contiguous piece to clear 2005 */ 2006 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 2007 else { 2008 /* 2009 * Some full blocks, possibly two pieces to clear 2010 */ 2011 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 2012 error = xfs_zero_remaining_bytes(ip, offset, 2013 XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 2014 if (!error && 2015 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 2016 error = xfs_zero_remaining_bytes(ip, 2017 XFS_FSB_TO_B(mp, endoffset_fsb), 2018 offset + len - 1); 2019 } 2020 2021 /* 2022 * free file space until done or until there is an error 2023 */ 2024 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 2025 while (!error && !done) { 2026 2027 /* 2028 * allocate and setup the transaction. Allow this 2029 * transaction to dip into the reserve blocks to ensure 2030 * the freeing of the space succeeds at ENOSPC. 2031 */ 2032 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 2033 tp->t_flags |= XFS_TRANS_RESERVE; 2034 error = xfs_trans_reserve(tp, 2035 resblks, 2036 XFS_WRITE_LOG_RES(mp), 2037 0, 2038 XFS_TRANS_PERM_LOG_RES, 2039 XFS_WRITE_LOG_COUNT); 2040 2041 /* 2042 * check for running out of space 2043 */ 2044 if (error) { 2045 /* 2046 * Free the transaction structure. 2047 */ 2048 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 2049 xfs_trans_cancel(tp, 0); 2050 break; 2051 } 2052 xfs_ilock(ip, XFS_ILOCK_EXCL); 2053 error = xfs_trans_reserve_quota(tp, mp, 2054 ip->i_udquot, ip->i_gdquot, 2055 resblks, 0, XFS_QMOPT_RES_REGBLKS); 2056 if (error) 2057 goto error1; 2058 2059 xfs_trans_ijoin(tp, ip, 0); 2060 2061 /* 2062 * issue the bunmapi() call to free the blocks 2063 */ 2064 xfs_bmap_init(&free_list, &firstfsb); 2065 error = xfs_bunmapi(tp, ip, startoffset_fsb, 2066 endoffset_fsb - startoffset_fsb, 2067 0, 2, &firstfsb, &free_list, &done); 2068 if (error) { 2069 goto error0; 2070 } 2071 2072 /* 2073 * complete the transaction 2074 */ 2075 error = xfs_bmap_finish(&tp, &free_list, &committed); 2076 if (error) { 2077 goto error0; 2078 } 2079 2080 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2081 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2082 } 2083 2084 out_unlock_iolock: 2085 if (need_iolock) 2086 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 2087 return error; 2088 2089 error0: 2090 xfs_bmap_cancel(&free_list); 2091 error1: 2092 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 2093 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 2094 XFS_ILOCK_EXCL); 2095 return error; 2096} 2097 2098/* 2099 * xfs_change_file_space() 2100 * This routine allocates or frees disk space for the given file. 2101 * The user specified parameters are checked for alignment and size 2102 * limitations. 2103 * 2104 * RETURNS: 2105 * 0 on success 2106 * errno on error 2107 * 2108 */ 2109int 2110xfs_change_file_space( 2111 xfs_inode_t *ip, 2112 int cmd, 2113 xfs_flock64_t *bf, 2114 xfs_off_t offset, 2115 int attr_flags) 2116{ 2117 xfs_mount_t *mp = ip->i_mount; 2118 int clrprealloc; 2119 int error; 2120 xfs_fsize_t fsize; 2121 int setprealloc; 2122 xfs_off_t startoffset; 2123 xfs_off_t llen; 2124 xfs_trans_t *tp; 2125 struct iattr iattr; 2126 int prealloc_type; 2127 2128 if (!S_ISREG(ip->i_d.di_mode)) 2129 return XFS_ERROR(EINVAL); 2130 2131 switch (bf->l_whence) { 2132 case 0: /*SEEK_SET*/ 2133 break; 2134 case 1: /*SEEK_CUR*/ 2135 bf->l_start += offset; 2136 break; 2137 case 2: /*SEEK_END*/ 2138 bf->l_start += XFS_ISIZE(ip); 2139 break; 2140 default: 2141 return XFS_ERROR(EINVAL); 2142 } 2143 2144 llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; 2145 2146 if (bf->l_start < 0 || 2147 bf->l_start > mp->m_super->s_maxbytes || 2148 bf->l_start + llen < 0 || 2149 bf->l_start + llen > mp->m_super->s_maxbytes) 2150 return XFS_ERROR(EINVAL); 2151 2152 bf->l_whence = 0; 2153 2154 startoffset = bf->l_start; 2155 fsize = XFS_ISIZE(ip); 2156 2157 /* 2158 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 2159 * file space. 2160 * These calls do NOT zero the data space allocated to the file, 2161 * nor do they change the file size. 2162 * 2163 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file 2164 * space. 2165 * These calls cause the new file data to be zeroed and the file 2166 * size to be changed. 2167 */ 2168 setprealloc = clrprealloc = 0; 2169 prealloc_type = XFS_BMAPI_PREALLOC; 2170 2171 switch (cmd) { 2172 case XFS_IOC_ZERO_RANGE: 2173 prealloc_type |= XFS_BMAPI_CONVERT; 2174 xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0); 2175 /* FALLTHRU */ 2176 case XFS_IOC_RESVSP: 2177 case XFS_IOC_RESVSP64: 2178 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 2179 prealloc_type, attr_flags); 2180 if (error) 2181 return error; 2182 setprealloc = 1; 2183 break; 2184 2185 case XFS_IOC_UNRESVSP: 2186 case XFS_IOC_UNRESVSP64: 2187 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 2188 attr_flags))) 2189 return error; 2190 break; 2191 2192 case XFS_IOC_ALLOCSP: 2193 case XFS_IOC_ALLOCSP64: 2194 case XFS_IOC_FREESP: 2195 case XFS_IOC_FREESP64: 2196 /* 2197 * These operations actually do IO when extending the file, but 2198 * the allocation is done seperately to the zeroing that is 2199 * done. This set of operations need to be serialised against 2200 * other IO operations, such as truncate and buffered IO. We 2201 * need to take the IOLOCK here to serialise the allocation and 2202 * zeroing IO to prevent other IOLOCK holders (e.g. getbmap, 2203 * truncate, direct IO) from racing against the transient 2204 * allocated but not written state we can have here. 2205 */ 2206 xfs_ilock(ip, XFS_IOLOCK_EXCL); 2207 if (startoffset > fsize) { 2208 error = xfs_alloc_file_space(ip, fsize, 2209 startoffset - fsize, 0, 2210 attr_flags | XFS_ATTR_NOLOCK); 2211 if (error) { 2212 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 2213 break; 2214 } 2215 } 2216 2217 iattr.ia_valid = ATTR_SIZE; 2218 iattr.ia_size = startoffset; 2219 2220 error = xfs_setattr_size(ip, &iattr, 2221 attr_flags | XFS_ATTR_NOLOCK); 2222 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 2223 2224 if (error) 2225 return error; 2226 2227 clrprealloc = 1; 2228 break; 2229 2230 default: 2231 ASSERT(0); 2232 return XFS_ERROR(EINVAL); 2233 } 2234 2235 /* 2236 * update the inode timestamp, mode, and prealloc flag bits 2237 */ 2238 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 2239 2240 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 2241 0, 0, 0))) { 2242 /* ASSERT(0); */ 2243 xfs_trans_cancel(tp, 0); 2244 return error; 2245 } 2246 2247 xfs_ilock(ip, XFS_ILOCK_EXCL); 2248 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2249 2250 if ((attr_flags & XFS_ATTR_DMI) == 0) { 2251 ip->i_d.di_mode &= ~S_ISUID; 2252 2253 /* 2254 * Note that we don't have to worry about mandatory 2255 * file locking being disabled here because we only 2256 * clear the S_ISGID bit if the Group execute bit is 2257 * on, but if it was on then mandatory locking wouldn't 2258 * have been enabled. 2259 */ 2260 if (ip->i_d.di_mode & S_IXGRP) 2261 ip->i_d.di_mode &= ~S_ISGID; 2262 2263 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2264 } 2265 if (setprealloc) 2266 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 2267 else if (clrprealloc) 2268 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 2269 2270 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2271 if (attr_flags & XFS_ATTR_SYNC) 2272 xfs_trans_set_sync(tp); 2273 return xfs_trans_commit(tp, 0); 2274}