Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.20-rc2 4706 lines 118 kB view raw
1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "xfs.h" 20#include "xfs_fs.h" 21#include "xfs_types.h" 22#include "xfs_bit.h" 23#include "xfs_log.h" 24#include "xfs_inum.h" 25#include "xfs_trans.h" 26#include "xfs_sb.h" 27#include "xfs_ag.h" 28#include "xfs_dir2.h" 29#include "xfs_dmapi.h" 30#include "xfs_mount.h" 31#include "xfs_da_btree.h" 32#include "xfs_bmap_btree.h" 33#include "xfs_alloc_btree.h" 34#include "xfs_ialloc_btree.h" 35#include "xfs_dir2_sf.h" 36#include "xfs_attr_sf.h" 37#include "xfs_dinode.h" 38#include "xfs_inode.h" 39#include "xfs_inode_item.h" 40#include "xfs_itable.h" 41#include "xfs_btree.h" 42#include "xfs_ialloc.h" 43#include "xfs_alloc.h" 44#include "xfs_bmap.h" 45#include "xfs_attr.h" 46#include "xfs_rw.h" 47#include "xfs_error.h" 48#include "xfs_quota.h" 49#include "xfs_utils.h" 50#include "xfs_rtalloc.h" 51#include "xfs_refcache.h" 52#include "xfs_trans_space.h" 53#include "xfs_log_priv.h" 54#include "xfs_mac.h" 55 56STATIC int 57xfs_open( 58 bhv_desc_t *bdp, 59 cred_t *credp) 60{ 61 int mode; 62 bhv_vnode_t *vp = BHV_TO_VNODE(bdp); 63 xfs_inode_t *ip = XFS_BHVTOI(bdp); 64 65 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 66 return XFS_ERROR(EIO); 67 68 /* 69 * If it's a directory with any blocks, read-ahead block 0 70 * as we're almost certain to have the next operation be a read there. 71 */ 72 if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) { 73 mode = xfs_ilock_map_shared(ip); 74 if (ip->i_d.di_nextents > 0) 75 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); 76 xfs_iunlock(ip, mode); 77 } 78 return 0; 79} 80 81STATIC int 82xfs_close( 83 bhv_desc_t *bdp, 84 int flags, 85 lastclose_t lastclose, 86 cred_t *credp) 87{ 88 bhv_vnode_t *vp = BHV_TO_VNODE(bdp); 89 xfs_inode_t *ip = XFS_BHVTOI(bdp); 90 91 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 92 return XFS_ERROR(EIO); 93 94 if (lastclose != L_TRUE || !VN_ISREG(vp)) 95 return 0; 96 97 /* 98 * If we previously truncated this file and removed old data in 99 * the process, we want to initiate "early" writeout on the last 100 * close. This is an attempt to combat the notorious NULL files 101 * problem which is particularly noticable from a truncate down, 102 * buffered (re-)write (delalloc), followed by a crash. What we 103 * are effectively doing here is significantly reducing the time 104 * window where we'd otherwise be exposed to that problem. 105 */ 106 if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0) 107 return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE); 108 return 0; 109} 110 111/* 112 * xfs_getattr 113 */ 114STATIC int 115xfs_getattr( 116 bhv_desc_t *bdp, 117 bhv_vattr_t *vap, 118 int flags, 119 cred_t *credp) 120{ 121 xfs_inode_t *ip; 122 xfs_mount_t *mp; 123 bhv_vnode_t *vp; 124 125 vp = BHV_TO_VNODE(bdp); 126 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 127 128 ip = XFS_BHVTOI(bdp); 129 mp = ip->i_mount; 130 131 if (XFS_FORCED_SHUTDOWN(mp)) 132 return XFS_ERROR(EIO); 133 134 if (!(flags & ATTR_LAZY)) 135 xfs_ilock(ip, XFS_ILOCK_SHARED); 136 137 vap->va_size = ip->i_d.di_size; 138 if (vap->va_mask == XFS_AT_SIZE) 139 goto all_done; 140 141 vap->va_nblocks = 142 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); 143 vap->va_nodeid = ip->i_ino; 144#if XFS_BIG_INUMS 145 vap->va_nodeid += mp->m_inoadd; 146#endif 147 vap->va_nlink = ip->i_d.di_nlink; 148 149 /* 150 * Quick exit for non-stat callers 151 */ 152 if ((vap->va_mask & 153 ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID| 154 XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0) 155 goto all_done; 156 157 /* 158 * Copy from in-core inode. 159 */ 160 vap->va_mode = ip->i_d.di_mode; 161 vap->va_uid = ip->i_d.di_uid; 162 vap->va_gid = ip->i_d.di_gid; 163 vap->va_projid = ip->i_d.di_projid; 164 165 /* 166 * Check vnode type block/char vs. everything else. 167 */ 168 switch (ip->i_d.di_mode & S_IFMT) { 169 case S_IFBLK: 170 case S_IFCHR: 171 vap->va_rdev = ip->i_df.if_u2.if_rdev; 172 vap->va_blocksize = BLKDEV_IOSIZE; 173 break; 174 default: 175 vap->va_rdev = 0; 176 177 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { 178 vap->va_blocksize = xfs_preferred_iosize(mp); 179 } else { 180 181 /* 182 * If the file blocks are being allocated from a 183 * realtime partition, then return the inode's 184 * realtime extent size or the realtime volume's 185 * extent size. 186 */ 187 vap->va_blocksize = ip->i_d.di_extsize ? 188 (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : 189 (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); 190 } 191 break; 192 } 193 194 vn_atime_to_timespec(vp, &vap->va_atime); 195 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 196 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 197 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 198 vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; 199 200 /* 201 * Exit for stat callers. See if any of the rest of the fields 202 * to be filled in are needed. 203 */ 204 if ((vap->va_mask & 205 (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| 206 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) 207 goto all_done; 208 209 /* 210 * Convert di_flags to xflags. 211 */ 212 vap->va_xflags = xfs_ip2xflags(ip); 213 214 /* 215 * Exit for inode revalidate. See if any of the rest of 216 * the fields to be filled in are needed. 217 */ 218 if ((vap->va_mask & 219 (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| 220 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) 221 goto all_done; 222 223 vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog; 224 vap->va_nextents = 225 (ip->i_df.if_flags & XFS_IFEXTENTS) ? 226 ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) : 227 ip->i_d.di_nextents; 228 if (ip->i_afp) 229 vap->va_anextents = 230 (ip->i_afp->if_flags & XFS_IFEXTENTS) ? 231 ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) : 232 ip->i_d.di_anextents; 233 else 234 vap->va_anextents = 0; 235 vap->va_gen = ip->i_d.di_gen; 236 237 all_done: 238 if (!(flags & ATTR_LAZY)) 239 xfs_iunlock(ip, XFS_ILOCK_SHARED); 240 return 0; 241} 242 243 244/* 245 * xfs_setattr 246 */ 247int 248xfs_setattr( 249 bhv_desc_t *bdp, 250 bhv_vattr_t *vap, 251 int flags, 252 cred_t *credp) 253{ 254 xfs_inode_t *ip; 255 xfs_trans_t *tp; 256 xfs_mount_t *mp; 257 int mask; 258 int code; 259 uint lock_flags; 260 uint commit_flags=0; 261 uid_t uid=0, iuid=0; 262 gid_t gid=0, igid=0; 263 int timeflags = 0; 264 bhv_vnode_t *vp; 265 xfs_prid_t projid=0, iprojid=0; 266 int mandlock_before, mandlock_after; 267 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 268 int file_owner; 269 int need_iolock = 1; 270 271 vp = BHV_TO_VNODE(bdp); 272 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 273 274 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 275 return XFS_ERROR(EROFS); 276 277 /* 278 * Cannot set certain attributes. 279 */ 280 mask = vap->va_mask; 281 if (mask & XFS_AT_NOSET) { 282 return XFS_ERROR(EINVAL); 283 } 284 285 ip = XFS_BHVTOI(bdp); 286 mp = ip->i_mount; 287 288 if (XFS_FORCED_SHUTDOWN(mp)) 289 return XFS_ERROR(EIO); 290 291 /* 292 * Timestamps do not need to be logged and hence do not 293 * need to be done within a transaction. 294 */ 295 if (mask & XFS_AT_UPDTIMES) { 296 ASSERT((mask & ~XFS_AT_UPDTIMES) == 0); 297 timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) | 298 ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) | 299 ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0); 300 xfs_ichgtime(ip, timeflags); 301 return 0; 302 } 303 304 olddquot1 = olddquot2 = NULL; 305 udqp = gdqp = NULL; 306 307 /* 308 * If disk quotas is on, we make sure that the dquots do exist on disk, 309 * before we start any other transactions. Trying to do this later 310 * is messy. We don't care to take a readlock to look at the ids 311 * in inode here, because we can't hold it across the trans_reserve. 312 * If the IDs do change before we take the ilock, we're covered 313 * because the i_*dquot fields will get updated anyway. 314 */ 315 if (XFS_IS_QUOTA_ON(mp) && 316 (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { 317 uint qflags = 0; 318 319 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { 320 uid = vap->va_uid; 321 qflags |= XFS_QMOPT_UQUOTA; 322 } else { 323 uid = ip->i_d.di_uid; 324 } 325 if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { 326 gid = vap->va_gid; 327 qflags |= XFS_QMOPT_GQUOTA; 328 } else { 329 gid = ip->i_d.di_gid; 330 } 331 if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { 332 projid = vap->va_projid; 333 qflags |= XFS_QMOPT_PQUOTA; 334 } else { 335 projid = ip->i_d.di_projid; 336 } 337 /* 338 * We take a reference when we initialize udqp and gdqp, 339 * so it is important that we never blindly double trip on 340 * the same variable. See xfs_create() for an example. 341 */ 342 ASSERT(udqp == NULL); 343 ASSERT(gdqp == NULL); 344 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, 345 &udqp, &gdqp); 346 if (code) 347 return code; 348 } 349 350 /* 351 * For the other attributes, we acquire the inode lock and 352 * first do an error checking pass. 353 */ 354 tp = NULL; 355 lock_flags = XFS_ILOCK_EXCL; 356 if (flags & ATTR_NOLOCK) 357 need_iolock = 0; 358 if (!(mask & XFS_AT_SIZE)) { 359 if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || 360 (mp->m_flags & XFS_MOUNT_WSYNC)) { 361 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 362 commit_flags = 0; 363 if ((code = xfs_trans_reserve(tp, 0, 364 XFS_ICHANGE_LOG_RES(mp), 0, 365 0, 0))) { 366 lock_flags = 0; 367 goto error_return; 368 } 369 } 370 } else { 371 if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) && 372 !(flags & ATTR_DMI)) { 373 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 374 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, 375 vap->va_size, 0, dmflags, NULL); 376 if (code) { 377 lock_flags = 0; 378 goto error_return; 379 } 380 } 381 if (need_iolock) 382 lock_flags |= XFS_IOLOCK_EXCL; 383 } 384 385 xfs_ilock(ip, lock_flags); 386 387 /* boolean: are we the file owner? */ 388 file_owner = (current_fsuid(credp) == ip->i_d.di_uid); 389 390 /* 391 * Change various properties of a file. 392 * Only the owner or users with CAP_FOWNER 393 * capability may do these things. 394 */ 395 if (mask & 396 (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| 397 XFS_AT_GID|XFS_AT_PROJID)) { 398 /* 399 * CAP_FOWNER overrides the following restrictions: 400 * 401 * The user ID of the calling process must be equal 402 * to the file owner ID, except in cases where the 403 * CAP_FSETID capability is applicable. 404 */ 405 if (!file_owner && !capable(CAP_FOWNER)) { 406 code = XFS_ERROR(EPERM); 407 goto error_return; 408 } 409 410 /* 411 * CAP_FSETID overrides the following restrictions: 412 * 413 * The effective user ID of the calling process shall match 414 * the file owner when setting the set-user-ID and 415 * set-group-ID bits on that file. 416 * 417 * The effective group ID or one of the supplementary group 418 * IDs of the calling process shall match the group owner of 419 * the file when setting the set-group-ID bit on that file 420 */ 421 if (mask & XFS_AT_MODE) { 422 mode_t m = 0; 423 424 if ((vap->va_mode & S_ISUID) && !file_owner) 425 m |= S_ISUID; 426 if ((vap->va_mode & S_ISGID) && 427 !in_group_p((gid_t)ip->i_d.di_gid)) 428 m |= S_ISGID; 429#if 0 430 /* Linux allows this, Irix doesn't. */ 431 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) 432 m |= S_ISVTX; 433#endif 434 if (m && !capable(CAP_FSETID)) 435 vap->va_mode &= ~m; 436 } 437 } 438 439 /* 440 * Change file ownership. Must be the owner or privileged. 441 * If the system was configured with the "restricted_chown" 442 * option, the owner is not permitted to give away the file, 443 * and can change the group id only to a group of which he 444 * or she is a member. 445 */ 446 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 447 /* 448 * These IDs could have changed since we last looked at them. 449 * But, we're assured that if the ownership did change 450 * while we didn't have the inode locked, inode's dquot(s) 451 * would have changed also. 452 */ 453 iuid = ip->i_d.di_uid; 454 iprojid = ip->i_d.di_projid; 455 igid = ip->i_d.di_gid; 456 gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; 457 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; 458 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : 459 iprojid; 460 461 /* 462 * CAP_CHOWN overrides the following restrictions: 463 * 464 * If _POSIX_CHOWN_RESTRICTED is defined, this capability 465 * shall override the restriction that a process cannot 466 * change the user ID of a file it owns and the restriction 467 * that the group ID supplied to the chown() function 468 * shall be equal to either the group ID or one of the 469 * supplementary group IDs of the calling process. 470 */ 471 if (restricted_chown && 472 (iuid != uid || (igid != gid && 473 !in_group_p((gid_t)gid))) && 474 !capable(CAP_CHOWN)) { 475 code = XFS_ERROR(EPERM); 476 goto error_return; 477 } 478 /* 479 * Do a quota reservation only if uid/projid/gid is actually 480 * going to change. 481 */ 482 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 483 (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || 484 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { 485 ASSERT(tp); 486 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, 487 capable(CAP_FOWNER) ? 488 XFS_QMOPT_FORCE_RES : 0); 489 if (code) /* out of quota */ 490 goto error_return; 491 } 492 } 493 494 /* 495 * Truncate file. Must have write permission and not be a directory. 496 */ 497 if (mask & XFS_AT_SIZE) { 498 /* Short circuit the truncate case for zero length files */ 499 if ((vap->va_size == 0) && 500 (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) { 501 xfs_iunlock(ip, XFS_ILOCK_EXCL); 502 lock_flags &= ~XFS_ILOCK_EXCL; 503 if (mask & XFS_AT_CTIME) 504 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 505 code = 0; 506 goto error_return; 507 } 508 509 if (VN_ISDIR(vp)) { 510 code = XFS_ERROR(EISDIR); 511 goto error_return; 512 } else if (!VN_ISREG(vp)) { 513 code = XFS_ERROR(EINVAL); 514 goto error_return; 515 } 516 /* 517 * Make sure that the dquots are attached to the inode. 518 */ 519 if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) 520 goto error_return; 521 } 522 523 /* 524 * Change file access or modified times. 525 */ 526 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 527 if (!file_owner) { 528 if ((flags & ATTR_UTIME) && 529 !capable(CAP_FOWNER)) { 530 code = XFS_ERROR(EPERM); 531 goto error_return; 532 } 533 } 534 } 535 536 /* 537 * Change extent size or realtime flag. 538 */ 539 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 540 /* 541 * Can't change extent size if any extents are allocated. 542 */ 543 if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) && 544 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != 545 vap->va_extsize) ) { 546 code = XFS_ERROR(EINVAL); /* EFBIG? */ 547 goto error_return; 548 } 549 550 /* 551 * Can't change realtime flag if any extents are allocated. 552 */ 553 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && 554 (mask & XFS_AT_XFLAGS) && 555 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 556 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 557 code = XFS_ERROR(EINVAL); /* EFBIG? */ 558 goto error_return; 559 } 560 /* 561 * Extent size must be a multiple of the appropriate block 562 * size, if set at all. 563 */ 564 if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) { 565 xfs_extlen_t size; 566 567 if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || 568 ((mask & XFS_AT_XFLAGS) && 569 (vap->va_xflags & XFS_XFLAG_REALTIME))) { 570 size = mp->m_sb.sb_rextsize << 571 mp->m_sb.sb_blocklog; 572 } else { 573 size = mp->m_sb.sb_blocksize; 574 } 575 if (vap->va_extsize % size) { 576 code = XFS_ERROR(EINVAL); 577 goto error_return; 578 } 579 } 580 /* 581 * If realtime flag is set then must have realtime data. 582 */ 583 if ((mask & XFS_AT_XFLAGS) && 584 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 585 if ((mp->m_sb.sb_rblocks == 0) || 586 (mp->m_sb.sb_rextsize == 0) || 587 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { 588 code = XFS_ERROR(EINVAL); 589 goto error_return; 590 } 591 } 592 593 /* 594 * Can't modify an immutable/append-only file unless 595 * we have appropriate permission. 596 */ 597 if ((mask & XFS_AT_XFLAGS) && 598 (ip->i_d.di_flags & 599 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || 600 (vap->va_xflags & 601 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && 602 !capable(CAP_LINUX_IMMUTABLE)) { 603 code = XFS_ERROR(EPERM); 604 goto error_return; 605 } 606 } 607 608 /* 609 * Now we can make the changes. Before we join the inode 610 * to the transaction, if XFS_AT_SIZE is set then take care of 611 * the part of the truncation that must be done without the 612 * inode lock. This needs to be done before joining the inode 613 * to the transaction, because the inode cannot be unlocked 614 * once it is a part of the transaction. 615 */ 616 if (mask & XFS_AT_SIZE) { 617 code = 0; 618 if ((vap->va_size > ip->i_d.di_size) && 619 (flags & ATTR_NOSIZETOK) == 0) { 620 code = xfs_igrow_start(ip, vap->va_size, credp); 621 } 622 xfs_iunlock(ip, XFS_ILOCK_EXCL); 623 vn_iowait(vp); /* wait for the completion of any pending DIOs */ 624 if (!code) 625 code = xfs_itruncate_data(ip, vap->va_size); 626 if (code) { 627 ASSERT(tp == NULL); 628 lock_flags &= ~XFS_ILOCK_EXCL; 629 ASSERT(lock_flags == XFS_IOLOCK_EXCL); 630 goto error_return; 631 } 632 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 633 if ((code = xfs_trans_reserve(tp, 0, 634 XFS_ITRUNCATE_LOG_RES(mp), 0, 635 XFS_TRANS_PERM_LOG_RES, 636 XFS_ITRUNCATE_LOG_COUNT))) { 637 xfs_trans_cancel(tp, 0); 638 if (need_iolock) 639 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 640 return code; 641 } 642 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 643 xfs_ilock(ip, XFS_ILOCK_EXCL); 644 } 645 646 if (tp) { 647 xfs_trans_ijoin(tp, ip, lock_flags); 648 xfs_trans_ihold(tp, ip); 649 } 650 651 /* determine whether mandatory locking mode changes */ 652 mandlock_before = MANDLOCK(vp, ip->i_d.di_mode); 653 654 /* 655 * Truncate file. Must have write permission and not be a directory. 656 */ 657 if (mask & XFS_AT_SIZE) { 658 if (vap->va_size > ip->i_d.di_size) { 659 xfs_igrow_finish(tp, ip, vap->va_size, 660 !(flags & ATTR_DMI)); 661 } else if ((vap->va_size <= ip->i_d.di_size) || 662 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 663 /* 664 * signal a sync transaction unless 665 * we're truncating an already unlinked 666 * file on a wsync filesystem 667 */ 668 code = xfs_itruncate_finish(&tp, ip, 669 (xfs_fsize_t)vap->va_size, 670 XFS_DATA_FORK, 671 ((ip->i_d.di_nlink != 0 || 672 !(mp->m_flags & XFS_MOUNT_WSYNC)) 673 ? 1 : 0)); 674 if (code) 675 goto abort_return; 676 /* 677 * Truncated "down", so we're removing references 678 * to old data here - if we now delay flushing for 679 * a long time, we expose ourselves unduly to the 680 * notorious NULL files problem. So, we mark this 681 * vnode and flush it when the file is closed, and 682 * do not wait the usual (long) time for writeout. 683 */ 684 VTRUNCATE(vp); 685 } 686 /* 687 * Have to do this even if the file's size doesn't change. 688 */ 689 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 690 } 691 692 /* 693 * Change file access modes. 694 */ 695 if (mask & XFS_AT_MODE) { 696 ip->i_d.di_mode &= S_IFMT; 697 ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; 698 699 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 700 timeflags |= XFS_ICHGTIME_CHG; 701 } 702 703 /* 704 * Change file ownership. Must be the owner or privileged. 705 * If the system was configured with the "restricted_chown" 706 * option, the owner is not permitted to give away the file, 707 * and can change the group id only to a group of which he 708 * or she is a member. 709 */ 710 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 711 /* 712 * CAP_FSETID overrides the following restrictions: 713 * 714 * The set-user-ID and set-group-ID bits of a file will be 715 * cleared upon successful return from chown() 716 */ 717 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && 718 !capable(CAP_FSETID)) { 719 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); 720 } 721 722 /* 723 * Change the ownerships and register quota modifications 724 * in the transaction. 725 */ 726 if (iuid != uid) { 727 if (XFS_IS_UQUOTA_ON(mp)) { 728 ASSERT(mask & XFS_AT_UID); 729 ASSERT(udqp); 730 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 731 &ip->i_udquot, udqp); 732 } 733 ip->i_d.di_uid = uid; 734 } 735 if (igid != gid) { 736 if (XFS_IS_GQUOTA_ON(mp)) { 737 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 738 ASSERT(mask & XFS_AT_GID); 739 ASSERT(gdqp); 740 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 741 &ip->i_gdquot, gdqp); 742 } 743 ip->i_d.di_gid = gid; 744 } 745 if (iprojid != projid) { 746 if (XFS_IS_PQUOTA_ON(mp)) { 747 ASSERT(!XFS_IS_GQUOTA_ON(mp)); 748 ASSERT(mask & XFS_AT_PROJID); 749 ASSERT(gdqp); 750 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 751 &ip->i_gdquot, gdqp); 752 } 753 ip->i_d.di_projid = projid; 754 /* 755 * We may have to rev the inode as well as 756 * the superblock version number since projids didn't 757 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. 758 */ 759 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) 760 xfs_bump_ino_vers2(tp, ip); 761 } 762 763 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 764 timeflags |= XFS_ICHGTIME_CHG; 765 } 766 767 768 /* 769 * Change file access or modified times. 770 */ 771 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 772 if (mask & XFS_AT_ATIME) { 773 ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; 774 ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; 775 ip->i_update_core = 1; 776 timeflags &= ~XFS_ICHGTIME_ACC; 777 } 778 if (mask & XFS_AT_MTIME) { 779 ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; 780 ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; 781 timeflags &= ~XFS_ICHGTIME_MOD; 782 timeflags |= XFS_ICHGTIME_CHG; 783 } 784 if (tp && (flags & ATTR_UTIME)) 785 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 786 } 787 788 /* 789 * Change XFS-added attributes. 790 */ 791 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 792 if (mask & XFS_AT_EXTSIZE) { 793 /* 794 * Converting bytes to fs blocks. 795 */ 796 ip->i_d.di_extsize = vap->va_extsize >> 797 mp->m_sb.sb_blocklog; 798 } 799 if (mask & XFS_AT_XFLAGS) { 800 uint di_flags; 801 802 /* can't set PREALLOC this way, just preserve it */ 803 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); 804 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 805 di_flags |= XFS_DIFLAG_IMMUTABLE; 806 if (vap->va_xflags & XFS_XFLAG_APPEND) 807 di_flags |= XFS_DIFLAG_APPEND; 808 if (vap->va_xflags & XFS_XFLAG_SYNC) 809 di_flags |= XFS_DIFLAG_SYNC; 810 if (vap->va_xflags & XFS_XFLAG_NOATIME) 811 di_flags |= XFS_DIFLAG_NOATIME; 812 if (vap->va_xflags & XFS_XFLAG_NODUMP) 813 di_flags |= XFS_DIFLAG_NODUMP; 814 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) 815 di_flags |= XFS_DIFLAG_PROJINHERIT; 816 if (vap->va_xflags & XFS_XFLAG_NODEFRAG) 817 di_flags |= XFS_DIFLAG_NODEFRAG; 818 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 819 if (vap->va_xflags & XFS_XFLAG_RTINHERIT) 820 di_flags |= XFS_DIFLAG_RTINHERIT; 821 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) 822 di_flags |= XFS_DIFLAG_NOSYMLINKS; 823 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) 824 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 825 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 826 if (vap->va_xflags & XFS_XFLAG_REALTIME) { 827 di_flags |= XFS_DIFLAG_REALTIME; 828 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 829 } else { 830 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; 831 } 832 if (vap->va_xflags & XFS_XFLAG_EXTSIZE) 833 di_flags |= XFS_DIFLAG_EXTSIZE; 834 } 835 ip->i_d.di_flags = di_flags; 836 } 837 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 838 timeflags |= XFS_ICHGTIME_CHG; 839 } 840 841 /* 842 * Change file inode change time only if XFS_AT_CTIME set 843 * AND we have been called by a DMI function. 844 */ 845 846 if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { 847 ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; 848 ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; 849 ip->i_update_core = 1; 850 timeflags &= ~XFS_ICHGTIME_CHG; 851 } 852 853 /* 854 * Send out timestamp changes that need to be set to the 855 * current time. Not done when called by a DMI function. 856 */ 857 if (timeflags && !(flags & ATTR_DMI)) 858 xfs_ichgtime(ip, timeflags); 859 860 XFS_STATS_INC(xs_ig_attrchg); 861 862 /* 863 * If this is a synchronous mount, make sure that the 864 * transaction goes to disk before returning to the user. 865 * This is slightly sub-optimal in that truncates require 866 * two sync transactions instead of one for wsync filesystems. 867 * One for the truncate and one for the timestamps since we 868 * don't want to change the timestamps unless we're sure the 869 * truncate worked. Truncates are less than 1% of the laddis 870 * mix so this probably isn't worth the trouble to optimize. 871 */ 872 code = 0; 873 if (tp) { 874 if (mp->m_flags & XFS_MOUNT_WSYNC) 875 xfs_trans_set_sync(tp); 876 877 code = xfs_trans_commit(tp, commit_flags, NULL); 878 } 879 880 /* 881 * If the (regular) file's mandatory locking mode changed, then 882 * notify the vnode. We do this under the inode lock to prevent 883 * racing calls to vop_vnode_change. 884 */ 885 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); 886 if (mandlock_before != mandlock_after) { 887 bhv_vop_vnode_change(vp, VCHANGE_FLAGS_ENF_LOCKING, 888 mandlock_after); 889 } 890 891 xfs_iunlock(ip, lock_flags); 892 893 /* 894 * Release any dquot(s) the inode had kept before chown. 895 */ 896 XFS_QM_DQRELE(mp, olddquot1); 897 XFS_QM_DQRELE(mp, olddquot2); 898 XFS_QM_DQRELE(mp, udqp); 899 XFS_QM_DQRELE(mp, gdqp); 900 901 if (code) { 902 return code; 903 } 904 905 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) && 906 !(flags & ATTR_DMI)) { 907 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, 908 NULL, DM_RIGHT_NULL, NULL, NULL, 909 0, 0, AT_DELAY_FLAG(flags)); 910 } 911 return 0; 912 913 abort_return: 914 commit_flags |= XFS_TRANS_ABORT; 915 /* FALLTHROUGH */ 916 error_return: 917 XFS_QM_DQRELE(mp, udqp); 918 XFS_QM_DQRELE(mp, gdqp); 919 if (tp) { 920 xfs_trans_cancel(tp, commit_flags); 921 } 922 if (lock_flags != 0) { 923 xfs_iunlock(ip, lock_flags); 924 } 925 return code; 926} 927 928 929/* 930 * xfs_access 931 * Null conversion from vnode mode bits to inode mode bits, as in efs. 932 */ 933STATIC int 934xfs_access( 935 bhv_desc_t *bdp, 936 int mode, 937 cred_t *credp) 938{ 939 xfs_inode_t *ip; 940 int error; 941 942 vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, 943 (inst_t *)__return_address); 944 945 ip = XFS_BHVTOI(bdp); 946 xfs_ilock(ip, XFS_ILOCK_SHARED); 947 error = xfs_iaccess(ip, mode, credp); 948 xfs_iunlock(ip, XFS_ILOCK_SHARED); 949 return error; 950} 951 952 953/* 954 * The maximum pathlen is 1024 bytes. Since the minimum file system 955 * blocksize is 512 bytes, we can get a max of 2 extents back from 956 * bmapi. 957 */ 958#define SYMLINK_MAPS 2 959 960/* 961 * xfs_readlink 962 * 963 */ 964STATIC int 965xfs_readlink( 966 bhv_desc_t *bdp, 967 uio_t *uiop, 968 int ioflags, 969 cred_t *credp) 970{ 971 xfs_inode_t *ip; 972 int count; 973 xfs_off_t offset; 974 int pathlen; 975 bhv_vnode_t *vp; 976 int error = 0; 977 xfs_mount_t *mp; 978 int nmaps; 979 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 980 xfs_daddr_t d; 981 int byte_cnt; 982 int n; 983 xfs_buf_t *bp; 984 985 vp = BHV_TO_VNODE(bdp); 986 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 987 988 ip = XFS_BHVTOI(bdp); 989 mp = ip->i_mount; 990 991 if (XFS_FORCED_SHUTDOWN(mp)) 992 return XFS_ERROR(EIO); 993 994 xfs_ilock(ip, XFS_ILOCK_SHARED); 995 996 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); 997 998 offset = uiop->uio_offset; 999 count = uiop->uio_resid; 1000 1001 if (offset < 0) { 1002 error = XFS_ERROR(EINVAL); 1003 goto error_return; 1004 } 1005 if (count <= 0) { 1006 error = 0; 1007 goto error_return; 1008 } 1009 1010 /* 1011 * See if the symlink is stored inline. 1012 */ 1013 pathlen = (int)ip->i_d.di_size; 1014 1015 if (ip->i_df.if_flags & XFS_IFINLINE) { 1016 error = xfs_uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); 1017 } 1018 else { 1019 /* 1020 * Symlink not inline. Call bmap to get it in. 1021 */ 1022 nmaps = SYMLINK_MAPS; 1023 1024 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 1025 0, NULL, 0, mval, &nmaps, NULL, NULL); 1026 1027 if (error) { 1028 goto error_return; 1029 } 1030 1031 for (n = 0; n < nmaps; n++) { 1032 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 1033 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 1034 bp = xfs_buf_read(mp->m_ddev_targp, d, 1035 BTOBB(byte_cnt), 0); 1036 error = XFS_BUF_GETERROR(bp); 1037 if (error) { 1038 xfs_ioerror_alert("xfs_readlink", 1039 ip->i_mount, bp, XFS_BUF_ADDR(bp)); 1040 xfs_buf_relse(bp); 1041 goto error_return; 1042 } 1043 if (pathlen < byte_cnt) 1044 byte_cnt = pathlen; 1045 pathlen -= byte_cnt; 1046 1047 error = xfs_uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); 1048 xfs_buf_relse (bp); 1049 } 1050 1051 } 1052 1053error_return: 1054 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1055 return error; 1056} 1057 1058 1059/* 1060 * xfs_fsync 1061 * 1062 * This is called to sync the inode and its data out to disk. 1063 * We need to hold the I/O lock while flushing the data, and 1064 * the inode lock while flushing the inode. The inode lock CANNOT 1065 * be held while flushing the data, so acquire after we're done 1066 * with that. 1067 */ 1068STATIC int 1069xfs_fsync( 1070 bhv_desc_t *bdp, 1071 int flag, 1072 cred_t *credp, 1073 xfs_off_t start, 1074 xfs_off_t stop) 1075{ 1076 xfs_inode_t *ip; 1077 xfs_trans_t *tp; 1078 int error; 1079 int log_flushed = 0, changed = 1; 1080 1081 vn_trace_entry(BHV_TO_VNODE(bdp), 1082 __FUNCTION__, (inst_t *)__return_address); 1083 1084 ip = XFS_BHVTOI(bdp); 1085 1086 ASSERT(start >= 0 && stop >= -1); 1087 1088 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1089 return XFS_ERROR(EIO); 1090 1091 /* 1092 * We always need to make sure that the required inode state 1093 * is safe on disk. The vnode might be clean but because 1094 * of committed transactions that haven't hit the disk yet. 1095 * Likewise, there could be unflushed non-transactional 1096 * changes to the inode core that have to go to disk. 1097 * 1098 * The following code depends on one assumption: that 1099 * any transaction that changes an inode logs the core 1100 * because it has to change some field in the inode core 1101 * (typically nextents or nblocks). That assumption 1102 * implies that any transactions against an inode will 1103 * catch any non-transactional updates. If inode-altering 1104 * transactions exist that violate this assumption, the 1105 * code breaks. Right now, it figures that if the involved 1106 * update_* field is clear and the inode is unpinned, the 1107 * inode is clean. Either it's been flushed or it's been 1108 * committed and the commit has hit the disk unpinning the inode. 1109 * (Note that xfs_inode_item_format() called at commit clears 1110 * the update_* fields.) 1111 */ 1112 xfs_ilock(ip, XFS_ILOCK_SHARED); 1113 1114 /* If we are flushing data then we care about update_size 1115 * being set, otherwise we care about update_core 1116 */ 1117 if ((flag & FSYNC_DATA) ? 1118 (ip->i_update_size == 0) : 1119 (ip->i_update_core == 0)) { 1120 /* 1121 * Timestamps/size haven't changed since last inode 1122 * flush or inode transaction commit. That means 1123 * either nothing got written or a transaction 1124 * committed which caught the updates. If the 1125 * latter happened and the transaction hasn't 1126 * hit the disk yet, the inode will be still 1127 * be pinned. If it is, force the log. 1128 */ 1129 1130 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1131 1132 if (xfs_ipincount(ip)) { 1133 _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 1134 XFS_LOG_FORCE | 1135 ((flag & FSYNC_WAIT) 1136 ? XFS_LOG_SYNC : 0), 1137 &log_flushed); 1138 } else { 1139 /* 1140 * If the inode is not pinned and nothing 1141 * has changed we don't need to flush the 1142 * cache. 1143 */ 1144 changed = 0; 1145 } 1146 error = 0; 1147 } else { 1148 /* 1149 * Kick off a transaction to log the inode 1150 * core to get the updates. Make it 1151 * sync if FSYNC_WAIT is passed in (which 1152 * is done by everybody but specfs). The 1153 * sync transaction will also force the log. 1154 */ 1155 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1156 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 1157 if ((error = xfs_trans_reserve(tp, 0, 1158 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 1159 0, 0, 0))) { 1160 xfs_trans_cancel(tp, 0); 1161 return error; 1162 } 1163 xfs_ilock(ip, XFS_ILOCK_EXCL); 1164 1165 /* 1166 * Note - it's possible that we might have pushed 1167 * ourselves out of the way during trans_reserve 1168 * which would flush the inode. But there's no 1169 * guarantee that the inode buffer has actually 1170 * gone out yet (it's delwri). Plus the buffer 1171 * could be pinned anyway if it's part of an 1172 * inode in another recent transaction. So we 1173 * play it safe and fire off the transaction anyway. 1174 */ 1175 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1176 xfs_trans_ihold(tp, ip); 1177 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1178 if (flag & FSYNC_WAIT) 1179 xfs_trans_set_sync(tp); 1180 error = _xfs_trans_commit(tp, 0, NULL, &log_flushed); 1181 1182 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1183 } 1184 1185 if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { 1186 /* 1187 * If the log write didn't issue an ordered tag we need 1188 * to flush the disk cache for the data device now. 1189 */ 1190 if (!log_flushed) 1191 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); 1192 1193 /* 1194 * If this inode is on the RT dev we need to flush that 1195 * cache as well. 1196 */ 1197 if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) 1198 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); 1199 } 1200 1201 return error; 1202} 1203 1204/* 1205 * This is called by xfs_inactive to free any blocks beyond eof, 1206 * when the link count isn't zero. 1207 */ 1208STATIC int 1209xfs_inactive_free_eofblocks( 1210 xfs_mount_t *mp, 1211 xfs_inode_t *ip) 1212{ 1213 xfs_trans_t *tp; 1214 int error; 1215 xfs_fileoff_t end_fsb; 1216 xfs_fileoff_t last_fsb; 1217 xfs_filblks_t map_len; 1218 int nimaps; 1219 xfs_bmbt_irec_t imap; 1220 1221 /* 1222 * Figure out if there are any blocks beyond the end 1223 * of the file. If not, then there is nothing to do. 1224 */ 1225 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size)); 1226 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1227 map_len = last_fsb - end_fsb; 1228 if (map_len <= 0) 1229 return 0; 1230 1231 nimaps = 1; 1232 xfs_ilock(ip, XFS_ILOCK_SHARED); 1233 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, 1234 NULL, 0, &imap, &nimaps, NULL, NULL); 1235 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1236 1237 if (!error && (nimaps != 0) && 1238 (imap.br_startblock != HOLESTARTBLOCK || 1239 ip->i_delayed_blks)) { 1240 /* 1241 * Attach the dquots to the inode up front. 1242 */ 1243 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1244 return error; 1245 1246 /* 1247 * There are blocks after the end of file. 1248 * Free them up now by truncating the file to 1249 * its current size. 1250 */ 1251 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1252 1253 /* 1254 * Do the xfs_itruncate_start() call before 1255 * reserving any log space because 1256 * itruncate_start will call into the buffer 1257 * cache and we can't 1258 * do that within a transaction. 1259 */ 1260 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1261 xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 1262 ip->i_d.di_size); 1263 1264 error = xfs_trans_reserve(tp, 0, 1265 XFS_ITRUNCATE_LOG_RES(mp), 1266 0, XFS_TRANS_PERM_LOG_RES, 1267 XFS_ITRUNCATE_LOG_COUNT); 1268 if (error) { 1269 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1270 xfs_trans_cancel(tp, 0); 1271 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1272 return error; 1273 } 1274 1275 xfs_ilock(ip, XFS_ILOCK_EXCL); 1276 xfs_trans_ijoin(tp, ip, 1277 XFS_IOLOCK_EXCL | 1278 XFS_ILOCK_EXCL); 1279 xfs_trans_ihold(tp, ip); 1280 1281 error = xfs_itruncate_finish(&tp, ip, 1282 ip->i_d.di_size, 1283 XFS_DATA_FORK, 1284 0); 1285 /* 1286 * If we get an error at this point we 1287 * simply don't bother truncating the file. 1288 */ 1289 if (error) { 1290 xfs_trans_cancel(tp, 1291 (XFS_TRANS_RELEASE_LOG_RES | 1292 XFS_TRANS_ABORT)); 1293 } else { 1294 error = xfs_trans_commit(tp, 1295 XFS_TRANS_RELEASE_LOG_RES, 1296 NULL); 1297 } 1298 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1299 } 1300 return error; 1301} 1302 1303/* 1304 * Free a symlink that has blocks associated with it. 1305 */ 1306STATIC int 1307xfs_inactive_symlink_rmt( 1308 xfs_inode_t *ip, 1309 xfs_trans_t **tpp) 1310{ 1311 xfs_buf_t *bp; 1312 int committed; 1313 int done; 1314 int error; 1315 xfs_fsblock_t first_block; 1316 xfs_bmap_free_t free_list; 1317 int i; 1318 xfs_mount_t *mp; 1319 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 1320 int nmaps; 1321 xfs_trans_t *ntp; 1322 int size; 1323 xfs_trans_t *tp; 1324 1325 tp = *tpp; 1326 mp = ip->i_mount; 1327 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); 1328 /* 1329 * We're freeing a symlink that has some 1330 * blocks allocated to it. Free the 1331 * blocks here. We know that we've got 1332 * either 1 or 2 extents and that we can 1333 * free them all in one bunmapi call. 1334 */ 1335 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 1336 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1337 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1338 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1339 xfs_trans_cancel(tp, 0); 1340 *tpp = NULL; 1341 return error; 1342 } 1343 /* 1344 * Lock the inode, fix the size, and join it to the transaction. 1345 * Hold it so in the normal path, we still have it locked for 1346 * the second transaction. In the error paths we need it 1347 * held so the cancel won't rele it, see below. 1348 */ 1349 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1350 size = (int)ip->i_d.di_size; 1351 ip->i_d.di_size = 0; 1352 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1353 xfs_trans_ihold(tp, ip); 1354 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1355 /* 1356 * Find the block(s) so we can inval and unmap them. 1357 */ 1358 done = 0; 1359 XFS_BMAP_INIT(&free_list, &first_block); 1360 nmaps = ARRAY_SIZE(mval); 1361 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 1362 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 1363 &free_list, NULL))) 1364 goto error0; 1365 /* 1366 * Invalidate the block(s). 1367 */ 1368 for (i = 0; i < nmaps; i++) { 1369 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 1370 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 1371 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 1372 xfs_trans_binval(tp, bp); 1373 } 1374 /* 1375 * Unmap the dead block(s) to the free_list. 1376 */ 1377 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 1378 &first_block, &free_list, NULL, &done))) 1379 goto error1; 1380 ASSERT(done); 1381 /* 1382 * Commit the first transaction. This logs the EFI and the inode. 1383 */ 1384 if ((error = xfs_bmap_finish(&tp, &free_list, first_block, &committed))) 1385 goto error1; 1386 /* 1387 * The transaction must have been committed, since there were 1388 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. 1389 * The new tp has the extent freeing and EFDs. 1390 */ 1391 ASSERT(committed); 1392 /* 1393 * The first xact was committed, so add the inode to the new one. 1394 * Mark it dirty so it will be logged and moved forward in the log as 1395 * part of every commit. 1396 */ 1397 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1398 xfs_trans_ihold(tp, ip); 1399 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1400 /* 1401 * Get a new, empty transaction to return to our caller. 1402 */ 1403 ntp = xfs_trans_dup(tp); 1404 /* 1405 * Commit the transaction containing extent freeing and EFDs. 1406 * If we get an error on the commit here or on the reserve below, 1407 * we need to unlock the inode since the new transaction doesn't 1408 * have the inode attached. 1409 */ 1410 error = xfs_trans_commit(tp, 0, NULL); 1411 tp = ntp; 1412 if (error) { 1413 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1414 goto error0; 1415 } 1416 /* 1417 * Remove the memory for extent descriptions (just bookkeeping). 1418 */ 1419 if (ip->i_df.if_bytes) 1420 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); 1421 ASSERT(ip->i_df.if_bytes == 0); 1422 /* 1423 * Put an itruncate log reservation in the new transaction 1424 * for our caller. 1425 */ 1426 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1427 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1428 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1429 goto error0; 1430 } 1431 /* 1432 * Return with the inode locked but not joined to the transaction. 1433 */ 1434 *tpp = tp; 1435 return 0; 1436 1437 error1: 1438 xfs_bmap_cancel(&free_list); 1439 error0: 1440 /* 1441 * Have to come here with the inode locked and either 1442 * (held and in the transaction) or (not in the transaction). 1443 * If the inode isn't held then cancel would iput it, but 1444 * that's wrong since this is inactive and the vnode ref 1445 * count is 0 already. 1446 * Cancel won't do anything to the inode if held, but it still 1447 * needs to be locked until the cancel is done, if it was 1448 * joined to the transaction. 1449 */ 1450 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1451 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1452 *tpp = NULL; 1453 return error; 1454 1455} 1456 1457STATIC int 1458xfs_inactive_symlink_local( 1459 xfs_inode_t *ip, 1460 xfs_trans_t **tpp) 1461{ 1462 int error; 1463 1464 ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip)); 1465 /* 1466 * We're freeing a symlink which fit into 1467 * the inode. Just free the memory used 1468 * to hold the old symlink. 1469 */ 1470 error = xfs_trans_reserve(*tpp, 0, 1471 XFS_ITRUNCATE_LOG_RES(ip->i_mount), 1472 0, XFS_TRANS_PERM_LOG_RES, 1473 XFS_ITRUNCATE_LOG_COUNT); 1474 1475 if (error) { 1476 xfs_trans_cancel(*tpp, 0); 1477 *tpp = NULL; 1478 return error; 1479 } 1480 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1481 1482 /* 1483 * Zero length symlinks _can_ exist. 1484 */ 1485 if (ip->i_df.if_bytes > 0) { 1486 xfs_idata_realloc(ip, 1487 -(ip->i_df.if_bytes), 1488 XFS_DATA_FORK); 1489 ASSERT(ip->i_df.if_bytes == 0); 1490 } 1491 return 0; 1492} 1493 1494STATIC int 1495xfs_inactive_attrs( 1496 xfs_inode_t *ip, 1497 xfs_trans_t **tpp) 1498{ 1499 xfs_trans_t *tp; 1500 int error; 1501 xfs_mount_t *mp; 1502 1503 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); 1504 tp = *tpp; 1505 mp = ip->i_mount; 1506 ASSERT(ip->i_d.di_forkoff != 0); 1507 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 1508 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1509 1510 error = xfs_attr_inactive(ip); 1511 if (error) { 1512 *tpp = NULL; 1513 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1514 return error; /* goto out */ 1515 } 1516 1517 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1518 error = xfs_trans_reserve(tp, 0, 1519 XFS_IFREE_LOG_RES(mp), 1520 0, XFS_TRANS_PERM_LOG_RES, 1521 XFS_INACTIVE_LOG_COUNT); 1522 if (error) { 1523 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1524 xfs_trans_cancel(tp, 0); 1525 *tpp = NULL; 1526 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1527 return error; 1528 } 1529 1530 xfs_ilock(ip, XFS_ILOCK_EXCL); 1531 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1532 xfs_trans_ihold(tp, ip); 1533 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1534 1535 ASSERT(ip->i_d.di_anextents == 0); 1536 1537 *tpp = tp; 1538 return 0; 1539} 1540 1541STATIC int 1542xfs_release( 1543 bhv_desc_t *bdp) 1544{ 1545 xfs_inode_t *ip; 1546 bhv_vnode_t *vp; 1547 xfs_mount_t *mp; 1548 int error; 1549 1550 vp = BHV_TO_VNODE(bdp); 1551 ip = XFS_BHVTOI(bdp); 1552 mp = ip->i_mount; 1553 1554 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) 1555 return 0; 1556 1557 /* If this is a read-only mount, don't do this (would generate I/O) */ 1558 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1559 return 0; 1560 1561#ifdef HAVE_REFCACHE 1562 /* If we are in the NFS reference cache then don't do this now */ 1563 if (ip->i_refcache) 1564 return 0; 1565#endif 1566 1567 if (ip->i_d.di_nlink != 0) { 1568 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1569 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1570 ip->i_delayed_blks > 0)) && 1571 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1572 (!(ip->i_d.di_flags & 1573 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1574 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1575 return error; 1576 /* Update linux inode block count after free above */ 1577 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1578 ip->i_d.di_nblocks + ip->i_delayed_blks); 1579 } 1580 } 1581 1582 return 0; 1583} 1584 1585/* 1586 * xfs_inactive 1587 * 1588 * This is called when the vnode reference count for the vnode 1589 * goes to zero. If the file has been unlinked, then it must 1590 * now be truncated. Also, we clear all of the read-ahead state 1591 * kept for the inode here since the file is now closed. 1592 */ 1593STATIC int 1594xfs_inactive( 1595 bhv_desc_t *bdp, 1596 cred_t *credp) 1597{ 1598 xfs_inode_t *ip; 1599 bhv_vnode_t *vp; 1600 xfs_bmap_free_t free_list; 1601 xfs_fsblock_t first_block; 1602 int committed; 1603 xfs_trans_t *tp; 1604 xfs_mount_t *mp; 1605 int error; 1606 int truncate; 1607 1608 vp = BHV_TO_VNODE(bdp); 1609 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 1610 1611 ip = XFS_BHVTOI(bdp); 1612 1613 /* 1614 * If the inode is already free, then there can be nothing 1615 * to clean up here. 1616 */ 1617 if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { 1618 ASSERT(ip->i_df.if_real_bytes == 0); 1619 ASSERT(ip->i_df.if_broot_bytes == 0); 1620 return VN_INACTIVE_CACHE; 1621 } 1622 1623 /* 1624 * Only do a truncate if it's a regular file with 1625 * some actual space in it. It's OK to look at the 1626 * inode's fields without the lock because we're the 1627 * only one with a reference to the inode. 1628 */ 1629 truncate = ((ip->i_d.di_nlink == 0) && 1630 ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || 1631 (ip->i_delayed_blks > 0)) && 1632 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1633 1634 mp = ip->i_mount; 1635 1636 if (ip->i_d.di_nlink == 0 && 1637 DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_DESTROY)) { 1638 (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL); 1639 } 1640 1641 error = 0; 1642 1643 /* If this is a read-only mount, don't do this (would generate I/O) */ 1644 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1645 goto out; 1646 1647 if (ip->i_d.di_nlink != 0) { 1648 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1649 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1650 ip->i_delayed_blks > 0)) && 1651 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1652 (!(ip->i_d.di_flags & 1653 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1654 (ip->i_delayed_blks != 0)))) { 1655 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1656 return VN_INACTIVE_CACHE; 1657 /* Update linux inode block count after free above */ 1658 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1659 ip->i_d.di_nblocks + ip->i_delayed_blks); 1660 } 1661 goto out; 1662 } 1663 1664 ASSERT(ip->i_d.di_nlink == 0); 1665 1666 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1667 return VN_INACTIVE_CACHE; 1668 1669 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1670 if (truncate) { 1671 /* 1672 * Do the xfs_itruncate_start() call before 1673 * reserving any log space because itruncate_start 1674 * will call into the buffer cache and we can't 1675 * do that within a transaction. 1676 */ 1677 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1678 1679 xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 1680 1681 error = xfs_trans_reserve(tp, 0, 1682 XFS_ITRUNCATE_LOG_RES(mp), 1683 0, XFS_TRANS_PERM_LOG_RES, 1684 XFS_ITRUNCATE_LOG_COUNT); 1685 if (error) { 1686 /* Don't call itruncate_cleanup */ 1687 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1688 xfs_trans_cancel(tp, 0); 1689 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1690 return VN_INACTIVE_CACHE; 1691 } 1692 1693 xfs_ilock(ip, XFS_ILOCK_EXCL); 1694 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1695 xfs_trans_ihold(tp, ip); 1696 1697 /* 1698 * normally, we have to run xfs_itruncate_finish sync. 1699 * But if filesystem is wsync and we're in the inactive 1700 * path, then we know that nlink == 0, and that the 1701 * xaction that made nlink == 0 is permanently committed 1702 * since xfs_remove runs as a synchronous transaction. 1703 */ 1704 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1705 (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); 1706 1707 if (error) { 1708 xfs_trans_cancel(tp, 1709 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1710 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1711 return VN_INACTIVE_CACHE; 1712 } 1713 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { 1714 1715 /* 1716 * If we get an error while cleaning up a 1717 * symlink we bail out. 1718 */ 1719 error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ? 1720 xfs_inactive_symlink_rmt(ip, &tp) : 1721 xfs_inactive_symlink_local(ip, &tp); 1722 1723 if (error) { 1724 ASSERT(tp == NULL); 1725 return VN_INACTIVE_CACHE; 1726 } 1727 1728 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1729 xfs_trans_ihold(tp, ip); 1730 } else { 1731 error = xfs_trans_reserve(tp, 0, 1732 XFS_IFREE_LOG_RES(mp), 1733 0, XFS_TRANS_PERM_LOG_RES, 1734 XFS_INACTIVE_LOG_COUNT); 1735 if (error) { 1736 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1737 xfs_trans_cancel(tp, 0); 1738 return VN_INACTIVE_CACHE; 1739 } 1740 1741 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1742 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1743 xfs_trans_ihold(tp, ip); 1744 } 1745 1746 /* 1747 * If there are attributes associated with the file 1748 * then blow them away now. The code calls a routine 1749 * that recursively deconstructs the attribute fork. 1750 * We need to just commit the current transaction 1751 * because we can't use it for xfs_attr_inactive(). 1752 */ 1753 if (ip->i_d.di_anextents > 0) { 1754 error = xfs_inactive_attrs(ip, &tp); 1755 /* 1756 * If we got an error, the transaction is already 1757 * cancelled, and the inode is unlocked. Just get out. 1758 */ 1759 if (error) 1760 return VN_INACTIVE_CACHE; 1761 } else if (ip->i_afp) { 1762 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1763 } 1764 1765 /* 1766 * Free the inode. 1767 */ 1768 XFS_BMAP_INIT(&free_list, &first_block); 1769 error = xfs_ifree(tp, ip, &free_list); 1770 if (error) { 1771 /* 1772 * If we fail to free the inode, shut down. The cancel 1773 * might do that, we need to make sure. Otherwise the 1774 * inode might be lost for a long time or forever. 1775 */ 1776 if (!XFS_FORCED_SHUTDOWN(mp)) { 1777 cmn_err(CE_NOTE, 1778 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1779 error, mp->m_fsname); 1780 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1781 } 1782 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1783 } else { 1784 /* 1785 * Credit the quota account(s). The inode is gone. 1786 */ 1787 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1788 1789 /* 1790 * Just ignore errors at this point. There is 1791 * nothing we can do except to try to keep going. 1792 */ 1793 (void) xfs_bmap_finish(&tp, &free_list, first_block, 1794 &committed); 1795 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 1796 } 1797 /* 1798 * Release the dquots held by inode, if any. 1799 */ 1800 XFS_QM_DQDETACH(mp, ip); 1801 1802 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1803 1804 out: 1805 return VN_INACTIVE_CACHE; 1806} 1807 1808 1809/* 1810 * xfs_lookup 1811 */ 1812STATIC int 1813xfs_lookup( 1814 bhv_desc_t *dir_bdp, 1815 bhv_vname_t *dentry, 1816 bhv_vnode_t **vpp, 1817 int flags, 1818 bhv_vnode_t *rdir, 1819 cred_t *credp) 1820{ 1821 xfs_inode_t *dp, *ip; 1822 xfs_ino_t e_inum; 1823 int error; 1824 uint lock_mode; 1825 bhv_vnode_t *dir_vp; 1826 1827 dir_vp = BHV_TO_VNODE(dir_bdp); 1828 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 1829 1830 dp = XFS_BHVTOI(dir_bdp); 1831 1832 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 1833 return XFS_ERROR(EIO); 1834 1835 lock_mode = xfs_ilock_map_shared(dp); 1836 error = xfs_dir_lookup_int(dir_bdp, lock_mode, dentry, &e_inum, &ip); 1837 if (!error) { 1838 *vpp = XFS_ITOV(ip); 1839 ITRACE(ip); 1840 } 1841 xfs_iunlock_map_shared(dp, lock_mode); 1842 return error; 1843} 1844 1845 1846/* 1847 * xfs_create (create a new file). 1848 */ 1849STATIC int 1850xfs_create( 1851 bhv_desc_t *dir_bdp, 1852 bhv_vname_t *dentry, 1853 bhv_vattr_t *vap, 1854 bhv_vnode_t **vpp, 1855 cred_t *credp) 1856{ 1857 char *name = VNAME(dentry); 1858 bhv_vnode_t *dir_vp; 1859 xfs_inode_t *dp, *ip; 1860 bhv_vnode_t *vp = NULL; 1861 xfs_trans_t *tp; 1862 xfs_mount_t *mp; 1863 xfs_dev_t rdev; 1864 int error; 1865 xfs_bmap_free_t free_list; 1866 xfs_fsblock_t first_block; 1867 boolean_t dp_joined_to_trans; 1868 int dm_event_sent = 0; 1869 uint cancel_flags; 1870 int committed; 1871 xfs_prid_t prid; 1872 struct xfs_dquot *udqp, *gdqp; 1873 uint resblks; 1874 int dm_di_mode; 1875 int namelen; 1876 1877 ASSERT(!*vpp); 1878 dir_vp = BHV_TO_VNODE(dir_bdp); 1879 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 1880 1881 dp = XFS_BHVTOI(dir_bdp); 1882 mp = dp->i_mount; 1883 1884 dm_di_mode = vap->va_mode; 1885 namelen = VNAMELEN(dentry); 1886 1887 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 1888 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 1889 dir_vp, DM_RIGHT_NULL, NULL, 1890 DM_RIGHT_NULL, name, NULL, 1891 dm_di_mode, 0, 0); 1892 1893 if (error) 1894 return error; 1895 dm_event_sent = 1; 1896 } 1897 1898 if (XFS_FORCED_SHUTDOWN(mp)) 1899 return XFS_ERROR(EIO); 1900 1901 /* Return through std_return after this point. */ 1902 1903 udqp = gdqp = NULL; 1904 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1905 prid = dp->i_d.di_projid; 1906 else if (vap->va_mask & XFS_AT_PROJID) 1907 prid = (xfs_prid_t)vap->va_projid; 1908 else 1909 prid = (xfs_prid_t)dfltprid; 1910 1911 /* 1912 * Make sure that we have allocated dquot(s) on disk. 1913 */ 1914 error = XFS_QM_DQVOPALLOC(mp, dp, 1915 current_fsuid(credp), current_fsgid(credp), prid, 1916 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); 1917 if (error) 1918 goto std_return; 1919 1920 ip = NULL; 1921 dp_joined_to_trans = B_FALSE; 1922 1923 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1924 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1925 resblks = XFS_CREATE_SPACE_RES(mp, namelen); 1926 /* 1927 * Initially assume that the file does not exist and 1928 * reserve the resources for that case. If that is not 1929 * the case we'll drop the one we have and get a more 1930 * appropriate transaction later. 1931 */ 1932 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, 1933 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1934 if (error == ENOSPC) { 1935 resblks = 0; 1936 error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0, 1937 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1938 } 1939 if (error) { 1940 cancel_flags = 0; 1941 dp = NULL; 1942 goto error_return; 1943 } 1944 1945 xfs_ilock(dp, XFS_ILOCK_EXCL); 1946 1947 XFS_BMAP_INIT(&free_list, &first_block); 1948 1949 ASSERT(ip == NULL); 1950 1951 /* 1952 * Reserve disk quota and the inode. 1953 */ 1954 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 1955 if (error) 1956 goto error_return; 1957 1958 if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) 1959 goto error_return; 1960 rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; 1961 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, 1962 rdev, credp, prid, resblks > 0, 1963 &ip, &committed); 1964 if (error) { 1965 if (error == ENOSPC) 1966 goto error_return; 1967 goto abort_return; 1968 } 1969 ITRACE(ip); 1970 1971 /* 1972 * At this point, we've gotten a newly allocated inode. 1973 * It is locked (and joined to the transaction). 1974 */ 1975 1976 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 1977 1978 /* 1979 * Now we join the directory inode to the transaction. 1980 * We do not do it earlier because xfs_dir_ialloc 1981 * might commit the previous transaction (and release 1982 * all the locks). 1983 */ 1984 1985 VN_HOLD(dir_vp); 1986 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1987 dp_joined_to_trans = B_TRUE; 1988 1989 error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, 1990 &first_block, &free_list, resblks ? 1991 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1992 if (error) { 1993 ASSERT(error != ENOSPC); 1994 goto abort_return; 1995 } 1996 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1997 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1998 1999 /* 2000 * If this is a synchronous mount, make sure that the 2001 * create transaction goes to disk before returning to 2002 * the user. 2003 */ 2004 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2005 xfs_trans_set_sync(tp); 2006 } 2007 2008 dp->i_gen++; 2009 2010 /* 2011 * Attach the dquot(s) to the inodes and modify them incore. 2012 * These ids of the inode couldn't have changed since the new 2013 * inode has been locked ever since it was created. 2014 */ 2015 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 2016 2017 /* 2018 * xfs_trans_commit normally decrements the vnode ref count 2019 * when it unlocks the inode. Since we want to return the 2020 * vnode to the caller, we bump the vnode ref count now. 2021 */ 2022 IHOLD(ip); 2023 vp = XFS_ITOV(ip); 2024 2025 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2026 if (error) { 2027 xfs_bmap_cancel(&free_list); 2028 goto abort_rele; 2029 } 2030 2031 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2032 if (error) { 2033 IRELE(ip); 2034 tp = NULL; 2035 goto error_return; 2036 } 2037 2038 XFS_QM_DQRELE(mp, udqp); 2039 XFS_QM_DQRELE(mp, gdqp); 2040 2041 /* 2042 * Propagate the fact that the vnode changed after the 2043 * xfs_inode locks have been released. 2044 */ 2045 bhv_vop_vnode_change(vp, VCHANGE_FLAGS_TRUNCATED, 3); 2046 2047 *vpp = vp; 2048 2049 /* Fallthrough to std_return with error = 0 */ 2050 2051std_return: 2052 if ( (*vpp || (error != 0 && dm_event_sent != 0)) && 2053 DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 2054 DM_EVENT_POSTCREATE)) { 2055 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2056 dir_vp, DM_RIGHT_NULL, 2057 *vpp ? vp:NULL, 2058 DM_RIGHT_NULL, name, NULL, 2059 dm_di_mode, error, 0); 2060 } 2061 return error; 2062 2063 abort_return: 2064 cancel_flags |= XFS_TRANS_ABORT; 2065 /* FALLTHROUGH */ 2066 2067 error_return: 2068 if (tp != NULL) 2069 xfs_trans_cancel(tp, cancel_flags); 2070 2071 if (!dp_joined_to_trans && (dp != NULL)) 2072 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2073 XFS_QM_DQRELE(mp, udqp); 2074 XFS_QM_DQRELE(mp, gdqp); 2075 2076 goto std_return; 2077 2078 abort_rele: 2079 /* 2080 * Wait until after the current transaction is aborted to 2081 * release the inode. This prevents recursive transactions 2082 * and deadlocks from xfs_inactive. 2083 */ 2084 cancel_flags |= XFS_TRANS_ABORT; 2085 xfs_trans_cancel(tp, cancel_flags); 2086 IRELE(ip); 2087 2088 XFS_QM_DQRELE(mp, udqp); 2089 XFS_QM_DQRELE(mp, gdqp); 2090 2091 goto std_return; 2092} 2093 2094#ifdef DEBUG 2095/* 2096 * Some counters to see if (and how often) we are hitting some deadlock 2097 * prevention code paths. 2098 */ 2099 2100int xfs_rm_locks; 2101int xfs_rm_lock_delays; 2102int xfs_rm_attempts; 2103#endif 2104 2105/* 2106 * The following routine will lock the inodes associated with the 2107 * directory and the named entry in the directory. The locks are 2108 * acquired in increasing inode number. 2109 * 2110 * If the entry is "..", then only the directory is locked. The 2111 * vnode ref count will still include that from the .. entry in 2112 * this case. 2113 * 2114 * There is a deadlock we need to worry about. If the locked directory is 2115 * in the AIL, it might be blocking up the log. The next inode we lock 2116 * could be already locked by another thread waiting for log space (e.g 2117 * a permanent log reservation with a long running transaction (see 2118 * xfs_itruncate_finish)). To solve this, we must check if the directory 2119 * is in the ail and use lock_nowait. If we can't lock, we need to 2120 * drop the inode lock on the directory and try again. xfs_iunlock will 2121 * potentially push the tail if we were holding up the log. 2122 */ 2123STATIC int 2124xfs_lock_dir_and_entry( 2125 xfs_inode_t *dp, 2126 bhv_vname_t *dentry, 2127 xfs_inode_t *ip) /* inode of entry 'name' */ 2128{ 2129 int attempts; 2130 xfs_ino_t e_inum; 2131 xfs_inode_t *ips[2]; 2132 xfs_log_item_t *lp; 2133 2134#ifdef DEBUG 2135 xfs_rm_locks++; 2136#endif 2137 attempts = 0; 2138 2139again: 2140 xfs_ilock(dp, XFS_ILOCK_EXCL); 2141 2142 e_inum = ip->i_ino; 2143 2144 ITRACE(ip); 2145 2146 /* 2147 * We want to lock in increasing inum. Since we've already 2148 * acquired the lock on the directory, we may need to release 2149 * if if the inum of the entry turns out to be less. 2150 */ 2151 if (e_inum > dp->i_ino) { 2152 /* 2153 * We are already in the right order, so just 2154 * lock on the inode of the entry. 2155 * We need to use nowait if dp is in the AIL. 2156 */ 2157 2158 lp = (xfs_log_item_t *)dp->i_itemp; 2159 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2160 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2161 attempts++; 2162#ifdef DEBUG 2163 xfs_rm_attempts++; 2164#endif 2165 2166 /* 2167 * Unlock dp and try again. 2168 * xfs_iunlock will try to push the tail 2169 * if the inode is in the AIL. 2170 */ 2171 2172 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2173 2174 if ((attempts % 5) == 0) { 2175 delay(1); /* Don't just spin the CPU */ 2176#ifdef DEBUG 2177 xfs_rm_lock_delays++; 2178#endif 2179 } 2180 goto again; 2181 } 2182 } else { 2183 xfs_ilock(ip, XFS_ILOCK_EXCL); 2184 } 2185 } else if (e_inum < dp->i_ino) { 2186 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2187 2188 ips[0] = ip; 2189 ips[1] = dp; 2190 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2191 } 2192 /* else e_inum == dp->i_ino */ 2193 /* This can happen if we're asked to lock /x/.. 2194 * the entry is "..", which is also the parent directory. 2195 */ 2196 2197 return 0; 2198} 2199 2200#ifdef DEBUG 2201int xfs_locked_n; 2202int xfs_small_retries; 2203int xfs_middle_retries; 2204int xfs_lots_retries; 2205int xfs_lock_delays; 2206#endif 2207 2208/* 2209 * The following routine will lock n inodes in exclusive mode. 2210 * We assume the caller calls us with the inodes in i_ino order. 2211 * 2212 * We need to detect deadlock where an inode that we lock 2213 * is in the AIL and we start waiting for another inode that is locked 2214 * by a thread in a long running transaction (such as truncate). This can 2215 * result in deadlock since the long running trans might need to wait 2216 * for the inode we just locked in order to push the tail and free space 2217 * in the log. 2218 */ 2219void 2220xfs_lock_inodes( 2221 xfs_inode_t **ips, 2222 int inodes, 2223 int first_locked, 2224 uint lock_mode) 2225{ 2226 int attempts = 0, i, j, try_lock; 2227 xfs_log_item_t *lp; 2228 2229 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 2230 2231 if (first_locked) { 2232 try_lock = 1; 2233 i = 1; 2234 } else { 2235 try_lock = 0; 2236 i = 0; 2237 } 2238 2239again: 2240 for (; i < inodes; i++) { 2241 ASSERT(ips[i]); 2242 2243 if (i && (ips[i] == ips[i-1])) /* Already locked */ 2244 continue; 2245 2246 /* 2247 * If try_lock is not set yet, make sure all locked inodes 2248 * are not in the AIL. 2249 * If any are, set try_lock to be used later. 2250 */ 2251 2252 if (!try_lock) { 2253 for (j = (i - 1); j >= 0 && !try_lock; j--) { 2254 lp = (xfs_log_item_t *)ips[j]->i_itemp; 2255 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2256 try_lock++; 2257 } 2258 } 2259 } 2260 2261 /* 2262 * If any of the previous locks we have locked is in the AIL, 2263 * we must TRY to get the second and subsequent locks. If 2264 * we can't get any, we must release all we have 2265 * and try again. 2266 */ 2267 2268 if (try_lock) { 2269 /* try_lock must be 0 if i is 0. */ 2270 /* 2271 * try_lock means we have an inode locked 2272 * that is in the AIL. 2273 */ 2274 ASSERT(i != 0); 2275 if (!xfs_ilock_nowait(ips[i], lock_mode)) { 2276 attempts++; 2277 2278 /* 2279 * Unlock all previous guys and try again. 2280 * xfs_iunlock will try to push the tail 2281 * if the inode is in the AIL. 2282 */ 2283 2284 for(j = i - 1; j >= 0; j--) { 2285 2286 /* 2287 * Check to see if we've already 2288 * unlocked this one. 2289 * Not the first one going back, 2290 * and the inode ptr is the same. 2291 */ 2292 if ((j != (i - 1)) && ips[j] == 2293 ips[j+1]) 2294 continue; 2295 2296 xfs_iunlock(ips[j], lock_mode); 2297 } 2298 2299 if ((attempts % 5) == 0) { 2300 delay(1); /* Don't just spin the CPU */ 2301#ifdef DEBUG 2302 xfs_lock_delays++; 2303#endif 2304 } 2305 i = 0; 2306 try_lock = 0; 2307 goto again; 2308 } 2309 } else { 2310 xfs_ilock(ips[i], lock_mode); 2311 } 2312 } 2313 2314#ifdef DEBUG 2315 if (attempts) { 2316 if (attempts < 5) xfs_small_retries++; 2317 else if (attempts < 100) xfs_middle_retries++; 2318 else xfs_lots_retries++; 2319 } else { 2320 xfs_locked_n++; 2321 } 2322#endif 2323} 2324 2325#ifdef DEBUG 2326#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);} 2327int remove_which_error_return = 0; 2328#else /* ! DEBUG */ 2329#define REMOVE_DEBUG_TRACE(x) 2330#endif /* ! DEBUG */ 2331 2332 2333/* 2334 * xfs_remove 2335 * 2336 */ 2337STATIC int 2338xfs_remove( 2339 bhv_desc_t *dir_bdp, 2340 bhv_vname_t *dentry, 2341 cred_t *credp) 2342{ 2343 bhv_vnode_t *dir_vp; 2344 char *name = VNAME(dentry); 2345 xfs_inode_t *dp, *ip; 2346 xfs_trans_t *tp = NULL; 2347 xfs_mount_t *mp; 2348 int error = 0; 2349 xfs_bmap_free_t free_list; 2350 xfs_fsblock_t first_block; 2351 int cancel_flags; 2352 int committed; 2353 int dm_di_mode = 0; 2354 int link_zero; 2355 uint resblks; 2356 int namelen; 2357 2358 dir_vp = BHV_TO_VNODE(dir_bdp); 2359 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 2360 2361 dp = XFS_BHVTOI(dir_bdp); 2362 mp = dp->i_mount; 2363 2364 if (XFS_FORCED_SHUTDOWN(mp)) 2365 return XFS_ERROR(EIO); 2366 2367 namelen = VNAMELEN(dentry); 2368 2369 if (!xfs_get_dir_entry(dentry, &ip)) { 2370 dm_di_mode = ip->i_d.di_mode; 2371 IRELE(ip); 2372 } 2373 2374 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { 2375 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, 2376 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2377 name, NULL, dm_di_mode, 0, 0); 2378 if (error) 2379 return error; 2380 } 2381 2382 /* From this point on, return through std_return */ 2383 ip = NULL; 2384 2385 /* 2386 * We need to get a reference to ip before we get our log 2387 * reservation. The reason for this is that we cannot call 2388 * xfs_iget for an inode for which we do not have a reference 2389 * once we've acquired a log reservation. This is because the 2390 * inode we are trying to get might be in xfs_inactive going 2391 * for a log reservation. Since we'll have to wait for the 2392 * inactive code to complete before returning from xfs_iget, 2393 * we need to make sure that we don't have log space reserved 2394 * when we call xfs_iget. Instead we get an unlocked reference 2395 * to the inode before getting our log reservation. 2396 */ 2397 error = xfs_get_dir_entry(dentry, &ip); 2398 if (error) { 2399 REMOVE_DEBUG_TRACE(__LINE__); 2400 goto std_return; 2401 } 2402 2403 dm_di_mode = ip->i_d.di_mode; 2404 2405 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 2406 2407 ITRACE(ip); 2408 2409 error = XFS_QM_DQATTACH(mp, dp, 0); 2410 if (!error && dp != ip) 2411 error = XFS_QM_DQATTACH(mp, ip, 0); 2412 if (error) { 2413 REMOVE_DEBUG_TRACE(__LINE__); 2414 IRELE(ip); 2415 goto std_return; 2416 } 2417 2418 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 2419 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2420 /* 2421 * We try to get the real space reservation first, 2422 * allowing for directory btree deletion(s) implying 2423 * possible bmap insert(s). If we can't get the space 2424 * reservation then we use 0 instead, and avoid the bmap 2425 * btree insert(s) in the directory code by, if the bmap 2426 * insert tries to happen, instead trimming the LAST 2427 * block from the directory. 2428 */ 2429 resblks = XFS_REMOVE_SPACE_RES(mp); 2430 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 2431 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2432 if (error == ENOSPC) { 2433 resblks = 0; 2434 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 2435 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2436 } 2437 if (error) { 2438 ASSERT(error != ENOSPC); 2439 REMOVE_DEBUG_TRACE(__LINE__); 2440 xfs_trans_cancel(tp, 0); 2441 IRELE(ip); 2442 return error; 2443 } 2444 2445 error = xfs_lock_dir_and_entry(dp, dentry, ip); 2446 if (error) { 2447 REMOVE_DEBUG_TRACE(__LINE__); 2448 xfs_trans_cancel(tp, cancel_flags); 2449 IRELE(ip); 2450 goto std_return; 2451 } 2452 2453 /* 2454 * At this point, we've gotten both the directory and the entry 2455 * inodes locked. 2456 */ 2457 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2458 if (dp != ip) { 2459 /* 2460 * Increment vnode ref count only in this case since 2461 * there's an extra vnode reference in the case where 2462 * dp == ip. 2463 */ 2464 IHOLD(dp); 2465 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2466 } 2467 2468 /* 2469 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2470 */ 2471 XFS_BMAP_INIT(&free_list, &first_block); 2472 error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, 2473 &first_block, &free_list, 0); 2474 if (error) { 2475 ASSERT(error != ENOENT); 2476 REMOVE_DEBUG_TRACE(__LINE__); 2477 goto error1; 2478 } 2479 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2480 2481 dp->i_gen++; 2482 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2483 2484 error = xfs_droplink(tp, ip); 2485 if (error) { 2486 REMOVE_DEBUG_TRACE(__LINE__); 2487 goto error1; 2488 } 2489 2490 /* Determine if this is the last link while 2491 * we are in the transaction. 2492 */ 2493 link_zero = (ip)->i_d.di_nlink==0; 2494 2495 /* 2496 * Take an extra ref on the inode so that it doesn't 2497 * go to xfs_inactive() from within the commit. 2498 */ 2499 IHOLD(ip); 2500 2501 /* 2502 * If this is a synchronous mount, make sure that the 2503 * remove transaction goes to disk before returning to 2504 * the user. 2505 */ 2506 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2507 xfs_trans_set_sync(tp); 2508 } 2509 2510 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2511 if (error) { 2512 REMOVE_DEBUG_TRACE(__LINE__); 2513 goto error_rele; 2514 } 2515 2516 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2517 if (error) { 2518 IRELE(ip); 2519 goto std_return; 2520 } 2521 2522 /* 2523 * Before we drop our extra reference to the inode, purge it 2524 * from the refcache if it is there. By waiting until afterwards 2525 * to do the IRELE, we ensure that we won't go inactive in the 2526 * xfs_refcache_purge_ip routine (although that would be OK). 2527 */ 2528 xfs_refcache_purge_ip(ip); 2529 2530 vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 2531 2532 /* 2533 * Let interposed file systems know about removed links. 2534 */ 2535 bhv_vop_link_removed(XFS_ITOV(ip), dir_vp, link_zero); 2536 2537 IRELE(ip); 2538 2539/* Fall through to std_return with error = 0 */ 2540 std_return: 2541 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, 2542 DM_EVENT_POSTREMOVE)) { 2543 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2544 dir_vp, DM_RIGHT_NULL, 2545 NULL, DM_RIGHT_NULL, 2546 name, NULL, dm_di_mode, error, 0); 2547 } 2548 return error; 2549 2550 error1: 2551 xfs_bmap_cancel(&free_list); 2552 cancel_flags |= XFS_TRANS_ABORT; 2553 xfs_trans_cancel(tp, cancel_flags); 2554 goto std_return; 2555 2556 error_rele: 2557 /* 2558 * In this case make sure to not release the inode until after 2559 * the current transaction is aborted. Releasing it beforehand 2560 * can cause us to go to xfs_inactive and start a recursive 2561 * transaction which can easily deadlock with the current one. 2562 */ 2563 xfs_bmap_cancel(&free_list); 2564 cancel_flags |= XFS_TRANS_ABORT; 2565 xfs_trans_cancel(tp, cancel_flags); 2566 2567 /* 2568 * Before we drop our extra reference to the inode, purge it 2569 * from the refcache if it is there. By waiting until afterwards 2570 * to do the IRELE, we ensure that we won't go inactive in the 2571 * xfs_refcache_purge_ip routine (although that would be OK). 2572 */ 2573 xfs_refcache_purge_ip(ip); 2574 2575 IRELE(ip); 2576 2577 goto std_return; 2578} 2579 2580 2581/* 2582 * xfs_link 2583 * 2584 */ 2585STATIC int 2586xfs_link( 2587 bhv_desc_t *target_dir_bdp, 2588 bhv_vnode_t *src_vp, 2589 bhv_vname_t *dentry, 2590 cred_t *credp) 2591{ 2592 xfs_inode_t *tdp, *sip; 2593 xfs_trans_t *tp; 2594 xfs_mount_t *mp; 2595 xfs_inode_t *ips[2]; 2596 int error; 2597 xfs_bmap_free_t free_list; 2598 xfs_fsblock_t first_block; 2599 int cancel_flags; 2600 int committed; 2601 bhv_vnode_t *target_dir_vp; 2602 int resblks; 2603 char *target_name = VNAME(dentry); 2604 int target_namelen; 2605 2606 target_dir_vp = BHV_TO_VNODE(target_dir_bdp); 2607 vn_trace_entry(target_dir_vp, __FUNCTION__, (inst_t *)__return_address); 2608 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); 2609 2610 target_namelen = VNAMELEN(dentry); 2611 ASSERT(!VN_ISDIR(src_vp)); 2612 2613 sip = xfs_vtoi(src_vp); 2614 tdp = XFS_BHVTOI(target_dir_bdp); 2615 mp = tdp->i_mount; 2616 if (XFS_FORCED_SHUTDOWN(mp)) 2617 return XFS_ERROR(EIO); 2618 2619 if (DM_EVENT_ENABLED(src_vp->v_vfsp, tdp, DM_EVENT_LINK)) { 2620 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, 2621 target_dir_vp, DM_RIGHT_NULL, 2622 src_vp, DM_RIGHT_NULL, 2623 target_name, NULL, 0, 0, 0); 2624 if (error) 2625 return error; 2626 } 2627 2628 /* Return through std_return after this point. */ 2629 2630 error = XFS_QM_DQATTACH(mp, sip, 0); 2631 if (!error && sip != tdp) 2632 error = XFS_QM_DQATTACH(mp, tdp, 0); 2633 if (error) 2634 goto std_return; 2635 2636 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 2637 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2638 resblks = XFS_LINK_SPACE_RES(mp, target_namelen); 2639 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 2640 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2641 if (error == ENOSPC) { 2642 resblks = 0; 2643 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0, 2644 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2645 } 2646 if (error) { 2647 cancel_flags = 0; 2648 goto error_return; 2649 } 2650 2651 if (sip->i_ino < tdp->i_ino) { 2652 ips[0] = sip; 2653 ips[1] = tdp; 2654 } else { 2655 ips[0] = tdp; 2656 ips[1] = sip; 2657 } 2658 2659 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2660 2661 /* 2662 * Increment vnode ref counts since xfs_trans_commit & 2663 * xfs_trans_cancel will both unlock the inodes and 2664 * decrement the associated ref counts. 2665 */ 2666 VN_HOLD(src_vp); 2667 VN_HOLD(target_dir_vp); 2668 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 2669 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 2670 2671 /* 2672 * If the source has too many links, we can't make any more to it. 2673 */ 2674 if (sip->i_d.di_nlink >= XFS_MAXLINK) { 2675 error = XFS_ERROR(EMLINK); 2676 goto error_return; 2677 } 2678 2679 /* 2680 * If we are using project inheritance, we only allow hard link 2681 * creation in our tree when the project IDs are the same; else 2682 * the tree quota mechanism could be circumvented. 2683 */ 2684 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2685 (tdp->i_d.di_projid != sip->i_d.di_projid))) { 2686 error = XFS_ERROR(EXDEV); 2687 goto error_return; 2688 } 2689 2690 if (resblks == 0 && 2691 (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) 2692 goto error_return; 2693 2694 XFS_BMAP_INIT(&free_list, &first_block); 2695 2696 error = xfs_dir_createname(tp, tdp, target_name, target_namelen, 2697 sip->i_ino, &first_block, &free_list, 2698 resblks); 2699 if (error) 2700 goto abort_return; 2701 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2702 tdp->i_gen++; 2703 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 2704 2705 error = xfs_bumplink(tp, sip); 2706 if (error) 2707 goto abort_return; 2708 2709 /* 2710 * If this is a synchronous mount, make sure that the 2711 * link transaction goes to disk before returning to 2712 * the user. 2713 */ 2714 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2715 xfs_trans_set_sync(tp); 2716 } 2717 2718 error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); 2719 if (error) { 2720 xfs_bmap_cancel(&free_list); 2721 goto abort_return; 2722 } 2723 2724 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2725 if (error) 2726 goto std_return; 2727 2728 /* Fall through to std_return with error = 0. */ 2729std_return: 2730 if (DM_EVENT_ENABLED(src_vp->v_vfsp, sip, 2731 DM_EVENT_POSTLINK)) { 2732 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, 2733 target_dir_vp, DM_RIGHT_NULL, 2734 src_vp, DM_RIGHT_NULL, 2735 target_name, NULL, 0, error, 0); 2736 } 2737 return error; 2738 2739 abort_return: 2740 cancel_flags |= XFS_TRANS_ABORT; 2741 /* FALLTHROUGH */ 2742 2743 error_return: 2744 xfs_trans_cancel(tp, cancel_flags); 2745 goto std_return; 2746} 2747 2748 2749/* 2750 * xfs_mkdir 2751 * 2752 */ 2753STATIC int 2754xfs_mkdir( 2755 bhv_desc_t *dir_bdp, 2756 bhv_vname_t *dentry, 2757 bhv_vattr_t *vap, 2758 bhv_vnode_t **vpp, 2759 cred_t *credp) 2760{ 2761 char *dir_name = VNAME(dentry); 2762 xfs_inode_t *dp; 2763 xfs_inode_t *cdp; /* inode of created dir */ 2764 bhv_vnode_t *cvp; /* vnode of created dir */ 2765 xfs_trans_t *tp; 2766 xfs_mount_t *mp; 2767 int cancel_flags; 2768 int error; 2769 int committed; 2770 xfs_bmap_free_t free_list; 2771 xfs_fsblock_t first_block; 2772 bhv_vnode_t *dir_vp; 2773 boolean_t dp_joined_to_trans; 2774 boolean_t created = B_FALSE; 2775 int dm_event_sent = 0; 2776 xfs_prid_t prid; 2777 struct xfs_dquot *udqp, *gdqp; 2778 uint resblks; 2779 int dm_di_mode; 2780 int dir_namelen; 2781 2782 dir_vp = BHV_TO_VNODE(dir_bdp); 2783 dp = XFS_BHVTOI(dir_bdp); 2784 mp = dp->i_mount; 2785 2786 if (XFS_FORCED_SHUTDOWN(mp)) 2787 return XFS_ERROR(EIO); 2788 2789 dir_namelen = VNAMELEN(dentry); 2790 2791 tp = NULL; 2792 dp_joined_to_trans = B_FALSE; 2793 dm_di_mode = vap->va_mode; 2794 2795 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 2796 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 2797 dir_vp, DM_RIGHT_NULL, NULL, 2798 DM_RIGHT_NULL, dir_name, NULL, 2799 dm_di_mode, 0, 0); 2800 if (error) 2801 return error; 2802 dm_event_sent = 1; 2803 } 2804 2805 /* Return through std_return after this point. */ 2806 2807 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 2808 2809 mp = dp->i_mount; 2810 udqp = gdqp = NULL; 2811 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 2812 prid = dp->i_d.di_projid; 2813 else if (vap->va_mask & XFS_AT_PROJID) 2814 prid = (xfs_prid_t)vap->va_projid; 2815 else 2816 prid = (xfs_prid_t)dfltprid; 2817 2818 /* 2819 * Make sure that we have allocated dquot(s) on disk. 2820 */ 2821 error = XFS_QM_DQVOPALLOC(mp, dp, 2822 current_fsuid(credp), current_fsgid(credp), prid, 2823 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2824 if (error) 2825 goto std_return; 2826 2827 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 2828 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2829 resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen); 2830 error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, 2831 XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); 2832 if (error == ENOSPC) { 2833 resblks = 0; 2834 error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0, 2835 XFS_TRANS_PERM_LOG_RES, 2836 XFS_MKDIR_LOG_COUNT); 2837 } 2838 if (error) { 2839 cancel_flags = 0; 2840 dp = NULL; 2841 goto error_return; 2842 } 2843 2844 xfs_ilock(dp, XFS_ILOCK_EXCL); 2845 2846 /* 2847 * Check for directory link count overflow. 2848 */ 2849 if (dp->i_d.di_nlink >= XFS_MAXLINK) { 2850 error = XFS_ERROR(EMLINK); 2851 goto error_return; 2852 } 2853 2854 /* 2855 * Reserve disk quota and the inode. 2856 */ 2857 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 2858 if (error) 2859 goto error_return; 2860 2861 if (resblks == 0 && 2862 (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) 2863 goto error_return; 2864 /* 2865 * create the directory inode. 2866 */ 2867 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2, 2868 0, credp, prid, resblks > 0, 2869 &cdp, NULL); 2870 if (error) { 2871 if (error == ENOSPC) 2872 goto error_return; 2873 goto abort_return; 2874 } 2875 ITRACE(cdp); 2876 2877 /* 2878 * Now we add the directory inode to the transaction. 2879 * We waited until now since xfs_dir_ialloc might start 2880 * a new transaction. Had we joined the transaction 2881 * earlier, the locks might have gotten released. 2882 */ 2883 VN_HOLD(dir_vp); 2884 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2885 dp_joined_to_trans = B_TRUE; 2886 2887 XFS_BMAP_INIT(&free_list, &first_block); 2888 2889 error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, 2890 &first_block, &free_list, resblks ? 2891 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2892 if (error) { 2893 ASSERT(error != ENOSPC); 2894 goto error1; 2895 } 2896 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2897 2898 /* 2899 * Bump the in memory version number of the parent directory 2900 * so that other processes accessing it will recognize that 2901 * the directory has changed. 2902 */ 2903 dp->i_gen++; 2904 2905 error = xfs_dir_init(tp, cdp, dp); 2906 if (error) 2907 goto error2; 2908 2909 cdp->i_gen = 1; 2910 error = xfs_bumplink(tp, dp); 2911 if (error) 2912 goto error2; 2913 2914 cvp = XFS_ITOV(cdp); 2915 2916 created = B_TRUE; 2917 2918 *vpp = cvp; 2919 IHOLD(cdp); 2920 2921 /* 2922 * Attach the dquots to the new inode and modify the icount incore. 2923 */ 2924 XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp); 2925 2926 /* 2927 * If this is a synchronous mount, make sure that the 2928 * mkdir transaction goes to disk before returning to 2929 * the user. 2930 */ 2931 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2932 xfs_trans_set_sync(tp); 2933 } 2934 2935 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2936 if (error) { 2937 IRELE(cdp); 2938 goto error2; 2939 } 2940 2941 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2942 XFS_QM_DQRELE(mp, udqp); 2943 XFS_QM_DQRELE(mp, gdqp); 2944 if (error) { 2945 IRELE(cdp); 2946 } 2947 2948 /* Fall through to std_return with error = 0 or errno from 2949 * xfs_trans_commit. */ 2950 2951std_return: 2952 if ( (created || (error != 0 && dm_event_sent != 0)) && 2953 DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 2954 DM_EVENT_POSTCREATE)) { 2955 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2956 dir_vp, DM_RIGHT_NULL, 2957 created ? XFS_ITOV(cdp):NULL, 2958 DM_RIGHT_NULL, 2959 dir_name, NULL, 2960 dm_di_mode, error, 0); 2961 } 2962 return error; 2963 2964 error2: 2965 error1: 2966 xfs_bmap_cancel(&free_list); 2967 abort_return: 2968 cancel_flags |= XFS_TRANS_ABORT; 2969 error_return: 2970 xfs_trans_cancel(tp, cancel_flags); 2971 XFS_QM_DQRELE(mp, udqp); 2972 XFS_QM_DQRELE(mp, gdqp); 2973 2974 if (!dp_joined_to_trans && (dp != NULL)) { 2975 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2976 } 2977 2978 goto std_return; 2979} 2980 2981 2982/* 2983 * xfs_rmdir 2984 * 2985 */ 2986STATIC int 2987xfs_rmdir( 2988 bhv_desc_t *dir_bdp, 2989 bhv_vname_t *dentry, 2990 cred_t *credp) 2991{ 2992 char *name = VNAME(dentry); 2993 xfs_inode_t *dp; 2994 xfs_inode_t *cdp; /* child directory */ 2995 xfs_trans_t *tp; 2996 xfs_mount_t *mp; 2997 int error; 2998 xfs_bmap_free_t free_list; 2999 xfs_fsblock_t first_block; 3000 int cancel_flags; 3001 int committed; 3002 bhv_vnode_t *dir_vp; 3003 int dm_di_mode = S_IFDIR; 3004 int last_cdp_link; 3005 int namelen; 3006 uint resblks; 3007 3008 dir_vp = BHV_TO_VNODE(dir_bdp); 3009 dp = XFS_BHVTOI(dir_bdp); 3010 mp = dp->i_mount; 3011 3012 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 3013 3014 if (XFS_FORCED_SHUTDOWN(XFS_BHVTOI(dir_bdp)->i_mount)) 3015 return XFS_ERROR(EIO); 3016 namelen = VNAMELEN(dentry); 3017 3018 if (!xfs_get_dir_entry(dentry, &cdp)) { 3019 dm_di_mode = cdp->i_d.di_mode; 3020 IRELE(cdp); 3021 } 3022 3023 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { 3024 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, 3025 dir_vp, DM_RIGHT_NULL, 3026 NULL, DM_RIGHT_NULL, 3027 name, NULL, dm_di_mode, 0, 0); 3028 if (error) 3029 return XFS_ERROR(error); 3030 } 3031 3032 /* Return through std_return after this point. */ 3033 3034 cdp = NULL; 3035 3036 /* 3037 * We need to get a reference to cdp before we get our log 3038 * reservation. The reason for this is that we cannot call 3039 * xfs_iget for an inode for which we do not have a reference 3040 * once we've acquired a log reservation. This is because the 3041 * inode we are trying to get might be in xfs_inactive going 3042 * for a log reservation. Since we'll have to wait for the 3043 * inactive code to complete before returning from xfs_iget, 3044 * we need to make sure that we don't have log space reserved 3045 * when we call xfs_iget. Instead we get an unlocked reference 3046 * to the inode before getting our log reservation. 3047 */ 3048 error = xfs_get_dir_entry(dentry, &cdp); 3049 if (error) { 3050 REMOVE_DEBUG_TRACE(__LINE__); 3051 goto std_return; 3052 } 3053 mp = dp->i_mount; 3054 dm_di_mode = cdp->i_d.di_mode; 3055 3056 /* 3057 * Get the dquots for the inodes. 3058 */ 3059 error = XFS_QM_DQATTACH(mp, dp, 0); 3060 if (!error && dp != cdp) 3061 error = XFS_QM_DQATTACH(mp, cdp, 0); 3062 if (error) { 3063 IRELE(cdp); 3064 REMOVE_DEBUG_TRACE(__LINE__); 3065 goto std_return; 3066 } 3067 3068 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 3069 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 3070 /* 3071 * We try to get the real space reservation first, 3072 * allowing for directory btree deletion(s) implying 3073 * possible bmap insert(s). If we can't get the space 3074 * reservation then we use 0 instead, and avoid the bmap 3075 * btree insert(s) in the directory code by, if the bmap 3076 * insert tries to happen, instead trimming the LAST 3077 * block from the directory. 3078 */ 3079 resblks = XFS_REMOVE_SPACE_RES(mp); 3080 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 3081 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 3082 if (error == ENOSPC) { 3083 resblks = 0; 3084 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 3085 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 3086 } 3087 if (error) { 3088 ASSERT(error != ENOSPC); 3089 cancel_flags = 0; 3090 IRELE(cdp); 3091 goto error_return; 3092 } 3093 XFS_BMAP_INIT(&free_list, &first_block); 3094 3095 /* 3096 * Now lock the child directory inode and the parent directory 3097 * inode in the proper order. This will take care of validating 3098 * that the directory entry for the child directory inode has 3099 * not changed while we were obtaining a log reservation. 3100 */ 3101 error = xfs_lock_dir_and_entry(dp, dentry, cdp); 3102 if (error) { 3103 xfs_trans_cancel(tp, cancel_flags); 3104 IRELE(cdp); 3105 goto std_return; 3106 } 3107 3108 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3109 if (dp != cdp) { 3110 /* 3111 * Only increment the parent directory vnode count if 3112 * we didn't bump it in looking up cdp. The only time 3113 * we don't bump it is when we're looking up ".". 3114 */ 3115 VN_HOLD(dir_vp); 3116 } 3117 3118 ITRACE(cdp); 3119 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); 3120 3121 ASSERT(cdp->i_d.di_nlink >= 2); 3122 if (cdp->i_d.di_nlink != 2) { 3123 error = XFS_ERROR(ENOTEMPTY); 3124 goto error_return; 3125 } 3126 if (!xfs_dir_isempty(cdp)) { 3127 error = XFS_ERROR(ENOTEMPTY); 3128 goto error_return; 3129 } 3130 3131 error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, 3132 &first_block, &free_list, resblks); 3133 if (error) 3134 goto error1; 3135 3136 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3137 3138 /* 3139 * Bump the in memory generation count on the parent 3140 * directory so that other can know that it has changed. 3141 */ 3142 dp->i_gen++; 3143 3144 /* 3145 * Drop the link from cdp's "..". 3146 */ 3147 error = xfs_droplink(tp, dp); 3148 if (error) { 3149 goto error1; 3150 } 3151 3152 /* 3153 * Drop the link from dp to cdp. 3154 */ 3155 error = xfs_droplink(tp, cdp); 3156 if (error) { 3157 goto error1; 3158 } 3159 3160 /* 3161 * Drop the "." link from cdp to self. 3162 */ 3163 error = xfs_droplink(tp, cdp); 3164 if (error) { 3165 goto error1; 3166 } 3167 3168 /* Determine these before committing transaction */ 3169 last_cdp_link = (cdp)->i_d.di_nlink==0; 3170 3171 /* 3172 * Take an extra ref on the child vnode so that it 3173 * does not go to xfs_inactive() from within the commit. 3174 */ 3175 IHOLD(cdp); 3176 3177 /* 3178 * If this is a synchronous mount, make sure that the 3179 * rmdir transaction goes to disk before returning to 3180 * the user. 3181 */ 3182 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3183 xfs_trans_set_sync(tp); 3184 } 3185 3186 error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); 3187 if (error) { 3188 xfs_bmap_cancel(&free_list); 3189 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 3190 XFS_TRANS_ABORT)); 3191 IRELE(cdp); 3192 goto std_return; 3193 } 3194 3195 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 3196 if (error) { 3197 IRELE(cdp); 3198 goto std_return; 3199 } 3200 3201 3202 /* 3203 * Let interposed file systems know about removed links. 3204 */ 3205 bhv_vop_link_removed(XFS_ITOV(cdp), dir_vp, last_cdp_link); 3206 3207 IRELE(cdp); 3208 3209 /* Fall through to std_return with error = 0 or the errno 3210 * from xfs_trans_commit. */ 3211 std_return: 3212 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) { 3213 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 3214 dir_vp, DM_RIGHT_NULL, 3215 NULL, DM_RIGHT_NULL, 3216 name, NULL, dm_di_mode, 3217 error, 0); 3218 } 3219 return error; 3220 3221 error1: 3222 xfs_bmap_cancel(&free_list); 3223 cancel_flags |= XFS_TRANS_ABORT; 3224 /* FALLTHROUGH */ 3225 3226 error_return: 3227 xfs_trans_cancel(tp, cancel_flags); 3228 goto std_return; 3229} 3230 3231 3232/* 3233 * Read dp's entries starting at uiop->uio_offset and translate them into 3234 * bufsize bytes worth of struct dirents starting at bufbase. 3235 */ 3236STATIC int 3237xfs_readdir( 3238 bhv_desc_t *dir_bdp, 3239 uio_t *uiop, 3240 cred_t *credp, 3241 int *eofp) 3242{ 3243 xfs_inode_t *dp; 3244 xfs_trans_t *tp = NULL; 3245 int error = 0; 3246 uint lock_mode; 3247 3248 vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, 3249 (inst_t *)__return_address); 3250 dp = XFS_BHVTOI(dir_bdp); 3251 3252 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 3253 return XFS_ERROR(EIO); 3254 3255 lock_mode = xfs_ilock_map_shared(dp); 3256 error = xfs_dir_getdents(tp, dp, uiop, eofp); 3257 xfs_iunlock_map_shared(dp, lock_mode); 3258 return error; 3259} 3260 3261 3262STATIC int 3263xfs_symlink( 3264 bhv_desc_t *dir_bdp, 3265 bhv_vname_t *dentry, 3266 bhv_vattr_t *vap, 3267 char *target_path, 3268 bhv_vnode_t **vpp, 3269 cred_t *credp) 3270{ 3271 xfs_trans_t *tp; 3272 xfs_mount_t *mp; 3273 xfs_inode_t *dp; 3274 xfs_inode_t *ip; 3275 int error; 3276 int pathlen; 3277 xfs_bmap_free_t free_list; 3278 xfs_fsblock_t first_block; 3279 boolean_t dp_joined_to_trans; 3280 bhv_vnode_t *dir_vp; 3281 uint cancel_flags; 3282 int committed; 3283 xfs_fileoff_t first_fsb; 3284 xfs_filblks_t fs_blocks; 3285 int nmaps; 3286 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 3287 xfs_daddr_t d; 3288 char *cur_chunk; 3289 int byte_cnt; 3290 int n; 3291 xfs_buf_t *bp; 3292 xfs_prid_t prid; 3293 struct xfs_dquot *udqp, *gdqp; 3294 uint resblks; 3295 char *link_name = VNAME(dentry); 3296 int link_namelen; 3297 3298 *vpp = NULL; 3299 dir_vp = BHV_TO_VNODE(dir_bdp); 3300 dp = XFS_BHVTOI(dir_bdp); 3301 dp_joined_to_trans = B_FALSE; 3302 error = 0; 3303 ip = NULL; 3304 tp = NULL; 3305 3306 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 3307 3308 mp = dp->i_mount; 3309 3310 if (XFS_FORCED_SHUTDOWN(mp)) 3311 return XFS_ERROR(EIO); 3312 3313 link_namelen = VNAMELEN(dentry); 3314 3315 /* 3316 * Check component lengths of the target path name. 3317 */ 3318 pathlen = strlen(target_path); 3319 if (pathlen >= MAXPATHLEN) /* total string too long */ 3320 return XFS_ERROR(ENAMETOOLONG); 3321 if (pathlen >= MAXNAMELEN) { /* is any component too long? */ 3322 int len, total; 3323 char *path; 3324 3325 for (total = 0, path = target_path; total < pathlen;) { 3326 /* 3327 * Skip any slashes. 3328 */ 3329 while(*path == '/') { 3330 total++; 3331 path++; 3332 } 3333 3334 /* 3335 * Count up to the next slash or end of path. 3336 * Error out if the component is bigger than MAXNAMELEN. 3337 */ 3338 for(len = 0; *path != '/' && total < pathlen;total++, path++) { 3339 if (++len >= MAXNAMELEN) { 3340 error = ENAMETOOLONG; 3341 return error; 3342 } 3343 } 3344 } 3345 } 3346 3347 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_SYMLINK)) { 3348 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp, 3349 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 3350 link_name, target_path, 0, 0, 0); 3351 if (error) 3352 return error; 3353 } 3354 3355 /* Return through std_return after this point. */ 3356 3357 udqp = gdqp = NULL; 3358 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 3359 prid = dp->i_d.di_projid; 3360 else if (vap->va_mask & XFS_AT_PROJID) 3361 prid = (xfs_prid_t)vap->va_projid; 3362 else 3363 prid = (xfs_prid_t)dfltprid; 3364 3365 /* 3366 * Make sure that we have allocated dquot(s) on disk. 3367 */ 3368 error = XFS_QM_DQVOPALLOC(mp, dp, 3369 current_fsuid(credp), current_fsgid(credp), prid, 3370 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 3371 if (error) 3372 goto std_return; 3373 3374 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 3375 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 3376 /* 3377 * The symlink will fit into the inode data fork? 3378 * There can't be any attributes so we get the whole variable part. 3379 */ 3380 if (pathlen <= XFS_LITINO(mp)) 3381 fs_blocks = 0; 3382 else 3383 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 3384 resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks); 3385 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 3386 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3387 if (error == ENOSPC && fs_blocks == 0) { 3388 resblks = 0; 3389 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0, 3390 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3391 } 3392 if (error) { 3393 cancel_flags = 0; 3394 dp = NULL; 3395 goto error_return; 3396 } 3397 3398 xfs_ilock(dp, XFS_ILOCK_EXCL); 3399 3400 /* 3401 * Check whether the directory allows new symlinks or not. 3402 */ 3403 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 3404 error = XFS_ERROR(EPERM); 3405 goto error_return; 3406 } 3407 3408 /* 3409 * Reserve disk quota : blocks and inode. 3410 */ 3411 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 3412 if (error) 3413 goto error_return; 3414 3415 /* 3416 * Check for ability to enter directory entry, if no space reserved. 3417 */ 3418 if (resblks == 0 && 3419 (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) 3420 goto error_return; 3421 /* 3422 * Initialize the bmap freelist prior to calling either 3423 * bmapi or the directory create code. 3424 */ 3425 XFS_BMAP_INIT(&free_list, &first_block); 3426 3427 /* 3428 * Allocate an inode for the symlink. 3429 */ 3430 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (vap->va_mode&~S_IFMT), 3431 1, 0, credp, prid, resblks > 0, &ip, NULL); 3432 if (error) { 3433 if (error == ENOSPC) 3434 goto error_return; 3435 goto error1; 3436 } 3437 ITRACE(ip); 3438 3439 VN_HOLD(dir_vp); 3440 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3441 dp_joined_to_trans = B_TRUE; 3442 3443 /* 3444 * Also attach the dquot(s) to it, if applicable. 3445 */ 3446 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 3447 3448 if (resblks) 3449 resblks -= XFS_IALLOC_SPACE_RES(mp); 3450 /* 3451 * If the symlink will fit into the inode, write it inline. 3452 */ 3453 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 3454 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 3455 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); 3456 ip->i_d.di_size = pathlen; 3457 3458 /* 3459 * The inode was initially created in extent format. 3460 */ 3461 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); 3462 ip->i_df.if_flags |= XFS_IFINLINE; 3463 3464 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 3465 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 3466 3467 } else { 3468 first_fsb = 0; 3469 nmaps = SYMLINK_MAPS; 3470 3471 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 3472 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 3473 &first_block, resblks, mval, &nmaps, 3474 &free_list, NULL); 3475 if (error) { 3476 goto error1; 3477 } 3478 3479 if (resblks) 3480 resblks -= fs_blocks; 3481 ip->i_d.di_size = pathlen; 3482 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3483 3484 cur_chunk = target_path; 3485 for (n = 0; n < nmaps; n++) { 3486 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 3487 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 3488 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 3489 BTOBB(byte_cnt), 0); 3490 ASSERT(bp && !XFS_BUF_GETERROR(bp)); 3491 if (pathlen < byte_cnt) { 3492 byte_cnt = pathlen; 3493 } 3494 pathlen -= byte_cnt; 3495 3496 memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt); 3497 cur_chunk += byte_cnt; 3498 3499 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); 3500 } 3501 } 3502 3503 /* 3504 * Create the directory entry for the symlink. 3505 */ 3506 error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, 3507 &first_block, &free_list, resblks); 3508 if (error) 3509 goto error1; 3510 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3511 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 3512 3513 /* 3514 * Bump the in memory version number of the parent directory 3515 * so that other processes accessing it will recognize that 3516 * the directory has changed. 3517 */ 3518 dp->i_gen++; 3519 3520 /* 3521 * If this is a synchronous mount, make sure that the 3522 * symlink transaction goes to disk before returning to 3523 * the user. 3524 */ 3525 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3526 xfs_trans_set_sync(tp); 3527 } 3528 3529 /* 3530 * xfs_trans_commit normally decrements the vnode ref count 3531 * when it unlocks the inode. Since we want to return the 3532 * vnode to the caller, we bump the vnode ref count now. 3533 */ 3534 IHOLD(ip); 3535 3536 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 3537 if (error) { 3538 goto error2; 3539 } 3540 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 3541 XFS_QM_DQRELE(mp, udqp); 3542 XFS_QM_DQRELE(mp, gdqp); 3543 3544 /* Fall through to std_return with error = 0 or errno from 3545 * xfs_trans_commit */ 3546std_return: 3547 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 3548 DM_EVENT_POSTSYMLINK)) { 3549 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, 3550 dir_vp, DM_RIGHT_NULL, 3551 error ? NULL : XFS_ITOV(ip), 3552 DM_RIGHT_NULL, link_name, target_path, 3553 0, error, 0); 3554 } 3555 3556 if (!error) { 3557 bhv_vnode_t *vp; 3558 3559 ASSERT(ip); 3560 vp = XFS_ITOV(ip); 3561 *vpp = vp; 3562 } 3563 return error; 3564 3565 error2: 3566 IRELE(ip); 3567 error1: 3568 xfs_bmap_cancel(&free_list); 3569 cancel_flags |= XFS_TRANS_ABORT; 3570 error_return: 3571 xfs_trans_cancel(tp, cancel_flags); 3572 XFS_QM_DQRELE(mp, udqp); 3573 XFS_QM_DQRELE(mp, gdqp); 3574 3575 if (!dp_joined_to_trans && (dp != NULL)) { 3576 xfs_iunlock(dp, XFS_ILOCK_EXCL); 3577 } 3578 3579 goto std_return; 3580} 3581 3582 3583/* 3584 * xfs_fid2 3585 * 3586 * A fid routine that takes a pointer to a previously allocated 3587 * fid structure (like xfs_fast_fid) but uses a 64 bit inode number. 3588 */ 3589STATIC int 3590xfs_fid2( 3591 bhv_desc_t *bdp, 3592 fid_t *fidp) 3593{ 3594 xfs_inode_t *ip; 3595 xfs_fid2_t *xfid; 3596 3597 vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, 3598 (inst_t *)__return_address); 3599 ASSERT(sizeof(fid_t) >= sizeof(xfs_fid2_t)); 3600 3601 xfid = (xfs_fid2_t *)fidp; 3602 ip = XFS_BHVTOI(bdp); 3603 xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len); 3604 xfid->fid_pad = 0; 3605 /* 3606 * use memcpy because the inode is a long long and there's no 3607 * assurance that xfid->fid_ino is properly aligned. 3608 */ 3609 memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino)); 3610 xfid->fid_gen = ip->i_d.di_gen; 3611 3612 return 0; 3613} 3614 3615 3616/* 3617 * xfs_rwlock 3618 */ 3619int 3620xfs_rwlock( 3621 bhv_desc_t *bdp, 3622 bhv_vrwlock_t locktype) 3623{ 3624 xfs_inode_t *ip; 3625 bhv_vnode_t *vp; 3626 3627 vp = BHV_TO_VNODE(bdp); 3628 if (VN_ISDIR(vp)) 3629 return 1; 3630 ip = XFS_BHVTOI(bdp); 3631 if (locktype == VRWLOCK_WRITE) { 3632 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3633 } else if (locktype == VRWLOCK_TRY_READ) { 3634 return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED); 3635 } else if (locktype == VRWLOCK_TRY_WRITE) { 3636 return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL); 3637 } else { 3638 ASSERT((locktype == VRWLOCK_READ) || 3639 (locktype == VRWLOCK_WRITE_DIRECT)); 3640 xfs_ilock(ip, XFS_IOLOCK_SHARED); 3641 } 3642 3643 return 1; 3644} 3645 3646 3647/* 3648 * xfs_rwunlock 3649 */ 3650void 3651xfs_rwunlock( 3652 bhv_desc_t *bdp, 3653 bhv_vrwlock_t locktype) 3654{ 3655 xfs_inode_t *ip; 3656 bhv_vnode_t *vp; 3657 3658 vp = BHV_TO_VNODE(bdp); 3659 if (VN_ISDIR(vp)) 3660 return; 3661 ip = XFS_BHVTOI(bdp); 3662 if (locktype == VRWLOCK_WRITE) { 3663 /* 3664 * In the write case, we may have added a new entry to 3665 * the reference cache. This might store a pointer to 3666 * an inode to be released in this inode. If it is there, 3667 * clear the pointer and release the inode after unlocking 3668 * this one. 3669 */ 3670 xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL); 3671 } else { 3672 ASSERT((locktype == VRWLOCK_READ) || 3673 (locktype == VRWLOCK_WRITE_DIRECT)); 3674 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 3675 } 3676 return; 3677} 3678 3679STATIC int 3680xfs_inode_flush( 3681 bhv_desc_t *bdp, 3682 int flags) 3683{ 3684 xfs_inode_t *ip; 3685 xfs_mount_t *mp; 3686 xfs_inode_log_item_t *iip; 3687 int error = 0; 3688 3689 ip = XFS_BHVTOI(bdp); 3690 mp = ip->i_mount; 3691 iip = ip->i_itemp; 3692 3693 if (XFS_FORCED_SHUTDOWN(mp)) 3694 return XFS_ERROR(EIO); 3695 3696 /* 3697 * Bypass inodes which have already been cleaned by 3698 * the inode flush clustering code inside xfs_iflush 3699 */ 3700 if ((ip->i_update_core == 0) && 3701 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) 3702 return 0; 3703 3704 if (flags & FLUSH_LOG) { 3705 if (iip && iip->ili_last_lsn) { 3706 xlog_t *log = mp->m_log; 3707 xfs_lsn_t sync_lsn; 3708 int s, log_flags = XFS_LOG_FORCE; 3709 3710 s = GRANT_LOCK(log); 3711 sync_lsn = log->l_last_sync_lsn; 3712 GRANT_UNLOCK(log, s); 3713 3714 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) 3715 return 0; 3716 3717 if (flags & FLUSH_SYNC) 3718 log_flags |= XFS_LOG_SYNC; 3719 return xfs_log_force(mp, iip->ili_last_lsn, log_flags); 3720 } 3721 } 3722 3723 /* 3724 * We make this non-blocking if the inode is contended, 3725 * return EAGAIN to indicate to the caller that they 3726 * did not succeed. This prevents the flush path from 3727 * blocking on inodes inside another operation right 3728 * now, they get caught later by xfs_sync. 3729 */ 3730 if (flags & FLUSH_INODE) { 3731 int flush_flags; 3732 3733 if (xfs_ipincount(ip)) 3734 return EAGAIN; 3735 3736 if (flags & FLUSH_SYNC) { 3737 xfs_ilock(ip, XFS_ILOCK_SHARED); 3738 xfs_iflock(ip); 3739 } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 3740 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { 3741 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3742 return EAGAIN; 3743 } 3744 } else { 3745 return EAGAIN; 3746 } 3747 3748 if (flags & FLUSH_SYNC) 3749 flush_flags = XFS_IFLUSH_SYNC; 3750 else 3751 flush_flags = XFS_IFLUSH_ASYNC; 3752 3753 error = xfs_iflush(ip, flush_flags); 3754 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3755 } 3756 3757 return error; 3758} 3759 3760int 3761xfs_set_dmattrs ( 3762 bhv_desc_t *bdp, 3763 u_int evmask, 3764 u_int16_t state, 3765 cred_t *credp) 3766{ 3767 xfs_inode_t *ip; 3768 xfs_trans_t *tp; 3769 xfs_mount_t *mp; 3770 int error; 3771 3772 if (!capable(CAP_SYS_ADMIN)) 3773 return XFS_ERROR(EPERM); 3774 3775 ip = XFS_BHVTOI(bdp); 3776 mp = ip->i_mount; 3777 3778 if (XFS_FORCED_SHUTDOWN(mp)) 3779 return XFS_ERROR(EIO); 3780 3781 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 3782 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); 3783 if (error) { 3784 xfs_trans_cancel(tp, 0); 3785 return error; 3786 } 3787 xfs_ilock(ip, XFS_ILOCK_EXCL); 3788 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3789 3790 ip->i_iocore.io_dmevmask = ip->i_d.di_dmevmask = evmask; 3791 ip->i_iocore.io_dmstate = ip->i_d.di_dmstate = state; 3792 3793 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3794 IHOLD(ip); 3795 error = xfs_trans_commit(tp, 0, NULL); 3796 3797 return error; 3798} 3799 3800STATIC int 3801xfs_reclaim( 3802 bhv_desc_t *bdp) 3803{ 3804 xfs_inode_t *ip; 3805 bhv_vnode_t *vp; 3806 3807 vp = BHV_TO_VNODE(bdp); 3808 ip = XFS_BHVTOI(bdp); 3809 3810 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 3811 3812 ASSERT(!VN_MAPPED(vp)); 3813 3814 /* bad inode, get out here ASAP */ 3815 if (VN_BAD(vp)) { 3816 xfs_ireclaim(ip); 3817 return 0; 3818 } 3819 3820 vn_iowait(vp); 3821 3822 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 3823 3824 /* 3825 * Make sure the atime in the XFS inode is correct before freeing the 3826 * Linux inode. 3827 */ 3828 xfs_synchronize_atime(ip); 3829 3830 /* 3831 * If we have nothing to flush with this inode then complete the 3832 * teardown now, otherwise break the link between the xfs inode and the 3833 * linux inode and clean up the xfs inode later. This avoids flushing 3834 * the inode to disk during the delete operation itself. 3835 * 3836 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag 3837 * first to ensure that xfs_iunpin() will never see an xfs inode 3838 * that has a linux inode being reclaimed. Synchronisation is provided 3839 * by the i_flags_lock. 3840 */ 3841 if (!ip->i_update_core && (ip->i_itemp == NULL)) { 3842 xfs_ilock(ip, XFS_ILOCK_EXCL); 3843 xfs_iflock(ip); 3844 return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); 3845 } else { 3846 xfs_mount_t *mp = ip->i_mount; 3847 3848 /* Protect sync and unpin from us */ 3849 XFS_MOUNT_ILOCK(mp); 3850 spin_lock(&ip->i_flags_lock); 3851 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 3852 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); 3853 spin_unlock(&ip->i_flags_lock); 3854 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); 3855 XFS_MOUNT_IUNLOCK(mp); 3856 } 3857 return 0; 3858} 3859 3860int 3861xfs_finish_reclaim( 3862 xfs_inode_t *ip, 3863 int locked, 3864 int sync_mode) 3865{ 3866 xfs_ihash_t *ih = ip->i_hash; 3867 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 3868 int error; 3869 3870 if (vp && VN_BAD(vp)) 3871 goto reclaim; 3872 3873 /* The hash lock here protects a thread in xfs_iget_core from 3874 * racing with us on linking the inode back with a vnode. 3875 * Once we have the XFS_IRECLAIM flag set it will not touch 3876 * us. 3877 */ 3878 write_lock(&ih->ih_lock); 3879 spin_lock(&ip->i_flags_lock); 3880 if (__xfs_iflags_test(ip, XFS_IRECLAIM) || 3881 (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { 3882 spin_unlock(&ip->i_flags_lock); 3883 write_unlock(&ih->ih_lock); 3884 if (locked) { 3885 xfs_ifunlock(ip); 3886 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3887 } 3888 return 1; 3889 } 3890 __xfs_iflags_set(ip, XFS_IRECLAIM); 3891 spin_unlock(&ip->i_flags_lock); 3892 write_unlock(&ih->ih_lock); 3893 3894 /* 3895 * If the inode is still dirty, then flush it out. If the inode 3896 * is not in the AIL, then it will be OK to flush it delwri as 3897 * long as xfs_iflush() does not keep any references to the inode. 3898 * We leave that decision up to xfs_iflush() since it has the 3899 * knowledge of whether it's OK to simply do a delwri flush of 3900 * the inode or whether we need to wait until the inode is 3901 * pulled from the AIL. 3902 * We get the flush lock regardless, though, just to make sure 3903 * we don't free it while it is being flushed. 3904 */ 3905 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 3906 if (!locked) { 3907 xfs_ilock(ip, XFS_ILOCK_EXCL); 3908 xfs_iflock(ip); 3909 } 3910 3911 if (ip->i_update_core || 3912 ((ip->i_itemp != NULL) && 3913 (ip->i_itemp->ili_format.ilf_fields != 0))) { 3914 error = xfs_iflush(ip, sync_mode); 3915 /* 3916 * If we hit an error, typically because of filesystem 3917 * shutdown, we don't need to let vn_reclaim to know 3918 * because we're gonna reclaim the inode anyway. 3919 */ 3920 if (error) { 3921 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3922 goto reclaim; 3923 } 3924 xfs_iflock(ip); /* synchronize with xfs_iflush_done */ 3925 } 3926 3927 ASSERT(ip->i_update_core == 0); 3928 ASSERT(ip->i_itemp == NULL || 3929 ip->i_itemp->ili_format.ilf_fields == 0); 3930 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3931 } else if (locked) { 3932 /* 3933 * We are not interested in doing an iflush if we're 3934 * in the process of shutting down the filesystem forcibly. 3935 * So, just reclaim the inode. 3936 */ 3937 xfs_ifunlock(ip); 3938 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3939 } 3940 3941 reclaim: 3942 xfs_ireclaim(ip); 3943 return 0; 3944} 3945 3946int 3947xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) 3948{ 3949 int purged; 3950 xfs_inode_t *ip, *n; 3951 int done = 0; 3952 3953 while (!done) { 3954 purged = 0; 3955 XFS_MOUNT_ILOCK(mp); 3956 list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { 3957 if (noblock) { 3958 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) 3959 continue; 3960 if (xfs_ipincount(ip) || 3961 !xfs_iflock_nowait(ip)) { 3962 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3963 continue; 3964 } 3965 } 3966 XFS_MOUNT_IUNLOCK(mp); 3967 if (xfs_finish_reclaim(ip, noblock, 3968 XFS_IFLUSH_DELWRI_ELSE_ASYNC)) 3969 delay(1); 3970 purged = 1; 3971 break; 3972 } 3973 3974 done = !purged; 3975 } 3976 3977 XFS_MOUNT_IUNLOCK(mp); 3978 return 0; 3979} 3980 3981/* 3982 * xfs_alloc_file_space() 3983 * This routine allocates disk space for the given file. 3984 * 3985 * If alloc_type == 0, this request is for an ALLOCSP type 3986 * request which will change the file size. In this case, no 3987 * DMAPI event will be generated by the call. A TRUNCATE event 3988 * will be generated later by xfs_setattr. 3989 * 3990 * If alloc_type != 0, this request is for a RESVSP type 3991 * request, and a DMAPI DM_EVENT_WRITE will be generated if the 3992 * lower block boundary byte address is less than the file's 3993 * length. 3994 * 3995 * RETURNS: 3996 * 0 on success 3997 * errno on error 3998 * 3999 */ 4000STATIC int 4001xfs_alloc_file_space( 4002 xfs_inode_t *ip, 4003 xfs_off_t offset, 4004 xfs_off_t len, 4005 int alloc_type, 4006 int attr_flags) 4007{ 4008 xfs_mount_t *mp = ip->i_mount; 4009 xfs_off_t count; 4010 xfs_filblks_t allocated_fsb; 4011 xfs_filblks_t allocatesize_fsb; 4012 xfs_extlen_t extsz, temp; 4013 xfs_fileoff_t startoffset_fsb; 4014 xfs_fsblock_t firstfsb; 4015 int nimaps; 4016 int bmapi_flag; 4017 int quota_flag; 4018 int rt; 4019 xfs_trans_t *tp; 4020 xfs_bmbt_irec_t imaps[1], *imapp; 4021 xfs_bmap_free_t free_list; 4022 uint qblocks, resblks, resrtextents; 4023 int committed; 4024 int error; 4025 4026 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 4027 4028 if (XFS_FORCED_SHUTDOWN(mp)) 4029 return XFS_ERROR(EIO); 4030 4031 rt = XFS_IS_REALTIME_INODE(ip); 4032 if (unlikely(rt)) { 4033 if (!(extsz = ip->i_d.di_extsize)) 4034 extsz = mp->m_sb.sb_rextsize; 4035 } else { 4036 extsz = ip->i_d.di_extsize; 4037 } 4038 4039 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4040 return error; 4041 4042 if (len <= 0) 4043 return XFS_ERROR(EINVAL); 4044 4045 count = len; 4046 error = 0; 4047 imapp = &imaps[0]; 4048 nimaps = 1; 4049 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 4050 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 4051 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 4052 4053 /* Generate a DMAPI event if needed. */ 4054 if (alloc_type != 0 && offset < ip->i_d.di_size && 4055 (attr_flags&ATTR_DMI) == 0 && 4056 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4057 xfs_off_t end_dmi_offset; 4058 4059 end_dmi_offset = offset+len; 4060 if (end_dmi_offset > ip->i_d.di_size) 4061 end_dmi_offset = ip->i_d.di_size; 4062 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), 4063 offset, end_dmi_offset - offset, 4064 0, NULL); 4065 if (error) 4066 return error; 4067 } 4068 4069 /* 4070 * Allocate file space until done or until there is an error 4071 */ 4072retry: 4073 while (allocatesize_fsb && !error) { 4074 xfs_fileoff_t s, e; 4075 4076 /* 4077 * Determine space reservations for data/realtime. 4078 */ 4079 if (unlikely(extsz)) { 4080 s = startoffset_fsb; 4081 do_div(s, extsz); 4082 s *= extsz; 4083 e = startoffset_fsb + allocatesize_fsb; 4084 if ((temp = do_mod(startoffset_fsb, extsz))) 4085 e += temp; 4086 if ((temp = do_mod(e, extsz))) 4087 e += extsz - temp; 4088 } else { 4089 s = 0; 4090 e = allocatesize_fsb; 4091 } 4092 4093 if (unlikely(rt)) { 4094 resrtextents = qblocks = (uint)(e - s); 4095 resrtextents /= mp->m_sb.sb_rextsize; 4096 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 4097 quota_flag = XFS_QMOPT_RES_RTBLKS; 4098 } else { 4099 resrtextents = 0; 4100 resblks = qblocks = \ 4101 XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); 4102 quota_flag = XFS_QMOPT_RES_REGBLKS; 4103 } 4104 4105 /* 4106 * Allocate and setup the transaction. 4107 */ 4108 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4109 error = xfs_trans_reserve(tp, resblks, 4110 XFS_WRITE_LOG_RES(mp), resrtextents, 4111 XFS_TRANS_PERM_LOG_RES, 4112 XFS_WRITE_LOG_COUNT); 4113 /* 4114 * Check for running out of space 4115 */ 4116 if (error) { 4117 /* 4118 * Free the transaction structure. 4119 */ 4120 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 4121 xfs_trans_cancel(tp, 0); 4122 break; 4123 } 4124 xfs_ilock(ip, XFS_ILOCK_EXCL); 4125 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, 4126 qblocks, 0, quota_flag); 4127 if (error) 4128 goto error1; 4129 4130 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4131 xfs_trans_ihold(tp, ip); 4132 4133 /* 4134 * Issue the xfs_bmapi() call to allocate the blocks 4135 */ 4136 XFS_BMAP_INIT(&free_list, &firstfsb); 4137 error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, 4138 allocatesize_fsb, bmapi_flag, 4139 &firstfsb, 0, imapp, &nimaps, 4140 &free_list, NULL); 4141 if (error) { 4142 goto error0; 4143 } 4144 4145 /* 4146 * Complete the transaction 4147 */ 4148 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4149 if (error) { 4150 goto error0; 4151 } 4152 4153 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 4154 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4155 if (error) { 4156 break; 4157 } 4158 4159 allocated_fsb = imapp->br_blockcount; 4160 4161 if (nimaps == 0) { 4162 error = XFS_ERROR(ENOSPC); 4163 break; 4164 } 4165 4166 startoffset_fsb += allocated_fsb; 4167 allocatesize_fsb -= allocated_fsb; 4168 } 4169dmapi_enospc_check: 4170 if (error == ENOSPC && (attr_flags&ATTR_DMI) == 0 && 4171 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_NOSPACE)) { 4172 4173 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 4174 XFS_ITOV(ip), DM_RIGHT_NULL, 4175 XFS_ITOV(ip), DM_RIGHT_NULL, 4176 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ 4177 if (error == 0) 4178 goto retry; /* Maybe DMAPI app. has made space */ 4179 /* else fall through with error from XFS_SEND_DATA */ 4180 } 4181 4182 return error; 4183 4184error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 4185 xfs_bmap_cancel(&free_list); 4186 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); 4187 4188error1: /* Just cancel transaction */ 4189 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4190 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4191 goto dmapi_enospc_check; 4192} 4193 4194/* 4195 * Zero file bytes between startoff and endoff inclusive. 4196 * The iolock is held exclusive and no blocks are buffered. 4197 */ 4198STATIC int 4199xfs_zero_remaining_bytes( 4200 xfs_inode_t *ip, 4201 xfs_off_t startoff, 4202 xfs_off_t endoff) 4203{ 4204 xfs_bmbt_irec_t imap; 4205 xfs_fileoff_t offset_fsb; 4206 xfs_off_t lastoffset; 4207 xfs_off_t offset; 4208 xfs_buf_t *bp; 4209 xfs_mount_t *mp = ip->i_mount; 4210 int nimap; 4211 int error = 0; 4212 4213 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 4214 ip->i_d.di_flags & XFS_DIFLAG_REALTIME ? 4215 mp->m_rtdev_targp : mp->m_ddev_targp); 4216 4217 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 4218 offset_fsb = XFS_B_TO_FSBT(mp, offset); 4219 nimap = 1; 4220 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, 4221 NULL, 0, &imap, &nimap, NULL, NULL); 4222 if (error || nimap < 1) 4223 break; 4224 ASSERT(imap.br_blockcount >= 1); 4225 ASSERT(imap.br_startoff == offset_fsb); 4226 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 4227 if (lastoffset > endoff) 4228 lastoffset = endoff; 4229 if (imap.br_startblock == HOLESTARTBLOCK) 4230 continue; 4231 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4232 if (imap.br_state == XFS_EXT_UNWRITTEN) 4233 continue; 4234 XFS_BUF_UNDONE(bp); 4235 XFS_BUF_UNWRITE(bp); 4236 XFS_BUF_READ(bp); 4237 XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); 4238 xfsbdstrat(mp, bp); 4239 if ((error = xfs_iowait(bp))) { 4240 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 4241 mp, bp, XFS_BUF_ADDR(bp)); 4242 break; 4243 } 4244 memset(XFS_BUF_PTR(bp) + 4245 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 4246 0, lastoffset - offset + 1); 4247 XFS_BUF_UNDONE(bp); 4248 XFS_BUF_UNREAD(bp); 4249 XFS_BUF_WRITE(bp); 4250 xfsbdstrat(mp, bp); 4251 if ((error = xfs_iowait(bp))) { 4252 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 4253 mp, bp, XFS_BUF_ADDR(bp)); 4254 break; 4255 } 4256 } 4257 xfs_buf_free(bp); 4258 return error; 4259} 4260 4261/* 4262 * xfs_free_file_space() 4263 * This routine frees disk space for the given file. 4264 * 4265 * This routine is only called by xfs_change_file_space 4266 * for an UNRESVSP type call. 4267 * 4268 * RETURNS: 4269 * 0 on success 4270 * errno on error 4271 * 4272 */ 4273STATIC int 4274xfs_free_file_space( 4275 xfs_inode_t *ip, 4276 xfs_off_t offset, 4277 xfs_off_t len, 4278 int attr_flags) 4279{ 4280 bhv_vnode_t *vp; 4281 int committed; 4282 int done; 4283 xfs_off_t end_dmi_offset; 4284 xfs_fileoff_t endoffset_fsb; 4285 int error; 4286 xfs_fsblock_t firstfsb; 4287 xfs_bmap_free_t free_list; 4288 xfs_off_t ilen; 4289 xfs_bmbt_irec_t imap; 4290 xfs_off_t ioffset; 4291 xfs_extlen_t mod=0; 4292 xfs_mount_t *mp; 4293 int nimap; 4294 uint resblks; 4295 uint rounding; 4296 int rt; 4297 xfs_fileoff_t startoffset_fsb; 4298 xfs_trans_t *tp; 4299 int need_iolock = 1; 4300 4301 vp = XFS_ITOV(ip); 4302 mp = ip->i_mount; 4303 4304 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 4305 4306 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4307 return error; 4308 4309 error = 0; 4310 if (len <= 0) /* if nothing being freed */ 4311 return error; 4312 rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME); 4313 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 4314 end_dmi_offset = offset + len; 4315 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 4316 4317 if (offset < ip->i_d.di_size && 4318 (attr_flags & ATTR_DMI) == 0 && 4319 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4320 if (end_dmi_offset > ip->i_d.di_size) 4321 end_dmi_offset = ip->i_d.di_size; 4322 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, 4323 offset, end_dmi_offset - offset, 4324 AT_DELAY_FLAG(attr_flags), NULL); 4325 if (error) 4326 return error; 4327 } 4328 4329 if (attr_flags & ATTR_NOLOCK) 4330 need_iolock = 0; 4331 if (need_iolock) { 4332 xfs_ilock(ip, XFS_IOLOCK_EXCL); 4333 vn_iowait(vp); /* wait for the completion of any pending DIOs */ 4334 } 4335 4336 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); 4337 ilen = len + (offset & (rounding - 1)); 4338 ioffset = offset & ~(rounding - 1); 4339 if (ilen & (rounding - 1)) 4340 ilen = (ilen + rounding) & ~(rounding - 1); 4341 4342 if (VN_CACHED(vp) != 0) { 4343 xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, 4344 ctooff(offtoct(ioffset)), -1); 4345 bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)), 4346 -1, FI_REMAPF_LOCKED); 4347 } 4348 4349 /* 4350 * Need to zero the stuff we're not freeing, on disk. 4351 * If its a realtime file & can't use unwritten extents then we 4352 * actually need to zero the extent edges. Otherwise xfs_bunmapi 4353 * will take care of it for us. 4354 */ 4355 if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { 4356 nimap = 1; 4357 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, 4358 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 4359 if (error) 4360 goto out_unlock_iolock; 4361 ASSERT(nimap == 0 || nimap == 1); 4362 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 4363 xfs_daddr_t block; 4364 4365 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4366 block = imap.br_startblock; 4367 mod = do_div(block, mp->m_sb.sb_rextsize); 4368 if (mod) 4369 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 4370 } 4371 nimap = 1; 4372 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, 4373 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 4374 if (error) 4375 goto out_unlock_iolock; 4376 ASSERT(nimap == 0 || nimap == 1); 4377 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 4378 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4379 mod++; 4380 if (mod && (mod != mp->m_sb.sb_rextsize)) 4381 endoffset_fsb -= mod; 4382 } 4383 } 4384 if ((done = (endoffset_fsb <= startoffset_fsb))) 4385 /* 4386 * One contiguous piece to clear 4387 */ 4388 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 4389 else { 4390 /* 4391 * Some full blocks, possibly two pieces to clear 4392 */ 4393 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 4394 error = xfs_zero_remaining_bytes(ip, offset, 4395 XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 4396 if (!error && 4397 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 4398 error = xfs_zero_remaining_bytes(ip, 4399 XFS_FSB_TO_B(mp, endoffset_fsb), 4400 offset + len - 1); 4401 } 4402 4403 /* 4404 * free file space until done or until there is an error 4405 */ 4406 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 4407 while (!error && !done) { 4408 4409 /* 4410 * allocate and setup the transaction 4411 */ 4412 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4413 error = xfs_trans_reserve(tp, 4414 resblks, 4415 XFS_WRITE_LOG_RES(mp), 4416 0, 4417 XFS_TRANS_PERM_LOG_RES, 4418 XFS_WRITE_LOG_COUNT); 4419 4420 /* 4421 * check for running out of space 4422 */ 4423 if (error) { 4424 /* 4425 * Free the transaction structure. 4426 */ 4427 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 4428 xfs_trans_cancel(tp, 0); 4429 break; 4430 } 4431 xfs_ilock(ip, XFS_ILOCK_EXCL); 4432 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4433 ip->i_udquot, ip->i_gdquot, resblks, 0, 4434 XFS_QMOPT_RES_REGBLKS); 4435 if (error) 4436 goto error1; 4437 4438 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4439 xfs_trans_ihold(tp, ip); 4440 4441 /* 4442 * issue the bunmapi() call to free the blocks 4443 */ 4444 XFS_BMAP_INIT(&free_list, &firstfsb); 4445 error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, 4446 endoffset_fsb - startoffset_fsb, 4447 0, 2, &firstfsb, &free_list, NULL, &done); 4448 if (error) { 4449 goto error0; 4450 } 4451 4452 /* 4453 * complete the transaction 4454 */ 4455 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4456 if (error) { 4457 goto error0; 4458 } 4459 4460 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 4461 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4462 } 4463 4464 out_unlock_iolock: 4465 if (need_iolock) 4466 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 4467 return error; 4468 4469 error0: 4470 xfs_bmap_cancel(&free_list); 4471 error1: 4472 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4473 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 4474 XFS_ILOCK_EXCL); 4475 return error; 4476} 4477 4478/* 4479 * xfs_change_file_space() 4480 * This routine allocates or frees disk space for the given file. 4481 * The user specified parameters are checked for alignment and size 4482 * limitations. 4483 * 4484 * RETURNS: 4485 * 0 on success 4486 * errno on error 4487 * 4488 */ 4489int 4490xfs_change_file_space( 4491 bhv_desc_t *bdp, 4492 int cmd, 4493 xfs_flock64_t *bf, 4494 xfs_off_t offset, 4495 cred_t *credp, 4496 int attr_flags) 4497{ 4498 int clrprealloc; 4499 int error; 4500 xfs_fsize_t fsize; 4501 xfs_inode_t *ip; 4502 xfs_mount_t *mp; 4503 int setprealloc; 4504 xfs_off_t startoffset; 4505 xfs_off_t llen; 4506 xfs_trans_t *tp; 4507 bhv_vattr_t va; 4508 bhv_vnode_t *vp; 4509 4510 vp = BHV_TO_VNODE(bdp); 4511 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 4512 4513 ip = XFS_BHVTOI(bdp); 4514 mp = ip->i_mount; 4515 4516 /* 4517 * must be a regular file and have write permission 4518 */ 4519 if (!VN_ISREG(vp)) 4520 return XFS_ERROR(EINVAL); 4521 4522 xfs_ilock(ip, XFS_ILOCK_SHARED); 4523 4524 if ((error = xfs_iaccess(ip, S_IWUSR, credp))) { 4525 xfs_iunlock(ip, XFS_ILOCK_SHARED); 4526 return error; 4527 } 4528 4529 xfs_iunlock(ip, XFS_ILOCK_SHARED); 4530 4531 switch (bf->l_whence) { 4532 case 0: /*SEEK_SET*/ 4533 break; 4534 case 1: /*SEEK_CUR*/ 4535 bf->l_start += offset; 4536 break; 4537 case 2: /*SEEK_END*/ 4538 bf->l_start += ip->i_d.di_size; 4539 break; 4540 default: 4541 return XFS_ERROR(EINVAL); 4542 } 4543 4544 llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; 4545 4546 if ( (bf->l_start < 0) 4547 || (bf->l_start > XFS_MAXIOFFSET(mp)) 4548 || (bf->l_start + llen < 0) 4549 || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) 4550 return XFS_ERROR(EINVAL); 4551 4552 bf->l_whence = 0; 4553 4554 startoffset = bf->l_start; 4555 fsize = ip->i_d.di_size; 4556 4557 /* 4558 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 4559 * file space. 4560 * These calls do NOT zero the data space allocated to the file, 4561 * nor do they change the file size. 4562 * 4563 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file 4564 * space. 4565 * These calls cause the new file data to be zeroed and the file 4566 * size to be changed. 4567 */ 4568 setprealloc = clrprealloc = 0; 4569 4570 switch (cmd) { 4571 case XFS_IOC_RESVSP: 4572 case XFS_IOC_RESVSP64: 4573 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 4574 1, attr_flags); 4575 if (error) 4576 return error; 4577 setprealloc = 1; 4578 break; 4579 4580 case XFS_IOC_UNRESVSP: 4581 case XFS_IOC_UNRESVSP64: 4582 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 4583 attr_flags))) 4584 return error; 4585 break; 4586 4587 case XFS_IOC_ALLOCSP: 4588 case XFS_IOC_ALLOCSP64: 4589 case XFS_IOC_FREESP: 4590 case XFS_IOC_FREESP64: 4591 if (startoffset > fsize) { 4592 error = xfs_alloc_file_space(ip, fsize, 4593 startoffset - fsize, 0, attr_flags); 4594 if (error) 4595 break; 4596 } 4597 4598 va.va_mask = XFS_AT_SIZE; 4599 va.va_size = startoffset; 4600 4601 error = xfs_setattr(bdp, &va, attr_flags, credp); 4602 4603 if (error) 4604 return error; 4605 4606 clrprealloc = 1; 4607 break; 4608 4609 default: 4610 ASSERT(0); 4611 return XFS_ERROR(EINVAL); 4612 } 4613 4614 /* 4615 * update the inode timestamp, mode, and prealloc flag bits 4616 */ 4617 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 4618 4619 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 4620 0, 0, 0))) { 4621 /* ASSERT(0); */ 4622 xfs_trans_cancel(tp, 0); 4623 return error; 4624 } 4625 4626 xfs_ilock(ip, XFS_ILOCK_EXCL); 4627 4628 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4629 xfs_trans_ihold(tp, ip); 4630 4631 if ((attr_flags & ATTR_DMI) == 0) { 4632 ip->i_d.di_mode &= ~S_ISUID; 4633 4634 /* 4635 * Note that we don't have to worry about mandatory 4636 * file locking being disabled here because we only 4637 * clear the S_ISGID bit if the Group execute bit is 4638 * on, but if it was on then mandatory locking wouldn't 4639 * have been enabled. 4640 */ 4641 if (ip->i_d.di_mode & S_IXGRP) 4642 ip->i_d.di_mode &= ~S_ISGID; 4643 4644 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 4645 } 4646 if (setprealloc) 4647 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 4648 else if (clrprealloc) 4649 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 4650 4651 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4652 xfs_trans_set_sync(tp); 4653 4654 error = xfs_trans_commit(tp, 0, NULL); 4655 4656 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4657 4658 return error; 4659} 4660 4661bhv_vnodeops_t xfs_vnodeops = { 4662 BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), 4663 .vop_open = xfs_open, 4664 .vop_close = xfs_close, 4665 .vop_read = xfs_read, 4666#ifdef HAVE_SENDFILE 4667 .vop_sendfile = xfs_sendfile, 4668#endif 4669#ifdef HAVE_SPLICE 4670 .vop_splice_read = xfs_splice_read, 4671 .vop_splice_write = xfs_splice_write, 4672#endif 4673 .vop_write = xfs_write, 4674 .vop_ioctl = xfs_ioctl, 4675 .vop_getattr = xfs_getattr, 4676 .vop_setattr = xfs_setattr, 4677 .vop_access = xfs_access, 4678 .vop_lookup = xfs_lookup, 4679 .vop_create = xfs_create, 4680 .vop_remove = xfs_remove, 4681 .vop_link = xfs_link, 4682 .vop_rename = xfs_rename, 4683 .vop_mkdir = xfs_mkdir, 4684 .vop_rmdir = xfs_rmdir, 4685 .vop_readdir = xfs_readdir, 4686 .vop_symlink = xfs_symlink, 4687 .vop_readlink = xfs_readlink, 4688 .vop_fsync = xfs_fsync, 4689 .vop_inactive = xfs_inactive, 4690 .vop_fid2 = xfs_fid2, 4691 .vop_rwlock = xfs_rwlock, 4692 .vop_rwunlock = xfs_rwunlock, 4693 .vop_bmap = xfs_bmap, 4694 .vop_reclaim = xfs_reclaim, 4695 .vop_attr_get = xfs_attr_get, 4696 .vop_attr_set = xfs_attr_set, 4697 .vop_attr_remove = xfs_attr_remove, 4698 .vop_attr_list = xfs_attr_list, 4699 .vop_link_removed = (vop_link_removed_t)fs_noval, 4700 .vop_vnode_change = (vop_vnode_change_t)fs_noval, 4701 .vop_tosspages = fs_tosspages, 4702 .vop_flushinval_pages = fs_flushinval_pages, 4703 .vop_flush_pages = fs_flush_pages, 4704 .vop_release = xfs_release, 4705 .vop_iflush = xfs_inode_flush, 4706};