Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.23-rc2 4755 lines 119 kB view raw
1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "xfs.h" 20#include "xfs_fs.h" 21#include "xfs_types.h" 22#include "xfs_bit.h" 23#include "xfs_log.h" 24#include "xfs_inum.h" 25#include "xfs_trans.h" 26#include "xfs_sb.h" 27#include "xfs_ag.h" 28#include "xfs_dir2.h" 29#include "xfs_dmapi.h" 30#include "xfs_mount.h" 31#include "xfs_da_btree.h" 32#include "xfs_bmap_btree.h" 33#include "xfs_alloc_btree.h" 34#include "xfs_ialloc_btree.h" 35#include "xfs_dir2_sf.h" 36#include "xfs_attr_sf.h" 37#include "xfs_dinode.h" 38#include "xfs_inode.h" 39#include "xfs_inode_item.h" 40#include "xfs_itable.h" 41#include "xfs_btree.h" 42#include "xfs_ialloc.h" 43#include "xfs_alloc.h" 44#include "xfs_bmap.h" 45#include "xfs_attr.h" 46#include "xfs_rw.h" 47#include "xfs_error.h" 48#include "xfs_quota.h" 49#include "xfs_utils.h" 50#include "xfs_rtalloc.h" 51#include "xfs_refcache.h" 52#include "xfs_trans_space.h" 53#include "xfs_log_priv.h" 54#include "xfs_filestream.h" 55 56STATIC int 57xfs_open( 58 bhv_desc_t *bdp, 59 cred_t *credp) 60{ 61 int mode; 62 bhv_vnode_t *vp = BHV_TO_VNODE(bdp); 63 xfs_inode_t *ip = XFS_BHVTOI(bdp); 64 65 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 66 return XFS_ERROR(EIO); 67 68 /* 69 * If it's a directory with any blocks, read-ahead block 0 70 * as we're almost certain to have the next operation be a read there. 71 */ 72 if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) { 73 mode = xfs_ilock_map_shared(ip); 74 if (ip->i_d.di_nextents > 0) 75 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); 76 xfs_iunlock(ip, mode); 77 } 78 return 0; 79} 80 81/* 82 * xfs_getattr 83 */ 84STATIC int 85xfs_getattr( 86 bhv_desc_t *bdp, 87 bhv_vattr_t *vap, 88 int flags, 89 cred_t *credp) 90{ 91 xfs_inode_t *ip; 92 xfs_mount_t *mp; 93 bhv_vnode_t *vp; 94 95 vp = BHV_TO_VNODE(bdp); 96 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 97 98 ip = XFS_BHVTOI(bdp); 99 mp = ip->i_mount; 100 101 if (XFS_FORCED_SHUTDOWN(mp)) 102 return XFS_ERROR(EIO); 103 104 if (!(flags & ATTR_LAZY)) 105 xfs_ilock(ip, XFS_ILOCK_SHARED); 106 107 vap->va_size = XFS_ISIZE(ip); 108 if (vap->va_mask == XFS_AT_SIZE) 109 goto all_done; 110 111 vap->va_nblocks = 112 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); 113 vap->va_nodeid = ip->i_ino; 114#if XFS_BIG_INUMS 115 vap->va_nodeid += mp->m_inoadd; 116#endif 117 vap->va_nlink = ip->i_d.di_nlink; 118 119 /* 120 * Quick exit for non-stat callers 121 */ 122 if ((vap->va_mask & 123 ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID| 124 XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0) 125 goto all_done; 126 127 /* 128 * Copy from in-core inode. 129 */ 130 vap->va_mode = ip->i_d.di_mode; 131 vap->va_uid = ip->i_d.di_uid; 132 vap->va_gid = ip->i_d.di_gid; 133 vap->va_projid = ip->i_d.di_projid; 134 135 /* 136 * Check vnode type block/char vs. everything else. 137 */ 138 switch (ip->i_d.di_mode & S_IFMT) { 139 case S_IFBLK: 140 case S_IFCHR: 141 vap->va_rdev = ip->i_df.if_u2.if_rdev; 142 vap->va_blocksize = BLKDEV_IOSIZE; 143 break; 144 default: 145 vap->va_rdev = 0; 146 147 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { 148 vap->va_blocksize = xfs_preferred_iosize(mp); 149 } else { 150 151 /* 152 * If the file blocks are being allocated from a 153 * realtime partition, then return the inode's 154 * realtime extent size or the realtime volume's 155 * extent size. 156 */ 157 vap->va_blocksize = 158 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; 159 } 160 break; 161 } 162 163 vn_atime_to_timespec(vp, &vap->va_atime); 164 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 165 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 166 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 167 vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; 168 169 /* 170 * Exit for stat callers. See if any of the rest of the fields 171 * to be filled in are needed. 172 */ 173 if ((vap->va_mask & 174 (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| 175 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) 176 goto all_done; 177 178 /* 179 * Convert di_flags to xflags. 180 */ 181 vap->va_xflags = xfs_ip2xflags(ip); 182 183 /* 184 * Exit for inode revalidate. See if any of the rest of 185 * the fields to be filled in are needed. 186 */ 187 if ((vap->va_mask & 188 (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| 189 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) 190 goto all_done; 191 192 vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog; 193 vap->va_nextents = 194 (ip->i_df.if_flags & XFS_IFEXTENTS) ? 195 ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) : 196 ip->i_d.di_nextents; 197 if (ip->i_afp) 198 vap->va_anextents = 199 (ip->i_afp->if_flags & XFS_IFEXTENTS) ? 200 ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) : 201 ip->i_d.di_anextents; 202 else 203 vap->va_anextents = 0; 204 vap->va_gen = ip->i_d.di_gen; 205 206 all_done: 207 if (!(flags & ATTR_LAZY)) 208 xfs_iunlock(ip, XFS_ILOCK_SHARED); 209 return 0; 210} 211 212 213/* 214 * xfs_setattr 215 */ 216int 217xfs_setattr( 218 bhv_desc_t *bdp, 219 bhv_vattr_t *vap, 220 int flags, 221 cred_t *credp) 222{ 223 xfs_inode_t *ip; 224 xfs_trans_t *tp; 225 xfs_mount_t *mp; 226 int mask; 227 int code; 228 uint lock_flags; 229 uint commit_flags=0; 230 uid_t uid=0, iuid=0; 231 gid_t gid=0, igid=0; 232 int timeflags = 0; 233 bhv_vnode_t *vp; 234 xfs_prid_t projid=0, iprojid=0; 235 int mandlock_before, mandlock_after; 236 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 237 int file_owner; 238 int need_iolock = 1; 239 240 vp = BHV_TO_VNODE(bdp); 241 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 242 243 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 244 return XFS_ERROR(EROFS); 245 246 /* 247 * Cannot set certain attributes. 248 */ 249 mask = vap->va_mask; 250 if (mask & XFS_AT_NOSET) { 251 return XFS_ERROR(EINVAL); 252 } 253 254 ip = XFS_BHVTOI(bdp); 255 mp = ip->i_mount; 256 257 if (XFS_FORCED_SHUTDOWN(mp)) 258 return XFS_ERROR(EIO); 259 260 /* 261 * Timestamps do not need to be logged and hence do not 262 * need to be done within a transaction. 263 */ 264 if (mask & XFS_AT_UPDTIMES) { 265 ASSERT((mask & ~XFS_AT_UPDTIMES) == 0); 266 timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) | 267 ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) | 268 ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0); 269 xfs_ichgtime(ip, timeflags); 270 return 0; 271 } 272 273 olddquot1 = olddquot2 = NULL; 274 udqp = gdqp = NULL; 275 276 /* 277 * If disk quotas is on, we make sure that the dquots do exist on disk, 278 * before we start any other transactions. Trying to do this later 279 * is messy. We don't care to take a readlock to look at the ids 280 * in inode here, because we can't hold it across the trans_reserve. 281 * If the IDs do change before we take the ilock, we're covered 282 * because the i_*dquot fields will get updated anyway. 283 */ 284 if (XFS_IS_QUOTA_ON(mp) && 285 (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { 286 uint qflags = 0; 287 288 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { 289 uid = vap->va_uid; 290 qflags |= XFS_QMOPT_UQUOTA; 291 } else { 292 uid = ip->i_d.di_uid; 293 } 294 if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { 295 gid = vap->va_gid; 296 qflags |= XFS_QMOPT_GQUOTA; 297 } else { 298 gid = ip->i_d.di_gid; 299 } 300 if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { 301 projid = vap->va_projid; 302 qflags |= XFS_QMOPT_PQUOTA; 303 } else { 304 projid = ip->i_d.di_projid; 305 } 306 /* 307 * We take a reference when we initialize udqp and gdqp, 308 * so it is important that we never blindly double trip on 309 * the same variable. See xfs_create() for an example. 310 */ 311 ASSERT(udqp == NULL); 312 ASSERT(gdqp == NULL); 313 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, 314 &udqp, &gdqp); 315 if (code) 316 return code; 317 } 318 319 /* 320 * For the other attributes, we acquire the inode lock and 321 * first do an error checking pass. 322 */ 323 tp = NULL; 324 lock_flags = XFS_ILOCK_EXCL; 325 if (flags & ATTR_NOLOCK) 326 need_iolock = 0; 327 if (!(mask & XFS_AT_SIZE)) { 328 if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || 329 (mp->m_flags & XFS_MOUNT_WSYNC)) { 330 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 331 commit_flags = 0; 332 if ((code = xfs_trans_reserve(tp, 0, 333 XFS_ICHANGE_LOG_RES(mp), 0, 334 0, 0))) { 335 lock_flags = 0; 336 goto error_return; 337 } 338 } 339 } else { 340 if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) && 341 !(flags & ATTR_DMI)) { 342 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 343 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, 344 vap->va_size, 0, dmflags, NULL); 345 if (code) { 346 lock_flags = 0; 347 goto error_return; 348 } 349 } 350 if (need_iolock) 351 lock_flags |= XFS_IOLOCK_EXCL; 352 } 353 354 xfs_ilock(ip, lock_flags); 355 356 /* boolean: are we the file owner? */ 357 file_owner = (current_fsuid(credp) == ip->i_d.di_uid); 358 359 /* 360 * Change various properties of a file. 361 * Only the owner or users with CAP_FOWNER 362 * capability may do these things. 363 */ 364 if (mask & 365 (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| 366 XFS_AT_GID|XFS_AT_PROJID)) { 367 /* 368 * CAP_FOWNER overrides the following restrictions: 369 * 370 * The user ID of the calling process must be equal 371 * to the file owner ID, except in cases where the 372 * CAP_FSETID capability is applicable. 373 */ 374 if (!file_owner && !capable(CAP_FOWNER)) { 375 code = XFS_ERROR(EPERM); 376 goto error_return; 377 } 378 379 /* 380 * CAP_FSETID overrides the following restrictions: 381 * 382 * The effective user ID of the calling process shall match 383 * the file owner when setting the set-user-ID and 384 * set-group-ID bits on that file. 385 * 386 * The effective group ID or one of the supplementary group 387 * IDs of the calling process shall match the group owner of 388 * the file when setting the set-group-ID bit on that file 389 */ 390 if (mask & XFS_AT_MODE) { 391 mode_t m = 0; 392 393 if ((vap->va_mode & S_ISUID) && !file_owner) 394 m |= S_ISUID; 395 if ((vap->va_mode & S_ISGID) && 396 !in_group_p((gid_t)ip->i_d.di_gid)) 397 m |= S_ISGID; 398#if 0 399 /* Linux allows this, Irix doesn't. */ 400 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) 401 m |= S_ISVTX; 402#endif 403 if (m && !capable(CAP_FSETID)) 404 vap->va_mode &= ~m; 405 } 406 } 407 408 /* 409 * Change file ownership. Must be the owner or privileged. 410 * If the system was configured with the "restricted_chown" 411 * option, the owner is not permitted to give away the file, 412 * and can change the group id only to a group of which he 413 * or she is a member. 414 */ 415 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 416 /* 417 * These IDs could have changed since we last looked at them. 418 * But, we're assured that if the ownership did change 419 * while we didn't have the inode locked, inode's dquot(s) 420 * would have changed also. 421 */ 422 iuid = ip->i_d.di_uid; 423 iprojid = ip->i_d.di_projid; 424 igid = ip->i_d.di_gid; 425 gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; 426 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; 427 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : 428 iprojid; 429 430 /* 431 * CAP_CHOWN overrides the following restrictions: 432 * 433 * If _POSIX_CHOWN_RESTRICTED is defined, this capability 434 * shall override the restriction that a process cannot 435 * change the user ID of a file it owns and the restriction 436 * that the group ID supplied to the chown() function 437 * shall be equal to either the group ID or one of the 438 * supplementary group IDs of the calling process. 439 */ 440 if (restricted_chown && 441 (iuid != uid || (igid != gid && 442 !in_group_p((gid_t)gid))) && 443 !capable(CAP_CHOWN)) { 444 code = XFS_ERROR(EPERM); 445 goto error_return; 446 } 447 /* 448 * Do a quota reservation only if uid/projid/gid is actually 449 * going to change. 450 */ 451 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 452 (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || 453 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { 454 ASSERT(tp); 455 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, 456 capable(CAP_FOWNER) ? 457 XFS_QMOPT_FORCE_RES : 0); 458 if (code) /* out of quota */ 459 goto error_return; 460 } 461 } 462 463 /* 464 * Truncate file. Must have write permission and not be a directory. 465 */ 466 if (mask & XFS_AT_SIZE) { 467 /* Short circuit the truncate case for zero length files */ 468 if ((vap->va_size == 0) && 469 (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) { 470 xfs_iunlock(ip, XFS_ILOCK_EXCL); 471 lock_flags &= ~XFS_ILOCK_EXCL; 472 if (mask & XFS_AT_CTIME) 473 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 474 code = 0; 475 goto error_return; 476 } 477 478 if (VN_ISDIR(vp)) { 479 code = XFS_ERROR(EISDIR); 480 goto error_return; 481 } else if (!VN_ISREG(vp)) { 482 code = XFS_ERROR(EINVAL); 483 goto error_return; 484 } 485 /* 486 * Make sure that the dquots are attached to the inode. 487 */ 488 if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) 489 goto error_return; 490 } 491 492 /* 493 * Change file access or modified times. 494 */ 495 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 496 if (!file_owner) { 497 if ((flags & ATTR_UTIME) && 498 !capable(CAP_FOWNER)) { 499 code = XFS_ERROR(EPERM); 500 goto error_return; 501 } 502 } 503 } 504 505 /* 506 * Change extent size or realtime flag. 507 */ 508 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 509 /* 510 * Can't change extent size if any extents are allocated. 511 */ 512 if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) && 513 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != 514 vap->va_extsize) ) { 515 code = XFS_ERROR(EINVAL); /* EFBIG? */ 516 goto error_return; 517 } 518 519 /* 520 * Can't change realtime flag if any extents are allocated. 521 */ 522 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && 523 (mask & XFS_AT_XFLAGS) && 524 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 525 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 526 code = XFS_ERROR(EINVAL); /* EFBIG? */ 527 goto error_return; 528 } 529 /* 530 * Extent size must be a multiple of the appropriate block 531 * size, if set at all. 532 */ 533 if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) { 534 xfs_extlen_t size; 535 536 if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || 537 ((mask & XFS_AT_XFLAGS) && 538 (vap->va_xflags & XFS_XFLAG_REALTIME))) { 539 size = mp->m_sb.sb_rextsize << 540 mp->m_sb.sb_blocklog; 541 } else { 542 size = mp->m_sb.sb_blocksize; 543 } 544 if (vap->va_extsize % size) { 545 code = XFS_ERROR(EINVAL); 546 goto error_return; 547 } 548 } 549 /* 550 * If realtime flag is set then must have realtime data. 551 */ 552 if ((mask & XFS_AT_XFLAGS) && 553 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 554 if ((mp->m_sb.sb_rblocks == 0) || 555 (mp->m_sb.sb_rextsize == 0) || 556 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { 557 code = XFS_ERROR(EINVAL); 558 goto error_return; 559 } 560 } 561 562 /* 563 * Can't modify an immutable/append-only file unless 564 * we have appropriate permission. 565 */ 566 if ((mask & XFS_AT_XFLAGS) && 567 (ip->i_d.di_flags & 568 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || 569 (vap->va_xflags & 570 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && 571 !capable(CAP_LINUX_IMMUTABLE)) { 572 code = XFS_ERROR(EPERM); 573 goto error_return; 574 } 575 } 576 577 /* 578 * Now we can make the changes. Before we join the inode 579 * to the transaction, if XFS_AT_SIZE is set then take care of 580 * the part of the truncation that must be done without the 581 * inode lock. This needs to be done before joining the inode 582 * to the transaction, because the inode cannot be unlocked 583 * once it is a part of the transaction. 584 */ 585 if (mask & XFS_AT_SIZE) { 586 code = 0; 587 if ((vap->va_size > ip->i_size) && 588 (flags & ATTR_NOSIZETOK) == 0) { 589 code = xfs_igrow_start(ip, vap->va_size, credp); 590 } 591 xfs_iunlock(ip, XFS_ILOCK_EXCL); 592 593 /* 594 * We are going to log the inode size change in this 595 * transaction so any previous writes that are beyond the on 596 * disk EOF and the new EOF that have not been written out need 597 * to be written here. If we do not write the data out, we 598 * expose ourselves to the null files problem. 599 * 600 * Only flush from the on disk size to the smaller of the in 601 * memory file size or the new size as that's the range we 602 * really care about here and prevents waiting for other data 603 * not within the range we care about here. 604 */ 605 if (!code && 606 (ip->i_size != ip->i_d.di_size) && 607 (vap->va_size > ip->i_d.di_size)) { 608 code = bhv_vop_flush_pages(XFS_ITOV(ip), 609 ip->i_d.di_size, vap->va_size, 610 XFS_B_ASYNC, FI_NONE); 611 } 612 613 /* wait for all I/O to complete */ 614 vn_iowait(vp); 615 616 if (!code) 617 code = xfs_itruncate_data(ip, vap->va_size); 618 if (code) { 619 ASSERT(tp == NULL); 620 lock_flags &= ~XFS_ILOCK_EXCL; 621 ASSERT(lock_flags == XFS_IOLOCK_EXCL); 622 goto error_return; 623 } 624 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 625 if ((code = xfs_trans_reserve(tp, 0, 626 XFS_ITRUNCATE_LOG_RES(mp), 0, 627 XFS_TRANS_PERM_LOG_RES, 628 XFS_ITRUNCATE_LOG_COUNT))) { 629 xfs_trans_cancel(tp, 0); 630 if (need_iolock) 631 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 632 return code; 633 } 634 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 635 xfs_ilock(ip, XFS_ILOCK_EXCL); 636 } 637 638 if (tp) { 639 xfs_trans_ijoin(tp, ip, lock_flags); 640 xfs_trans_ihold(tp, ip); 641 } 642 643 /* determine whether mandatory locking mode changes */ 644 mandlock_before = MANDLOCK(vp, ip->i_d.di_mode); 645 646 /* 647 * Truncate file. Must have write permission and not be a directory. 648 */ 649 if (mask & XFS_AT_SIZE) { 650 if (vap->va_size > ip->i_size) { 651 xfs_igrow_finish(tp, ip, vap->va_size, 652 !(flags & ATTR_DMI)); 653 } else if ((vap->va_size <= ip->i_size) || 654 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 655 /* 656 * signal a sync transaction unless 657 * we're truncating an already unlinked 658 * file on a wsync filesystem 659 */ 660 code = xfs_itruncate_finish(&tp, ip, 661 (xfs_fsize_t)vap->va_size, 662 XFS_DATA_FORK, 663 ((ip->i_d.di_nlink != 0 || 664 !(mp->m_flags & XFS_MOUNT_WSYNC)) 665 ? 1 : 0)); 666 if (code) 667 goto abort_return; 668 /* 669 * Truncated "down", so we're removing references 670 * to old data here - if we now delay flushing for 671 * a long time, we expose ourselves unduly to the 672 * notorious NULL files problem. So, we mark this 673 * vnode and flush it when the file is closed, and 674 * do not wait the usual (long) time for writeout. 675 */ 676 VTRUNCATE(vp); 677 } 678 /* 679 * Have to do this even if the file's size doesn't change. 680 */ 681 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 682 } 683 684 /* 685 * Change file access modes. 686 */ 687 if (mask & XFS_AT_MODE) { 688 ip->i_d.di_mode &= S_IFMT; 689 ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; 690 691 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 692 timeflags |= XFS_ICHGTIME_CHG; 693 } 694 695 /* 696 * Change file ownership. Must be the owner or privileged. 697 * If the system was configured with the "restricted_chown" 698 * option, the owner is not permitted to give away the file, 699 * and can change the group id only to a group of which he 700 * or she is a member. 701 */ 702 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 703 /* 704 * CAP_FSETID overrides the following restrictions: 705 * 706 * The set-user-ID and set-group-ID bits of a file will be 707 * cleared upon successful return from chown() 708 */ 709 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && 710 !capable(CAP_FSETID)) { 711 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); 712 } 713 714 /* 715 * Change the ownerships and register quota modifications 716 * in the transaction. 717 */ 718 if (iuid != uid) { 719 if (XFS_IS_UQUOTA_ON(mp)) { 720 ASSERT(mask & XFS_AT_UID); 721 ASSERT(udqp); 722 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 723 &ip->i_udquot, udqp); 724 } 725 ip->i_d.di_uid = uid; 726 } 727 if (igid != gid) { 728 if (XFS_IS_GQUOTA_ON(mp)) { 729 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 730 ASSERT(mask & XFS_AT_GID); 731 ASSERT(gdqp); 732 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 733 &ip->i_gdquot, gdqp); 734 } 735 ip->i_d.di_gid = gid; 736 } 737 if (iprojid != projid) { 738 if (XFS_IS_PQUOTA_ON(mp)) { 739 ASSERT(!XFS_IS_GQUOTA_ON(mp)); 740 ASSERT(mask & XFS_AT_PROJID); 741 ASSERT(gdqp); 742 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 743 &ip->i_gdquot, gdqp); 744 } 745 ip->i_d.di_projid = projid; 746 /* 747 * We may have to rev the inode as well as 748 * the superblock version number since projids didn't 749 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. 750 */ 751 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) 752 xfs_bump_ino_vers2(tp, ip); 753 } 754 755 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 756 timeflags |= XFS_ICHGTIME_CHG; 757 } 758 759 760 /* 761 * Change file access or modified times. 762 */ 763 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 764 if (mask & XFS_AT_ATIME) { 765 ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; 766 ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; 767 ip->i_update_core = 1; 768 timeflags &= ~XFS_ICHGTIME_ACC; 769 } 770 if (mask & XFS_AT_MTIME) { 771 ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; 772 ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; 773 timeflags &= ~XFS_ICHGTIME_MOD; 774 timeflags |= XFS_ICHGTIME_CHG; 775 } 776 if (tp && (flags & ATTR_UTIME)) 777 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 778 } 779 780 /* 781 * Change XFS-added attributes. 782 */ 783 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 784 if (mask & XFS_AT_EXTSIZE) { 785 /* 786 * Converting bytes to fs blocks. 787 */ 788 ip->i_d.di_extsize = vap->va_extsize >> 789 mp->m_sb.sb_blocklog; 790 } 791 if (mask & XFS_AT_XFLAGS) { 792 uint di_flags; 793 794 /* can't set PREALLOC this way, just preserve it */ 795 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); 796 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 797 di_flags |= XFS_DIFLAG_IMMUTABLE; 798 if (vap->va_xflags & XFS_XFLAG_APPEND) 799 di_flags |= XFS_DIFLAG_APPEND; 800 if (vap->va_xflags & XFS_XFLAG_SYNC) 801 di_flags |= XFS_DIFLAG_SYNC; 802 if (vap->va_xflags & XFS_XFLAG_NOATIME) 803 di_flags |= XFS_DIFLAG_NOATIME; 804 if (vap->va_xflags & XFS_XFLAG_NODUMP) 805 di_flags |= XFS_DIFLAG_NODUMP; 806 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) 807 di_flags |= XFS_DIFLAG_PROJINHERIT; 808 if (vap->va_xflags & XFS_XFLAG_NODEFRAG) 809 di_flags |= XFS_DIFLAG_NODEFRAG; 810 if (vap->va_xflags & XFS_XFLAG_FILESTREAM) 811 di_flags |= XFS_DIFLAG_FILESTREAM; 812 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 813 if (vap->va_xflags & XFS_XFLAG_RTINHERIT) 814 di_flags |= XFS_DIFLAG_RTINHERIT; 815 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) 816 di_flags |= XFS_DIFLAG_NOSYMLINKS; 817 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) 818 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 819 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 820 if (vap->va_xflags & XFS_XFLAG_REALTIME) { 821 di_flags |= XFS_DIFLAG_REALTIME; 822 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 823 } else { 824 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; 825 } 826 if (vap->va_xflags & XFS_XFLAG_EXTSIZE) 827 di_flags |= XFS_DIFLAG_EXTSIZE; 828 } 829 ip->i_d.di_flags = di_flags; 830 } 831 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 832 timeflags |= XFS_ICHGTIME_CHG; 833 } 834 835 /* 836 * Change file inode change time only if XFS_AT_CTIME set 837 * AND we have been called by a DMI function. 838 */ 839 840 if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { 841 ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; 842 ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; 843 ip->i_update_core = 1; 844 timeflags &= ~XFS_ICHGTIME_CHG; 845 } 846 847 /* 848 * Send out timestamp changes that need to be set to the 849 * current time. Not done when called by a DMI function. 850 */ 851 if (timeflags && !(flags & ATTR_DMI)) 852 xfs_ichgtime(ip, timeflags); 853 854 XFS_STATS_INC(xs_ig_attrchg); 855 856 /* 857 * If this is a synchronous mount, make sure that the 858 * transaction goes to disk before returning to the user. 859 * This is slightly sub-optimal in that truncates require 860 * two sync transactions instead of one for wsync filesystems. 861 * One for the truncate and one for the timestamps since we 862 * don't want to change the timestamps unless we're sure the 863 * truncate worked. Truncates are less than 1% of the laddis 864 * mix so this probably isn't worth the trouble to optimize. 865 */ 866 code = 0; 867 if (tp) { 868 if (mp->m_flags & XFS_MOUNT_WSYNC) 869 xfs_trans_set_sync(tp); 870 871 code = xfs_trans_commit(tp, commit_flags); 872 } 873 874 /* 875 * If the (regular) file's mandatory locking mode changed, then 876 * notify the vnode. We do this under the inode lock to prevent 877 * racing calls to vop_vnode_change. 878 */ 879 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); 880 if (mandlock_before != mandlock_after) { 881 bhv_vop_vnode_change(vp, VCHANGE_FLAGS_ENF_LOCKING, 882 mandlock_after); 883 } 884 885 xfs_iunlock(ip, lock_flags); 886 887 /* 888 * Release any dquot(s) the inode had kept before chown. 889 */ 890 XFS_QM_DQRELE(mp, olddquot1); 891 XFS_QM_DQRELE(mp, olddquot2); 892 XFS_QM_DQRELE(mp, udqp); 893 XFS_QM_DQRELE(mp, gdqp); 894 895 if (code) { 896 return code; 897 } 898 899 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) && 900 !(flags & ATTR_DMI)) { 901 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, 902 NULL, DM_RIGHT_NULL, NULL, NULL, 903 0, 0, AT_DELAY_FLAG(flags)); 904 } 905 return 0; 906 907 abort_return: 908 commit_flags |= XFS_TRANS_ABORT; 909 /* FALLTHROUGH */ 910 error_return: 911 XFS_QM_DQRELE(mp, udqp); 912 XFS_QM_DQRELE(mp, gdqp); 913 if (tp) { 914 xfs_trans_cancel(tp, commit_flags); 915 } 916 if (lock_flags != 0) { 917 xfs_iunlock(ip, lock_flags); 918 } 919 return code; 920} 921 922 923/* 924 * xfs_access 925 * Null conversion from vnode mode bits to inode mode bits, as in efs. 926 */ 927STATIC int 928xfs_access( 929 bhv_desc_t *bdp, 930 int mode, 931 cred_t *credp) 932{ 933 xfs_inode_t *ip; 934 int error; 935 936 vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, 937 (inst_t *)__return_address); 938 939 ip = XFS_BHVTOI(bdp); 940 xfs_ilock(ip, XFS_ILOCK_SHARED); 941 error = xfs_iaccess(ip, mode, credp); 942 xfs_iunlock(ip, XFS_ILOCK_SHARED); 943 return error; 944} 945 946 947/* 948 * The maximum pathlen is 1024 bytes. Since the minimum file system 949 * blocksize is 512 bytes, we can get a max of 2 extents back from 950 * bmapi. 951 */ 952#define SYMLINK_MAPS 2 953 954/* 955 * xfs_readlink 956 * 957 */ 958STATIC int 959xfs_readlink( 960 bhv_desc_t *bdp, 961 uio_t *uiop, 962 int ioflags, 963 cred_t *credp) 964{ 965 xfs_inode_t *ip; 966 int count; 967 xfs_off_t offset; 968 int pathlen; 969 bhv_vnode_t *vp; 970 int error = 0; 971 xfs_mount_t *mp; 972 int nmaps; 973 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 974 xfs_daddr_t d; 975 int byte_cnt; 976 int n; 977 xfs_buf_t *bp; 978 979 vp = BHV_TO_VNODE(bdp); 980 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 981 982 ip = XFS_BHVTOI(bdp); 983 mp = ip->i_mount; 984 985 if (XFS_FORCED_SHUTDOWN(mp)) 986 return XFS_ERROR(EIO); 987 988 xfs_ilock(ip, XFS_ILOCK_SHARED); 989 990 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); 991 992 offset = uiop->uio_offset; 993 count = uiop->uio_resid; 994 995 if (offset < 0) { 996 error = XFS_ERROR(EINVAL); 997 goto error_return; 998 } 999 if (count <= 0) { 1000 error = 0; 1001 goto error_return; 1002 } 1003 1004 /* 1005 * See if the symlink is stored inline. 1006 */ 1007 pathlen = (int)ip->i_d.di_size; 1008 1009 if (ip->i_df.if_flags & XFS_IFINLINE) { 1010 error = xfs_uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); 1011 } 1012 else { 1013 /* 1014 * Symlink not inline. Call bmap to get it in. 1015 */ 1016 nmaps = SYMLINK_MAPS; 1017 1018 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 1019 0, NULL, 0, mval, &nmaps, NULL, NULL); 1020 1021 if (error) { 1022 goto error_return; 1023 } 1024 1025 for (n = 0; n < nmaps; n++) { 1026 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 1027 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 1028 bp = xfs_buf_read(mp->m_ddev_targp, d, 1029 BTOBB(byte_cnt), 0); 1030 error = XFS_BUF_GETERROR(bp); 1031 if (error) { 1032 xfs_ioerror_alert("xfs_readlink", 1033 ip->i_mount, bp, XFS_BUF_ADDR(bp)); 1034 xfs_buf_relse(bp); 1035 goto error_return; 1036 } 1037 if (pathlen < byte_cnt) 1038 byte_cnt = pathlen; 1039 pathlen -= byte_cnt; 1040 1041 error = xfs_uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); 1042 xfs_buf_relse (bp); 1043 } 1044 1045 } 1046 1047error_return: 1048 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1049 return error; 1050} 1051 1052 1053/* 1054 * xfs_fsync 1055 * 1056 * This is called to sync the inode and its data out to disk. 1057 * We need to hold the I/O lock while flushing the data, and 1058 * the inode lock while flushing the inode. The inode lock CANNOT 1059 * be held while flushing the data, so acquire after we're done 1060 * with that. 1061 */ 1062STATIC int 1063xfs_fsync( 1064 bhv_desc_t *bdp, 1065 int flag, 1066 cred_t *credp, 1067 xfs_off_t start, 1068 xfs_off_t stop) 1069{ 1070 xfs_inode_t *ip; 1071 xfs_trans_t *tp; 1072 int error; 1073 int log_flushed = 0, changed = 1; 1074 1075 vn_trace_entry(BHV_TO_VNODE(bdp), 1076 __FUNCTION__, (inst_t *)__return_address); 1077 1078 ip = XFS_BHVTOI(bdp); 1079 1080 ASSERT(start >= 0 && stop >= -1); 1081 1082 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1083 return XFS_ERROR(EIO); 1084 1085 /* 1086 * We always need to make sure that the required inode state 1087 * is safe on disk. The vnode might be clean but because 1088 * of committed transactions that haven't hit the disk yet. 1089 * Likewise, there could be unflushed non-transactional 1090 * changes to the inode core that have to go to disk. 1091 * 1092 * The following code depends on one assumption: that 1093 * any transaction that changes an inode logs the core 1094 * because it has to change some field in the inode core 1095 * (typically nextents or nblocks). That assumption 1096 * implies that any transactions against an inode will 1097 * catch any non-transactional updates. If inode-altering 1098 * transactions exist that violate this assumption, the 1099 * code breaks. Right now, it figures that if the involved 1100 * update_* field is clear and the inode is unpinned, the 1101 * inode is clean. Either it's been flushed or it's been 1102 * committed and the commit has hit the disk unpinning the inode. 1103 * (Note that xfs_inode_item_format() called at commit clears 1104 * the update_* fields.) 1105 */ 1106 xfs_ilock(ip, XFS_ILOCK_SHARED); 1107 1108 /* If we are flushing data then we care about update_size 1109 * being set, otherwise we care about update_core 1110 */ 1111 if ((flag & FSYNC_DATA) ? 1112 (ip->i_update_size == 0) : 1113 (ip->i_update_core == 0)) { 1114 /* 1115 * Timestamps/size haven't changed since last inode 1116 * flush or inode transaction commit. That means 1117 * either nothing got written or a transaction 1118 * committed which caught the updates. If the 1119 * latter happened and the transaction hasn't 1120 * hit the disk yet, the inode will be still 1121 * be pinned. If it is, force the log. 1122 */ 1123 1124 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1125 1126 if (xfs_ipincount(ip)) { 1127 _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 1128 XFS_LOG_FORCE | 1129 ((flag & FSYNC_WAIT) 1130 ? XFS_LOG_SYNC : 0), 1131 &log_flushed); 1132 } else { 1133 /* 1134 * If the inode is not pinned and nothing 1135 * has changed we don't need to flush the 1136 * cache. 1137 */ 1138 changed = 0; 1139 } 1140 error = 0; 1141 } else { 1142 /* 1143 * Kick off a transaction to log the inode 1144 * core to get the updates. Make it 1145 * sync if FSYNC_WAIT is passed in (which 1146 * is done by everybody but specfs). The 1147 * sync transaction will also force the log. 1148 */ 1149 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1150 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 1151 if ((error = xfs_trans_reserve(tp, 0, 1152 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 1153 0, 0, 0))) { 1154 xfs_trans_cancel(tp, 0); 1155 return error; 1156 } 1157 xfs_ilock(ip, XFS_ILOCK_EXCL); 1158 1159 /* 1160 * Note - it's possible that we might have pushed 1161 * ourselves out of the way during trans_reserve 1162 * which would flush the inode. But there's no 1163 * guarantee that the inode buffer has actually 1164 * gone out yet (it's delwri). Plus the buffer 1165 * could be pinned anyway if it's part of an 1166 * inode in another recent transaction. So we 1167 * play it safe and fire off the transaction anyway. 1168 */ 1169 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1170 xfs_trans_ihold(tp, ip); 1171 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1172 if (flag & FSYNC_WAIT) 1173 xfs_trans_set_sync(tp); 1174 error = _xfs_trans_commit(tp, 0, &log_flushed); 1175 1176 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1177 } 1178 1179 if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { 1180 /* 1181 * If the log write didn't issue an ordered tag we need 1182 * to flush the disk cache for the data device now. 1183 */ 1184 if (!log_flushed) 1185 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); 1186 1187 /* 1188 * If this inode is on the RT dev we need to flush that 1189 * cache as well. 1190 */ 1191 if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) 1192 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); 1193 } 1194 1195 return error; 1196} 1197 1198/* 1199 * This is called by xfs_inactive to free any blocks beyond eof 1200 * when the link count isn't zero and by xfs_dm_punch_hole() when 1201 * punching a hole to EOF. 1202 */ 1203int 1204xfs_free_eofblocks( 1205 xfs_mount_t *mp, 1206 xfs_inode_t *ip, 1207 int flags) 1208{ 1209 xfs_trans_t *tp; 1210 int error; 1211 xfs_fileoff_t end_fsb; 1212 xfs_fileoff_t last_fsb; 1213 xfs_filblks_t map_len; 1214 int nimaps; 1215 xfs_bmbt_irec_t imap; 1216 int use_iolock = (flags & XFS_FREE_EOF_LOCK); 1217 1218 /* 1219 * Figure out if there are any blocks beyond the end 1220 * of the file. If not, then there is nothing to do. 1221 */ 1222 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); 1223 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1224 map_len = last_fsb - end_fsb; 1225 if (map_len <= 0) 1226 return 0; 1227 1228 nimaps = 1; 1229 xfs_ilock(ip, XFS_ILOCK_SHARED); 1230 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, 1231 NULL, 0, &imap, &nimaps, NULL, NULL); 1232 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1233 1234 if (!error && (nimaps != 0) && 1235 (imap.br_startblock != HOLESTARTBLOCK || 1236 ip->i_delayed_blks)) { 1237 /* 1238 * Attach the dquots to the inode up front. 1239 */ 1240 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1241 return error; 1242 1243 /* 1244 * There are blocks after the end of file. 1245 * Free them up now by truncating the file to 1246 * its current size. 1247 */ 1248 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1249 1250 /* 1251 * Do the xfs_itruncate_start() call before 1252 * reserving any log space because 1253 * itruncate_start will call into the buffer 1254 * cache and we can't 1255 * do that within a transaction. 1256 */ 1257 if (use_iolock) 1258 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1259 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 1260 ip->i_size); 1261 if (error) { 1262 xfs_trans_cancel(tp, 0); 1263 if (use_iolock) 1264 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1265 return error; 1266 } 1267 1268 error = xfs_trans_reserve(tp, 0, 1269 XFS_ITRUNCATE_LOG_RES(mp), 1270 0, XFS_TRANS_PERM_LOG_RES, 1271 XFS_ITRUNCATE_LOG_COUNT); 1272 if (error) { 1273 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1274 xfs_trans_cancel(tp, 0); 1275 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1276 return error; 1277 } 1278 1279 xfs_ilock(ip, XFS_ILOCK_EXCL); 1280 xfs_trans_ijoin(tp, ip, 1281 XFS_IOLOCK_EXCL | 1282 XFS_ILOCK_EXCL); 1283 xfs_trans_ihold(tp, ip); 1284 1285 error = xfs_itruncate_finish(&tp, ip, 1286 ip->i_size, 1287 XFS_DATA_FORK, 1288 0); 1289 /* 1290 * If we get an error at this point we 1291 * simply don't bother truncating the file. 1292 */ 1293 if (error) { 1294 xfs_trans_cancel(tp, 1295 (XFS_TRANS_RELEASE_LOG_RES | 1296 XFS_TRANS_ABORT)); 1297 } else { 1298 error = xfs_trans_commit(tp, 1299 XFS_TRANS_RELEASE_LOG_RES); 1300 } 1301 xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) 1302 : XFS_ILOCK_EXCL)); 1303 } 1304 return error; 1305} 1306 1307/* 1308 * Free a symlink that has blocks associated with it. 1309 */ 1310STATIC int 1311xfs_inactive_symlink_rmt( 1312 xfs_inode_t *ip, 1313 xfs_trans_t **tpp) 1314{ 1315 xfs_buf_t *bp; 1316 int committed; 1317 int done; 1318 int error; 1319 xfs_fsblock_t first_block; 1320 xfs_bmap_free_t free_list; 1321 int i; 1322 xfs_mount_t *mp; 1323 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 1324 int nmaps; 1325 xfs_trans_t *ntp; 1326 int size; 1327 xfs_trans_t *tp; 1328 1329 tp = *tpp; 1330 mp = ip->i_mount; 1331 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); 1332 /* 1333 * We're freeing a symlink that has some 1334 * blocks allocated to it. Free the 1335 * blocks here. We know that we've got 1336 * either 1 or 2 extents and that we can 1337 * free them all in one bunmapi call. 1338 */ 1339 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 1340 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1341 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1342 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1343 xfs_trans_cancel(tp, 0); 1344 *tpp = NULL; 1345 return error; 1346 } 1347 /* 1348 * Lock the inode, fix the size, and join it to the transaction. 1349 * Hold it so in the normal path, we still have it locked for 1350 * the second transaction. In the error paths we need it 1351 * held so the cancel won't rele it, see below. 1352 */ 1353 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1354 size = (int)ip->i_d.di_size; 1355 ip->i_d.di_size = 0; 1356 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1357 xfs_trans_ihold(tp, ip); 1358 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1359 /* 1360 * Find the block(s) so we can inval and unmap them. 1361 */ 1362 done = 0; 1363 XFS_BMAP_INIT(&free_list, &first_block); 1364 nmaps = ARRAY_SIZE(mval); 1365 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 1366 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 1367 &free_list, NULL))) 1368 goto error0; 1369 /* 1370 * Invalidate the block(s). 1371 */ 1372 for (i = 0; i < nmaps; i++) { 1373 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 1374 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 1375 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 1376 xfs_trans_binval(tp, bp); 1377 } 1378 /* 1379 * Unmap the dead block(s) to the free_list. 1380 */ 1381 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 1382 &first_block, &free_list, NULL, &done))) 1383 goto error1; 1384 ASSERT(done); 1385 /* 1386 * Commit the first transaction. This logs the EFI and the inode. 1387 */ 1388 if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) 1389 goto error1; 1390 /* 1391 * The transaction must have been committed, since there were 1392 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. 1393 * The new tp has the extent freeing and EFDs. 1394 */ 1395 ASSERT(committed); 1396 /* 1397 * The first xact was committed, so add the inode to the new one. 1398 * Mark it dirty so it will be logged and moved forward in the log as 1399 * part of every commit. 1400 */ 1401 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1402 xfs_trans_ihold(tp, ip); 1403 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1404 /* 1405 * Get a new, empty transaction to return to our caller. 1406 */ 1407 ntp = xfs_trans_dup(tp); 1408 /* 1409 * Commit the transaction containing extent freeing and EFDs. 1410 * If we get an error on the commit here or on the reserve below, 1411 * we need to unlock the inode since the new transaction doesn't 1412 * have the inode attached. 1413 */ 1414 error = xfs_trans_commit(tp, 0); 1415 tp = ntp; 1416 if (error) { 1417 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1418 goto error0; 1419 } 1420 /* 1421 * Remove the memory for extent descriptions (just bookkeeping). 1422 */ 1423 if (ip->i_df.if_bytes) 1424 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); 1425 ASSERT(ip->i_df.if_bytes == 0); 1426 /* 1427 * Put an itruncate log reservation in the new transaction 1428 * for our caller. 1429 */ 1430 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1431 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1432 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1433 goto error0; 1434 } 1435 /* 1436 * Return with the inode locked but not joined to the transaction. 1437 */ 1438 *tpp = tp; 1439 return 0; 1440 1441 error1: 1442 xfs_bmap_cancel(&free_list); 1443 error0: 1444 /* 1445 * Have to come here with the inode locked and either 1446 * (held and in the transaction) or (not in the transaction). 1447 * If the inode isn't held then cancel would iput it, but 1448 * that's wrong since this is inactive and the vnode ref 1449 * count is 0 already. 1450 * Cancel won't do anything to the inode if held, but it still 1451 * needs to be locked until the cancel is done, if it was 1452 * joined to the transaction. 1453 */ 1454 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1455 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1456 *tpp = NULL; 1457 return error; 1458 1459} 1460 1461STATIC int 1462xfs_inactive_symlink_local( 1463 xfs_inode_t *ip, 1464 xfs_trans_t **tpp) 1465{ 1466 int error; 1467 1468 ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip)); 1469 /* 1470 * We're freeing a symlink which fit into 1471 * the inode. Just free the memory used 1472 * to hold the old symlink. 1473 */ 1474 error = xfs_trans_reserve(*tpp, 0, 1475 XFS_ITRUNCATE_LOG_RES(ip->i_mount), 1476 0, XFS_TRANS_PERM_LOG_RES, 1477 XFS_ITRUNCATE_LOG_COUNT); 1478 1479 if (error) { 1480 xfs_trans_cancel(*tpp, 0); 1481 *tpp = NULL; 1482 return error; 1483 } 1484 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1485 1486 /* 1487 * Zero length symlinks _can_ exist. 1488 */ 1489 if (ip->i_df.if_bytes > 0) { 1490 xfs_idata_realloc(ip, 1491 -(ip->i_df.if_bytes), 1492 XFS_DATA_FORK); 1493 ASSERT(ip->i_df.if_bytes == 0); 1494 } 1495 return 0; 1496} 1497 1498STATIC int 1499xfs_inactive_attrs( 1500 xfs_inode_t *ip, 1501 xfs_trans_t **tpp) 1502{ 1503 xfs_trans_t *tp; 1504 int error; 1505 xfs_mount_t *mp; 1506 1507 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); 1508 tp = *tpp; 1509 mp = ip->i_mount; 1510 ASSERT(ip->i_d.di_forkoff != 0); 1511 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1512 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1513 1514 error = xfs_attr_inactive(ip); 1515 if (error) { 1516 *tpp = NULL; 1517 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1518 return error; /* goto out */ 1519 } 1520 1521 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1522 error = xfs_trans_reserve(tp, 0, 1523 XFS_IFREE_LOG_RES(mp), 1524 0, XFS_TRANS_PERM_LOG_RES, 1525 XFS_INACTIVE_LOG_COUNT); 1526 if (error) { 1527 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1528 xfs_trans_cancel(tp, 0); 1529 *tpp = NULL; 1530 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1531 return error; 1532 } 1533 1534 xfs_ilock(ip, XFS_ILOCK_EXCL); 1535 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1536 xfs_trans_ihold(tp, ip); 1537 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1538 1539 ASSERT(ip->i_d.di_anextents == 0); 1540 1541 *tpp = tp; 1542 return 0; 1543} 1544 1545STATIC int 1546xfs_release( 1547 bhv_desc_t *bdp) 1548{ 1549 xfs_inode_t *ip; 1550 bhv_vnode_t *vp; 1551 xfs_mount_t *mp; 1552 int error; 1553 1554 vp = BHV_TO_VNODE(bdp); 1555 ip = XFS_BHVTOI(bdp); 1556 mp = ip->i_mount; 1557 1558 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) 1559 return 0; 1560 1561 /* If this is a read-only mount, don't do this (would generate I/O) */ 1562 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1563 return 0; 1564 1565 if (!XFS_FORCED_SHUTDOWN(mp)) { 1566 /* 1567 * If we are using filestreams, and we have an unlinked 1568 * file that we are processing the last close on, then nothing 1569 * will be able to reopen and write to this file. Purge this 1570 * inode from the filestreams cache so that it doesn't delay 1571 * teardown of the inode. 1572 */ 1573 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) 1574 xfs_filestream_deassociate(ip); 1575 1576 /* 1577 * If we previously truncated this file and removed old data 1578 * in the process, we want to initiate "early" writeout on 1579 * the last close. This is an attempt to combat the notorious 1580 * NULL files problem which is particularly noticable from a 1581 * truncate down, buffered (re-)write (delalloc), followed by 1582 * a crash. What we are effectively doing here is 1583 * significantly reducing the time window where we'd otherwise 1584 * be exposed to that problem. 1585 */ 1586 if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0) 1587 bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE); 1588 } 1589 1590#ifdef HAVE_REFCACHE 1591 /* If we are in the NFS reference cache then don't do this now */ 1592 if (ip->i_refcache) 1593 return 0; 1594#endif 1595 1596 if (ip->i_d.di_nlink != 0) { 1597 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1598 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1599 ip->i_delayed_blks > 0)) && 1600 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1601 (!(ip->i_d.di_flags & 1602 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1603 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1604 if (error) 1605 return error; 1606 /* Update linux inode block count after free above */ 1607 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1608 ip->i_d.di_nblocks + ip->i_delayed_blks); 1609 } 1610 } 1611 1612 return 0; 1613} 1614 1615/* 1616 * xfs_inactive 1617 * 1618 * This is called when the vnode reference count for the vnode 1619 * goes to zero. If the file has been unlinked, then it must 1620 * now be truncated. Also, we clear all of the read-ahead state 1621 * kept for the inode here since the file is now closed. 1622 */ 1623STATIC int 1624xfs_inactive( 1625 bhv_desc_t *bdp, 1626 cred_t *credp) 1627{ 1628 xfs_inode_t *ip; 1629 bhv_vnode_t *vp; 1630 xfs_bmap_free_t free_list; 1631 xfs_fsblock_t first_block; 1632 int committed; 1633 xfs_trans_t *tp; 1634 xfs_mount_t *mp; 1635 int error; 1636 int truncate; 1637 1638 vp = BHV_TO_VNODE(bdp); 1639 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 1640 1641 ip = XFS_BHVTOI(bdp); 1642 1643 /* 1644 * If the inode is already free, then there can be nothing 1645 * to clean up here. 1646 */ 1647 if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { 1648 ASSERT(ip->i_df.if_real_bytes == 0); 1649 ASSERT(ip->i_df.if_broot_bytes == 0); 1650 return VN_INACTIVE_CACHE; 1651 } 1652 1653 /* 1654 * Only do a truncate if it's a regular file with 1655 * some actual space in it. It's OK to look at the 1656 * inode's fields without the lock because we're the 1657 * only one with a reference to the inode. 1658 */ 1659 truncate = ((ip->i_d.di_nlink == 0) && 1660 ((ip->i_d.di_size != 0) || (ip->i_size != 0) || 1661 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && 1662 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1663 1664 mp = ip->i_mount; 1665 1666 if (ip->i_d.di_nlink == 0 && 1667 DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_DESTROY)) { 1668 (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL); 1669 } 1670 1671 error = 0; 1672 1673 /* If this is a read-only mount, don't do this (would generate I/O) */ 1674 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1675 goto out; 1676 1677 if (ip->i_d.di_nlink != 0) { 1678 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1679 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1680 ip->i_delayed_blks > 0)) && 1681 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1682 (!(ip->i_d.di_flags & 1683 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1684 (ip->i_delayed_blks != 0)))) { 1685 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1686 if (error) 1687 return VN_INACTIVE_CACHE; 1688 /* Update linux inode block count after free above */ 1689 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1690 ip->i_d.di_nblocks + ip->i_delayed_blks); 1691 } 1692 goto out; 1693 } 1694 1695 ASSERT(ip->i_d.di_nlink == 0); 1696 1697 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1698 return VN_INACTIVE_CACHE; 1699 1700 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1701 if (truncate) { 1702 /* 1703 * Do the xfs_itruncate_start() call before 1704 * reserving any log space because itruncate_start 1705 * will call into the buffer cache and we can't 1706 * do that within a transaction. 1707 */ 1708 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1709 1710 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 1711 if (error) { 1712 xfs_trans_cancel(tp, 0); 1713 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1714 return VN_INACTIVE_CACHE; 1715 } 1716 1717 error = xfs_trans_reserve(tp, 0, 1718 XFS_ITRUNCATE_LOG_RES(mp), 1719 0, XFS_TRANS_PERM_LOG_RES, 1720 XFS_ITRUNCATE_LOG_COUNT); 1721 if (error) { 1722 /* Don't call itruncate_cleanup */ 1723 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1724 xfs_trans_cancel(tp, 0); 1725 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1726 return VN_INACTIVE_CACHE; 1727 } 1728 1729 xfs_ilock(ip, XFS_ILOCK_EXCL); 1730 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1731 xfs_trans_ihold(tp, ip); 1732 1733 /* 1734 * normally, we have to run xfs_itruncate_finish sync. 1735 * But if filesystem is wsync and we're in the inactive 1736 * path, then we know that nlink == 0, and that the 1737 * xaction that made nlink == 0 is permanently committed 1738 * since xfs_remove runs as a synchronous transaction. 1739 */ 1740 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1741 (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); 1742 1743 if (error) { 1744 xfs_trans_cancel(tp, 1745 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1746 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1747 return VN_INACTIVE_CACHE; 1748 } 1749 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { 1750 1751 /* 1752 * If we get an error while cleaning up a 1753 * symlink we bail out. 1754 */ 1755 error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ? 1756 xfs_inactive_symlink_rmt(ip, &tp) : 1757 xfs_inactive_symlink_local(ip, &tp); 1758 1759 if (error) { 1760 ASSERT(tp == NULL); 1761 return VN_INACTIVE_CACHE; 1762 } 1763 1764 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1765 xfs_trans_ihold(tp, ip); 1766 } else { 1767 error = xfs_trans_reserve(tp, 0, 1768 XFS_IFREE_LOG_RES(mp), 1769 0, XFS_TRANS_PERM_LOG_RES, 1770 XFS_INACTIVE_LOG_COUNT); 1771 if (error) { 1772 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1773 xfs_trans_cancel(tp, 0); 1774 return VN_INACTIVE_CACHE; 1775 } 1776 1777 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1778 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1779 xfs_trans_ihold(tp, ip); 1780 } 1781 1782 /* 1783 * If there are attributes associated with the file 1784 * then blow them away now. The code calls a routine 1785 * that recursively deconstructs the attribute fork. 1786 * We need to just commit the current transaction 1787 * because we can't use it for xfs_attr_inactive(). 1788 */ 1789 if (ip->i_d.di_anextents > 0) { 1790 error = xfs_inactive_attrs(ip, &tp); 1791 /* 1792 * If we got an error, the transaction is already 1793 * cancelled, and the inode is unlocked. Just get out. 1794 */ 1795 if (error) 1796 return VN_INACTIVE_CACHE; 1797 } else if (ip->i_afp) { 1798 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1799 } 1800 1801 /* 1802 * Free the inode. 1803 */ 1804 XFS_BMAP_INIT(&free_list, &first_block); 1805 error = xfs_ifree(tp, ip, &free_list); 1806 if (error) { 1807 /* 1808 * If we fail to free the inode, shut down. The cancel 1809 * might do that, we need to make sure. Otherwise the 1810 * inode might be lost for a long time or forever. 1811 */ 1812 if (!XFS_FORCED_SHUTDOWN(mp)) { 1813 cmn_err(CE_NOTE, 1814 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1815 error, mp->m_fsname); 1816 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1817 } 1818 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1819 } else { 1820 /* 1821 * Credit the quota account(s). The inode is gone. 1822 */ 1823 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1824 1825 /* 1826 * Just ignore errors at this point. There is 1827 * nothing we can do except to try to keep going. 1828 */ 1829 (void) xfs_bmap_finish(&tp, &free_list, &committed); 1830 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1831 } 1832 /* 1833 * Release the dquots held by inode, if any. 1834 */ 1835 XFS_QM_DQDETACH(mp, ip); 1836 1837 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1838 1839 out: 1840 return VN_INACTIVE_CACHE; 1841} 1842 1843 1844/* 1845 * xfs_lookup 1846 */ 1847STATIC int 1848xfs_lookup( 1849 bhv_desc_t *dir_bdp, 1850 bhv_vname_t *dentry, 1851 bhv_vnode_t **vpp, 1852 int flags, 1853 bhv_vnode_t *rdir, 1854 cred_t *credp) 1855{ 1856 xfs_inode_t *dp, *ip; 1857 xfs_ino_t e_inum; 1858 int error; 1859 uint lock_mode; 1860 bhv_vnode_t *dir_vp; 1861 1862 dir_vp = BHV_TO_VNODE(dir_bdp); 1863 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 1864 1865 dp = XFS_BHVTOI(dir_bdp); 1866 1867 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 1868 return XFS_ERROR(EIO); 1869 1870 lock_mode = xfs_ilock_map_shared(dp); 1871 error = xfs_dir_lookup_int(dir_bdp, lock_mode, dentry, &e_inum, &ip); 1872 if (!error) { 1873 *vpp = XFS_ITOV(ip); 1874 ITRACE(ip); 1875 } 1876 xfs_iunlock_map_shared(dp, lock_mode); 1877 return error; 1878} 1879 1880 1881/* 1882 * xfs_create (create a new file). 1883 */ 1884STATIC int 1885xfs_create( 1886 bhv_desc_t *dir_bdp, 1887 bhv_vname_t *dentry, 1888 bhv_vattr_t *vap, 1889 bhv_vnode_t **vpp, 1890 cred_t *credp) 1891{ 1892 char *name = VNAME(dentry); 1893 bhv_vnode_t *dir_vp; 1894 xfs_inode_t *dp, *ip; 1895 bhv_vnode_t *vp = NULL; 1896 xfs_trans_t *tp; 1897 xfs_mount_t *mp; 1898 xfs_dev_t rdev; 1899 int error; 1900 xfs_bmap_free_t free_list; 1901 xfs_fsblock_t first_block; 1902 boolean_t dp_joined_to_trans; 1903 int dm_event_sent = 0; 1904 uint cancel_flags; 1905 int committed; 1906 xfs_prid_t prid; 1907 struct xfs_dquot *udqp, *gdqp; 1908 uint resblks; 1909 int dm_di_mode; 1910 int namelen; 1911 1912 ASSERT(!*vpp); 1913 dir_vp = BHV_TO_VNODE(dir_bdp); 1914 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 1915 1916 dp = XFS_BHVTOI(dir_bdp); 1917 mp = dp->i_mount; 1918 1919 dm_di_mode = vap->va_mode; 1920 namelen = VNAMELEN(dentry); 1921 1922 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 1923 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 1924 dir_vp, DM_RIGHT_NULL, NULL, 1925 DM_RIGHT_NULL, name, NULL, 1926 dm_di_mode, 0, 0); 1927 1928 if (error) 1929 return error; 1930 dm_event_sent = 1; 1931 } 1932 1933 if (XFS_FORCED_SHUTDOWN(mp)) 1934 return XFS_ERROR(EIO); 1935 1936 /* Return through std_return after this point. */ 1937 1938 udqp = gdqp = NULL; 1939 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1940 prid = dp->i_d.di_projid; 1941 else if (vap->va_mask & XFS_AT_PROJID) 1942 prid = (xfs_prid_t)vap->va_projid; 1943 else 1944 prid = (xfs_prid_t)dfltprid; 1945 1946 /* 1947 * Make sure that we have allocated dquot(s) on disk. 1948 */ 1949 error = XFS_QM_DQVOPALLOC(mp, dp, 1950 current_fsuid(credp), current_fsgid(credp), prid, 1951 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); 1952 if (error) 1953 goto std_return; 1954 1955 ip = NULL; 1956 dp_joined_to_trans = B_FALSE; 1957 1958 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1959 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1960 resblks = XFS_CREATE_SPACE_RES(mp, namelen); 1961 /* 1962 * Initially assume that the file does not exist and 1963 * reserve the resources for that case. If that is not 1964 * the case we'll drop the one we have and get a more 1965 * appropriate transaction later. 1966 */ 1967 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, 1968 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1969 if (error == ENOSPC) { 1970 resblks = 0; 1971 error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0, 1972 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1973 } 1974 if (error) { 1975 cancel_flags = 0; 1976 dp = NULL; 1977 goto error_return; 1978 } 1979 1980 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1981 1982 XFS_BMAP_INIT(&free_list, &first_block); 1983 1984 ASSERT(ip == NULL); 1985 1986 /* 1987 * Reserve disk quota and the inode. 1988 */ 1989 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 1990 if (error) 1991 goto error_return; 1992 1993 if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) 1994 goto error_return; 1995 rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; 1996 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, 1997 rdev, credp, prid, resblks > 0, 1998 &ip, &committed); 1999 if (error) { 2000 if (error == ENOSPC) 2001 goto error_return; 2002 goto abort_return; 2003 } 2004 ITRACE(ip); 2005 2006 /* 2007 * At this point, we've gotten a newly allocated inode. 2008 * It is locked (and joined to the transaction). 2009 */ 2010 2011 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 2012 2013 /* 2014 * Now we join the directory inode to the transaction. 2015 * We do not do it earlier because xfs_dir_ialloc 2016 * might commit the previous transaction (and release 2017 * all the locks). 2018 */ 2019 2020 VN_HOLD(dir_vp); 2021 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2022 dp_joined_to_trans = B_TRUE; 2023 2024 error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, 2025 &first_block, &free_list, resblks ? 2026 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2027 if (error) { 2028 ASSERT(error != ENOSPC); 2029 goto abort_return; 2030 } 2031 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2032 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2033 2034 /* 2035 * If this is a synchronous mount, make sure that the 2036 * create transaction goes to disk before returning to 2037 * the user. 2038 */ 2039 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2040 xfs_trans_set_sync(tp); 2041 } 2042 2043 dp->i_gen++; 2044 2045 /* 2046 * Attach the dquot(s) to the inodes and modify them incore. 2047 * These ids of the inode couldn't have changed since the new 2048 * inode has been locked ever since it was created. 2049 */ 2050 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 2051 2052 /* 2053 * xfs_trans_commit normally decrements the vnode ref count 2054 * when it unlocks the inode. Since we want to return the 2055 * vnode to the caller, we bump the vnode ref count now. 2056 */ 2057 IHOLD(ip); 2058 vp = XFS_ITOV(ip); 2059 2060 error = xfs_bmap_finish(&tp, &free_list, &committed); 2061 if (error) { 2062 xfs_bmap_cancel(&free_list); 2063 goto abort_rele; 2064 } 2065 2066 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2067 if (error) { 2068 IRELE(ip); 2069 tp = NULL; 2070 goto error_return; 2071 } 2072 2073 XFS_QM_DQRELE(mp, udqp); 2074 XFS_QM_DQRELE(mp, gdqp); 2075 2076 /* 2077 * Propagate the fact that the vnode changed after the 2078 * xfs_inode locks have been released. 2079 */ 2080 bhv_vop_vnode_change(vp, VCHANGE_FLAGS_TRUNCATED, 3); 2081 2082 *vpp = vp; 2083 2084 /* Fallthrough to std_return with error = 0 */ 2085 2086std_return: 2087 if ( (*vpp || (error != 0 && dm_event_sent != 0)) && 2088 DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 2089 DM_EVENT_POSTCREATE)) { 2090 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2091 dir_vp, DM_RIGHT_NULL, 2092 *vpp ? vp:NULL, 2093 DM_RIGHT_NULL, name, NULL, 2094 dm_di_mode, error, 0); 2095 } 2096 return error; 2097 2098 abort_return: 2099 cancel_flags |= XFS_TRANS_ABORT; 2100 /* FALLTHROUGH */ 2101 2102 error_return: 2103 if (tp != NULL) 2104 xfs_trans_cancel(tp, cancel_flags); 2105 2106 if (!dp_joined_to_trans && (dp != NULL)) 2107 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2108 XFS_QM_DQRELE(mp, udqp); 2109 XFS_QM_DQRELE(mp, gdqp); 2110 2111 goto std_return; 2112 2113 abort_rele: 2114 /* 2115 * Wait until after the current transaction is aborted to 2116 * release the inode. This prevents recursive transactions 2117 * and deadlocks from xfs_inactive. 2118 */ 2119 cancel_flags |= XFS_TRANS_ABORT; 2120 xfs_trans_cancel(tp, cancel_flags); 2121 IRELE(ip); 2122 2123 XFS_QM_DQRELE(mp, udqp); 2124 XFS_QM_DQRELE(mp, gdqp); 2125 2126 goto std_return; 2127} 2128 2129#ifdef DEBUG 2130/* 2131 * Some counters to see if (and how often) we are hitting some deadlock 2132 * prevention code paths. 2133 */ 2134 2135int xfs_rm_locks; 2136int xfs_rm_lock_delays; 2137int xfs_rm_attempts; 2138#endif 2139 2140/* 2141 * The following routine will lock the inodes associated with the 2142 * directory and the named entry in the directory. The locks are 2143 * acquired in increasing inode number. 2144 * 2145 * If the entry is "..", then only the directory is locked. The 2146 * vnode ref count will still include that from the .. entry in 2147 * this case. 2148 * 2149 * There is a deadlock we need to worry about. If the locked directory is 2150 * in the AIL, it might be blocking up the log. The next inode we lock 2151 * could be already locked by another thread waiting for log space (e.g 2152 * a permanent log reservation with a long running transaction (see 2153 * xfs_itruncate_finish)). To solve this, we must check if the directory 2154 * is in the ail and use lock_nowait. If we can't lock, we need to 2155 * drop the inode lock on the directory and try again. xfs_iunlock will 2156 * potentially push the tail if we were holding up the log. 2157 */ 2158STATIC int 2159xfs_lock_dir_and_entry( 2160 xfs_inode_t *dp, 2161 xfs_inode_t *ip) /* inode of entry 'name' */ 2162{ 2163 int attempts; 2164 xfs_ino_t e_inum; 2165 xfs_inode_t *ips[2]; 2166 xfs_log_item_t *lp; 2167 2168#ifdef DEBUG 2169 xfs_rm_locks++; 2170#endif 2171 attempts = 0; 2172 2173again: 2174 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 2175 2176 e_inum = ip->i_ino; 2177 2178 ITRACE(ip); 2179 2180 /* 2181 * We want to lock in increasing inum. Since we've already 2182 * acquired the lock on the directory, we may need to release 2183 * if if the inum of the entry turns out to be less. 2184 */ 2185 if (e_inum > dp->i_ino) { 2186 /* 2187 * We are already in the right order, so just 2188 * lock on the inode of the entry. 2189 * We need to use nowait if dp is in the AIL. 2190 */ 2191 2192 lp = (xfs_log_item_t *)dp->i_itemp; 2193 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2194 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2195 attempts++; 2196#ifdef DEBUG 2197 xfs_rm_attempts++; 2198#endif 2199 2200 /* 2201 * Unlock dp and try again. 2202 * xfs_iunlock will try to push the tail 2203 * if the inode is in the AIL. 2204 */ 2205 2206 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2207 2208 if ((attempts % 5) == 0) { 2209 delay(1); /* Don't just spin the CPU */ 2210#ifdef DEBUG 2211 xfs_rm_lock_delays++; 2212#endif 2213 } 2214 goto again; 2215 } 2216 } else { 2217 xfs_ilock(ip, XFS_ILOCK_EXCL); 2218 } 2219 } else if (e_inum < dp->i_ino) { 2220 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2221 2222 ips[0] = ip; 2223 ips[1] = dp; 2224 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2225 } 2226 /* else e_inum == dp->i_ino */ 2227 /* This can happen if we're asked to lock /x/.. 2228 * the entry is "..", which is also the parent directory. 2229 */ 2230 2231 return 0; 2232} 2233 2234#ifdef DEBUG 2235int xfs_locked_n; 2236int xfs_small_retries; 2237int xfs_middle_retries; 2238int xfs_lots_retries; 2239int xfs_lock_delays; 2240#endif 2241 2242/* 2243 * Bump the subclass so xfs_lock_inodes() acquires each lock with 2244 * a different value 2245 */ 2246static inline int 2247xfs_lock_inumorder(int lock_mode, int subclass) 2248{ 2249 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 2250 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 2251 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 2252 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 2253 2254 return lock_mode; 2255} 2256 2257/* 2258 * The following routine will lock n inodes in exclusive mode. 2259 * We assume the caller calls us with the inodes in i_ino order. 2260 * 2261 * We need to detect deadlock where an inode that we lock 2262 * is in the AIL and we start waiting for another inode that is locked 2263 * by a thread in a long running transaction (such as truncate). This can 2264 * result in deadlock since the long running trans might need to wait 2265 * for the inode we just locked in order to push the tail and free space 2266 * in the log. 2267 */ 2268void 2269xfs_lock_inodes( 2270 xfs_inode_t **ips, 2271 int inodes, 2272 int first_locked, 2273 uint lock_mode) 2274{ 2275 int attempts = 0, i, j, try_lock; 2276 xfs_log_item_t *lp; 2277 2278 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 2279 2280 if (first_locked) { 2281 try_lock = 1; 2282 i = 1; 2283 } else { 2284 try_lock = 0; 2285 i = 0; 2286 } 2287 2288again: 2289 for (; i < inodes; i++) { 2290 ASSERT(ips[i]); 2291 2292 if (i && (ips[i] == ips[i-1])) /* Already locked */ 2293 continue; 2294 2295 /* 2296 * If try_lock is not set yet, make sure all locked inodes 2297 * are not in the AIL. 2298 * If any are, set try_lock to be used later. 2299 */ 2300 2301 if (!try_lock) { 2302 for (j = (i - 1); j >= 0 && !try_lock; j--) { 2303 lp = (xfs_log_item_t *)ips[j]->i_itemp; 2304 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2305 try_lock++; 2306 } 2307 } 2308 } 2309 2310 /* 2311 * If any of the previous locks we have locked is in the AIL, 2312 * we must TRY to get the second and subsequent locks. If 2313 * we can't get any, we must release all we have 2314 * and try again. 2315 */ 2316 2317 if (try_lock) { 2318 /* try_lock must be 0 if i is 0. */ 2319 /* 2320 * try_lock means we have an inode locked 2321 * that is in the AIL. 2322 */ 2323 ASSERT(i != 0); 2324 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 2325 attempts++; 2326 2327 /* 2328 * Unlock all previous guys and try again. 2329 * xfs_iunlock will try to push the tail 2330 * if the inode is in the AIL. 2331 */ 2332 2333 for(j = i - 1; j >= 0; j--) { 2334 2335 /* 2336 * Check to see if we've already 2337 * unlocked this one. 2338 * Not the first one going back, 2339 * and the inode ptr is the same. 2340 */ 2341 if ((j != (i - 1)) && ips[j] == 2342 ips[j+1]) 2343 continue; 2344 2345 xfs_iunlock(ips[j], lock_mode); 2346 } 2347 2348 if ((attempts % 5) == 0) { 2349 delay(1); /* Don't just spin the CPU */ 2350#ifdef DEBUG 2351 xfs_lock_delays++; 2352#endif 2353 } 2354 i = 0; 2355 try_lock = 0; 2356 goto again; 2357 } 2358 } else { 2359 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); 2360 } 2361 } 2362 2363#ifdef DEBUG 2364 if (attempts) { 2365 if (attempts < 5) xfs_small_retries++; 2366 else if (attempts < 100) xfs_middle_retries++; 2367 else xfs_lots_retries++; 2368 } else { 2369 xfs_locked_n++; 2370 } 2371#endif 2372} 2373 2374#ifdef DEBUG 2375#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);} 2376int remove_which_error_return = 0; 2377#else /* ! DEBUG */ 2378#define REMOVE_DEBUG_TRACE(x) 2379#endif /* ! DEBUG */ 2380 2381 2382/* 2383 * xfs_remove 2384 * 2385 */ 2386STATIC int 2387xfs_remove( 2388 bhv_desc_t *dir_bdp, 2389 bhv_vname_t *dentry, 2390 cred_t *credp) 2391{ 2392 bhv_vnode_t *dir_vp; 2393 char *name = VNAME(dentry); 2394 xfs_inode_t *dp, *ip; 2395 xfs_trans_t *tp = NULL; 2396 xfs_mount_t *mp; 2397 int error = 0; 2398 xfs_bmap_free_t free_list; 2399 xfs_fsblock_t first_block; 2400 int cancel_flags; 2401 int committed; 2402 int dm_di_mode = 0; 2403 int link_zero; 2404 uint resblks; 2405 int namelen; 2406 2407 dir_vp = BHV_TO_VNODE(dir_bdp); 2408 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 2409 2410 dp = XFS_BHVTOI(dir_bdp); 2411 mp = dp->i_mount; 2412 2413 if (XFS_FORCED_SHUTDOWN(mp)) 2414 return XFS_ERROR(EIO); 2415 2416 namelen = VNAMELEN(dentry); 2417 2418 if (!xfs_get_dir_entry(dentry, &ip)) { 2419 dm_di_mode = ip->i_d.di_mode; 2420 IRELE(ip); 2421 } 2422 2423 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { 2424 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, 2425 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2426 name, NULL, dm_di_mode, 0, 0); 2427 if (error) 2428 return error; 2429 } 2430 2431 /* From this point on, return through std_return */ 2432 ip = NULL; 2433 2434 /* 2435 * We need to get a reference to ip before we get our log 2436 * reservation. The reason for this is that we cannot call 2437 * xfs_iget for an inode for which we do not have a reference 2438 * once we've acquired a log reservation. This is because the 2439 * inode we are trying to get might be in xfs_inactive going 2440 * for a log reservation. Since we'll have to wait for the 2441 * inactive code to complete before returning from xfs_iget, 2442 * we need to make sure that we don't have log space reserved 2443 * when we call xfs_iget. Instead we get an unlocked reference 2444 * to the inode before getting our log reservation. 2445 */ 2446 error = xfs_get_dir_entry(dentry, &ip); 2447 if (error) { 2448 REMOVE_DEBUG_TRACE(__LINE__); 2449 goto std_return; 2450 } 2451 2452 dm_di_mode = ip->i_d.di_mode; 2453 2454 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 2455 2456 ITRACE(ip); 2457 2458 error = XFS_QM_DQATTACH(mp, dp, 0); 2459 if (!error && dp != ip) 2460 error = XFS_QM_DQATTACH(mp, ip, 0); 2461 if (error) { 2462 REMOVE_DEBUG_TRACE(__LINE__); 2463 IRELE(ip); 2464 goto std_return; 2465 } 2466 2467 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 2468 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2469 /* 2470 * We try to get the real space reservation first, 2471 * allowing for directory btree deletion(s) implying 2472 * possible bmap insert(s). If we can't get the space 2473 * reservation then we use 0 instead, and avoid the bmap 2474 * btree insert(s) in the directory code by, if the bmap 2475 * insert tries to happen, instead trimming the LAST 2476 * block from the directory. 2477 */ 2478 resblks = XFS_REMOVE_SPACE_RES(mp); 2479 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 2480 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2481 if (error == ENOSPC) { 2482 resblks = 0; 2483 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 2484 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2485 } 2486 if (error) { 2487 ASSERT(error != ENOSPC); 2488 REMOVE_DEBUG_TRACE(__LINE__); 2489 xfs_trans_cancel(tp, 0); 2490 IRELE(ip); 2491 return error; 2492 } 2493 2494 error = xfs_lock_dir_and_entry(dp, ip); 2495 if (error) { 2496 REMOVE_DEBUG_TRACE(__LINE__); 2497 xfs_trans_cancel(tp, cancel_flags); 2498 IRELE(ip); 2499 goto std_return; 2500 } 2501 2502 /* 2503 * At this point, we've gotten both the directory and the entry 2504 * inodes locked. 2505 */ 2506 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2507 if (dp != ip) { 2508 /* 2509 * Increment vnode ref count only in this case since 2510 * there's an extra vnode reference in the case where 2511 * dp == ip. 2512 */ 2513 IHOLD(dp); 2514 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2515 } 2516 2517 /* 2518 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2519 */ 2520 XFS_BMAP_INIT(&free_list, &first_block); 2521 error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, 2522 &first_block, &free_list, 0); 2523 if (error) { 2524 ASSERT(error != ENOENT); 2525 REMOVE_DEBUG_TRACE(__LINE__); 2526 goto error1; 2527 } 2528 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2529 2530 dp->i_gen++; 2531 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2532 2533 error = xfs_droplink(tp, ip); 2534 if (error) { 2535 REMOVE_DEBUG_TRACE(__LINE__); 2536 goto error1; 2537 } 2538 2539 /* Determine if this is the last link while 2540 * we are in the transaction. 2541 */ 2542 link_zero = (ip)->i_d.di_nlink==0; 2543 2544 /* 2545 * Take an extra ref on the inode so that it doesn't 2546 * go to xfs_inactive() from within the commit. 2547 */ 2548 IHOLD(ip); 2549 2550 /* 2551 * If this is a synchronous mount, make sure that the 2552 * remove transaction goes to disk before returning to 2553 * the user. 2554 */ 2555 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2556 xfs_trans_set_sync(tp); 2557 } 2558 2559 error = xfs_bmap_finish(&tp, &free_list, &committed); 2560 if (error) { 2561 REMOVE_DEBUG_TRACE(__LINE__); 2562 goto error_rele; 2563 } 2564 2565 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2566 if (error) { 2567 IRELE(ip); 2568 goto std_return; 2569 } 2570 2571 /* 2572 * Before we drop our extra reference to the inode, purge it 2573 * from the refcache if it is there. By waiting until afterwards 2574 * to do the IRELE, we ensure that we won't go inactive in the 2575 * xfs_refcache_purge_ip routine (although that would be OK). 2576 */ 2577 xfs_refcache_purge_ip(ip); 2578 2579 /* 2580 * If we are using filestreams, kill the stream association. 2581 * If the file is still open it may get a new one but that 2582 * will get killed on last close in xfs_close() so we don't 2583 * have to worry about that. 2584 */ 2585 if (link_zero && xfs_inode_is_filestream(ip)) 2586 xfs_filestream_deassociate(ip); 2587 2588 vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 2589 2590 /* 2591 * Let interposed file systems know about removed links. 2592 */ 2593 bhv_vop_link_removed(XFS_ITOV(ip), dir_vp, link_zero); 2594 2595 IRELE(ip); 2596 2597/* Fall through to std_return with error = 0 */ 2598 std_return: 2599 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, 2600 DM_EVENT_POSTREMOVE)) { 2601 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2602 dir_vp, DM_RIGHT_NULL, 2603 NULL, DM_RIGHT_NULL, 2604 name, NULL, dm_di_mode, error, 0); 2605 } 2606 return error; 2607 2608 error1: 2609 xfs_bmap_cancel(&free_list); 2610 cancel_flags |= XFS_TRANS_ABORT; 2611 xfs_trans_cancel(tp, cancel_flags); 2612 goto std_return; 2613 2614 error_rele: 2615 /* 2616 * In this case make sure to not release the inode until after 2617 * the current transaction is aborted. Releasing it beforehand 2618 * can cause us to go to xfs_inactive and start a recursive 2619 * transaction which can easily deadlock with the current one. 2620 */ 2621 xfs_bmap_cancel(&free_list); 2622 cancel_flags |= XFS_TRANS_ABORT; 2623 xfs_trans_cancel(tp, cancel_flags); 2624 2625 /* 2626 * Before we drop our extra reference to the inode, purge it 2627 * from the refcache if it is there. By waiting until afterwards 2628 * to do the IRELE, we ensure that we won't go inactive in the 2629 * xfs_refcache_purge_ip routine (although that would be OK). 2630 */ 2631 xfs_refcache_purge_ip(ip); 2632 2633 IRELE(ip); 2634 2635 goto std_return; 2636} 2637 2638 2639/* 2640 * xfs_link 2641 * 2642 */ 2643STATIC int 2644xfs_link( 2645 bhv_desc_t *target_dir_bdp, 2646 bhv_vnode_t *src_vp, 2647 bhv_vname_t *dentry, 2648 cred_t *credp) 2649{ 2650 xfs_inode_t *tdp, *sip; 2651 xfs_trans_t *tp; 2652 xfs_mount_t *mp; 2653 xfs_inode_t *ips[2]; 2654 int error; 2655 xfs_bmap_free_t free_list; 2656 xfs_fsblock_t first_block; 2657 int cancel_flags; 2658 int committed; 2659 bhv_vnode_t *target_dir_vp; 2660 int resblks; 2661 char *target_name = VNAME(dentry); 2662 int target_namelen; 2663 2664 target_dir_vp = BHV_TO_VNODE(target_dir_bdp); 2665 vn_trace_entry(target_dir_vp, __FUNCTION__, (inst_t *)__return_address); 2666 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); 2667 2668 target_namelen = VNAMELEN(dentry); 2669 ASSERT(!VN_ISDIR(src_vp)); 2670 2671 sip = xfs_vtoi(src_vp); 2672 tdp = XFS_BHVTOI(target_dir_bdp); 2673 mp = tdp->i_mount; 2674 if (XFS_FORCED_SHUTDOWN(mp)) 2675 return XFS_ERROR(EIO); 2676 2677 if (DM_EVENT_ENABLED(src_vp->v_vfsp, tdp, DM_EVENT_LINK)) { 2678 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, 2679 target_dir_vp, DM_RIGHT_NULL, 2680 src_vp, DM_RIGHT_NULL, 2681 target_name, NULL, 0, 0, 0); 2682 if (error) 2683 return error; 2684 } 2685 2686 /* Return through std_return after this point. */ 2687 2688 error = XFS_QM_DQATTACH(mp, sip, 0); 2689 if (!error && sip != tdp) 2690 error = XFS_QM_DQATTACH(mp, tdp, 0); 2691 if (error) 2692 goto std_return; 2693 2694 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 2695 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2696 resblks = XFS_LINK_SPACE_RES(mp, target_namelen); 2697 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 2698 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2699 if (error == ENOSPC) { 2700 resblks = 0; 2701 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0, 2702 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2703 } 2704 if (error) { 2705 cancel_flags = 0; 2706 goto error_return; 2707 } 2708 2709 if (sip->i_ino < tdp->i_ino) { 2710 ips[0] = sip; 2711 ips[1] = tdp; 2712 } else { 2713 ips[0] = tdp; 2714 ips[1] = sip; 2715 } 2716 2717 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2718 2719 /* 2720 * Increment vnode ref counts since xfs_trans_commit & 2721 * xfs_trans_cancel will both unlock the inodes and 2722 * decrement the associated ref counts. 2723 */ 2724 VN_HOLD(src_vp); 2725 VN_HOLD(target_dir_vp); 2726 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 2727 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 2728 2729 /* 2730 * If the source has too many links, we can't make any more to it. 2731 */ 2732 if (sip->i_d.di_nlink >= XFS_MAXLINK) { 2733 error = XFS_ERROR(EMLINK); 2734 goto error_return; 2735 } 2736 2737 /* 2738 * If we are using project inheritance, we only allow hard link 2739 * creation in our tree when the project IDs are the same; else 2740 * the tree quota mechanism could be circumvented. 2741 */ 2742 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2743 (tdp->i_d.di_projid != sip->i_d.di_projid))) { 2744 error = XFS_ERROR(EXDEV); 2745 goto error_return; 2746 } 2747 2748 if (resblks == 0 && 2749 (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) 2750 goto error_return; 2751 2752 XFS_BMAP_INIT(&free_list, &first_block); 2753 2754 error = xfs_dir_createname(tp, tdp, target_name, target_namelen, 2755 sip->i_ino, &first_block, &free_list, 2756 resblks); 2757 if (error) 2758 goto abort_return; 2759 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2760 tdp->i_gen++; 2761 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 2762 2763 error = xfs_bumplink(tp, sip); 2764 if (error) 2765 goto abort_return; 2766 2767 /* 2768 * If this is a synchronous mount, make sure that the 2769 * link transaction goes to disk before returning to 2770 * the user. 2771 */ 2772 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2773 xfs_trans_set_sync(tp); 2774 } 2775 2776 error = xfs_bmap_finish (&tp, &free_list, &committed); 2777 if (error) { 2778 xfs_bmap_cancel(&free_list); 2779 goto abort_return; 2780 } 2781 2782 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2783 if (error) 2784 goto std_return; 2785 2786 /* Fall through to std_return with error = 0. */ 2787std_return: 2788 if (DM_EVENT_ENABLED(src_vp->v_vfsp, sip, 2789 DM_EVENT_POSTLINK)) { 2790 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, 2791 target_dir_vp, DM_RIGHT_NULL, 2792 src_vp, DM_RIGHT_NULL, 2793 target_name, NULL, 0, error, 0); 2794 } 2795 return error; 2796 2797 abort_return: 2798 cancel_flags |= XFS_TRANS_ABORT; 2799 /* FALLTHROUGH */ 2800 2801 error_return: 2802 xfs_trans_cancel(tp, cancel_flags); 2803 goto std_return; 2804} 2805 2806 2807/* 2808 * xfs_mkdir 2809 * 2810 */ 2811STATIC int 2812xfs_mkdir( 2813 bhv_desc_t *dir_bdp, 2814 bhv_vname_t *dentry, 2815 bhv_vattr_t *vap, 2816 bhv_vnode_t **vpp, 2817 cred_t *credp) 2818{ 2819 char *dir_name = VNAME(dentry); 2820 xfs_inode_t *dp; 2821 xfs_inode_t *cdp; /* inode of created dir */ 2822 bhv_vnode_t *cvp; /* vnode of created dir */ 2823 xfs_trans_t *tp; 2824 xfs_mount_t *mp; 2825 int cancel_flags; 2826 int error; 2827 int committed; 2828 xfs_bmap_free_t free_list; 2829 xfs_fsblock_t first_block; 2830 bhv_vnode_t *dir_vp; 2831 boolean_t dp_joined_to_trans; 2832 boolean_t created = B_FALSE; 2833 int dm_event_sent = 0; 2834 xfs_prid_t prid; 2835 struct xfs_dquot *udqp, *gdqp; 2836 uint resblks; 2837 int dm_di_mode; 2838 int dir_namelen; 2839 2840 dir_vp = BHV_TO_VNODE(dir_bdp); 2841 dp = XFS_BHVTOI(dir_bdp); 2842 mp = dp->i_mount; 2843 2844 if (XFS_FORCED_SHUTDOWN(mp)) 2845 return XFS_ERROR(EIO); 2846 2847 dir_namelen = VNAMELEN(dentry); 2848 2849 tp = NULL; 2850 dp_joined_to_trans = B_FALSE; 2851 dm_di_mode = vap->va_mode; 2852 2853 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 2854 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 2855 dir_vp, DM_RIGHT_NULL, NULL, 2856 DM_RIGHT_NULL, dir_name, NULL, 2857 dm_di_mode, 0, 0); 2858 if (error) 2859 return error; 2860 dm_event_sent = 1; 2861 } 2862 2863 /* Return through std_return after this point. */ 2864 2865 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 2866 2867 mp = dp->i_mount; 2868 udqp = gdqp = NULL; 2869 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 2870 prid = dp->i_d.di_projid; 2871 else if (vap->va_mask & XFS_AT_PROJID) 2872 prid = (xfs_prid_t)vap->va_projid; 2873 else 2874 prid = (xfs_prid_t)dfltprid; 2875 2876 /* 2877 * Make sure that we have allocated dquot(s) on disk. 2878 */ 2879 error = XFS_QM_DQVOPALLOC(mp, dp, 2880 current_fsuid(credp), current_fsgid(credp), prid, 2881 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2882 if (error) 2883 goto std_return; 2884 2885 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 2886 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2887 resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen); 2888 error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, 2889 XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); 2890 if (error == ENOSPC) { 2891 resblks = 0; 2892 error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0, 2893 XFS_TRANS_PERM_LOG_RES, 2894 XFS_MKDIR_LOG_COUNT); 2895 } 2896 if (error) { 2897 cancel_flags = 0; 2898 dp = NULL; 2899 goto error_return; 2900 } 2901 2902 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 2903 2904 /* 2905 * Check for directory link count overflow. 2906 */ 2907 if (dp->i_d.di_nlink >= XFS_MAXLINK) { 2908 error = XFS_ERROR(EMLINK); 2909 goto error_return; 2910 } 2911 2912 /* 2913 * Reserve disk quota and the inode. 2914 */ 2915 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 2916 if (error) 2917 goto error_return; 2918 2919 if (resblks == 0 && 2920 (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) 2921 goto error_return; 2922 /* 2923 * create the directory inode. 2924 */ 2925 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2, 2926 0, credp, prid, resblks > 0, 2927 &cdp, NULL); 2928 if (error) { 2929 if (error == ENOSPC) 2930 goto error_return; 2931 goto abort_return; 2932 } 2933 ITRACE(cdp); 2934 2935 /* 2936 * Now we add the directory inode to the transaction. 2937 * We waited until now since xfs_dir_ialloc might start 2938 * a new transaction. Had we joined the transaction 2939 * earlier, the locks might have gotten released. 2940 */ 2941 VN_HOLD(dir_vp); 2942 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2943 dp_joined_to_trans = B_TRUE; 2944 2945 XFS_BMAP_INIT(&free_list, &first_block); 2946 2947 error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, 2948 &first_block, &free_list, resblks ? 2949 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2950 if (error) { 2951 ASSERT(error != ENOSPC); 2952 goto error1; 2953 } 2954 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2955 2956 /* 2957 * Bump the in memory version number of the parent directory 2958 * so that other processes accessing it will recognize that 2959 * the directory has changed. 2960 */ 2961 dp->i_gen++; 2962 2963 error = xfs_dir_init(tp, cdp, dp); 2964 if (error) 2965 goto error2; 2966 2967 cdp->i_gen = 1; 2968 error = xfs_bumplink(tp, dp); 2969 if (error) 2970 goto error2; 2971 2972 cvp = XFS_ITOV(cdp); 2973 2974 created = B_TRUE; 2975 2976 *vpp = cvp; 2977 IHOLD(cdp); 2978 2979 /* 2980 * Attach the dquots to the new inode and modify the icount incore. 2981 */ 2982 XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp); 2983 2984 /* 2985 * If this is a synchronous mount, make sure that the 2986 * mkdir transaction goes to disk before returning to 2987 * the user. 2988 */ 2989 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2990 xfs_trans_set_sync(tp); 2991 } 2992 2993 error = xfs_bmap_finish(&tp, &free_list, &committed); 2994 if (error) { 2995 IRELE(cdp); 2996 goto error2; 2997 } 2998 2999 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 3000 XFS_QM_DQRELE(mp, udqp); 3001 XFS_QM_DQRELE(mp, gdqp); 3002 if (error) { 3003 IRELE(cdp); 3004 } 3005 3006 /* Fall through to std_return with error = 0 or errno from 3007 * xfs_trans_commit. */ 3008 3009std_return: 3010 if ( (created || (error != 0 && dm_event_sent != 0)) && 3011 DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 3012 DM_EVENT_POSTCREATE)) { 3013 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 3014 dir_vp, DM_RIGHT_NULL, 3015 created ? XFS_ITOV(cdp):NULL, 3016 DM_RIGHT_NULL, 3017 dir_name, NULL, 3018 dm_di_mode, error, 0); 3019 } 3020 return error; 3021 3022 error2: 3023 error1: 3024 xfs_bmap_cancel(&free_list); 3025 abort_return: 3026 cancel_flags |= XFS_TRANS_ABORT; 3027 error_return: 3028 xfs_trans_cancel(tp, cancel_flags); 3029 XFS_QM_DQRELE(mp, udqp); 3030 XFS_QM_DQRELE(mp, gdqp); 3031 3032 if (!dp_joined_to_trans && (dp != NULL)) { 3033 xfs_iunlock(dp, XFS_ILOCK_EXCL); 3034 } 3035 3036 goto std_return; 3037} 3038 3039 3040/* 3041 * xfs_rmdir 3042 * 3043 */ 3044STATIC int 3045xfs_rmdir( 3046 bhv_desc_t *dir_bdp, 3047 bhv_vname_t *dentry, 3048 cred_t *credp) 3049{ 3050 char *name = VNAME(dentry); 3051 xfs_inode_t *dp; 3052 xfs_inode_t *cdp; /* child directory */ 3053 xfs_trans_t *tp; 3054 xfs_mount_t *mp; 3055 int error; 3056 xfs_bmap_free_t free_list; 3057 xfs_fsblock_t first_block; 3058 int cancel_flags; 3059 int committed; 3060 bhv_vnode_t *dir_vp; 3061 int dm_di_mode = S_IFDIR; 3062 int last_cdp_link; 3063 int namelen; 3064 uint resblks; 3065 3066 dir_vp = BHV_TO_VNODE(dir_bdp); 3067 dp = XFS_BHVTOI(dir_bdp); 3068 mp = dp->i_mount; 3069 3070 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 3071 3072 if (XFS_FORCED_SHUTDOWN(XFS_BHVTOI(dir_bdp)->i_mount)) 3073 return XFS_ERROR(EIO); 3074 namelen = VNAMELEN(dentry); 3075 3076 if (!xfs_get_dir_entry(dentry, &cdp)) { 3077 dm_di_mode = cdp->i_d.di_mode; 3078 IRELE(cdp); 3079 } 3080 3081 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { 3082 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, 3083 dir_vp, DM_RIGHT_NULL, 3084 NULL, DM_RIGHT_NULL, 3085 name, NULL, dm_di_mode, 0, 0); 3086 if (error) 3087 return XFS_ERROR(error); 3088 } 3089 3090 /* Return through std_return after this point. */ 3091 3092 cdp = NULL; 3093 3094 /* 3095 * We need to get a reference to cdp before we get our log 3096 * reservation. The reason for this is that we cannot call 3097 * xfs_iget for an inode for which we do not have a reference 3098 * once we've acquired a log reservation. This is because the 3099 * inode we are trying to get might be in xfs_inactive going 3100 * for a log reservation. Since we'll have to wait for the 3101 * inactive code to complete before returning from xfs_iget, 3102 * we need to make sure that we don't have log space reserved 3103 * when we call xfs_iget. Instead we get an unlocked reference 3104 * to the inode before getting our log reservation. 3105 */ 3106 error = xfs_get_dir_entry(dentry, &cdp); 3107 if (error) { 3108 REMOVE_DEBUG_TRACE(__LINE__); 3109 goto std_return; 3110 } 3111 mp = dp->i_mount; 3112 dm_di_mode = cdp->i_d.di_mode; 3113 3114 /* 3115 * Get the dquots for the inodes. 3116 */ 3117 error = XFS_QM_DQATTACH(mp, dp, 0); 3118 if (!error && dp != cdp) 3119 error = XFS_QM_DQATTACH(mp, cdp, 0); 3120 if (error) { 3121 IRELE(cdp); 3122 REMOVE_DEBUG_TRACE(__LINE__); 3123 goto std_return; 3124 } 3125 3126 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 3127 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 3128 /* 3129 * We try to get the real space reservation first, 3130 * allowing for directory btree deletion(s) implying 3131 * possible bmap insert(s). If we can't get the space 3132 * reservation then we use 0 instead, and avoid the bmap 3133 * btree insert(s) in the directory code by, if the bmap 3134 * insert tries to happen, instead trimming the LAST 3135 * block from the directory. 3136 */ 3137 resblks = XFS_REMOVE_SPACE_RES(mp); 3138 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 3139 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 3140 if (error == ENOSPC) { 3141 resblks = 0; 3142 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 3143 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 3144 } 3145 if (error) { 3146 ASSERT(error != ENOSPC); 3147 cancel_flags = 0; 3148 IRELE(cdp); 3149 goto error_return; 3150 } 3151 XFS_BMAP_INIT(&free_list, &first_block); 3152 3153 /* 3154 * Now lock the child directory inode and the parent directory 3155 * inode in the proper order. This will take care of validating 3156 * that the directory entry for the child directory inode has 3157 * not changed while we were obtaining a log reservation. 3158 */ 3159 error = xfs_lock_dir_and_entry(dp, cdp); 3160 if (error) { 3161 xfs_trans_cancel(tp, cancel_flags); 3162 IRELE(cdp); 3163 goto std_return; 3164 } 3165 3166 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3167 if (dp != cdp) { 3168 /* 3169 * Only increment the parent directory vnode count if 3170 * we didn't bump it in looking up cdp. The only time 3171 * we don't bump it is when we're looking up ".". 3172 */ 3173 VN_HOLD(dir_vp); 3174 } 3175 3176 ITRACE(cdp); 3177 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); 3178 3179 ASSERT(cdp->i_d.di_nlink >= 2); 3180 if (cdp->i_d.di_nlink != 2) { 3181 error = XFS_ERROR(ENOTEMPTY); 3182 goto error_return; 3183 } 3184 if (!xfs_dir_isempty(cdp)) { 3185 error = XFS_ERROR(ENOTEMPTY); 3186 goto error_return; 3187 } 3188 3189 error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, 3190 &first_block, &free_list, resblks); 3191 if (error) 3192 goto error1; 3193 3194 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3195 3196 /* 3197 * Bump the in memory generation count on the parent 3198 * directory so that other can know that it has changed. 3199 */ 3200 dp->i_gen++; 3201 3202 /* 3203 * Drop the link from cdp's "..". 3204 */ 3205 error = xfs_droplink(tp, dp); 3206 if (error) { 3207 goto error1; 3208 } 3209 3210 /* 3211 * Drop the link from dp to cdp. 3212 */ 3213 error = xfs_droplink(tp, cdp); 3214 if (error) { 3215 goto error1; 3216 } 3217 3218 /* 3219 * Drop the "." link from cdp to self. 3220 */ 3221 error = xfs_droplink(tp, cdp); 3222 if (error) { 3223 goto error1; 3224 } 3225 3226 /* Determine these before committing transaction */ 3227 last_cdp_link = (cdp)->i_d.di_nlink==0; 3228 3229 /* 3230 * Take an extra ref on the child vnode so that it 3231 * does not go to xfs_inactive() from within the commit. 3232 */ 3233 IHOLD(cdp); 3234 3235 /* 3236 * If this is a synchronous mount, make sure that the 3237 * rmdir transaction goes to disk before returning to 3238 * the user. 3239 */ 3240 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3241 xfs_trans_set_sync(tp); 3242 } 3243 3244 error = xfs_bmap_finish (&tp, &free_list, &committed); 3245 if (error) { 3246 xfs_bmap_cancel(&free_list); 3247 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 3248 XFS_TRANS_ABORT)); 3249 IRELE(cdp); 3250 goto std_return; 3251 } 3252 3253 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 3254 if (error) { 3255 IRELE(cdp); 3256 goto std_return; 3257 } 3258 3259 3260 /* 3261 * Let interposed file systems know about removed links. 3262 */ 3263 bhv_vop_link_removed(XFS_ITOV(cdp), dir_vp, last_cdp_link); 3264 3265 IRELE(cdp); 3266 3267 /* Fall through to std_return with error = 0 or the errno 3268 * from xfs_trans_commit. */ 3269 std_return: 3270 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) { 3271 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 3272 dir_vp, DM_RIGHT_NULL, 3273 NULL, DM_RIGHT_NULL, 3274 name, NULL, dm_di_mode, 3275 error, 0); 3276 } 3277 return error; 3278 3279 error1: 3280 xfs_bmap_cancel(&free_list); 3281 cancel_flags |= XFS_TRANS_ABORT; 3282 /* FALLTHROUGH */ 3283 3284 error_return: 3285 xfs_trans_cancel(tp, cancel_flags); 3286 goto std_return; 3287} 3288 3289 3290/* 3291 * Read dp's entries starting at uiop->uio_offset and translate them into 3292 * bufsize bytes worth of struct dirents starting at bufbase. 3293 */ 3294STATIC int 3295xfs_readdir( 3296 bhv_desc_t *dir_bdp, 3297 uio_t *uiop, 3298 cred_t *credp, 3299 int *eofp) 3300{ 3301 xfs_inode_t *dp; 3302 xfs_trans_t *tp = NULL; 3303 int error = 0; 3304 uint lock_mode; 3305 3306 vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, 3307 (inst_t *)__return_address); 3308 dp = XFS_BHVTOI(dir_bdp); 3309 3310 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 3311 return XFS_ERROR(EIO); 3312 3313 lock_mode = xfs_ilock_map_shared(dp); 3314 error = xfs_dir_getdents(tp, dp, uiop, eofp); 3315 xfs_iunlock_map_shared(dp, lock_mode); 3316 return error; 3317} 3318 3319 3320STATIC int 3321xfs_symlink( 3322 bhv_desc_t *dir_bdp, 3323 bhv_vname_t *dentry, 3324 bhv_vattr_t *vap, 3325 char *target_path, 3326 bhv_vnode_t **vpp, 3327 cred_t *credp) 3328{ 3329 xfs_trans_t *tp; 3330 xfs_mount_t *mp; 3331 xfs_inode_t *dp; 3332 xfs_inode_t *ip; 3333 int error; 3334 int pathlen; 3335 xfs_bmap_free_t free_list; 3336 xfs_fsblock_t first_block; 3337 boolean_t dp_joined_to_trans; 3338 bhv_vnode_t *dir_vp; 3339 uint cancel_flags; 3340 int committed; 3341 xfs_fileoff_t first_fsb; 3342 xfs_filblks_t fs_blocks; 3343 int nmaps; 3344 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 3345 xfs_daddr_t d; 3346 char *cur_chunk; 3347 int byte_cnt; 3348 int n; 3349 xfs_buf_t *bp; 3350 xfs_prid_t prid; 3351 struct xfs_dquot *udqp, *gdqp; 3352 uint resblks; 3353 char *link_name = VNAME(dentry); 3354 int link_namelen; 3355 3356 *vpp = NULL; 3357 dir_vp = BHV_TO_VNODE(dir_bdp); 3358 dp = XFS_BHVTOI(dir_bdp); 3359 dp_joined_to_trans = B_FALSE; 3360 error = 0; 3361 ip = NULL; 3362 tp = NULL; 3363 3364 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 3365 3366 mp = dp->i_mount; 3367 3368 if (XFS_FORCED_SHUTDOWN(mp)) 3369 return XFS_ERROR(EIO); 3370 3371 link_namelen = VNAMELEN(dentry); 3372 3373 /* 3374 * Check component lengths of the target path name. 3375 */ 3376 pathlen = strlen(target_path); 3377 if (pathlen >= MAXPATHLEN) /* total string too long */ 3378 return XFS_ERROR(ENAMETOOLONG); 3379 if (pathlen >= MAXNAMELEN) { /* is any component too long? */ 3380 int len, total; 3381 char *path; 3382 3383 for (total = 0, path = target_path; total < pathlen;) { 3384 /* 3385 * Skip any slashes. 3386 */ 3387 while(*path == '/') { 3388 total++; 3389 path++; 3390 } 3391 3392 /* 3393 * Count up to the next slash or end of path. 3394 * Error out if the component is bigger than MAXNAMELEN. 3395 */ 3396 for(len = 0; *path != '/' && total < pathlen;total++, path++) { 3397 if (++len >= MAXNAMELEN) { 3398 error = ENAMETOOLONG; 3399 return error; 3400 } 3401 } 3402 } 3403 } 3404 3405 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_SYMLINK)) { 3406 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp, 3407 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 3408 link_name, target_path, 0, 0, 0); 3409 if (error) 3410 return error; 3411 } 3412 3413 /* Return through std_return after this point. */ 3414 3415 udqp = gdqp = NULL; 3416 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 3417 prid = dp->i_d.di_projid; 3418 else if (vap->va_mask & XFS_AT_PROJID) 3419 prid = (xfs_prid_t)vap->va_projid; 3420 else 3421 prid = (xfs_prid_t)dfltprid; 3422 3423 /* 3424 * Make sure that we have allocated dquot(s) on disk. 3425 */ 3426 error = XFS_QM_DQVOPALLOC(mp, dp, 3427 current_fsuid(credp), current_fsgid(credp), prid, 3428 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 3429 if (error) 3430 goto std_return; 3431 3432 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 3433 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 3434 /* 3435 * The symlink will fit into the inode data fork? 3436 * There can't be any attributes so we get the whole variable part. 3437 */ 3438 if (pathlen <= XFS_LITINO(mp)) 3439 fs_blocks = 0; 3440 else 3441 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 3442 resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks); 3443 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 3444 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3445 if (error == ENOSPC && fs_blocks == 0) { 3446 resblks = 0; 3447 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0, 3448 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3449 } 3450 if (error) { 3451 cancel_flags = 0; 3452 dp = NULL; 3453 goto error_return; 3454 } 3455 3456 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 3457 3458 /* 3459 * Check whether the directory allows new symlinks or not. 3460 */ 3461 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 3462 error = XFS_ERROR(EPERM); 3463 goto error_return; 3464 } 3465 3466 /* 3467 * Reserve disk quota : blocks and inode. 3468 */ 3469 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 3470 if (error) 3471 goto error_return; 3472 3473 /* 3474 * Check for ability to enter directory entry, if no space reserved. 3475 */ 3476 if (resblks == 0 && 3477 (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) 3478 goto error_return; 3479 /* 3480 * Initialize the bmap freelist prior to calling either 3481 * bmapi or the directory create code. 3482 */ 3483 XFS_BMAP_INIT(&free_list, &first_block); 3484 3485 /* 3486 * Allocate an inode for the symlink. 3487 */ 3488 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (vap->va_mode&~S_IFMT), 3489 1, 0, credp, prid, resblks > 0, &ip, NULL); 3490 if (error) { 3491 if (error == ENOSPC) 3492 goto error_return; 3493 goto error1; 3494 } 3495 ITRACE(ip); 3496 3497 VN_HOLD(dir_vp); 3498 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3499 dp_joined_to_trans = B_TRUE; 3500 3501 /* 3502 * Also attach the dquot(s) to it, if applicable. 3503 */ 3504 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 3505 3506 if (resblks) 3507 resblks -= XFS_IALLOC_SPACE_RES(mp); 3508 /* 3509 * If the symlink will fit into the inode, write it inline. 3510 */ 3511 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 3512 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 3513 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); 3514 ip->i_d.di_size = pathlen; 3515 3516 /* 3517 * The inode was initially created in extent format. 3518 */ 3519 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); 3520 ip->i_df.if_flags |= XFS_IFINLINE; 3521 3522 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 3523 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 3524 3525 } else { 3526 first_fsb = 0; 3527 nmaps = SYMLINK_MAPS; 3528 3529 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 3530 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 3531 &first_block, resblks, mval, &nmaps, 3532 &free_list, NULL); 3533 if (error) { 3534 goto error1; 3535 } 3536 3537 if (resblks) 3538 resblks -= fs_blocks; 3539 ip->i_d.di_size = pathlen; 3540 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3541 3542 cur_chunk = target_path; 3543 for (n = 0; n < nmaps; n++) { 3544 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 3545 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 3546 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 3547 BTOBB(byte_cnt), 0); 3548 ASSERT(bp && !XFS_BUF_GETERROR(bp)); 3549 if (pathlen < byte_cnt) { 3550 byte_cnt = pathlen; 3551 } 3552 pathlen -= byte_cnt; 3553 3554 memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt); 3555 cur_chunk += byte_cnt; 3556 3557 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); 3558 } 3559 } 3560 3561 /* 3562 * Create the directory entry for the symlink. 3563 */ 3564 error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, 3565 &first_block, &free_list, resblks); 3566 if (error) 3567 goto error1; 3568 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3569 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 3570 3571 /* 3572 * Bump the in memory version number of the parent directory 3573 * so that other processes accessing it will recognize that 3574 * the directory has changed. 3575 */ 3576 dp->i_gen++; 3577 3578 /* 3579 * If this is a synchronous mount, make sure that the 3580 * symlink transaction goes to disk before returning to 3581 * the user. 3582 */ 3583 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3584 xfs_trans_set_sync(tp); 3585 } 3586 3587 /* 3588 * xfs_trans_commit normally decrements the vnode ref count 3589 * when it unlocks the inode. Since we want to return the 3590 * vnode to the caller, we bump the vnode ref count now. 3591 */ 3592 IHOLD(ip); 3593 3594 error = xfs_bmap_finish(&tp, &free_list, &committed); 3595 if (error) { 3596 goto error2; 3597 } 3598 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 3599 XFS_QM_DQRELE(mp, udqp); 3600 XFS_QM_DQRELE(mp, gdqp); 3601 3602 /* Fall through to std_return with error = 0 or errno from 3603 * xfs_trans_commit */ 3604std_return: 3605 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 3606 DM_EVENT_POSTSYMLINK)) { 3607 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, 3608 dir_vp, DM_RIGHT_NULL, 3609 error ? NULL : XFS_ITOV(ip), 3610 DM_RIGHT_NULL, link_name, target_path, 3611 0, error, 0); 3612 } 3613 3614 if (!error) { 3615 bhv_vnode_t *vp; 3616 3617 ASSERT(ip); 3618 vp = XFS_ITOV(ip); 3619 *vpp = vp; 3620 } 3621 return error; 3622 3623 error2: 3624 IRELE(ip); 3625 error1: 3626 xfs_bmap_cancel(&free_list); 3627 cancel_flags |= XFS_TRANS_ABORT; 3628 error_return: 3629 xfs_trans_cancel(tp, cancel_flags); 3630 XFS_QM_DQRELE(mp, udqp); 3631 XFS_QM_DQRELE(mp, gdqp); 3632 3633 if (!dp_joined_to_trans && (dp != NULL)) { 3634 xfs_iunlock(dp, XFS_ILOCK_EXCL); 3635 } 3636 3637 goto std_return; 3638} 3639 3640 3641/* 3642 * xfs_fid2 3643 * 3644 * A fid routine that takes a pointer to a previously allocated 3645 * fid structure (like xfs_fast_fid) but uses a 64 bit inode number. 3646 */ 3647STATIC int 3648xfs_fid2( 3649 bhv_desc_t *bdp, 3650 fid_t *fidp) 3651{ 3652 xfs_inode_t *ip; 3653 xfs_fid2_t *xfid; 3654 3655 vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, 3656 (inst_t *)__return_address); 3657 ASSERT(sizeof(fid_t) >= sizeof(xfs_fid2_t)); 3658 3659 xfid = (xfs_fid2_t *)fidp; 3660 ip = XFS_BHVTOI(bdp); 3661 xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len); 3662 xfid->fid_pad = 0; 3663 /* 3664 * use memcpy because the inode is a long long and there's no 3665 * assurance that xfid->fid_ino is properly aligned. 3666 */ 3667 memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino)); 3668 xfid->fid_gen = ip->i_d.di_gen; 3669 3670 return 0; 3671} 3672 3673 3674/* 3675 * xfs_rwlock 3676 */ 3677int 3678xfs_rwlock( 3679 bhv_desc_t *bdp, 3680 bhv_vrwlock_t locktype) 3681{ 3682 xfs_inode_t *ip; 3683 bhv_vnode_t *vp; 3684 3685 vp = BHV_TO_VNODE(bdp); 3686 if (VN_ISDIR(vp)) 3687 return 1; 3688 ip = XFS_BHVTOI(bdp); 3689 if (locktype == VRWLOCK_WRITE) { 3690 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3691 } else if (locktype == VRWLOCK_TRY_READ) { 3692 return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED); 3693 } else if (locktype == VRWLOCK_TRY_WRITE) { 3694 return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL); 3695 } else { 3696 ASSERT((locktype == VRWLOCK_READ) || 3697 (locktype == VRWLOCK_WRITE_DIRECT)); 3698 xfs_ilock(ip, XFS_IOLOCK_SHARED); 3699 } 3700 3701 return 1; 3702} 3703 3704 3705/* 3706 * xfs_rwunlock 3707 */ 3708void 3709xfs_rwunlock( 3710 bhv_desc_t *bdp, 3711 bhv_vrwlock_t locktype) 3712{ 3713 xfs_inode_t *ip; 3714 bhv_vnode_t *vp; 3715 3716 vp = BHV_TO_VNODE(bdp); 3717 if (VN_ISDIR(vp)) 3718 return; 3719 ip = XFS_BHVTOI(bdp); 3720 if (locktype == VRWLOCK_WRITE) { 3721 /* 3722 * In the write case, we may have added a new entry to 3723 * the reference cache. This might store a pointer to 3724 * an inode to be released in this inode. If it is there, 3725 * clear the pointer and release the inode after unlocking 3726 * this one. 3727 */ 3728 xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL); 3729 } else { 3730 ASSERT((locktype == VRWLOCK_READ) || 3731 (locktype == VRWLOCK_WRITE_DIRECT)); 3732 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 3733 } 3734 return; 3735} 3736 3737STATIC int 3738xfs_inode_flush( 3739 bhv_desc_t *bdp, 3740 int flags) 3741{ 3742 xfs_inode_t *ip; 3743 xfs_mount_t *mp; 3744 xfs_inode_log_item_t *iip; 3745 int error = 0; 3746 3747 ip = XFS_BHVTOI(bdp); 3748 mp = ip->i_mount; 3749 iip = ip->i_itemp; 3750 3751 if (XFS_FORCED_SHUTDOWN(mp)) 3752 return XFS_ERROR(EIO); 3753 3754 /* 3755 * Bypass inodes which have already been cleaned by 3756 * the inode flush clustering code inside xfs_iflush 3757 */ 3758 if ((ip->i_update_core == 0) && 3759 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) 3760 return 0; 3761 3762 if (flags & FLUSH_LOG) { 3763 if (iip && iip->ili_last_lsn) { 3764 xlog_t *log = mp->m_log; 3765 xfs_lsn_t sync_lsn; 3766 int s, log_flags = XFS_LOG_FORCE; 3767 3768 s = GRANT_LOCK(log); 3769 sync_lsn = log->l_last_sync_lsn; 3770 GRANT_UNLOCK(log, s); 3771 3772 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) 3773 return 0; 3774 3775 if (flags & FLUSH_SYNC) 3776 log_flags |= XFS_LOG_SYNC; 3777 return xfs_log_force(mp, iip->ili_last_lsn, log_flags); 3778 } 3779 } 3780 3781 /* 3782 * We make this non-blocking if the inode is contended, 3783 * return EAGAIN to indicate to the caller that they 3784 * did not succeed. This prevents the flush path from 3785 * blocking on inodes inside another operation right 3786 * now, they get caught later by xfs_sync. 3787 */ 3788 if (flags & FLUSH_INODE) { 3789 int flush_flags; 3790 3791 if (xfs_ipincount(ip)) 3792 return EAGAIN; 3793 3794 if (flags & FLUSH_SYNC) { 3795 xfs_ilock(ip, XFS_ILOCK_SHARED); 3796 xfs_iflock(ip); 3797 } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 3798 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { 3799 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3800 return EAGAIN; 3801 } 3802 } else { 3803 return EAGAIN; 3804 } 3805 3806 if (flags & FLUSH_SYNC) 3807 flush_flags = XFS_IFLUSH_SYNC; 3808 else 3809 flush_flags = XFS_IFLUSH_ASYNC; 3810 3811 error = xfs_iflush(ip, flush_flags); 3812 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3813 } 3814 3815 return error; 3816} 3817 3818int 3819xfs_set_dmattrs ( 3820 bhv_desc_t *bdp, 3821 u_int evmask, 3822 u_int16_t state, 3823 cred_t *credp) 3824{ 3825 xfs_inode_t *ip; 3826 xfs_trans_t *tp; 3827 xfs_mount_t *mp; 3828 int error; 3829 3830 if (!capable(CAP_SYS_ADMIN)) 3831 return XFS_ERROR(EPERM); 3832 3833 ip = XFS_BHVTOI(bdp); 3834 mp = ip->i_mount; 3835 3836 if (XFS_FORCED_SHUTDOWN(mp)) 3837 return XFS_ERROR(EIO); 3838 3839 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 3840 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); 3841 if (error) { 3842 xfs_trans_cancel(tp, 0); 3843 return error; 3844 } 3845 xfs_ilock(ip, XFS_ILOCK_EXCL); 3846 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3847 3848 ip->i_iocore.io_dmevmask = ip->i_d.di_dmevmask = evmask; 3849 ip->i_iocore.io_dmstate = ip->i_d.di_dmstate = state; 3850 3851 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3852 IHOLD(ip); 3853 error = xfs_trans_commit(tp, 0); 3854 3855 return error; 3856} 3857 3858STATIC int 3859xfs_reclaim( 3860 bhv_desc_t *bdp) 3861{ 3862 xfs_inode_t *ip; 3863 bhv_vnode_t *vp; 3864 3865 vp = BHV_TO_VNODE(bdp); 3866 ip = XFS_BHVTOI(bdp); 3867 3868 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 3869 3870 ASSERT(!VN_MAPPED(vp)); 3871 3872 /* bad inode, get out here ASAP */ 3873 if (VN_BAD(vp)) { 3874 xfs_ireclaim(ip); 3875 return 0; 3876 } 3877 3878 vn_iowait(vp); 3879 3880 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 3881 3882 /* 3883 * Make sure the atime in the XFS inode is correct before freeing the 3884 * Linux inode. 3885 */ 3886 xfs_synchronize_atime(ip); 3887 3888 /* 3889 * If we have nothing to flush with this inode then complete the 3890 * teardown now, otherwise break the link between the xfs inode and the 3891 * linux inode and clean up the xfs inode later. This avoids flushing 3892 * the inode to disk during the delete operation itself. 3893 * 3894 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag 3895 * first to ensure that xfs_iunpin() will never see an xfs inode 3896 * that has a linux inode being reclaimed. Synchronisation is provided 3897 * by the i_flags_lock. 3898 */ 3899 if (!ip->i_update_core && (ip->i_itemp == NULL)) { 3900 xfs_ilock(ip, XFS_ILOCK_EXCL); 3901 xfs_iflock(ip); 3902 return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); 3903 } else { 3904 xfs_mount_t *mp = ip->i_mount; 3905 3906 /* Protect sync and unpin from us */ 3907 XFS_MOUNT_ILOCK(mp); 3908 spin_lock(&ip->i_flags_lock); 3909 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 3910 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); 3911 spin_unlock(&ip->i_flags_lock); 3912 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); 3913 XFS_MOUNT_IUNLOCK(mp); 3914 } 3915 return 0; 3916} 3917 3918int 3919xfs_finish_reclaim( 3920 xfs_inode_t *ip, 3921 int locked, 3922 int sync_mode) 3923{ 3924 xfs_ihash_t *ih = ip->i_hash; 3925 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 3926 int error; 3927 3928 if (vp && VN_BAD(vp)) 3929 goto reclaim; 3930 3931 /* The hash lock here protects a thread in xfs_iget_core from 3932 * racing with us on linking the inode back with a vnode. 3933 * Once we have the XFS_IRECLAIM flag set it will not touch 3934 * us. 3935 */ 3936 write_lock(&ih->ih_lock); 3937 spin_lock(&ip->i_flags_lock); 3938 if (__xfs_iflags_test(ip, XFS_IRECLAIM) || 3939 (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { 3940 spin_unlock(&ip->i_flags_lock); 3941 write_unlock(&ih->ih_lock); 3942 if (locked) { 3943 xfs_ifunlock(ip); 3944 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3945 } 3946 return 1; 3947 } 3948 __xfs_iflags_set(ip, XFS_IRECLAIM); 3949 spin_unlock(&ip->i_flags_lock); 3950 write_unlock(&ih->ih_lock); 3951 3952 /* 3953 * If the inode is still dirty, then flush it out. If the inode 3954 * is not in the AIL, then it will be OK to flush it delwri as 3955 * long as xfs_iflush() does not keep any references to the inode. 3956 * We leave that decision up to xfs_iflush() since it has the 3957 * knowledge of whether it's OK to simply do a delwri flush of 3958 * the inode or whether we need to wait until the inode is 3959 * pulled from the AIL. 3960 * We get the flush lock regardless, though, just to make sure 3961 * we don't free it while it is being flushed. 3962 */ 3963 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 3964 if (!locked) { 3965 xfs_ilock(ip, XFS_ILOCK_EXCL); 3966 xfs_iflock(ip); 3967 } 3968 3969 if (ip->i_update_core || 3970 ((ip->i_itemp != NULL) && 3971 (ip->i_itemp->ili_format.ilf_fields != 0))) { 3972 error = xfs_iflush(ip, sync_mode); 3973 /* 3974 * If we hit an error, typically because of filesystem 3975 * shutdown, we don't need to let vn_reclaim to know 3976 * because we're gonna reclaim the inode anyway. 3977 */ 3978 if (error) { 3979 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3980 goto reclaim; 3981 } 3982 xfs_iflock(ip); /* synchronize with xfs_iflush_done */ 3983 } 3984 3985 ASSERT(ip->i_update_core == 0); 3986 ASSERT(ip->i_itemp == NULL || 3987 ip->i_itemp->ili_format.ilf_fields == 0); 3988 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3989 } else if (locked) { 3990 /* 3991 * We are not interested in doing an iflush if we're 3992 * in the process of shutting down the filesystem forcibly. 3993 * So, just reclaim the inode. 3994 */ 3995 xfs_ifunlock(ip); 3996 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3997 } 3998 3999 reclaim: 4000 xfs_ireclaim(ip); 4001 return 0; 4002} 4003 4004int 4005xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) 4006{ 4007 int purged; 4008 xfs_inode_t *ip, *n; 4009 int done = 0; 4010 4011 while (!done) { 4012 purged = 0; 4013 XFS_MOUNT_ILOCK(mp); 4014 list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { 4015 if (noblock) { 4016 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) 4017 continue; 4018 if (xfs_ipincount(ip) || 4019 !xfs_iflock_nowait(ip)) { 4020 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4021 continue; 4022 } 4023 } 4024 XFS_MOUNT_IUNLOCK(mp); 4025 if (xfs_finish_reclaim(ip, noblock, 4026 XFS_IFLUSH_DELWRI_ELSE_ASYNC)) 4027 delay(1); 4028 purged = 1; 4029 break; 4030 } 4031 4032 done = !purged; 4033 } 4034 4035 XFS_MOUNT_IUNLOCK(mp); 4036 return 0; 4037} 4038 4039/* 4040 * xfs_alloc_file_space() 4041 * This routine allocates disk space for the given file. 4042 * 4043 * If alloc_type == 0, this request is for an ALLOCSP type 4044 * request which will change the file size. In this case, no 4045 * DMAPI event will be generated by the call. A TRUNCATE event 4046 * will be generated later by xfs_setattr. 4047 * 4048 * If alloc_type != 0, this request is for a RESVSP type 4049 * request, and a DMAPI DM_EVENT_WRITE will be generated if the 4050 * lower block boundary byte address is less than the file's 4051 * length. 4052 * 4053 * RETURNS: 4054 * 0 on success 4055 * errno on error 4056 * 4057 */ 4058STATIC int 4059xfs_alloc_file_space( 4060 xfs_inode_t *ip, 4061 xfs_off_t offset, 4062 xfs_off_t len, 4063 int alloc_type, 4064 int attr_flags) 4065{ 4066 xfs_mount_t *mp = ip->i_mount; 4067 xfs_off_t count; 4068 xfs_filblks_t allocated_fsb; 4069 xfs_filblks_t allocatesize_fsb; 4070 xfs_extlen_t extsz, temp; 4071 xfs_fileoff_t startoffset_fsb; 4072 xfs_fsblock_t firstfsb; 4073 int nimaps; 4074 int bmapi_flag; 4075 int quota_flag; 4076 int rt; 4077 xfs_trans_t *tp; 4078 xfs_bmbt_irec_t imaps[1], *imapp; 4079 xfs_bmap_free_t free_list; 4080 uint qblocks, resblks, resrtextents; 4081 int committed; 4082 int error; 4083 4084 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 4085 4086 if (XFS_FORCED_SHUTDOWN(mp)) 4087 return XFS_ERROR(EIO); 4088 4089 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4090 return error; 4091 4092 if (len <= 0) 4093 return XFS_ERROR(EINVAL); 4094 4095 rt = XFS_IS_REALTIME_INODE(ip); 4096 extsz = xfs_get_extsz_hint(ip); 4097 4098 count = len; 4099 imapp = &imaps[0]; 4100 nimaps = 1; 4101 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 4102 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 4103 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 4104 4105 /* Generate a DMAPI event if needed. */ 4106 if (alloc_type != 0 && offset < ip->i_size && 4107 (attr_flags&ATTR_DMI) == 0 && 4108 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4109 xfs_off_t end_dmi_offset; 4110 4111 end_dmi_offset = offset+len; 4112 if (end_dmi_offset > ip->i_size) 4113 end_dmi_offset = ip->i_size; 4114 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), 4115 offset, end_dmi_offset - offset, 4116 0, NULL); 4117 if (error) 4118 return error; 4119 } 4120 4121 /* 4122 * Allocate file space until done or until there is an error 4123 */ 4124retry: 4125 while (allocatesize_fsb && !error) { 4126 xfs_fileoff_t s, e; 4127 4128 /* 4129 * Determine space reservations for data/realtime. 4130 */ 4131 if (unlikely(extsz)) { 4132 s = startoffset_fsb; 4133 do_div(s, extsz); 4134 s *= extsz; 4135 e = startoffset_fsb + allocatesize_fsb; 4136 if ((temp = do_mod(startoffset_fsb, extsz))) 4137 e += temp; 4138 if ((temp = do_mod(e, extsz))) 4139 e += extsz - temp; 4140 } else { 4141 s = 0; 4142 e = allocatesize_fsb; 4143 } 4144 4145 if (unlikely(rt)) { 4146 resrtextents = qblocks = (uint)(e - s); 4147 resrtextents /= mp->m_sb.sb_rextsize; 4148 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 4149 quota_flag = XFS_QMOPT_RES_RTBLKS; 4150 } else { 4151 resrtextents = 0; 4152 resblks = qblocks = \ 4153 XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); 4154 quota_flag = XFS_QMOPT_RES_REGBLKS; 4155 } 4156 4157 /* 4158 * Allocate and setup the transaction. 4159 */ 4160 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4161 error = xfs_trans_reserve(tp, resblks, 4162 XFS_WRITE_LOG_RES(mp), resrtextents, 4163 XFS_TRANS_PERM_LOG_RES, 4164 XFS_WRITE_LOG_COUNT); 4165 /* 4166 * Check for running out of space 4167 */ 4168 if (error) { 4169 /* 4170 * Free the transaction structure. 4171 */ 4172 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 4173 xfs_trans_cancel(tp, 0); 4174 break; 4175 } 4176 xfs_ilock(ip, XFS_ILOCK_EXCL); 4177 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, 4178 qblocks, 0, quota_flag); 4179 if (error) 4180 goto error1; 4181 4182 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4183 xfs_trans_ihold(tp, ip); 4184 4185 /* 4186 * Issue the xfs_bmapi() call to allocate the blocks 4187 */ 4188 XFS_BMAP_INIT(&free_list, &firstfsb); 4189 error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, 4190 allocatesize_fsb, bmapi_flag, 4191 &firstfsb, 0, imapp, &nimaps, 4192 &free_list, NULL); 4193 if (error) { 4194 goto error0; 4195 } 4196 4197 /* 4198 * Complete the transaction 4199 */ 4200 error = xfs_bmap_finish(&tp, &free_list, &committed); 4201 if (error) { 4202 goto error0; 4203 } 4204 4205 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 4206 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4207 if (error) { 4208 break; 4209 } 4210 4211 allocated_fsb = imapp->br_blockcount; 4212 4213 if (nimaps == 0) { 4214 error = XFS_ERROR(ENOSPC); 4215 break; 4216 } 4217 4218 startoffset_fsb += allocated_fsb; 4219 allocatesize_fsb -= allocated_fsb; 4220 } 4221dmapi_enospc_check: 4222 if (error == ENOSPC && (attr_flags&ATTR_DMI) == 0 && 4223 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_NOSPACE)) { 4224 4225 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 4226 XFS_ITOV(ip), DM_RIGHT_NULL, 4227 XFS_ITOV(ip), DM_RIGHT_NULL, 4228 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ 4229 if (error == 0) 4230 goto retry; /* Maybe DMAPI app. has made space */ 4231 /* else fall through with error from XFS_SEND_DATA */ 4232 } 4233 4234 return error; 4235 4236error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 4237 xfs_bmap_cancel(&free_list); 4238 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); 4239 4240error1: /* Just cancel transaction */ 4241 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4242 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4243 goto dmapi_enospc_check; 4244} 4245 4246/* 4247 * Zero file bytes between startoff and endoff inclusive. 4248 * The iolock is held exclusive and no blocks are buffered. 4249 */ 4250STATIC int 4251xfs_zero_remaining_bytes( 4252 xfs_inode_t *ip, 4253 xfs_off_t startoff, 4254 xfs_off_t endoff) 4255{ 4256 xfs_bmbt_irec_t imap; 4257 xfs_fileoff_t offset_fsb; 4258 xfs_off_t lastoffset; 4259 xfs_off_t offset; 4260 xfs_buf_t *bp; 4261 xfs_mount_t *mp = ip->i_mount; 4262 int nimap; 4263 int error = 0; 4264 4265 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 4266 ip->i_d.di_flags & XFS_DIFLAG_REALTIME ? 4267 mp->m_rtdev_targp : mp->m_ddev_targp); 4268 4269 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 4270 offset_fsb = XFS_B_TO_FSBT(mp, offset); 4271 nimap = 1; 4272 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, 4273 NULL, 0, &imap, &nimap, NULL, NULL); 4274 if (error || nimap < 1) 4275 break; 4276 ASSERT(imap.br_blockcount >= 1); 4277 ASSERT(imap.br_startoff == offset_fsb); 4278 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 4279 if (lastoffset > endoff) 4280 lastoffset = endoff; 4281 if (imap.br_startblock == HOLESTARTBLOCK) 4282 continue; 4283 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4284 if (imap.br_state == XFS_EXT_UNWRITTEN) 4285 continue; 4286 XFS_BUF_UNDONE(bp); 4287 XFS_BUF_UNWRITE(bp); 4288 XFS_BUF_READ(bp); 4289 XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); 4290 xfsbdstrat(mp, bp); 4291 if ((error = xfs_iowait(bp))) { 4292 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 4293 mp, bp, XFS_BUF_ADDR(bp)); 4294 break; 4295 } 4296 memset(XFS_BUF_PTR(bp) + 4297 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 4298 0, lastoffset - offset + 1); 4299 XFS_BUF_UNDONE(bp); 4300 XFS_BUF_UNREAD(bp); 4301 XFS_BUF_WRITE(bp); 4302 xfsbdstrat(mp, bp); 4303 if ((error = xfs_iowait(bp))) { 4304 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 4305 mp, bp, XFS_BUF_ADDR(bp)); 4306 break; 4307 } 4308 } 4309 xfs_buf_free(bp); 4310 return error; 4311} 4312 4313/* 4314 * xfs_free_file_space() 4315 * This routine frees disk space for the given file. 4316 * 4317 * This routine is only called by xfs_change_file_space 4318 * for an UNRESVSP type call. 4319 * 4320 * RETURNS: 4321 * 0 on success 4322 * errno on error 4323 * 4324 */ 4325STATIC int 4326xfs_free_file_space( 4327 xfs_inode_t *ip, 4328 xfs_off_t offset, 4329 xfs_off_t len, 4330 int attr_flags) 4331{ 4332 bhv_vnode_t *vp; 4333 int committed; 4334 int done; 4335 xfs_off_t end_dmi_offset; 4336 xfs_fileoff_t endoffset_fsb; 4337 int error; 4338 xfs_fsblock_t firstfsb; 4339 xfs_bmap_free_t free_list; 4340 xfs_bmbt_irec_t imap; 4341 xfs_off_t ioffset; 4342 xfs_extlen_t mod=0; 4343 xfs_mount_t *mp; 4344 int nimap; 4345 uint resblks; 4346 uint rounding; 4347 int rt; 4348 xfs_fileoff_t startoffset_fsb; 4349 xfs_trans_t *tp; 4350 int need_iolock = 1; 4351 4352 vp = XFS_ITOV(ip); 4353 mp = ip->i_mount; 4354 4355 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 4356 4357 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4358 return error; 4359 4360 error = 0; 4361 if (len <= 0) /* if nothing being freed */ 4362 return error; 4363 rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME); 4364 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 4365 end_dmi_offset = offset + len; 4366 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 4367 4368 if (offset < ip->i_size && 4369 (attr_flags & ATTR_DMI) == 0 && 4370 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4371 if (end_dmi_offset > ip->i_size) 4372 end_dmi_offset = ip->i_size; 4373 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, 4374 offset, end_dmi_offset - offset, 4375 AT_DELAY_FLAG(attr_flags), NULL); 4376 if (error) 4377 return error; 4378 } 4379 4380 if (attr_flags & ATTR_NOLOCK) 4381 need_iolock = 0; 4382 if (need_iolock) { 4383 xfs_ilock(ip, XFS_IOLOCK_EXCL); 4384 vn_iowait(vp); /* wait for the completion of any pending DIOs */ 4385 } 4386 4387 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); 4388 ioffset = offset & ~(rounding - 1); 4389 4390 if (VN_CACHED(vp) != 0) { 4391 xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, 4392 ctooff(offtoct(ioffset)), -1); 4393 error = bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)), 4394 -1, FI_REMAPF_LOCKED); 4395 if (error) 4396 goto out_unlock_iolock; 4397 } 4398 4399 /* 4400 * Need to zero the stuff we're not freeing, on disk. 4401 * If its a realtime file & can't use unwritten extents then we 4402 * actually need to zero the extent edges. Otherwise xfs_bunmapi 4403 * will take care of it for us. 4404 */ 4405 if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { 4406 nimap = 1; 4407 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, 4408 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 4409 if (error) 4410 goto out_unlock_iolock; 4411 ASSERT(nimap == 0 || nimap == 1); 4412 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 4413 xfs_daddr_t block; 4414 4415 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4416 block = imap.br_startblock; 4417 mod = do_div(block, mp->m_sb.sb_rextsize); 4418 if (mod) 4419 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 4420 } 4421 nimap = 1; 4422 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, 4423 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 4424 if (error) 4425 goto out_unlock_iolock; 4426 ASSERT(nimap == 0 || nimap == 1); 4427 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 4428 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4429 mod++; 4430 if (mod && (mod != mp->m_sb.sb_rextsize)) 4431 endoffset_fsb -= mod; 4432 } 4433 } 4434 if ((done = (endoffset_fsb <= startoffset_fsb))) 4435 /* 4436 * One contiguous piece to clear 4437 */ 4438 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 4439 else { 4440 /* 4441 * Some full blocks, possibly two pieces to clear 4442 */ 4443 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 4444 error = xfs_zero_remaining_bytes(ip, offset, 4445 XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 4446 if (!error && 4447 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 4448 error = xfs_zero_remaining_bytes(ip, 4449 XFS_FSB_TO_B(mp, endoffset_fsb), 4450 offset + len - 1); 4451 } 4452 4453 /* 4454 * free file space until done or until there is an error 4455 */ 4456 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 4457 while (!error && !done) { 4458 4459 /* 4460 * allocate and setup the transaction. Allow this 4461 * transaction to dip into the reserve blocks to ensure 4462 * the freeing of the space succeeds at ENOSPC. 4463 */ 4464 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4465 tp->t_flags |= XFS_TRANS_RESERVE; 4466 error = xfs_trans_reserve(tp, 4467 resblks, 4468 XFS_WRITE_LOG_RES(mp), 4469 0, 4470 XFS_TRANS_PERM_LOG_RES, 4471 XFS_WRITE_LOG_COUNT); 4472 4473 /* 4474 * check for running out of space 4475 */ 4476 if (error) { 4477 /* 4478 * Free the transaction structure. 4479 */ 4480 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 4481 xfs_trans_cancel(tp, 0); 4482 break; 4483 } 4484 xfs_ilock(ip, XFS_ILOCK_EXCL); 4485 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4486 ip->i_udquot, ip->i_gdquot, resblks, 0, 4487 XFS_QMOPT_RES_REGBLKS); 4488 if (error) 4489 goto error1; 4490 4491 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4492 xfs_trans_ihold(tp, ip); 4493 4494 /* 4495 * issue the bunmapi() call to free the blocks 4496 */ 4497 XFS_BMAP_INIT(&free_list, &firstfsb); 4498 error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, 4499 endoffset_fsb - startoffset_fsb, 4500 0, 2, &firstfsb, &free_list, NULL, &done); 4501 if (error) { 4502 goto error0; 4503 } 4504 4505 /* 4506 * complete the transaction 4507 */ 4508 error = xfs_bmap_finish(&tp, &free_list, &committed); 4509 if (error) { 4510 goto error0; 4511 } 4512 4513 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 4514 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4515 } 4516 4517 out_unlock_iolock: 4518 if (need_iolock) 4519 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 4520 return error; 4521 4522 error0: 4523 xfs_bmap_cancel(&free_list); 4524 error1: 4525 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4526 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 4527 XFS_ILOCK_EXCL); 4528 return error; 4529} 4530 4531/* 4532 * xfs_change_file_space() 4533 * This routine allocates or frees disk space for the given file. 4534 * The user specified parameters are checked for alignment and size 4535 * limitations. 4536 * 4537 * RETURNS: 4538 * 0 on success 4539 * errno on error 4540 * 4541 */ 4542int 4543xfs_change_file_space( 4544 bhv_desc_t *bdp, 4545 int cmd, 4546 xfs_flock64_t *bf, 4547 xfs_off_t offset, 4548 cred_t *credp, 4549 int attr_flags) 4550{ 4551 int clrprealloc; 4552 int error; 4553 xfs_fsize_t fsize; 4554 xfs_inode_t *ip; 4555 xfs_mount_t *mp; 4556 int setprealloc; 4557 xfs_off_t startoffset; 4558 xfs_off_t llen; 4559 xfs_trans_t *tp; 4560 bhv_vattr_t va; 4561 bhv_vnode_t *vp; 4562 4563 vp = BHV_TO_VNODE(bdp); 4564 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 4565 4566 ip = XFS_BHVTOI(bdp); 4567 mp = ip->i_mount; 4568 4569 /* 4570 * must be a regular file and have write permission 4571 */ 4572 if (!VN_ISREG(vp)) 4573 return XFS_ERROR(EINVAL); 4574 4575 xfs_ilock(ip, XFS_ILOCK_SHARED); 4576 4577 if ((error = xfs_iaccess(ip, S_IWUSR, credp))) { 4578 xfs_iunlock(ip, XFS_ILOCK_SHARED); 4579 return error; 4580 } 4581 4582 xfs_iunlock(ip, XFS_ILOCK_SHARED); 4583 4584 switch (bf->l_whence) { 4585 case 0: /*SEEK_SET*/ 4586 break; 4587 case 1: /*SEEK_CUR*/ 4588 bf->l_start += offset; 4589 break; 4590 case 2: /*SEEK_END*/ 4591 bf->l_start += ip->i_size; 4592 break; 4593 default: 4594 return XFS_ERROR(EINVAL); 4595 } 4596 4597 llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; 4598 4599 if ( (bf->l_start < 0) 4600 || (bf->l_start > XFS_MAXIOFFSET(mp)) 4601 || (bf->l_start + llen < 0) 4602 || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) 4603 return XFS_ERROR(EINVAL); 4604 4605 bf->l_whence = 0; 4606 4607 startoffset = bf->l_start; 4608 fsize = ip->i_size; 4609 4610 /* 4611 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 4612 * file space. 4613 * These calls do NOT zero the data space allocated to the file, 4614 * nor do they change the file size. 4615 * 4616 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file 4617 * space. 4618 * These calls cause the new file data to be zeroed and the file 4619 * size to be changed. 4620 */ 4621 setprealloc = clrprealloc = 0; 4622 4623 switch (cmd) { 4624 case XFS_IOC_RESVSP: 4625 case XFS_IOC_RESVSP64: 4626 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 4627 1, attr_flags); 4628 if (error) 4629 return error; 4630 setprealloc = 1; 4631 break; 4632 4633 case XFS_IOC_UNRESVSP: 4634 case XFS_IOC_UNRESVSP64: 4635 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 4636 attr_flags))) 4637 return error; 4638 break; 4639 4640 case XFS_IOC_ALLOCSP: 4641 case XFS_IOC_ALLOCSP64: 4642 case XFS_IOC_FREESP: 4643 case XFS_IOC_FREESP64: 4644 if (startoffset > fsize) { 4645 error = xfs_alloc_file_space(ip, fsize, 4646 startoffset - fsize, 0, attr_flags); 4647 if (error) 4648 break; 4649 } 4650 4651 va.va_mask = XFS_AT_SIZE; 4652 va.va_size = startoffset; 4653 4654 error = xfs_setattr(bdp, &va, attr_flags, credp); 4655 4656 if (error) 4657 return error; 4658 4659 clrprealloc = 1; 4660 break; 4661 4662 default: 4663 ASSERT(0); 4664 return XFS_ERROR(EINVAL); 4665 } 4666 4667 /* 4668 * update the inode timestamp, mode, and prealloc flag bits 4669 */ 4670 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 4671 4672 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 4673 0, 0, 0))) { 4674 /* ASSERT(0); */ 4675 xfs_trans_cancel(tp, 0); 4676 return error; 4677 } 4678 4679 xfs_ilock(ip, XFS_ILOCK_EXCL); 4680 4681 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4682 xfs_trans_ihold(tp, ip); 4683 4684 if ((attr_flags & ATTR_DMI) == 0) { 4685 ip->i_d.di_mode &= ~S_ISUID; 4686 4687 /* 4688 * Note that we don't have to worry about mandatory 4689 * file locking being disabled here because we only 4690 * clear the S_ISGID bit if the Group execute bit is 4691 * on, but if it was on then mandatory locking wouldn't 4692 * have been enabled. 4693 */ 4694 if (ip->i_d.di_mode & S_IXGRP) 4695 ip->i_d.di_mode &= ~S_ISGID; 4696 4697 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 4698 } 4699 if (setprealloc) 4700 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 4701 else if (clrprealloc) 4702 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 4703 4704 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4705 xfs_trans_set_sync(tp); 4706 4707 error = xfs_trans_commit(tp, 0); 4708 4709 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4710 4711 return error; 4712} 4713 4714bhv_vnodeops_t xfs_vnodeops = { 4715 BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), 4716 .vop_open = xfs_open, 4717 .vop_read = xfs_read, 4718#ifdef HAVE_SPLICE 4719 .vop_splice_read = xfs_splice_read, 4720 .vop_splice_write = xfs_splice_write, 4721#endif 4722 .vop_write = xfs_write, 4723 .vop_ioctl = xfs_ioctl, 4724 .vop_getattr = xfs_getattr, 4725 .vop_setattr = xfs_setattr, 4726 .vop_access = xfs_access, 4727 .vop_lookup = xfs_lookup, 4728 .vop_create = xfs_create, 4729 .vop_remove = xfs_remove, 4730 .vop_link = xfs_link, 4731 .vop_rename = xfs_rename, 4732 .vop_mkdir = xfs_mkdir, 4733 .vop_rmdir = xfs_rmdir, 4734 .vop_readdir = xfs_readdir, 4735 .vop_symlink = xfs_symlink, 4736 .vop_readlink = xfs_readlink, 4737 .vop_fsync = xfs_fsync, 4738 .vop_inactive = xfs_inactive, 4739 .vop_fid2 = xfs_fid2, 4740 .vop_rwlock = xfs_rwlock, 4741 .vop_rwunlock = xfs_rwunlock, 4742 .vop_bmap = xfs_bmap, 4743 .vop_reclaim = xfs_reclaim, 4744 .vop_attr_get = xfs_attr_get, 4745 .vop_attr_set = xfs_attr_set, 4746 .vop_attr_remove = xfs_attr_remove, 4747 .vop_attr_list = xfs_attr_list, 4748 .vop_link_removed = (vop_link_removed_t)fs_noval, 4749 .vop_vnode_change = (vop_vnode_change_t)fs_noval, 4750 .vop_tosspages = fs_tosspages, 4751 .vop_flushinval_pages = fs_flushinval_pages, 4752 .vop_flush_pages = fs_flush_pages, 4753 .vop_release = xfs_release, 4754 .vop_iflush = xfs_inode_flush, 4755};