Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at c9a28fa7b9ac19b676deefa0a171ce7df8755c08 4495 lines 114 kB view raw
1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "xfs.h" 20#include "xfs_fs.h" 21#include "xfs_types.h" 22#include "xfs_bit.h" 23#include "xfs_log.h" 24#include "xfs_inum.h" 25#include "xfs_trans.h" 26#include "xfs_sb.h" 27#include "xfs_ag.h" 28#include "xfs_dir2.h" 29#include "xfs_dmapi.h" 30#include "xfs_mount.h" 31#include "xfs_da_btree.h" 32#include "xfs_bmap_btree.h" 33#include "xfs_alloc_btree.h" 34#include "xfs_ialloc_btree.h" 35#include "xfs_dir2_sf.h" 36#include "xfs_attr_sf.h" 37#include "xfs_dinode.h" 38#include "xfs_inode.h" 39#include "xfs_inode_item.h" 40#include "xfs_itable.h" 41#include "xfs_btree.h" 42#include "xfs_ialloc.h" 43#include "xfs_alloc.h" 44#include "xfs_bmap.h" 45#include "xfs_attr.h" 46#include "xfs_rw.h" 47#include "xfs_error.h" 48#include "xfs_quota.h" 49#include "xfs_utils.h" 50#include "xfs_rtalloc.h" 51#include "xfs_refcache.h" 52#include "xfs_trans_space.h" 53#include "xfs_log_priv.h" 54#include "xfs_filestream.h" 55#include "xfs_vnodeops.h" 56 57int 58xfs_open( 59 xfs_inode_t *ip) 60{ 61 int mode; 62 63 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 64 return XFS_ERROR(EIO); 65 66 /* 67 * If it's a directory with any blocks, read-ahead block 0 68 * as we're almost certain to have the next operation be a read there. 69 */ 70 if (S_ISDIR(ip->i_d.di_mode) && ip->i_d.di_nextents > 0) { 71 mode = xfs_ilock_map_shared(ip); 72 if (ip->i_d.di_nextents > 0) 73 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); 74 xfs_iunlock(ip, mode); 75 } 76 return 0; 77} 78 79/* 80 * xfs_getattr 81 */ 82int 83xfs_getattr( 84 xfs_inode_t *ip, 85 bhv_vattr_t *vap, 86 int flags) 87{ 88 bhv_vnode_t *vp = XFS_ITOV(ip); 89 xfs_mount_t *mp = ip->i_mount; 90 91 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 92 93 if (XFS_FORCED_SHUTDOWN(mp)) 94 return XFS_ERROR(EIO); 95 96 if (!(flags & ATTR_LAZY)) 97 xfs_ilock(ip, XFS_ILOCK_SHARED); 98 99 vap->va_size = XFS_ISIZE(ip); 100 if (vap->va_mask == XFS_AT_SIZE) 101 goto all_done; 102 103 vap->va_nblocks = 104 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); 105 vap->va_nodeid = ip->i_ino; 106#if XFS_BIG_INUMS 107 vap->va_nodeid += mp->m_inoadd; 108#endif 109 vap->va_nlink = ip->i_d.di_nlink; 110 111 /* 112 * Quick exit for non-stat callers 113 */ 114 if ((vap->va_mask & 115 ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID| 116 XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0) 117 goto all_done; 118 119 /* 120 * Copy from in-core inode. 121 */ 122 vap->va_mode = ip->i_d.di_mode; 123 vap->va_uid = ip->i_d.di_uid; 124 vap->va_gid = ip->i_d.di_gid; 125 vap->va_projid = ip->i_d.di_projid; 126 127 /* 128 * Check vnode type block/char vs. everything else. 129 */ 130 switch (ip->i_d.di_mode & S_IFMT) { 131 case S_IFBLK: 132 case S_IFCHR: 133 vap->va_rdev = ip->i_df.if_u2.if_rdev; 134 vap->va_blocksize = BLKDEV_IOSIZE; 135 break; 136 default: 137 vap->va_rdev = 0; 138 139 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { 140 vap->va_blocksize = xfs_preferred_iosize(mp); 141 } else { 142 143 /* 144 * If the file blocks are being allocated from a 145 * realtime partition, then return the inode's 146 * realtime extent size or the realtime volume's 147 * extent size. 148 */ 149 vap->va_blocksize = 150 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; 151 } 152 break; 153 } 154 155 vn_atime_to_timespec(vp, &vap->va_atime); 156 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 157 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 158 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 159 vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; 160 161 /* 162 * Exit for stat callers. See if any of the rest of the fields 163 * to be filled in are needed. 164 */ 165 if ((vap->va_mask & 166 (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| 167 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) 168 goto all_done; 169 170 /* 171 * Convert di_flags to xflags. 172 */ 173 vap->va_xflags = xfs_ip2xflags(ip); 174 175 /* 176 * Exit for inode revalidate. See if any of the rest of 177 * the fields to be filled in are needed. 178 */ 179 if ((vap->va_mask & 180 (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| 181 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) 182 goto all_done; 183 184 vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog; 185 vap->va_nextents = 186 (ip->i_df.if_flags & XFS_IFEXTENTS) ? 187 ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) : 188 ip->i_d.di_nextents; 189 if (ip->i_afp) 190 vap->va_anextents = 191 (ip->i_afp->if_flags & XFS_IFEXTENTS) ? 192 ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) : 193 ip->i_d.di_anextents; 194 else 195 vap->va_anextents = 0; 196 vap->va_gen = ip->i_d.di_gen; 197 198 all_done: 199 if (!(flags & ATTR_LAZY)) 200 xfs_iunlock(ip, XFS_ILOCK_SHARED); 201 return 0; 202} 203 204 205/* 206 * xfs_setattr 207 */ 208int 209xfs_setattr( 210 xfs_inode_t *ip, 211 bhv_vattr_t *vap, 212 int flags, 213 cred_t *credp) 214{ 215 bhv_vnode_t *vp = XFS_ITOV(ip); 216 xfs_mount_t *mp = ip->i_mount; 217 xfs_trans_t *tp; 218 int mask; 219 int code; 220 uint lock_flags; 221 uint commit_flags=0; 222 uid_t uid=0, iuid=0; 223 gid_t gid=0, igid=0; 224 int timeflags = 0; 225 xfs_prid_t projid=0, iprojid=0; 226 int mandlock_before, mandlock_after; 227 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 228 int file_owner; 229 int need_iolock = 1; 230 231 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 232 233 if (mp->m_flags & XFS_MOUNT_RDONLY) 234 return XFS_ERROR(EROFS); 235 236 /* 237 * Cannot set certain attributes. 238 */ 239 mask = vap->va_mask; 240 if (mask & XFS_AT_NOSET) { 241 return XFS_ERROR(EINVAL); 242 } 243 244 if (XFS_FORCED_SHUTDOWN(mp)) 245 return XFS_ERROR(EIO); 246 247 /* 248 * Timestamps do not need to be logged and hence do not 249 * need to be done within a transaction. 250 */ 251 if (mask & XFS_AT_UPDTIMES) { 252 ASSERT((mask & ~XFS_AT_UPDTIMES) == 0); 253 timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) | 254 ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) | 255 ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0); 256 xfs_ichgtime(ip, timeflags); 257 return 0; 258 } 259 260 olddquot1 = olddquot2 = NULL; 261 udqp = gdqp = NULL; 262 263 /* 264 * If disk quotas is on, we make sure that the dquots do exist on disk, 265 * before we start any other transactions. Trying to do this later 266 * is messy. We don't care to take a readlock to look at the ids 267 * in inode here, because we can't hold it across the trans_reserve. 268 * If the IDs do change before we take the ilock, we're covered 269 * because the i_*dquot fields will get updated anyway. 270 */ 271 if (XFS_IS_QUOTA_ON(mp) && 272 (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { 273 uint qflags = 0; 274 275 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { 276 uid = vap->va_uid; 277 qflags |= XFS_QMOPT_UQUOTA; 278 } else { 279 uid = ip->i_d.di_uid; 280 } 281 if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { 282 gid = vap->va_gid; 283 qflags |= XFS_QMOPT_GQUOTA; 284 } else { 285 gid = ip->i_d.di_gid; 286 } 287 if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { 288 projid = vap->va_projid; 289 qflags |= XFS_QMOPT_PQUOTA; 290 } else { 291 projid = ip->i_d.di_projid; 292 } 293 /* 294 * We take a reference when we initialize udqp and gdqp, 295 * so it is important that we never blindly double trip on 296 * the same variable. See xfs_create() for an example. 297 */ 298 ASSERT(udqp == NULL); 299 ASSERT(gdqp == NULL); 300 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, 301 &udqp, &gdqp); 302 if (code) 303 return code; 304 } 305 306 /* 307 * For the other attributes, we acquire the inode lock and 308 * first do an error checking pass. 309 */ 310 tp = NULL; 311 lock_flags = XFS_ILOCK_EXCL; 312 if (flags & ATTR_NOLOCK) 313 need_iolock = 0; 314 if (!(mask & XFS_AT_SIZE)) { 315 if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || 316 (mp->m_flags & XFS_MOUNT_WSYNC)) { 317 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 318 commit_flags = 0; 319 if ((code = xfs_trans_reserve(tp, 0, 320 XFS_ICHANGE_LOG_RES(mp), 0, 321 0, 0))) { 322 lock_flags = 0; 323 goto error_return; 324 } 325 } 326 } else { 327 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 328 !(flags & ATTR_DMI)) { 329 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 330 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, 331 vap->va_size, 0, dmflags, NULL); 332 if (code) { 333 lock_flags = 0; 334 goto error_return; 335 } 336 } 337 if (need_iolock) 338 lock_flags |= XFS_IOLOCK_EXCL; 339 } 340 341 xfs_ilock(ip, lock_flags); 342 343 /* boolean: are we the file owner? */ 344 file_owner = (current_fsuid(credp) == ip->i_d.di_uid); 345 346 /* 347 * Change various properties of a file. 348 * Only the owner or users with CAP_FOWNER 349 * capability may do these things. 350 */ 351 if (mask & 352 (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| 353 XFS_AT_GID|XFS_AT_PROJID)) { 354 /* 355 * CAP_FOWNER overrides the following restrictions: 356 * 357 * The user ID of the calling process must be equal 358 * to the file owner ID, except in cases where the 359 * CAP_FSETID capability is applicable. 360 */ 361 if (!file_owner && !capable(CAP_FOWNER)) { 362 code = XFS_ERROR(EPERM); 363 goto error_return; 364 } 365 366 /* 367 * CAP_FSETID overrides the following restrictions: 368 * 369 * The effective user ID of the calling process shall match 370 * the file owner when setting the set-user-ID and 371 * set-group-ID bits on that file. 372 * 373 * The effective group ID or one of the supplementary group 374 * IDs of the calling process shall match the group owner of 375 * the file when setting the set-group-ID bit on that file 376 */ 377 if (mask & XFS_AT_MODE) { 378 mode_t m = 0; 379 380 if ((vap->va_mode & S_ISUID) && !file_owner) 381 m |= S_ISUID; 382 if ((vap->va_mode & S_ISGID) && 383 !in_group_p((gid_t)ip->i_d.di_gid)) 384 m |= S_ISGID; 385#if 0 386 /* Linux allows this, Irix doesn't. */ 387 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) 388 m |= S_ISVTX; 389#endif 390 if (m && !capable(CAP_FSETID)) 391 vap->va_mode &= ~m; 392 } 393 } 394 395 /* 396 * Change file ownership. Must be the owner or privileged. 397 * If the system was configured with the "restricted_chown" 398 * option, the owner is not permitted to give away the file, 399 * and can change the group id only to a group of which he 400 * or she is a member. 401 */ 402 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 403 /* 404 * These IDs could have changed since we last looked at them. 405 * But, we're assured that if the ownership did change 406 * while we didn't have the inode locked, inode's dquot(s) 407 * would have changed also. 408 */ 409 iuid = ip->i_d.di_uid; 410 iprojid = ip->i_d.di_projid; 411 igid = ip->i_d.di_gid; 412 gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; 413 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; 414 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : 415 iprojid; 416 417 /* 418 * CAP_CHOWN overrides the following restrictions: 419 * 420 * If _POSIX_CHOWN_RESTRICTED is defined, this capability 421 * shall override the restriction that a process cannot 422 * change the user ID of a file it owns and the restriction 423 * that the group ID supplied to the chown() function 424 * shall be equal to either the group ID or one of the 425 * supplementary group IDs of the calling process. 426 */ 427 if (restricted_chown && 428 (iuid != uid || (igid != gid && 429 !in_group_p((gid_t)gid))) && 430 !capable(CAP_CHOWN)) { 431 code = XFS_ERROR(EPERM); 432 goto error_return; 433 } 434 /* 435 * Do a quota reservation only if uid/projid/gid is actually 436 * going to change. 437 */ 438 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 439 (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || 440 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { 441 ASSERT(tp); 442 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, 443 capable(CAP_FOWNER) ? 444 XFS_QMOPT_FORCE_RES : 0); 445 if (code) /* out of quota */ 446 goto error_return; 447 } 448 } 449 450 /* 451 * Truncate file. Must have write permission and not be a directory. 452 */ 453 if (mask & XFS_AT_SIZE) { 454 /* Short circuit the truncate case for zero length files */ 455 if ((vap->va_size == 0) && 456 (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) { 457 xfs_iunlock(ip, XFS_ILOCK_EXCL); 458 lock_flags &= ~XFS_ILOCK_EXCL; 459 if (mask & XFS_AT_CTIME) 460 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 461 code = 0; 462 goto error_return; 463 } 464 465 if (VN_ISDIR(vp)) { 466 code = XFS_ERROR(EISDIR); 467 goto error_return; 468 } else if (!VN_ISREG(vp)) { 469 code = XFS_ERROR(EINVAL); 470 goto error_return; 471 } 472 /* 473 * Make sure that the dquots are attached to the inode. 474 */ 475 if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) 476 goto error_return; 477 } 478 479 /* 480 * Change file access or modified times. 481 */ 482 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 483 if (!file_owner) { 484 if ((flags & ATTR_UTIME) && 485 !capable(CAP_FOWNER)) { 486 code = XFS_ERROR(EPERM); 487 goto error_return; 488 } 489 } 490 } 491 492 /* 493 * Change extent size or realtime flag. 494 */ 495 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 496 /* 497 * Can't change extent size if any extents are allocated. 498 */ 499 if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) && 500 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != 501 vap->va_extsize) ) { 502 code = XFS_ERROR(EINVAL); /* EFBIG? */ 503 goto error_return; 504 } 505 506 /* 507 * Can't change realtime flag if any extents are allocated. 508 */ 509 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && 510 (mask & XFS_AT_XFLAGS) && 511 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 512 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 513 code = XFS_ERROR(EINVAL); /* EFBIG? */ 514 goto error_return; 515 } 516 /* 517 * Extent size must be a multiple of the appropriate block 518 * size, if set at all. 519 */ 520 if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) { 521 xfs_extlen_t size; 522 523 if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || 524 ((mask & XFS_AT_XFLAGS) && 525 (vap->va_xflags & XFS_XFLAG_REALTIME))) { 526 size = mp->m_sb.sb_rextsize << 527 mp->m_sb.sb_blocklog; 528 } else { 529 size = mp->m_sb.sb_blocksize; 530 } 531 if (vap->va_extsize % size) { 532 code = XFS_ERROR(EINVAL); 533 goto error_return; 534 } 535 } 536 /* 537 * If realtime flag is set then must have realtime data. 538 */ 539 if ((mask & XFS_AT_XFLAGS) && 540 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 541 if ((mp->m_sb.sb_rblocks == 0) || 542 (mp->m_sb.sb_rextsize == 0) || 543 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { 544 code = XFS_ERROR(EINVAL); 545 goto error_return; 546 } 547 } 548 549 /* 550 * Can't modify an immutable/append-only file unless 551 * we have appropriate permission. 552 */ 553 if ((mask & XFS_AT_XFLAGS) && 554 (ip->i_d.di_flags & 555 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || 556 (vap->va_xflags & 557 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && 558 !capable(CAP_LINUX_IMMUTABLE)) { 559 code = XFS_ERROR(EPERM); 560 goto error_return; 561 } 562 } 563 564 /* 565 * Now we can make the changes. Before we join the inode 566 * to the transaction, if XFS_AT_SIZE is set then take care of 567 * the part of the truncation that must be done without the 568 * inode lock. This needs to be done before joining the inode 569 * to the transaction, because the inode cannot be unlocked 570 * once it is a part of the transaction. 571 */ 572 if (mask & XFS_AT_SIZE) { 573 code = 0; 574 if ((vap->va_size > ip->i_size) && 575 (flags & ATTR_NOSIZETOK) == 0) { 576 code = xfs_igrow_start(ip, vap->va_size, credp); 577 } 578 xfs_iunlock(ip, XFS_ILOCK_EXCL); 579 580 /* 581 * We are going to log the inode size change in this 582 * transaction so any previous writes that are beyond the on 583 * disk EOF and the new EOF that have not been written out need 584 * to be written here. If we do not write the data out, we 585 * expose ourselves to the null files problem. 586 * 587 * Only flush from the on disk size to the smaller of the in 588 * memory file size or the new size as that's the range we 589 * really care about here and prevents waiting for other data 590 * not within the range we care about here. 591 */ 592 if (!code && 593 (ip->i_size != ip->i_d.di_size) && 594 (vap->va_size > ip->i_d.di_size)) { 595 code = xfs_flush_pages(ip, 596 ip->i_d.di_size, vap->va_size, 597 XFS_B_ASYNC, FI_NONE); 598 } 599 600 /* wait for all I/O to complete */ 601 vn_iowait(ip); 602 603 if (!code) 604 code = xfs_itruncate_data(ip, vap->va_size); 605 if (code) { 606 ASSERT(tp == NULL); 607 lock_flags &= ~XFS_ILOCK_EXCL; 608 ASSERT(lock_flags == XFS_IOLOCK_EXCL); 609 goto error_return; 610 } 611 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 612 if ((code = xfs_trans_reserve(tp, 0, 613 XFS_ITRUNCATE_LOG_RES(mp), 0, 614 XFS_TRANS_PERM_LOG_RES, 615 XFS_ITRUNCATE_LOG_COUNT))) { 616 xfs_trans_cancel(tp, 0); 617 if (need_iolock) 618 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 619 return code; 620 } 621 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 622 xfs_ilock(ip, XFS_ILOCK_EXCL); 623 } 624 625 if (tp) { 626 xfs_trans_ijoin(tp, ip, lock_flags); 627 xfs_trans_ihold(tp, ip); 628 } 629 630 /* determine whether mandatory locking mode changes */ 631 mandlock_before = MANDLOCK(vp, ip->i_d.di_mode); 632 633 /* 634 * Truncate file. Must have write permission and not be a directory. 635 */ 636 if (mask & XFS_AT_SIZE) { 637 if (vap->va_size > ip->i_size) { 638 xfs_igrow_finish(tp, ip, vap->va_size, 639 !(flags & ATTR_DMI)); 640 } else if ((vap->va_size <= ip->i_size) || 641 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 642 /* 643 * signal a sync transaction unless 644 * we're truncating an already unlinked 645 * file on a wsync filesystem 646 */ 647 code = xfs_itruncate_finish(&tp, ip, 648 (xfs_fsize_t)vap->va_size, 649 XFS_DATA_FORK, 650 ((ip->i_d.di_nlink != 0 || 651 !(mp->m_flags & XFS_MOUNT_WSYNC)) 652 ? 1 : 0)); 653 if (code) 654 goto abort_return; 655 /* 656 * Truncated "down", so we're removing references 657 * to old data here - if we now delay flushing for 658 * a long time, we expose ourselves unduly to the 659 * notorious NULL files problem. So, we mark this 660 * vnode and flush it when the file is closed, and 661 * do not wait the usual (long) time for writeout. 662 */ 663 xfs_iflags_set(ip, XFS_ITRUNCATED); 664 } 665 /* 666 * Have to do this even if the file's size doesn't change. 667 */ 668 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 669 } 670 671 /* 672 * Change file access modes. 673 */ 674 if (mask & XFS_AT_MODE) { 675 ip->i_d.di_mode &= S_IFMT; 676 ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; 677 678 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 679 timeflags |= XFS_ICHGTIME_CHG; 680 } 681 682 /* 683 * Change file ownership. Must be the owner or privileged. 684 * If the system was configured with the "restricted_chown" 685 * option, the owner is not permitted to give away the file, 686 * and can change the group id only to a group of which he 687 * or she is a member. 688 */ 689 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 690 /* 691 * CAP_FSETID overrides the following restrictions: 692 * 693 * The set-user-ID and set-group-ID bits of a file will be 694 * cleared upon successful return from chown() 695 */ 696 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && 697 !capable(CAP_FSETID)) { 698 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); 699 } 700 701 /* 702 * Change the ownerships and register quota modifications 703 * in the transaction. 704 */ 705 if (iuid != uid) { 706 if (XFS_IS_UQUOTA_ON(mp)) { 707 ASSERT(mask & XFS_AT_UID); 708 ASSERT(udqp); 709 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 710 &ip->i_udquot, udqp); 711 } 712 ip->i_d.di_uid = uid; 713 } 714 if (igid != gid) { 715 if (XFS_IS_GQUOTA_ON(mp)) { 716 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 717 ASSERT(mask & XFS_AT_GID); 718 ASSERT(gdqp); 719 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 720 &ip->i_gdquot, gdqp); 721 } 722 ip->i_d.di_gid = gid; 723 } 724 if (iprojid != projid) { 725 if (XFS_IS_PQUOTA_ON(mp)) { 726 ASSERT(!XFS_IS_GQUOTA_ON(mp)); 727 ASSERT(mask & XFS_AT_PROJID); 728 ASSERT(gdqp); 729 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 730 &ip->i_gdquot, gdqp); 731 } 732 ip->i_d.di_projid = projid; 733 /* 734 * We may have to rev the inode as well as 735 * the superblock version number since projids didn't 736 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. 737 */ 738 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) 739 xfs_bump_ino_vers2(tp, ip); 740 } 741 742 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 743 timeflags |= XFS_ICHGTIME_CHG; 744 } 745 746 747 /* 748 * Change file access or modified times. 749 */ 750 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 751 if (mask & XFS_AT_ATIME) { 752 ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; 753 ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; 754 ip->i_update_core = 1; 755 timeflags &= ~XFS_ICHGTIME_ACC; 756 } 757 if (mask & XFS_AT_MTIME) { 758 ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; 759 ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; 760 timeflags &= ~XFS_ICHGTIME_MOD; 761 timeflags |= XFS_ICHGTIME_CHG; 762 } 763 if (tp && (flags & ATTR_UTIME)) 764 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 765 } 766 767 /* 768 * Change XFS-added attributes. 769 */ 770 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 771 if (mask & XFS_AT_EXTSIZE) { 772 /* 773 * Converting bytes to fs blocks. 774 */ 775 ip->i_d.di_extsize = vap->va_extsize >> 776 mp->m_sb.sb_blocklog; 777 } 778 if (mask & XFS_AT_XFLAGS) { 779 uint di_flags; 780 781 /* can't set PREALLOC this way, just preserve it */ 782 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); 783 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 784 di_flags |= XFS_DIFLAG_IMMUTABLE; 785 if (vap->va_xflags & XFS_XFLAG_APPEND) 786 di_flags |= XFS_DIFLAG_APPEND; 787 if (vap->va_xflags & XFS_XFLAG_SYNC) 788 di_flags |= XFS_DIFLAG_SYNC; 789 if (vap->va_xflags & XFS_XFLAG_NOATIME) 790 di_flags |= XFS_DIFLAG_NOATIME; 791 if (vap->va_xflags & XFS_XFLAG_NODUMP) 792 di_flags |= XFS_DIFLAG_NODUMP; 793 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) 794 di_flags |= XFS_DIFLAG_PROJINHERIT; 795 if (vap->va_xflags & XFS_XFLAG_NODEFRAG) 796 di_flags |= XFS_DIFLAG_NODEFRAG; 797 if (vap->va_xflags & XFS_XFLAG_FILESTREAM) 798 di_flags |= XFS_DIFLAG_FILESTREAM; 799 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 800 if (vap->va_xflags & XFS_XFLAG_RTINHERIT) 801 di_flags |= XFS_DIFLAG_RTINHERIT; 802 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) 803 di_flags |= XFS_DIFLAG_NOSYMLINKS; 804 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) 805 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 806 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 807 if (vap->va_xflags & XFS_XFLAG_REALTIME) { 808 di_flags |= XFS_DIFLAG_REALTIME; 809 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 810 } else { 811 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; 812 } 813 if (vap->va_xflags & XFS_XFLAG_EXTSIZE) 814 di_flags |= XFS_DIFLAG_EXTSIZE; 815 } 816 ip->i_d.di_flags = di_flags; 817 } 818 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 819 timeflags |= XFS_ICHGTIME_CHG; 820 } 821 822 /* 823 * Change file inode change time only if XFS_AT_CTIME set 824 * AND we have been called by a DMI function. 825 */ 826 827 if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { 828 ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; 829 ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; 830 ip->i_update_core = 1; 831 timeflags &= ~XFS_ICHGTIME_CHG; 832 } 833 834 /* 835 * Send out timestamp changes that need to be set to the 836 * current time. Not done when called by a DMI function. 837 */ 838 if (timeflags && !(flags & ATTR_DMI)) 839 xfs_ichgtime(ip, timeflags); 840 841 XFS_STATS_INC(xs_ig_attrchg); 842 843 /* 844 * If this is a synchronous mount, make sure that the 845 * transaction goes to disk before returning to the user. 846 * This is slightly sub-optimal in that truncates require 847 * two sync transactions instead of one for wsync filesystems. 848 * One for the truncate and one for the timestamps since we 849 * don't want to change the timestamps unless we're sure the 850 * truncate worked. Truncates are less than 1% of the laddis 851 * mix so this probably isn't worth the trouble to optimize. 852 */ 853 code = 0; 854 if (tp) { 855 if (mp->m_flags & XFS_MOUNT_WSYNC) 856 xfs_trans_set_sync(tp); 857 858 code = xfs_trans_commit(tp, commit_flags); 859 } 860 861 /* 862 * If the (regular) file's mandatory locking mode changed, then 863 * notify the vnode. We do this under the inode lock to prevent 864 * racing calls to vop_vnode_change. 865 */ 866 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); 867 868 xfs_iunlock(ip, lock_flags); 869 870 /* 871 * Release any dquot(s) the inode had kept before chown. 872 */ 873 XFS_QM_DQRELE(mp, olddquot1); 874 XFS_QM_DQRELE(mp, olddquot2); 875 XFS_QM_DQRELE(mp, udqp); 876 XFS_QM_DQRELE(mp, gdqp); 877 878 if (code) { 879 return code; 880 } 881 882 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && 883 !(flags & ATTR_DMI)) { 884 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, 885 NULL, DM_RIGHT_NULL, NULL, NULL, 886 0, 0, AT_DELAY_FLAG(flags)); 887 } 888 return 0; 889 890 abort_return: 891 commit_flags |= XFS_TRANS_ABORT; 892 /* FALLTHROUGH */ 893 error_return: 894 XFS_QM_DQRELE(mp, udqp); 895 XFS_QM_DQRELE(mp, gdqp); 896 if (tp) { 897 xfs_trans_cancel(tp, commit_flags); 898 } 899 if (lock_flags != 0) { 900 xfs_iunlock(ip, lock_flags); 901 } 902 return code; 903} 904 905 906/* 907 * xfs_access 908 * Null conversion from vnode mode bits to inode mode bits, as in efs. 909 */ 910int 911xfs_access( 912 xfs_inode_t *ip, 913 int mode, 914 cred_t *credp) 915{ 916 int error; 917 918 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 919 920 xfs_ilock(ip, XFS_ILOCK_SHARED); 921 error = xfs_iaccess(ip, mode, credp); 922 xfs_iunlock(ip, XFS_ILOCK_SHARED); 923 return error; 924} 925 926 927/* 928 * The maximum pathlen is 1024 bytes. Since the minimum file system 929 * blocksize is 512 bytes, we can get a max of 2 extents back from 930 * bmapi. 931 */ 932#define SYMLINK_MAPS 2 933 934STATIC int 935xfs_readlink_bmap( 936 xfs_inode_t *ip, 937 char *link) 938{ 939 xfs_mount_t *mp = ip->i_mount; 940 int pathlen = ip->i_d.di_size; 941 int nmaps = SYMLINK_MAPS; 942 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 943 xfs_daddr_t d; 944 int byte_cnt; 945 int n; 946 xfs_buf_t *bp; 947 int error = 0; 948 949 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0, 950 mval, &nmaps, NULL, NULL); 951 if (error) 952 goto out; 953 954 for (n = 0; n < nmaps; n++) { 955 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 956 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 957 958 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); 959 error = XFS_BUF_GETERROR(bp); 960 if (error) { 961 xfs_ioerror_alert("xfs_readlink", 962 ip->i_mount, bp, XFS_BUF_ADDR(bp)); 963 xfs_buf_relse(bp); 964 goto out; 965 } 966 if (pathlen < byte_cnt) 967 byte_cnt = pathlen; 968 pathlen -= byte_cnt; 969 970 memcpy(link, XFS_BUF_PTR(bp), byte_cnt); 971 xfs_buf_relse(bp); 972 } 973 974 link[ip->i_d.di_size] = '\0'; 975 error = 0; 976 977 out: 978 return error; 979} 980 981int 982xfs_readlink( 983 xfs_inode_t *ip, 984 char *link) 985{ 986 xfs_mount_t *mp = ip->i_mount; 987 int pathlen; 988 int error = 0; 989 990 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 991 992 if (XFS_FORCED_SHUTDOWN(mp)) 993 return XFS_ERROR(EIO); 994 995 xfs_ilock(ip, XFS_ILOCK_SHARED); 996 997 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); 998 ASSERT(ip->i_d.di_size <= MAXPATHLEN); 999 1000 pathlen = ip->i_d.di_size; 1001 if (!pathlen) 1002 goto out; 1003 1004 if (ip->i_df.if_flags & XFS_IFINLINE) { 1005 memcpy(link, ip->i_df.if_u1.if_data, pathlen); 1006 link[pathlen] = '\0'; 1007 } else { 1008 error = xfs_readlink_bmap(ip, link); 1009 } 1010 1011 out: 1012 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1013 return error; 1014} 1015 1016/* 1017 * xfs_fsync 1018 * 1019 * This is called to sync the inode and its data out to disk. 1020 * We need to hold the I/O lock while flushing the data, and 1021 * the inode lock while flushing the inode. The inode lock CANNOT 1022 * be held while flushing the data, so acquire after we're done 1023 * with that. 1024 */ 1025int 1026xfs_fsync( 1027 xfs_inode_t *ip, 1028 int flag, 1029 xfs_off_t start, 1030 xfs_off_t stop) 1031{ 1032 xfs_trans_t *tp; 1033 int error; 1034 int log_flushed = 0, changed = 1; 1035 1036 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 1037 1038 ASSERT(start >= 0 && stop >= -1); 1039 1040 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1041 return XFS_ERROR(EIO); 1042 1043 if (flag & FSYNC_DATA) 1044 filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); 1045 1046 /* 1047 * We always need to make sure that the required inode state 1048 * is safe on disk. The vnode might be clean but because 1049 * of committed transactions that haven't hit the disk yet. 1050 * Likewise, there could be unflushed non-transactional 1051 * changes to the inode core that have to go to disk. 1052 * 1053 * The following code depends on one assumption: that 1054 * any transaction that changes an inode logs the core 1055 * because it has to change some field in the inode core 1056 * (typically nextents or nblocks). That assumption 1057 * implies that any transactions against an inode will 1058 * catch any non-transactional updates. If inode-altering 1059 * transactions exist that violate this assumption, the 1060 * code breaks. Right now, it figures that if the involved 1061 * update_* field is clear and the inode is unpinned, the 1062 * inode is clean. Either it's been flushed or it's been 1063 * committed and the commit has hit the disk unpinning the inode. 1064 * (Note that xfs_inode_item_format() called at commit clears 1065 * the update_* fields.) 1066 */ 1067 xfs_ilock(ip, XFS_ILOCK_SHARED); 1068 1069 /* If we are flushing data then we care about update_size 1070 * being set, otherwise we care about update_core 1071 */ 1072 if ((flag & FSYNC_DATA) ? 1073 (ip->i_update_size == 0) : 1074 (ip->i_update_core == 0)) { 1075 /* 1076 * Timestamps/size haven't changed since last inode 1077 * flush or inode transaction commit. That means 1078 * either nothing got written or a transaction 1079 * committed which caught the updates. If the 1080 * latter happened and the transaction hasn't 1081 * hit the disk yet, the inode will be still 1082 * be pinned. If it is, force the log. 1083 */ 1084 1085 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1086 1087 if (xfs_ipincount(ip)) { 1088 _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 1089 XFS_LOG_FORCE | 1090 ((flag & FSYNC_WAIT) 1091 ? XFS_LOG_SYNC : 0), 1092 &log_flushed); 1093 } else { 1094 /* 1095 * If the inode is not pinned and nothing 1096 * has changed we don't need to flush the 1097 * cache. 1098 */ 1099 changed = 0; 1100 } 1101 error = 0; 1102 } else { 1103 /* 1104 * Kick off a transaction to log the inode 1105 * core to get the updates. Make it 1106 * sync if FSYNC_WAIT is passed in (which 1107 * is done by everybody but specfs). The 1108 * sync transaction will also force the log. 1109 */ 1110 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1111 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 1112 if ((error = xfs_trans_reserve(tp, 0, 1113 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 1114 0, 0, 0))) { 1115 xfs_trans_cancel(tp, 0); 1116 return error; 1117 } 1118 xfs_ilock(ip, XFS_ILOCK_EXCL); 1119 1120 /* 1121 * Note - it's possible that we might have pushed 1122 * ourselves out of the way during trans_reserve 1123 * which would flush the inode. But there's no 1124 * guarantee that the inode buffer has actually 1125 * gone out yet (it's delwri). Plus the buffer 1126 * could be pinned anyway if it's part of an 1127 * inode in another recent transaction. So we 1128 * play it safe and fire off the transaction anyway. 1129 */ 1130 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1131 xfs_trans_ihold(tp, ip); 1132 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1133 if (flag & FSYNC_WAIT) 1134 xfs_trans_set_sync(tp); 1135 error = _xfs_trans_commit(tp, 0, &log_flushed); 1136 1137 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1138 } 1139 1140 if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { 1141 /* 1142 * If the log write didn't issue an ordered tag we need 1143 * to flush the disk cache for the data device now. 1144 */ 1145 if (!log_flushed) 1146 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); 1147 1148 /* 1149 * If this inode is on the RT dev we need to flush that 1150 * cache as well. 1151 */ 1152 if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) 1153 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); 1154 } 1155 1156 return error; 1157} 1158 1159/* 1160 * This is called by xfs_inactive to free any blocks beyond eof 1161 * when the link count isn't zero and by xfs_dm_punch_hole() when 1162 * punching a hole to EOF. 1163 */ 1164int 1165xfs_free_eofblocks( 1166 xfs_mount_t *mp, 1167 xfs_inode_t *ip, 1168 int flags) 1169{ 1170 xfs_trans_t *tp; 1171 int error; 1172 xfs_fileoff_t end_fsb; 1173 xfs_fileoff_t last_fsb; 1174 xfs_filblks_t map_len; 1175 int nimaps; 1176 xfs_bmbt_irec_t imap; 1177 int use_iolock = (flags & XFS_FREE_EOF_LOCK); 1178 1179 /* 1180 * Figure out if there are any blocks beyond the end 1181 * of the file. If not, then there is nothing to do. 1182 */ 1183 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); 1184 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1185 map_len = last_fsb - end_fsb; 1186 if (map_len <= 0) 1187 return 0; 1188 1189 nimaps = 1; 1190 xfs_ilock(ip, XFS_ILOCK_SHARED); 1191 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, 1192 NULL, 0, &imap, &nimaps, NULL, NULL); 1193 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1194 1195 if (!error && (nimaps != 0) && 1196 (imap.br_startblock != HOLESTARTBLOCK || 1197 ip->i_delayed_blks)) { 1198 /* 1199 * Attach the dquots to the inode up front. 1200 */ 1201 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1202 return error; 1203 1204 /* 1205 * There are blocks after the end of file. 1206 * Free them up now by truncating the file to 1207 * its current size. 1208 */ 1209 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1210 1211 /* 1212 * Do the xfs_itruncate_start() call before 1213 * reserving any log space because 1214 * itruncate_start will call into the buffer 1215 * cache and we can't 1216 * do that within a transaction. 1217 */ 1218 if (use_iolock) 1219 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1220 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 1221 ip->i_size); 1222 if (error) { 1223 xfs_trans_cancel(tp, 0); 1224 if (use_iolock) 1225 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1226 return error; 1227 } 1228 1229 error = xfs_trans_reserve(tp, 0, 1230 XFS_ITRUNCATE_LOG_RES(mp), 1231 0, XFS_TRANS_PERM_LOG_RES, 1232 XFS_ITRUNCATE_LOG_COUNT); 1233 if (error) { 1234 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1235 xfs_trans_cancel(tp, 0); 1236 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1237 return error; 1238 } 1239 1240 xfs_ilock(ip, XFS_ILOCK_EXCL); 1241 xfs_trans_ijoin(tp, ip, 1242 XFS_IOLOCK_EXCL | 1243 XFS_ILOCK_EXCL); 1244 xfs_trans_ihold(tp, ip); 1245 1246 error = xfs_itruncate_finish(&tp, ip, 1247 ip->i_size, 1248 XFS_DATA_FORK, 1249 0); 1250 /* 1251 * If we get an error at this point we 1252 * simply don't bother truncating the file. 1253 */ 1254 if (error) { 1255 xfs_trans_cancel(tp, 1256 (XFS_TRANS_RELEASE_LOG_RES | 1257 XFS_TRANS_ABORT)); 1258 } else { 1259 error = xfs_trans_commit(tp, 1260 XFS_TRANS_RELEASE_LOG_RES); 1261 } 1262 xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) 1263 : XFS_ILOCK_EXCL)); 1264 } 1265 return error; 1266} 1267 1268/* 1269 * Free a symlink that has blocks associated with it. 1270 */ 1271STATIC int 1272xfs_inactive_symlink_rmt( 1273 xfs_inode_t *ip, 1274 xfs_trans_t **tpp) 1275{ 1276 xfs_buf_t *bp; 1277 int committed; 1278 int done; 1279 int error; 1280 xfs_fsblock_t first_block; 1281 xfs_bmap_free_t free_list; 1282 int i; 1283 xfs_mount_t *mp; 1284 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 1285 int nmaps; 1286 xfs_trans_t *ntp; 1287 int size; 1288 xfs_trans_t *tp; 1289 1290 tp = *tpp; 1291 mp = ip->i_mount; 1292 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); 1293 /* 1294 * We're freeing a symlink that has some 1295 * blocks allocated to it. Free the 1296 * blocks here. We know that we've got 1297 * either 1 or 2 extents and that we can 1298 * free them all in one bunmapi call. 1299 */ 1300 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 1301 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1302 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1303 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1304 xfs_trans_cancel(tp, 0); 1305 *tpp = NULL; 1306 return error; 1307 } 1308 /* 1309 * Lock the inode, fix the size, and join it to the transaction. 1310 * Hold it so in the normal path, we still have it locked for 1311 * the second transaction. In the error paths we need it 1312 * held so the cancel won't rele it, see below. 1313 */ 1314 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1315 size = (int)ip->i_d.di_size; 1316 ip->i_d.di_size = 0; 1317 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1318 xfs_trans_ihold(tp, ip); 1319 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1320 /* 1321 * Find the block(s) so we can inval and unmap them. 1322 */ 1323 done = 0; 1324 XFS_BMAP_INIT(&free_list, &first_block); 1325 nmaps = ARRAY_SIZE(mval); 1326 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 1327 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 1328 &free_list, NULL))) 1329 goto error0; 1330 /* 1331 * Invalidate the block(s). 1332 */ 1333 for (i = 0; i < nmaps; i++) { 1334 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 1335 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 1336 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 1337 xfs_trans_binval(tp, bp); 1338 } 1339 /* 1340 * Unmap the dead block(s) to the free_list. 1341 */ 1342 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 1343 &first_block, &free_list, NULL, &done))) 1344 goto error1; 1345 ASSERT(done); 1346 /* 1347 * Commit the first transaction. This logs the EFI and the inode. 1348 */ 1349 if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) 1350 goto error1; 1351 /* 1352 * The transaction must have been committed, since there were 1353 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. 1354 * The new tp has the extent freeing and EFDs. 1355 */ 1356 ASSERT(committed); 1357 /* 1358 * The first xact was committed, so add the inode to the new one. 1359 * Mark it dirty so it will be logged and moved forward in the log as 1360 * part of every commit. 1361 */ 1362 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1363 xfs_trans_ihold(tp, ip); 1364 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1365 /* 1366 * Get a new, empty transaction to return to our caller. 1367 */ 1368 ntp = xfs_trans_dup(tp); 1369 /* 1370 * Commit the transaction containing extent freeing and EFDs. 1371 * If we get an error on the commit here or on the reserve below, 1372 * we need to unlock the inode since the new transaction doesn't 1373 * have the inode attached. 1374 */ 1375 error = xfs_trans_commit(tp, 0); 1376 tp = ntp; 1377 if (error) { 1378 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1379 goto error0; 1380 } 1381 /* 1382 * Remove the memory for extent descriptions (just bookkeeping). 1383 */ 1384 if (ip->i_df.if_bytes) 1385 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); 1386 ASSERT(ip->i_df.if_bytes == 0); 1387 /* 1388 * Put an itruncate log reservation in the new transaction 1389 * for our caller. 1390 */ 1391 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1392 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1393 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1394 goto error0; 1395 } 1396 /* 1397 * Return with the inode locked but not joined to the transaction. 1398 */ 1399 *tpp = tp; 1400 return 0; 1401 1402 error1: 1403 xfs_bmap_cancel(&free_list); 1404 error0: 1405 /* 1406 * Have to come here with the inode locked and either 1407 * (held and in the transaction) or (not in the transaction). 1408 * If the inode isn't held then cancel would iput it, but 1409 * that's wrong since this is inactive and the vnode ref 1410 * count is 0 already. 1411 * Cancel won't do anything to the inode if held, but it still 1412 * needs to be locked until the cancel is done, if it was 1413 * joined to the transaction. 1414 */ 1415 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1416 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1417 *tpp = NULL; 1418 return error; 1419 1420} 1421 1422STATIC int 1423xfs_inactive_symlink_local( 1424 xfs_inode_t *ip, 1425 xfs_trans_t **tpp) 1426{ 1427 int error; 1428 1429 ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip)); 1430 /* 1431 * We're freeing a symlink which fit into 1432 * the inode. Just free the memory used 1433 * to hold the old symlink. 1434 */ 1435 error = xfs_trans_reserve(*tpp, 0, 1436 XFS_ITRUNCATE_LOG_RES(ip->i_mount), 1437 0, XFS_TRANS_PERM_LOG_RES, 1438 XFS_ITRUNCATE_LOG_COUNT); 1439 1440 if (error) { 1441 xfs_trans_cancel(*tpp, 0); 1442 *tpp = NULL; 1443 return error; 1444 } 1445 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1446 1447 /* 1448 * Zero length symlinks _can_ exist. 1449 */ 1450 if (ip->i_df.if_bytes > 0) { 1451 xfs_idata_realloc(ip, 1452 -(ip->i_df.if_bytes), 1453 XFS_DATA_FORK); 1454 ASSERT(ip->i_df.if_bytes == 0); 1455 } 1456 return 0; 1457} 1458 1459STATIC int 1460xfs_inactive_attrs( 1461 xfs_inode_t *ip, 1462 xfs_trans_t **tpp) 1463{ 1464 xfs_trans_t *tp; 1465 int error; 1466 xfs_mount_t *mp; 1467 1468 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); 1469 tp = *tpp; 1470 mp = ip->i_mount; 1471 ASSERT(ip->i_d.di_forkoff != 0); 1472 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1473 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1474 1475 error = xfs_attr_inactive(ip); 1476 if (error) { 1477 *tpp = NULL; 1478 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1479 return error; /* goto out */ 1480 } 1481 1482 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1483 error = xfs_trans_reserve(tp, 0, 1484 XFS_IFREE_LOG_RES(mp), 1485 0, XFS_TRANS_PERM_LOG_RES, 1486 XFS_INACTIVE_LOG_COUNT); 1487 if (error) { 1488 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1489 xfs_trans_cancel(tp, 0); 1490 *tpp = NULL; 1491 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1492 return error; 1493 } 1494 1495 xfs_ilock(ip, XFS_ILOCK_EXCL); 1496 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1497 xfs_trans_ihold(tp, ip); 1498 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1499 1500 ASSERT(ip->i_d.di_anextents == 0); 1501 1502 *tpp = tp; 1503 return 0; 1504} 1505 1506int 1507xfs_release( 1508 xfs_inode_t *ip) 1509{ 1510 bhv_vnode_t *vp = XFS_ITOV(ip); 1511 xfs_mount_t *mp = ip->i_mount; 1512 int error; 1513 1514 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) 1515 return 0; 1516 1517 /* If this is a read-only mount, don't do this (would generate I/O) */ 1518 if (mp->m_flags & XFS_MOUNT_RDONLY) 1519 return 0; 1520 1521 if (!XFS_FORCED_SHUTDOWN(mp)) { 1522 int truncated; 1523 1524 /* 1525 * If we are using filestreams, and we have an unlinked 1526 * file that we are processing the last close on, then nothing 1527 * will be able to reopen and write to this file. Purge this 1528 * inode from the filestreams cache so that it doesn't delay 1529 * teardown of the inode. 1530 */ 1531 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) 1532 xfs_filestream_deassociate(ip); 1533 1534 /* 1535 * If we previously truncated this file and removed old data 1536 * in the process, we want to initiate "early" writeout on 1537 * the last close. This is an attempt to combat the notorious 1538 * NULL files problem which is particularly noticable from a 1539 * truncate down, buffered (re-)write (delalloc), followed by 1540 * a crash. What we are effectively doing here is 1541 * significantly reducing the time window where we'd otherwise 1542 * be exposed to that problem. 1543 */ 1544 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 1545 if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0) 1546 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); 1547 } 1548 1549#ifdef HAVE_REFCACHE 1550 /* If we are in the NFS reference cache then don't do this now */ 1551 if (ip->i_refcache) 1552 return 0; 1553#endif 1554 1555 if (ip->i_d.di_nlink != 0) { 1556 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1557 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1558 ip->i_delayed_blks > 0)) && 1559 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1560 (!(ip->i_d.di_flags & 1561 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1562 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1563 if (error) 1564 return error; 1565 /* Update linux inode block count after free above */ 1566 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1567 ip->i_d.di_nblocks + ip->i_delayed_blks); 1568 } 1569 } 1570 1571 return 0; 1572} 1573 1574/* 1575 * xfs_inactive 1576 * 1577 * This is called when the vnode reference count for the vnode 1578 * goes to zero. If the file has been unlinked, then it must 1579 * now be truncated. Also, we clear all of the read-ahead state 1580 * kept for the inode here since the file is now closed. 1581 */ 1582int 1583xfs_inactive( 1584 xfs_inode_t *ip) 1585{ 1586 bhv_vnode_t *vp = XFS_ITOV(ip); 1587 xfs_bmap_free_t free_list; 1588 xfs_fsblock_t first_block; 1589 int committed; 1590 xfs_trans_t *tp; 1591 xfs_mount_t *mp; 1592 int error; 1593 int truncate; 1594 1595 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 1596 1597 /* 1598 * If the inode is already free, then there can be nothing 1599 * to clean up here. 1600 */ 1601 if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { 1602 ASSERT(ip->i_df.if_real_bytes == 0); 1603 ASSERT(ip->i_df.if_broot_bytes == 0); 1604 return VN_INACTIVE_CACHE; 1605 } 1606 1607 /* 1608 * Only do a truncate if it's a regular file with 1609 * some actual space in it. It's OK to look at the 1610 * inode's fields without the lock because we're the 1611 * only one with a reference to the inode. 1612 */ 1613 truncate = ((ip->i_d.di_nlink == 0) && 1614 ((ip->i_d.di_size != 0) || (ip->i_size != 0) || 1615 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && 1616 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1617 1618 mp = ip->i_mount; 1619 1620 if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) { 1621 (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL); 1622 } 1623 1624 error = 0; 1625 1626 /* If this is a read-only mount, don't do this (would generate I/O) */ 1627 if (mp->m_flags & XFS_MOUNT_RDONLY) 1628 goto out; 1629 1630 if (ip->i_d.di_nlink != 0) { 1631 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1632 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1633 ip->i_delayed_blks > 0)) && 1634 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1635 (!(ip->i_d.di_flags & 1636 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1637 (ip->i_delayed_blks != 0)))) { 1638 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1639 if (error) 1640 return VN_INACTIVE_CACHE; 1641 /* Update linux inode block count after free above */ 1642 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1643 ip->i_d.di_nblocks + ip->i_delayed_blks); 1644 } 1645 goto out; 1646 } 1647 1648 ASSERT(ip->i_d.di_nlink == 0); 1649 1650 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1651 return VN_INACTIVE_CACHE; 1652 1653 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1654 if (truncate) { 1655 /* 1656 * Do the xfs_itruncate_start() call before 1657 * reserving any log space because itruncate_start 1658 * will call into the buffer cache and we can't 1659 * do that within a transaction. 1660 */ 1661 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1662 1663 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 1664 if (error) { 1665 xfs_trans_cancel(tp, 0); 1666 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1667 return VN_INACTIVE_CACHE; 1668 } 1669 1670 error = xfs_trans_reserve(tp, 0, 1671 XFS_ITRUNCATE_LOG_RES(mp), 1672 0, XFS_TRANS_PERM_LOG_RES, 1673 XFS_ITRUNCATE_LOG_COUNT); 1674 if (error) { 1675 /* Don't call itruncate_cleanup */ 1676 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1677 xfs_trans_cancel(tp, 0); 1678 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1679 return VN_INACTIVE_CACHE; 1680 } 1681 1682 xfs_ilock(ip, XFS_ILOCK_EXCL); 1683 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1684 xfs_trans_ihold(tp, ip); 1685 1686 /* 1687 * normally, we have to run xfs_itruncate_finish sync. 1688 * But if filesystem is wsync and we're in the inactive 1689 * path, then we know that nlink == 0, and that the 1690 * xaction that made nlink == 0 is permanently committed 1691 * since xfs_remove runs as a synchronous transaction. 1692 */ 1693 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1694 (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); 1695 1696 if (error) { 1697 xfs_trans_cancel(tp, 1698 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1699 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1700 return VN_INACTIVE_CACHE; 1701 } 1702 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { 1703 1704 /* 1705 * If we get an error while cleaning up a 1706 * symlink we bail out. 1707 */ 1708 error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ? 1709 xfs_inactive_symlink_rmt(ip, &tp) : 1710 xfs_inactive_symlink_local(ip, &tp); 1711 1712 if (error) { 1713 ASSERT(tp == NULL); 1714 return VN_INACTIVE_CACHE; 1715 } 1716 1717 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1718 xfs_trans_ihold(tp, ip); 1719 } else { 1720 error = xfs_trans_reserve(tp, 0, 1721 XFS_IFREE_LOG_RES(mp), 1722 0, XFS_TRANS_PERM_LOG_RES, 1723 XFS_INACTIVE_LOG_COUNT); 1724 if (error) { 1725 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1726 xfs_trans_cancel(tp, 0); 1727 return VN_INACTIVE_CACHE; 1728 } 1729 1730 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1731 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1732 xfs_trans_ihold(tp, ip); 1733 } 1734 1735 /* 1736 * If there are attributes associated with the file 1737 * then blow them away now. The code calls a routine 1738 * that recursively deconstructs the attribute fork. 1739 * We need to just commit the current transaction 1740 * because we can't use it for xfs_attr_inactive(). 1741 */ 1742 if (ip->i_d.di_anextents > 0) { 1743 error = xfs_inactive_attrs(ip, &tp); 1744 /* 1745 * If we got an error, the transaction is already 1746 * cancelled, and the inode is unlocked. Just get out. 1747 */ 1748 if (error) 1749 return VN_INACTIVE_CACHE; 1750 } else if (ip->i_afp) { 1751 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1752 } 1753 1754 /* 1755 * Free the inode. 1756 */ 1757 XFS_BMAP_INIT(&free_list, &first_block); 1758 error = xfs_ifree(tp, ip, &free_list); 1759 if (error) { 1760 /* 1761 * If we fail to free the inode, shut down. The cancel 1762 * might do that, we need to make sure. Otherwise the 1763 * inode might be lost for a long time or forever. 1764 */ 1765 if (!XFS_FORCED_SHUTDOWN(mp)) { 1766 cmn_err(CE_NOTE, 1767 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1768 error, mp->m_fsname); 1769 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1770 } 1771 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1772 } else { 1773 /* 1774 * Credit the quota account(s). The inode is gone. 1775 */ 1776 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1777 1778 /* 1779 * Just ignore errors at this point. There is 1780 * nothing we can do except to try to keep going. 1781 */ 1782 (void) xfs_bmap_finish(&tp, &free_list, &committed); 1783 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1784 } 1785 /* 1786 * Release the dquots held by inode, if any. 1787 */ 1788 XFS_QM_DQDETACH(mp, ip); 1789 1790 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1791 1792 out: 1793 return VN_INACTIVE_CACHE; 1794} 1795 1796 1797int 1798xfs_lookup( 1799 xfs_inode_t *dp, 1800 bhv_vname_t *dentry, 1801 bhv_vnode_t **vpp) 1802{ 1803 xfs_inode_t *ip; 1804 xfs_ino_t e_inum; 1805 int error; 1806 uint lock_mode; 1807 1808 vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); 1809 1810 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 1811 return XFS_ERROR(EIO); 1812 1813 lock_mode = xfs_ilock_map_shared(dp); 1814 error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip); 1815 if (!error) { 1816 *vpp = XFS_ITOV(ip); 1817 ITRACE(ip); 1818 } 1819 xfs_iunlock_map_shared(dp, lock_mode); 1820 return error; 1821} 1822 1823int 1824xfs_create( 1825 xfs_inode_t *dp, 1826 bhv_vname_t *dentry, 1827 mode_t mode, 1828 xfs_dev_t rdev, 1829 bhv_vnode_t **vpp, 1830 cred_t *credp) 1831{ 1832 char *name = VNAME(dentry); 1833 xfs_mount_t *mp = dp->i_mount; 1834 bhv_vnode_t *dir_vp = XFS_ITOV(dp); 1835 xfs_inode_t *ip; 1836 bhv_vnode_t *vp = NULL; 1837 xfs_trans_t *tp; 1838 int error; 1839 xfs_bmap_free_t free_list; 1840 xfs_fsblock_t first_block; 1841 boolean_t unlock_dp_on_error = B_FALSE; 1842 int dm_event_sent = 0; 1843 uint cancel_flags; 1844 int committed; 1845 xfs_prid_t prid; 1846 struct xfs_dquot *udqp, *gdqp; 1847 uint resblks; 1848 int namelen; 1849 1850 ASSERT(!*vpp); 1851 vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); 1852 1853 namelen = VNAMELEN(dentry); 1854 1855 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { 1856 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 1857 dir_vp, DM_RIGHT_NULL, NULL, 1858 DM_RIGHT_NULL, name, NULL, 1859 mode, 0, 0); 1860 1861 if (error) 1862 return error; 1863 dm_event_sent = 1; 1864 } 1865 1866 if (XFS_FORCED_SHUTDOWN(mp)) 1867 return XFS_ERROR(EIO); 1868 1869 /* Return through std_return after this point. */ 1870 1871 udqp = gdqp = NULL; 1872 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1873 prid = dp->i_d.di_projid; 1874 else 1875 prid = (xfs_prid_t)dfltprid; 1876 1877 /* 1878 * Make sure that we have allocated dquot(s) on disk. 1879 */ 1880 error = XFS_QM_DQVOPALLOC(mp, dp, 1881 current_fsuid(credp), current_fsgid(credp), prid, 1882 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); 1883 if (error) 1884 goto std_return; 1885 1886 ip = NULL; 1887 1888 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1889 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1890 resblks = XFS_CREATE_SPACE_RES(mp, namelen); 1891 /* 1892 * Initially assume that the file does not exist and 1893 * reserve the resources for that case. If that is not 1894 * the case we'll drop the one we have and get a more 1895 * appropriate transaction later. 1896 */ 1897 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, 1898 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1899 if (error == ENOSPC) { 1900 resblks = 0; 1901 error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0, 1902 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1903 } 1904 if (error) { 1905 cancel_flags = 0; 1906 goto error_return; 1907 } 1908 1909 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1910 unlock_dp_on_error = B_TRUE; 1911 1912 XFS_BMAP_INIT(&free_list, &first_block); 1913 1914 ASSERT(ip == NULL); 1915 1916 /* 1917 * Reserve disk quota and the inode. 1918 */ 1919 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 1920 if (error) 1921 goto error_return; 1922 1923 if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) 1924 goto error_return; 1925 error = xfs_dir_ialloc(&tp, dp, mode, 1, 1926 rdev, credp, prid, resblks > 0, 1927 &ip, &committed); 1928 if (error) { 1929 if (error == ENOSPC) 1930 goto error_return; 1931 goto abort_return; 1932 } 1933 ITRACE(ip); 1934 1935 /* 1936 * At this point, we've gotten a newly allocated inode. 1937 * It is locked (and joined to the transaction). 1938 */ 1939 1940 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 1941 1942 /* 1943 * Now we join the directory inode to the transaction. We do not do it 1944 * earlier because xfs_dir_ialloc might commit the previous transaction 1945 * (and release all the locks). An error from here on will result in 1946 * the transaction cancel unlocking dp so don't do it explicitly in the 1947 * error path. 1948 */ 1949 VN_HOLD(dir_vp); 1950 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1951 unlock_dp_on_error = B_FALSE; 1952 1953 error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, 1954 &first_block, &free_list, resblks ? 1955 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1956 if (error) { 1957 ASSERT(error != ENOSPC); 1958 goto abort_return; 1959 } 1960 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1961 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1962 1963 /* 1964 * If this is a synchronous mount, make sure that the 1965 * create transaction goes to disk before returning to 1966 * the user. 1967 */ 1968 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1969 xfs_trans_set_sync(tp); 1970 } 1971 1972 dp->i_gen++; 1973 1974 /* 1975 * Attach the dquot(s) to the inodes and modify them incore. 1976 * These ids of the inode couldn't have changed since the new 1977 * inode has been locked ever since it was created. 1978 */ 1979 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 1980 1981 /* 1982 * xfs_trans_commit normally decrements the vnode ref count 1983 * when it unlocks the inode. Since we want to return the 1984 * vnode to the caller, we bump the vnode ref count now. 1985 */ 1986 IHOLD(ip); 1987 vp = XFS_ITOV(ip); 1988 1989 error = xfs_bmap_finish(&tp, &free_list, &committed); 1990 if (error) { 1991 xfs_bmap_cancel(&free_list); 1992 goto abort_rele; 1993 } 1994 1995 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1996 if (error) { 1997 IRELE(ip); 1998 tp = NULL; 1999 goto error_return; 2000 } 2001 2002 XFS_QM_DQRELE(mp, udqp); 2003 XFS_QM_DQRELE(mp, gdqp); 2004 2005 *vpp = vp; 2006 2007 /* Fallthrough to std_return with error = 0 */ 2008 2009std_return: 2010 if ((*vpp || (error != 0 && dm_event_sent != 0)) && 2011 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { 2012 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2013 dir_vp, DM_RIGHT_NULL, 2014 *vpp ? vp:NULL, 2015 DM_RIGHT_NULL, name, NULL, 2016 mode, error, 0); 2017 } 2018 return error; 2019 2020 abort_return: 2021 cancel_flags |= XFS_TRANS_ABORT; 2022 /* FALLTHROUGH */ 2023 2024 error_return: 2025 if (tp != NULL) 2026 xfs_trans_cancel(tp, cancel_flags); 2027 2028 XFS_QM_DQRELE(mp, udqp); 2029 XFS_QM_DQRELE(mp, gdqp); 2030 2031 if (unlock_dp_on_error) 2032 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2033 2034 goto std_return; 2035 2036 abort_rele: 2037 /* 2038 * Wait until after the current transaction is aborted to 2039 * release the inode. This prevents recursive transactions 2040 * and deadlocks from xfs_inactive. 2041 */ 2042 cancel_flags |= XFS_TRANS_ABORT; 2043 xfs_trans_cancel(tp, cancel_flags); 2044 IRELE(ip); 2045 2046 XFS_QM_DQRELE(mp, udqp); 2047 XFS_QM_DQRELE(mp, gdqp); 2048 2049 goto std_return; 2050} 2051 2052#ifdef DEBUG 2053/* 2054 * Some counters to see if (and how often) we are hitting some deadlock 2055 * prevention code paths. 2056 */ 2057 2058int xfs_rm_locks; 2059int xfs_rm_lock_delays; 2060int xfs_rm_attempts; 2061#endif 2062 2063/* 2064 * The following routine will lock the inodes associated with the 2065 * directory and the named entry in the directory. The locks are 2066 * acquired in increasing inode number. 2067 * 2068 * If the entry is "..", then only the directory is locked. The 2069 * vnode ref count will still include that from the .. entry in 2070 * this case. 2071 * 2072 * There is a deadlock we need to worry about. If the locked directory is 2073 * in the AIL, it might be blocking up the log. The next inode we lock 2074 * could be already locked by another thread waiting for log space (e.g 2075 * a permanent log reservation with a long running transaction (see 2076 * xfs_itruncate_finish)). To solve this, we must check if the directory 2077 * is in the ail and use lock_nowait. If we can't lock, we need to 2078 * drop the inode lock on the directory and try again. xfs_iunlock will 2079 * potentially push the tail if we were holding up the log. 2080 */ 2081STATIC int 2082xfs_lock_dir_and_entry( 2083 xfs_inode_t *dp, 2084 xfs_inode_t *ip) /* inode of entry 'name' */ 2085{ 2086 int attempts; 2087 xfs_ino_t e_inum; 2088 xfs_inode_t *ips[2]; 2089 xfs_log_item_t *lp; 2090 2091#ifdef DEBUG 2092 xfs_rm_locks++; 2093#endif 2094 attempts = 0; 2095 2096again: 2097 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 2098 2099 e_inum = ip->i_ino; 2100 2101 ITRACE(ip); 2102 2103 /* 2104 * We want to lock in increasing inum. Since we've already 2105 * acquired the lock on the directory, we may need to release 2106 * if if the inum of the entry turns out to be less. 2107 */ 2108 if (e_inum > dp->i_ino) { 2109 /* 2110 * We are already in the right order, so just 2111 * lock on the inode of the entry. 2112 * We need to use nowait if dp is in the AIL. 2113 */ 2114 2115 lp = (xfs_log_item_t *)dp->i_itemp; 2116 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2117 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2118 attempts++; 2119#ifdef DEBUG 2120 xfs_rm_attempts++; 2121#endif 2122 2123 /* 2124 * Unlock dp and try again. 2125 * xfs_iunlock will try to push the tail 2126 * if the inode is in the AIL. 2127 */ 2128 2129 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2130 2131 if ((attempts % 5) == 0) { 2132 delay(1); /* Don't just spin the CPU */ 2133#ifdef DEBUG 2134 xfs_rm_lock_delays++; 2135#endif 2136 } 2137 goto again; 2138 } 2139 } else { 2140 xfs_ilock(ip, XFS_ILOCK_EXCL); 2141 } 2142 } else if (e_inum < dp->i_ino) { 2143 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2144 2145 ips[0] = ip; 2146 ips[1] = dp; 2147 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2148 } 2149 /* else e_inum == dp->i_ino */ 2150 /* This can happen if we're asked to lock /x/.. 2151 * the entry is "..", which is also the parent directory. 2152 */ 2153 2154 return 0; 2155} 2156 2157#ifdef DEBUG 2158int xfs_locked_n; 2159int xfs_small_retries; 2160int xfs_middle_retries; 2161int xfs_lots_retries; 2162int xfs_lock_delays; 2163#endif 2164 2165/* 2166 * Bump the subclass so xfs_lock_inodes() acquires each lock with 2167 * a different value 2168 */ 2169static inline int 2170xfs_lock_inumorder(int lock_mode, int subclass) 2171{ 2172 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 2173 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 2174 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 2175 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 2176 2177 return lock_mode; 2178} 2179 2180/* 2181 * The following routine will lock n inodes in exclusive mode. 2182 * We assume the caller calls us with the inodes in i_ino order. 2183 * 2184 * We need to detect deadlock where an inode that we lock 2185 * is in the AIL and we start waiting for another inode that is locked 2186 * by a thread in a long running transaction (such as truncate). This can 2187 * result in deadlock since the long running trans might need to wait 2188 * for the inode we just locked in order to push the tail and free space 2189 * in the log. 2190 */ 2191void 2192xfs_lock_inodes( 2193 xfs_inode_t **ips, 2194 int inodes, 2195 int first_locked, 2196 uint lock_mode) 2197{ 2198 int attempts = 0, i, j, try_lock; 2199 xfs_log_item_t *lp; 2200 2201 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 2202 2203 if (first_locked) { 2204 try_lock = 1; 2205 i = 1; 2206 } else { 2207 try_lock = 0; 2208 i = 0; 2209 } 2210 2211again: 2212 for (; i < inodes; i++) { 2213 ASSERT(ips[i]); 2214 2215 if (i && (ips[i] == ips[i-1])) /* Already locked */ 2216 continue; 2217 2218 /* 2219 * If try_lock is not set yet, make sure all locked inodes 2220 * are not in the AIL. 2221 * If any are, set try_lock to be used later. 2222 */ 2223 2224 if (!try_lock) { 2225 for (j = (i - 1); j >= 0 && !try_lock; j--) { 2226 lp = (xfs_log_item_t *)ips[j]->i_itemp; 2227 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2228 try_lock++; 2229 } 2230 } 2231 } 2232 2233 /* 2234 * If any of the previous locks we have locked is in the AIL, 2235 * we must TRY to get the second and subsequent locks. If 2236 * we can't get any, we must release all we have 2237 * and try again. 2238 */ 2239 2240 if (try_lock) { 2241 /* try_lock must be 0 if i is 0. */ 2242 /* 2243 * try_lock means we have an inode locked 2244 * that is in the AIL. 2245 */ 2246 ASSERT(i != 0); 2247 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 2248 attempts++; 2249 2250 /* 2251 * Unlock all previous guys and try again. 2252 * xfs_iunlock will try to push the tail 2253 * if the inode is in the AIL. 2254 */ 2255 2256 for(j = i - 1; j >= 0; j--) { 2257 2258 /* 2259 * Check to see if we've already 2260 * unlocked this one. 2261 * Not the first one going back, 2262 * and the inode ptr is the same. 2263 */ 2264 if ((j != (i - 1)) && ips[j] == 2265 ips[j+1]) 2266 continue; 2267 2268 xfs_iunlock(ips[j], lock_mode); 2269 } 2270 2271 if ((attempts % 5) == 0) { 2272 delay(1); /* Don't just spin the CPU */ 2273#ifdef DEBUG 2274 xfs_lock_delays++; 2275#endif 2276 } 2277 i = 0; 2278 try_lock = 0; 2279 goto again; 2280 } 2281 } else { 2282 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); 2283 } 2284 } 2285 2286#ifdef DEBUG 2287 if (attempts) { 2288 if (attempts < 5) xfs_small_retries++; 2289 else if (attempts < 100) xfs_middle_retries++; 2290 else xfs_lots_retries++; 2291 } else { 2292 xfs_locked_n++; 2293 } 2294#endif 2295} 2296 2297#ifdef DEBUG 2298#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);} 2299int remove_which_error_return = 0; 2300#else /* ! DEBUG */ 2301#define REMOVE_DEBUG_TRACE(x) 2302#endif /* ! DEBUG */ 2303 2304int 2305xfs_remove( 2306 xfs_inode_t *dp, 2307 bhv_vname_t *dentry) 2308{ 2309 bhv_vnode_t *dir_vp = XFS_ITOV(dp); 2310 char *name = VNAME(dentry); 2311 xfs_mount_t *mp = dp->i_mount; 2312 xfs_inode_t *ip; 2313 xfs_trans_t *tp = NULL; 2314 int error = 0; 2315 xfs_bmap_free_t free_list; 2316 xfs_fsblock_t first_block; 2317 int cancel_flags; 2318 int committed; 2319 int dm_di_mode = 0; 2320 int link_zero; 2321 uint resblks; 2322 int namelen; 2323 2324 vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); 2325 2326 if (XFS_FORCED_SHUTDOWN(mp)) 2327 return XFS_ERROR(EIO); 2328 2329 namelen = VNAMELEN(dentry); 2330 2331 if (!xfs_get_dir_entry(dentry, &ip)) { 2332 dm_di_mode = ip->i_d.di_mode; 2333 IRELE(ip); 2334 } 2335 2336 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { 2337 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, 2338 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2339 name, NULL, dm_di_mode, 0, 0); 2340 if (error) 2341 return error; 2342 } 2343 2344 /* From this point on, return through std_return */ 2345 ip = NULL; 2346 2347 /* 2348 * We need to get a reference to ip before we get our log 2349 * reservation. The reason for this is that we cannot call 2350 * xfs_iget for an inode for which we do not have a reference 2351 * once we've acquired a log reservation. This is because the 2352 * inode we are trying to get might be in xfs_inactive going 2353 * for a log reservation. Since we'll have to wait for the 2354 * inactive code to complete before returning from xfs_iget, 2355 * we need to make sure that we don't have log space reserved 2356 * when we call xfs_iget. Instead we get an unlocked reference 2357 * to the inode before getting our log reservation. 2358 */ 2359 error = xfs_get_dir_entry(dentry, &ip); 2360 if (error) { 2361 REMOVE_DEBUG_TRACE(__LINE__); 2362 goto std_return; 2363 } 2364 2365 dm_di_mode = ip->i_d.di_mode; 2366 2367 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 2368 2369 ITRACE(ip); 2370 2371 error = XFS_QM_DQATTACH(mp, dp, 0); 2372 if (!error && dp != ip) 2373 error = XFS_QM_DQATTACH(mp, ip, 0); 2374 if (error) { 2375 REMOVE_DEBUG_TRACE(__LINE__); 2376 IRELE(ip); 2377 goto std_return; 2378 } 2379 2380 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 2381 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2382 /* 2383 * We try to get the real space reservation first, 2384 * allowing for directory btree deletion(s) implying 2385 * possible bmap insert(s). If we can't get the space 2386 * reservation then we use 0 instead, and avoid the bmap 2387 * btree insert(s) in the directory code by, if the bmap 2388 * insert tries to happen, instead trimming the LAST 2389 * block from the directory. 2390 */ 2391 resblks = XFS_REMOVE_SPACE_RES(mp); 2392 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 2393 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2394 if (error == ENOSPC) { 2395 resblks = 0; 2396 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 2397 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2398 } 2399 if (error) { 2400 ASSERT(error != ENOSPC); 2401 REMOVE_DEBUG_TRACE(__LINE__); 2402 xfs_trans_cancel(tp, 0); 2403 IRELE(ip); 2404 return error; 2405 } 2406 2407 error = xfs_lock_dir_and_entry(dp, ip); 2408 if (error) { 2409 REMOVE_DEBUG_TRACE(__LINE__); 2410 xfs_trans_cancel(tp, cancel_flags); 2411 IRELE(ip); 2412 goto std_return; 2413 } 2414 2415 /* 2416 * At this point, we've gotten both the directory and the entry 2417 * inodes locked. 2418 */ 2419 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2420 if (dp != ip) { 2421 /* 2422 * Increment vnode ref count only in this case since 2423 * there's an extra vnode reference in the case where 2424 * dp == ip. 2425 */ 2426 IHOLD(dp); 2427 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2428 } 2429 2430 /* 2431 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2432 */ 2433 XFS_BMAP_INIT(&free_list, &first_block); 2434 error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, 2435 &first_block, &free_list, 0); 2436 if (error) { 2437 ASSERT(error != ENOENT); 2438 REMOVE_DEBUG_TRACE(__LINE__); 2439 goto error1; 2440 } 2441 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2442 2443 dp->i_gen++; 2444 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2445 2446 error = xfs_droplink(tp, ip); 2447 if (error) { 2448 REMOVE_DEBUG_TRACE(__LINE__); 2449 goto error1; 2450 } 2451 2452 /* Determine if this is the last link while 2453 * we are in the transaction. 2454 */ 2455 link_zero = (ip)->i_d.di_nlink==0; 2456 2457 /* 2458 * Take an extra ref on the inode so that it doesn't 2459 * go to xfs_inactive() from within the commit. 2460 */ 2461 IHOLD(ip); 2462 2463 /* 2464 * If this is a synchronous mount, make sure that the 2465 * remove transaction goes to disk before returning to 2466 * the user. 2467 */ 2468 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2469 xfs_trans_set_sync(tp); 2470 } 2471 2472 error = xfs_bmap_finish(&tp, &free_list, &committed); 2473 if (error) { 2474 REMOVE_DEBUG_TRACE(__LINE__); 2475 goto error_rele; 2476 } 2477 2478 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2479 if (error) { 2480 IRELE(ip); 2481 goto std_return; 2482 } 2483 2484 /* 2485 * Before we drop our extra reference to the inode, purge it 2486 * from the refcache if it is there. By waiting until afterwards 2487 * to do the IRELE, we ensure that we won't go inactive in the 2488 * xfs_refcache_purge_ip routine (although that would be OK). 2489 */ 2490 xfs_refcache_purge_ip(ip); 2491 2492 /* 2493 * If we are using filestreams, kill the stream association. 2494 * If the file is still open it may get a new one but that 2495 * will get killed on last close in xfs_close() so we don't 2496 * have to worry about that. 2497 */ 2498 if (link_zero && xfs_inode_is_filestream(ip)) 2499 xfs_filestream_deassociate(ip); 2500 2501 vn_trace_exit(ip, __FUNCTION__, (inst_t *)__return_address); 2502 2503 IRELE(ip); 2504 2505/* Fall through to std_return with error = 0 */ 2506 std_return: 2507 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 2508 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2509 dir_vp, DM_RIGHT_NULL, 2510 NULL, DM_RIGHT_NULL, 2511 name, NULL, dm_di_mode, error, 0); 2512 } 2513 return error; 2514 2515 error1: 2516 xfs_bmap_cancel(&free_list); 2517 cancel_flags |= XFS_TRANS_ABORT; 2518 xfs_trans_cancel(tp, cancel_flags); 2519 goto std_return; 2520 2521 error_rele: 2522 /* 2523 * In this case make sure to not release the inode until after 2524 * the current transaction is aborted. Releasing it beforehand 2525 * can cause us to go to xfs_inactive and start a recursive 2526 * transaction which can easily deadlock with the current one. 2527 */ 2528 xfs_bmap_cancel(&free_list); 2529 cancel_flags |= XFS_TRANS_ABORT; 2530 xfs_trans_cancel(tp, cancel_flags); 2531 2532 /* 2533 * Before we drop our extra reference to the inode, purge it 2534 * from the refcache if it is there. By waiting until afterwards 2535 * to do the IRELE, we ensure that we won't go inactive in the 2536 * xfs_refcache_purge_ip routine (although that would be OK). 2537 */ 2538 xfs_refcache_purge_ip(ip); 2539 2540 IRELE(ip); 2541 2542 goto std_return; 2543} 2544 2545int 2546xfs_link( 2547 xfs_inode_t *tdp, 2548 bhv_vnode_t *src_vp, 2549 bhv_vname_t *dentry) 2550{ 2551 bhv_vnode_t *target_dir_vp = XFS_ITOV(tdp); 2552 xfs_mount_t *mp = tdp->i_mount; 2553 xfs_inode_t *sip = xfs_vtoi(src_vp); 2554 xfs_trans_t *tp; 2555 xfs_inode_t *ips[2]; 2556 int error; 2557 xfs_bmap_free_t free_list; 2558 xfs_fsblock_t first_block; 2559 int cancel_flags; 2560 int committed; 2561 int resblks; 2562 char *target_name = VNAME(dentry); 2563 int target_namelen; 2564 2565 vn_trace_entry(tdp, __FUNCTION__, (inst_t *)__return_address); 2566 vn_trace_entry(xfs_vtoi(src_vp), __FUNCTION__, (inst_t *)__return_address); 2567 2568 target_namelen = VNAMELEN(dentry); 2569 ASSERT(!VN_ISDIR(src_vp)); 2570 2571 if (XFS_FORCED_SHUTDOWN(mp)) 2572 return XFS_ERROR(EIO); 2573 2574 if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) { 2575 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, 2576 target_dir_vp, DM_RIGHT_NULL, 2577 src_vp, DM_RIGHT_NULL, 2578 target_name, NULL, 0, 0, 0); 2579 if (error) 2580 return error; 2581 } 2582 2583 /* Return through std_return after this point. */ 2584 2585 error = XFS_QM_DQATTACH(mp, sip, 0); 2586 if (!error && sip != tdp) 2587 error = XFS_QM_DQATTACH(mp, tdp, 0); 2588 if (error) 2589 goto std_return; 2590 2591 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 2592 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2593 resblks = XFS_LINK_SPACE_RES(mp, target_namelen); 2594 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 2595 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2596 if (error == ENOSPC) { 2597 resblks = 0; 2598 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0, 2599 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2600 } 2601 if (error) { 2602 cancel_flags = 0; 2603 goto error_return; 2604 } 2605 2606 if (sip->i_ino < tdp->i_ino) { 2607 ips[0] = sip; 2608 ips[1] = tdp; 2609 } else { 2610 ips[0] = tdp; 2611 ips[1] = sip; 2612 } 2613 2614 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2615 2616 /* 2617 * Increment vnode ref counts since xfs_trans_commit & 2618 * xfs_trans_cancel will both unlock the inodes and 2619 * decrement the associated ref counts. 2620 */ 2621 VN_HOLD(src_vp); 2622 VN_HOLD(target_dir_vp); 2623 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 2624 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 2625 2626 /* 2627 * If the source has too many links, we can't make any more to it. 2628 */ 2629 if (sip->i_d.di_nlink >= XFS_MAXLINK) { 2630 error = XFS_ERROR(EMLINK); 2631 goto error_return; 2632 } 2633 2634 /* 2635 * If we are using project inheritance, we only allow hard link 2636 * creation in our tree when the project IDs are the same; else 2637 * the tree quota mechanism could be circumvented. 2638 */ 2639 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2640 (tdp->i_d.di_projid != sip->i_d.di_projid))) { 2641 error = XFS_ERROR(EXDEV); 2642 goto error_return; 2643 } 2644 2645 if (resblks == 0 && 2646 (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) 2647 goto error_return; 2648 2649 XFS_BMAP_INIT(&free_list, &first_block); 2650 2651 error = xfs_dir_createname(tp, tdp, target_name, target_namelen, 2652 sip->i_ino, &first_block, &free_list, 2653 resblks); 2654 if (error) 2655 goto abort_return; 2656 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2657 tdp->i_gen++; 2658 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 2659 2660 error = xfs_bumplink(tp, sip); 2661 if (error) 2662 goto abort_return; 2663 2664 /* 2665 * If this is a synchronous mount, make sure that the 2666 * link transaction goes to disk before returning to 2667 * the user. 2668 */ 2669 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2670 xfs_trans_set_sync(tp); 2671 } 2672 2673 error = xfs_bmap_finish (&tp, &free_list, &committed); 2674 if (error) { 2675 xfs_bmap_cancel(&free_list); 2676 goto abort_return; 2677 } 2678 2679 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2680 if (error) 2681 goto std_return; 2682 2683 /* Fall through to std_return with error = 0. */ 2684std_return: 2685 if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) { 2686 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, 2687 target_dir_vp, DM_RIGHT_NULL, 2688 src_vp, DM_RIGHT_NULL, 2689 target_name, NULL, 0, error, 0); 2690 } 2691 return error; 2692 2693 abort_return: 2694 cancel_flags |= XFS_TRANS_ABORT; 2695 /* FALLTHROUGH */ 2696 2697 error_return: 2698 xfs_trans_cancel(tp, cancel_flags); 2699 goto std_return; 2700} 2701 2702 2703int 2704xfs_mkdir( 2705 xfs_inode_t *dp, 2706 bhv_vname_t *dentry, 2707 mode_t mode, 2708 bhv_vnode_t **vpp, 2709 cred_t *credp) 2710{ 2711 bhv_vnode_t *dir_vp = XFS_ITOV(dp); 2712 char *dir_name = VNAME(dentry); 2713 int dir_namelen = VNAMELEN(dentry); 2714 xfs_mount_t *mp = dp->i_mount; 2715 xfs_inode_t *cdp; /* inode of created dir */ 2716 bhv_vnode_t *cvp; /* vnode of created dir */ 2717 xfs_trans_t *tp; 2718 int cancel_flags; 2719 int error; 2720 int committed; 2721 xfs_bmap_free_t free_list; 2722 xfs_fsblock_t first_block; 2723 boolean_t unlock_dp_on_error = B_FALSE; 2724 boolean_t created = B_FALSE; 2725 int dm_event_sent = 0; 2726 xfs_prid_t prid; 2727 struct xfs_dquot *udqp, *gdqp; 2728 uint resblks; 2729 2730 if (XFS_FORCED_SHUTDOWN(mp)) 2731 return XFS_ERROR(EIO); 2732 2733 tp = NULL; 2734 2735 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { 2736 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 2737 dir_vp, DM_RIGHT_NULL, NULL, 2738 DM_RIGHT_NULL, dir_name, NULL, 2739 mode, 0, 0); 2740 if (error) 2741 return error; 2742 dm_event_sent = 1; 2743 } 2744 2745 /* Return through std_return after this point. */ 2746 2747 vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); 2748 2749 mp = dp->i_mount; 2750 udqp = gdqp = NULL; 2751 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 2752 prid = dp->i_d.di_projid; 2753 else 2754 prid = (xfs_prid_t)dfltprid; 2755 2756 /* 2757 * Make sure that we have allocated dquot(s) on disk. 2758 */ 2759 error = XFS_QM_DQVOPALLOC(mp, dp, 2760 current_fsuid(credp), current_fsgid(credp), prid, 2761 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2762 if (error) 2763 goto std_return; 2764 2765 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 2766 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2767 resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen); 2768 error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, 2769 XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); 2770 if (error == ENOSPC) { 2771 resblks = 0; 2772 error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0, 2773 XFS_TRANS_PERM_LOG_RES, 2774 XFS_MKDIR_LOG_COUNT); 2775 } 2776 if (error) { 2777 cancel_flags = 0; 2778 goto error_return; 2779 } 2780 2781 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 2782 unlock_dp_on_error = B_TRUE; 2783 2784 /* 2785 * Check for directory link count overflow. 2786 */ 2787 if (dp->i_d.di_nlink >= XFS_MAXLINK) { 2788 error = XFS_ERROR(EMLINK); 2789 goto error_return; 2790 } 2791 2792 /* 2793 * Reserve disk quota and the inode. 2794 */ 2795 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 2796 if (error) 2797 goto error_return; 2798 2799 if (resblks == 0 && 2800 (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) 2801 goto error_return; 2802 /* 2803 * create the directory inode. 2804 */ 2805 error = xfs_dir_ialloc(&tp, dp, mode, 2, 2806 0, credp, prid, resblks > 0, 2807 &cdp, NULL); 2808 if (error) { 2809 if (error == ENOSPC) 2810 goto error_return; 2811 goto abort_return; 2812 } 2813 ITRACE(cdp); 2814 2815 /* 2816 * Now we add the directory inode to the transaction. 2817 * We waited until now since xfs_dir_ialloc might start 2818 * a new transaction. Had we joined the transaction 2819 * earlier, the locks might have gotten released. An error 2820 * from here on will result in the transaction cancel 2821 * unlocking dp so don't do it explicitly in the error path. 2822 */ 2823 VN_HOLD(dir_vp); 2824 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2825 unlock_dp_on_error = B_FALSE; 2826 2827 XFS_BMAP_INIT(&free_list, &first_block); 2828 2829 error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, 2830 &first_block, &free_list, resblks ? 2831 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2832 if (error) { 2833 ASSERT(error != ENOSPC); 2834 goto error1; 2835 } 2836 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2837 2838 /* 2839 * Bump the in memory version number of the parent directory 2840 * so that other processes accessing it will recognize that 2841 * the directory has changed. 2842 */ 2843 dp->i_gen++; 2844 2845 error = xfs_dir_init(tp, cdp, dp); 2846 if (error) 2847 goto error2; 2848 2849 cdp->i_gen = 1; 2850 error = xfs_bumplink(tp, dp); 2851 if (error) 2852 goto error2; 2853 2854 cvp = XFS_ITOV(cdp); 2855 2856 created = B_TRUE; 2857 2858 *vpp = cvp; 2859 IHOLD(cdp); 2860 2861 /* 2862 * Attach the dquots to the new inode and modify the icount incore. 2863 */ 2864 XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp); 2865 2866 /* 2867 * If this is a synchronous mount, make sure that the 2868 * mkdir transaction goes to disk before returning to 2869 * the user. 2870 */ 2871 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2872 xfs_trans_set_sync(tp); 2873 } 2874 2875 error = xfs_bmap_finish(&tp, &free_list, &committed); 2876 if (error) { 2877 IRELE(cdp); 2878 goto error2; 2879 } 2880 2881 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2882 XFS_QM_DQRELE(mp, udqp); 2883 XFS_QM_DQRELE(mp, gdqp); 2884 if (error) { 2885 IRELE(cdp); 2886 } 2887 2888 /* Fall through to std_return with error = 0 or errno from 2889 * xfs_trans_commit. */ 2890 2891std_return: 2892 if ((created || (error != 0 && dm_event_sent != 0)) && 2893 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { 2894 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2895 dir_vp, DM_RIGHT_NULL, 2896 created ? XFS_ITOV(cdp):NULL, 2897 DM_RIGHT_NULL, 2898 dir_name, NULL, 2899 mode, error, 0); 2900 } 2901 return error; 2902 2903 error2: 2904 error1: 2905 xfs_bmap_cancel(&free_list); 2906 abort_return: 2907 cancel_flags |= XFS_TRANS_ABORT; 2908 error_return: 2909 xfs_trans_cancel(tp, cancel_flags); 2910 XFS_QM_DQRELE(mp, udqp); 2911 XFS_QM_DQRELE(mp, gdqp); 2912 2913 if (unlock_dp_on_error) 2914 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2915 2916 goto std_return; 2917} 2918 2919int 2920xfs_rmdir( 2921 xfs_inode_t *dp, 2922 bhv_vname_t *dentry) 2923{ 2924 bhv_vnode_t *dir_vp = XFS_ITOV(dp); 2925 char *name = VNAME(dentry); 2926 int namelen = VNAMELEN(dentry); 2927 xfs_mount_t *mp = dp->i_mount; 2928 xfs_inode_t *cdp; /* child directory */ 2929 xfs_trans_t *tp; 2930 int error; 2931 xfs_bmap_free_t free_list; 2932 xfs_fsblock_t first_block; 2933 int cancel_flags; 2934 int committed; 2935 int dm_di_mode = S_IFDIR; 2936 int last_cdp_link; 2937 uint resblks; 2938 2939 vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); 2940 2941 if (XFS_FORCED_SHUTDOWN(mp)) 2942 return XFS_ERROR(EIO); 2943 2944 if (!xfs_get_dir_entry(dentry, &cdp)) { 2945 dm_di_mode = cdp->i_d.di_mode; 2946 IRELE(cdp); 2947 } 2948 2949 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { 2950 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, 2951 dir_vp, DM_RIGHT_NULL, 2952 NULL, DM_RIGHT_NULL, 2953 name, NULL, dm_di_mode, 0, 0); 2954 if (error) 2955 return XFS_ERROR(error); 2956 } 2957 2958 /* Return through std_return after this point. */ 2959 2960 cdp = NULL; 2961 2962 /* 2963 * We need to get a reference to cdp before we get our log 2964 * reservation. The reason for this is that we cannot call 2965 * xfs_iget for an inode for which we do not have a reference 2966 * once we've acquired a log reservation. This is because the 2967 * inode we are trying to get might be in xfs_inactive going 2968 * for a log reservation. Since we'll have to wait for the 2969 * inactive code to complete before returning from xfs_iget, 2970 * we need to make sure that we don't have log space reserved 2971 * when we call xfs_iget. Instead we get an unlocked reference 2972 * to the inode before getting our log reservation. 2973 */ 2974 error = xfs_get_dir_entry(dentry, &cdp); 2975 if (error) { 2976 REMOVE_DEBUG_TRACE(__LINE__); 2977 goto std_return; 2978 } 2979 mp = dp->i_mount; 2980 dm_di_mode = cdp->i_d.di_mode; 2981 2982 /* 2983 * Get the dquots for the inodes. 2984 */ 2985 error = XFS_QM_DQATTACH(mp, dp, 0); 2986 if (!error && dp != cdp) 2987 error = XFS_QM_DQATTACH(mp, cdp, 0); 2988 if (error) { 2989 IRELE(cdp); 2990 REMOVE_DEBUG_TRACE(__LINE__); 2991 goto std_return; 2992 } 2993 2994 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 2995 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2996 /* 2997 * We try to get the real space reservation first, 2998 * allowing for directory btree deletion(s) implying 2999 * possible bmap insert(s). If we can't get the space 3000 * reservation then we use 0 instead, and avoid the bmap 3001 * btree insert(s) in the directory code by, if the bmap 3002 * insert tries to happen, instead trimming the LAST 3003 * block from the directory. 3004 */ 3005 resblks = XFS_REMOVE_SPACE_RES(mp); 3006 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 3007 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 3008 if (error == ENOSPC) { 3009 resblks = 0; 3010 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 3011 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 3012 } 3013 if (error) { 3014 ASSERT(error != ENOSPC); 3015 cancel_flags = 0; 3016 IRELE(cdp); 3017 goto error_return; 3018 } 3019 XFS_BMAP_INIT(&free_list, &first_block); 3020 3021 /* 3022 * Now lock the child directory inode and the parent directory 3023 * inode in the proper order. This will take care of validating 3024 * that the directory entry for the child directory inode has 3025 * not changed while we were obtaining a log reservation. 3026 */ 3027 error = xfs_lock_dir_and_entry(dp, cdp); 3028 if (error) { 3029 xfs_trans_cancel(tp, cancel_flags); 3030 IRELE(cdp); 3031 goto std_return; 3032 } 3033 3034 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3035 if (dp != cdp) { 3036 /* 3037 * Only increment the parent directory vnode count if 3038 * we didn't bump it in looking up cdp. The only time 3039 * we don't bump it is when we're looking up ".". 3040 */ 3041 VN_HOLD(dir_vp); 3042 } 3043 3044 ITRACE(cdp); 3045 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); 3046 3047 ASSERT(cdp->i_d.di_nlink >= 2); 3048 if (cdp->i_d.di_nlink != 2) { 3049 error = XFS_ERROR(ENOTEMPTY); 3050 goto error_return; 3051 } 3052 if (!xfs_dir_isempty(cdp)) { 3053 error = XFS_ERROR(ENOTEMPTY); 3054 goto error_return; 3055 } 3056 3057 error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, 3058 &first_block, &free_list, resblks); 3059 if (error) 3060 goto error1; 3061 3062 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3063 3064 /* 3065 * Bump the in memory generation count on the parent 3066 * directory so that other can know that it has changed. 3067 */ 3068 dp->i_gen++; 3069 3070 /* 3071 * Drop the link from cdp's "..". 3072 */ 3073 error = xfs_droplink(tp, dp); 3074 if (error) { 3075 goto error1; 3076 } 3077 3078 /* 3079 * Drop the link from dp to cdp. 3080 */ 3081 error = xfs_droplink(tp, cdp); 3082 if (error) { 3083 goto error1; 3084 } 3085 3086 /* 3087 * Drop the "." link from cdp to self. 3088 */ 3089 error = xfs_droplink(tp, cdp); 3090 if (error) { 3091 goto error1; 3092 } 3093 3094 /* Determine these before committing transaction */ 3095 last_cdp_link = (cdp)->i_d.di_nlink==0; 3096 3097 /* 3098 * Take an extra ref on the child vnode so that it 3099 * does not go to xfs_inactive() from within the commit. 3100 */ 3101 IHOLD(cdp); 3102 3103 /* 3104 * If this is a synchronous mount, make sure that the 3105 * rmdir transaction goes to disk before returning to 3106 * the user. 3107 */ 3108 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3109 xfs_trans_set_sync(tp); 3110 } 3111 3112 error = xfs_bmap_finish (&tp, &free_list, &committed); 3113 if (error) { 3114 xfs_bmap_cancel(&free_list); 3115 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 3116 XFS_TRANS_ABORT)); 3117 IRELE(cdp); 3118 goto std_return; 3119 } 3120 3121 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 3122 if (error) { 3123 IRELE(cdp); 3124 goto std_return; 3125 } 3126 3127 3128 IRELE(cdp); 3129 3130 /* Fall through to std_return with error = 0 or the errno 3131 * from xfs_trans_commit. */ 3132 std_return: 3133 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 3134 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 3135 dir_vp, DM_RIGHT_NULL, 3136 NULL, DM_RIGHT_NULL, 3137 name, NULL, dm_di_mode, 3138 error, 0); 3139 } 3140 return error; 3141 3142 error1: 3143 xfs_bmap_cancel(&free_list); 3144 cancel_flags |= XFS_TRANS_ABORT; 3145 /* FALLTHROUGH */ 3146 3147 error_return: 3148 xfs_trans_cancel(tp, cancel_flags); 3149 goto std_return; 3150} 3151 3152int 3153xfs_symlink( 3154 xfs_inode_t *dp, 3155 bhv_vname_t *dentry, 3156 char *target_path, 3157 mode_t mode, 3158 bhv_vnode_t **vpp, 3159 cred_t *credp) 3160{ 3161 bhv_vnode_t *dir_vp = XFS_ITOV(dp); 3162 xfs_mount_t *mp = dp->i_mount; 3163 xfs_trans_t *tp; 3164 xfs_inode_t *ip; 3165 int error; 3166 int pathlen; 3167 xfs_bmap_free_t free_list; 3168 xfs_fsblock_t first_block; 3169 boolean_t unlock_dp_on_error = B_FALSE; 3170 uint cancel_flags; 3171 int committed; 3172 xfs_fileoff_t first_fsb; 3173 xfs_filblks_t fs_blocks; 3174 int nmaps; 3175 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 3176 xfs_daddr_t d; 3177 char *cur_chunk; 3178 int byte_cnt; 3179 int n; 3180 xfs_buf_t *bp; 3181 xfs_prid_t prid; 3182 struct xfs_dquot *udqp, *gdqp; 3183 uint resblks; 3184 char *link_name = VNAME(dentry); 3185 int link_namelen; 3186 3187 *vpp = NULL; 3188 error = 0; 3189 ip = NULL; 3190 tp = NULL; 3191 3192 vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); 3193 3194 3195 if (XFS_FORCED_SHUTDOWN(mp)) 3196 return XFS_ERROR(EIO); 3197 3198 link_namelen = VNAMELEN(dentry); 3199 3200 /* 3201 * Check component lengths of the target path name. 3202 */ 3203 pathlen = strlen(target_path); 3204 if (pathlen >= MAXPATHLEN) /* total string too long */ 3205 return XFS_ERROR(ENAMETOOLONG); 3206 if (pathlen >= MAXNAMELEN) { /* is any component too long? */ 3207 int len, total; 3208 char *path; 3209 3210 for (total = 0, path = target_path; total < pathlen;) { 3211 /* 3212 * Skip any slashes. 3213 */ 3214 while(*path == '/') { 3215 total++; 3216 path++; 3217 } 3218 3219 /* 3220 * Count up to the next slash or end of path. 3221 * Error out if the component is bigger than MAXNAMELEN. 3222 */ 3223 for(len = 0; *path != '/' && total < pathlen;total++, path++) { 3224 if (++len >= MAXNAMELEN) { 3225 error = ENAMETOOLONG; 3226 return error; 3227 } 3228 } 3229 } 3230 } 3231 3232 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { 3233 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp, 3234 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 3235 link_name, target_path, 0, 0, 0); 3236 if (error) 3237 return error; 3238 } 3239 3240 /* Return through std_return after this point. */ 3241 3242 udqp = gdqp = NULL; 3243 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 3244 prid = dp->i_d.di_projid; 3245 else 3246 prid = (xfs_prid_t)dfltprid; 3247 3248 /* 3249 * Make sure that we have allocated dquot(s) on disk. 3250 */ 3251 error = XFS_QM_DQVOPALLOC(mp, dp, 3252 current_fsuid(credp), current_fsgid(credp), prid, 3253 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 3254 if (error) 3255 goto std_return; 3256 3257 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 3258 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 3259 /* 3260 * The symlink will fit into the inode data fork? 3261 * There can't be any attributes so we get the whole variable part. 3262 */ 3263 if (pathlen <= XFS_LITINO(mp)) 3264 fs_blocks = 0; 3265 else 3266 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 3267 resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks); 3268 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 3269 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3270 if (error == ENOSPC && fs_blocks == 0) { 3271 resblks = 0; 3272 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0, 3273 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3274 } 3275 if (error) { 3276 cancel_flags = 0; 3277 goto error_return; 3278 } 3279 3280 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 3281 unlock_dp_on_error = B_TRUE; 3282 3283 /* 3284 * Check whether the directory allows new symlinks or not. 3285 */ 3286 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 3287 error = XFS_ERROR(EPERM); 3288 goto error_return; 3289 } 3290 3291 /* 3292 * Reserve disk quota : blocks and inode. 3293 */ 3294 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 3295 if (error) 3296 goto error_return; 3297 3298 /* 3299 * Check for ability to enter directory entry, if no space reserved. 3300 */ 3301 if (resblks == 0 && 3302 (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) 3303 goto error_return; 3304 /* 3305 * Initialize the bmap freelist prior to calling either 3306 * bmapi or the directory create code. 3307 */ 3308 XFS_BMAP_INIT(&free_list, &first_block); 3309 3310 /* 3311 * Allocate an inode for the symlink. 3312 */ 3313 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 3314 1, 0, credp, prid, resblks > 0, &ip, NULL); 3315 if (error) { 3316 if (error == ENOSPC) 3317 goto error_return; 3318 goto error1; 3319 } 3320 ITRACE(ip); 3321 3322 /* 3323 * An error after we've joined dp to the transaction will result in the 3324 * transaction cancel unlocking dp so don't do it explicitly in the 3325 * error path. 3326 */ 3327 VN_HOLD(dir_vp); 3328 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3329 unlock_dp_on_error = B_FALSE; 3330 3331 /* 3332 * Also attach the dquot(s) to it, if applicable. 3333 */ 3334 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 3335 3336 if (resblks) 3337 resblks -= XFS_IALLOC_SPACE_RES(mp); 3338 /* 3339 * If the symlink will fit into the inode, write it inline. 3340 */ 3341 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 3342 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 3343 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); 3344 ip->i_d.di_size = pathlen; 3345 3346 /* 3347 * The inode was initially created in extent format. 3348 */ 3349 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); 3350 ip->i_df.if_flags |= XFS_IFINLINE; 3351 3352 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 3353 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 3354 3355 } else { 3356 first_fsb = 0; 3357 nmaps = SYMLINK_MAPS; 3358 3359 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 3360 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 3361 &first_block, resblks, mval, &nmaps, 3362 &free_list, NULL); 3363 if (error) { 3364 goto error1; 3365 } 3366 3367 if (resblks) 3368 resblks -= fs_blocks; 3369 ip->i_d.di_size = pathlen; 3370 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3371 3372 cur_chunk = target_path; 3373 for (n = 0; n < nmaps; n++) { 3374 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 3375 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 3376 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 3377 BTOBB(byte_cnt), 0); 3378 ASSERT(bp && !XFS_BUF_GETERROR(bp)); 3379 if (pathlen < byte_cnt) { 3380 byte_cnt = pathlen; 3381 } 3382 pathlen -= byte_cnt; 3383 3384 memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt); 3385 cur_chunk += byte_cnt; 3386 3387 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); 3388 } 3389 } 3390 3391 /* 3392 * Create the directory entry for the symlink. 3393 */ 3394 error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, 3395 &first_block, &free_list, resblks); 3396 if (error) 3397 goto error1; 3398 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3399 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 3400 3401 /* 3402 * Bump the in memory version number of the parent directory 3403 * so that other processes accessing it will recognize that 3404 * the directory has changed. 3405 */ 3406 dp->i_gen++; 3407 3408 /* 3409 * If this is a synchronous mount, make sure that the 3410 * symlink transaction goes to disk before returning to 3411 * the user. 3412 */ 3413 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3414 xfs_trans_set_sync(tp); 3415 } 3416 3417 /* 3418 * xfs_trans_commit normally decrements the vnode ref count 3419 * when it unlocks the inode. Since we want to return the 3420 * vnode to the caller, we bump the vnode ref count now. 3421 */ 3422 IHOLD(ip); 3423 3424 error = xfs_bmap_finish(&tp, &free_list, &committed); 3425 if (error) { 3426 goto error2; 3427 } 3428 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 3429 XFS_QM_DQRELE(mp, udqp); 3430 XFS_QM_DQRELE(mp, gdqp); 3431 3432 /* Fall through to std_return with error = 0 or errno from 3433 * xfs_trans_commit */ 3434std_return: 3435 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) { 3436 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, 3437 dir_vp, DM_RIGHT_NULL, 3438 error ? NULL : XFS_ITOV(ip), 3439 DM_RIGHT_NULL, link_name, target_path, 3440 0, error, 0); 3441 } 3442 3443 if (!error) { 3444 bhv_vnode_t *vp; 3445 3446 ASSERT(ip); 3447 vp = XFS_ITOV(ip); 3448 *vpp = vp; 3449 } 3450 return error; 3451 3452 error2: 3453 IRELE(ip); 3454 error1: 3455 xfs_bmap_cancel(&free_list); 3456 cancel_flags |= XFS_TRANS_ABORT; 3457 error_return: 3458 xfs_trans_cancel(tp, cancel_flags); 3459 XFS_QM_DQRELE(mp, udqp); 3460 XFS_QM_DQRELE(mp, gdqp); 3461 3462 if (unlock_dp_on_error) 3463 xfs_iunlock(dp, XFS_ILOCK_EXCL); 3464 3465 goto std_return; 3466} 3467 3468 3469int 3470xfs_fid2( 3471 xfs_inode_t *ip, 3472 xfs_fid_t *xfid) 3473{ 3474 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 3475 3476 xfid->fid_len = sizeof(xfs_fid_t) - sizeof(xfid->fid_len); 3477 xfid->fid_pad = 0; 3478 /* 3479 * use memcpy because the inode is a long long and there's no 3480 * assurance that xfid->fid_ino is properly aligned. 3481 */ 3482 memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino)); 3483 xfid->fid_gen = ip->i_d.di_gen; 3484 3485 return 0; 3486} 3487 3488 3489int 3490xfs_rwlock( 3491 xfs_inode_t *ip, 3492 bhv_vrwlock_t locktype) 3493{ 3494 if (S_ISDIR(ip->i_d.di_mode)) 3495 return 1; 3496 if (locktype == VRWLOCK_WRITE) { 3497 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3498 } else if (locktype == VRWLOCK_TRY_READ) { 3499 return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED); 3500 } else if (locktype == VRWLOCK_TRY_WRITE) { 3501 return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL); 3502 } else { 3503 ASSERT((locktype == VRWLOCK_READ) || 3504 (locktype == VRWLOCK_WRITE_DIRECT)); 3505 xfs_ilock(ip, XFS_IOLOCK_SHARED); 3506 } 3507 3508 return 1; 3509} 3510 3511 3512void 3513xfs_rwunlock( 3514 xfs_inode_t *ip, 3515 bhv_vrwlock_t locktype) 3516{ 3517 if (S_ISDIR(ip->i_d.di_mode)) 3518 return; 3519 if (locktype == VRWLOCK_WRITE) { 3520 /* 3521 * In the write case, we may have added a new entry to 3522 * the reference cache. This might store a pointer to 3523 * an inode to be released in this inode. If it is there, 3524 * clear the pointer and release the inode after unlocking 3525 * this one. 3526 */ 3527 xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL); 3528 } else { 3529 ASSERT((locktype == VRWLOCK_READ) || 3530 (locktype == VRWLOCK_WRITE_DIRECT)); 3531 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 3532 } 3533 return; 3534} 3535 3536 3537int 3538xfs_inode_flush( 3539 xfs_inode_t *ip, 3540 int flags) 3541{ 3542 xfs_mount_t *mp = ip->i_mount; 3543 xfs_inode_log_item_t *iip = ip->i_itemp; 3544 int error = 0; 3545 3546 if (XFS_FORCED_SHUTDOWN(mp)) 3547 return XFS_ERROR(EIO); 3548 3549 /* 3550 * Bypass inodes which have already been cleaned by 3551 * the inode flush clustering code inside xfs_iflush 3552 */ 3553 if ((ip->i_update_core == 0) && 3554 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) 3555 return 0; 3556 3557 if (flags & FLUSH_LOG) { 3558 if (iip && iip->ili_last_lsn) { 3559 xlog_t *log = mp->m_log; 3560 xfs_lsn_t sync_lsn; 3561 int s, log_flags = XFS_LOG_FORCE; 3562 3563 s = GRANT_LOCK(log); 3564 sync_lsn = log->l_last_sync_lsn; 3565 GRANT_UNLOCK(log, s); 3566 3567 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { 3568 if (flags & FLUSH_SYNC) 3569 log_flags |= XFS_LOG_SYNC; 3570 error = xfs_log_force(mp, iip->ili_last_lsn, log_flags); 3571 if (error) 3572 return error; 3573 } 3574 3575 if (ip->i_update_core == 0) 3576 return 0; 3577 } 3578 } 3579 3580 /* 3581 * We make this non-blocking if the inode is contended, 3582 * return EAGAIN to indicate to the caller that they 3583 * did not succeed. This prevents the flush path from 3584 * blocking on inodes inside another operation right 3585 * now, they get caught later by xfs_sync. 3586 */ 3587 if (flags & FLUSH_INODE) { 3588 int flush_flags; 3589 3590 if (flags & FLUSH_SYNC) { 3591 xfs_ilock(ip, XFS_ILOCK_SHARED); 3592 xfs_iflock(ip); 3593 } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 3594 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { 3595 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3596 return EAGAIN; 3597 } 3598 } else { 3599 return EAGAIN; 3600 } 3601 3602 if (flags & FLUSH_SYNC) 3603 flush_flags = XFS_IFLUSH_SYNC; 3604 else 3605 flush_flags = XFS_IFLUSH_ASYNC; 3606 3607 error = xfs_iflush(ip, flush_flags); 3608 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3609 } 3610 3611 return error; 3612} 3613 3614 3615int 3616xfs_set_dmattrs( 3617 xfs_inode_t *ip, 3618 u_int evmask, 3619 u_int16_t state) 3620{ 3621 xfs_mount_t *mp = ip->i_mount; 3622 xfs_trans_t *tp; 3623 int error; 3624 3625 if (!capable(CAP_SYS_ADMIN)) 3626 return XFS_ERROR(EPERM); 3627 3628 if (XFS_FORCED_SHUTDOWN(mp)) 3629 return XFS_ERROR(EIO); 3630 3631 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 3632 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); 3633 if (error) { 3634 xfs_trans_cancel(tp, 0); 3635 return error; 3636 } 3637 xfs_ilock(ip, XFS_ILOCK_EXCL); 3638 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3639 3640 ip->i_iocore.io_dmevmask = ip->i_d.di_dmevmask = evmask; 3641 ip->i_iocore.io_dmstate = ip->i_d.di_dmstate = state; 3642 3643 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3644 IHOLD(ip); 3645 error = xfs_trans_commit(tp, 0); 3646 3647 return error; 3648} 3649 3650int 3651xfs_reclaim( 3652 xfs_inode_t *ip) 3653{ 3654 bhv_vnode_t *vp = XFS_ITOV(ip); 3655 3656 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 3657 3658 ASSERT(!VN_MAPPED(vp)); 3659 3660 /* bad inode, get out here ASAP */ 3661 if (VN_BAD(vp)) { 3662 xfs_ireclaim(ip); 3663 return 0; 3664 } 3665 3666 vn_iowait(ip); 3667 3668 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 3669 3670 /* 3671 * Make sure the atime in the XFS inode is correct before freeing the 3672 * Linux inode. 3673 */ 3674 xfs_synchronize_atime(ip); 3675 3676 /* 3677 * If we have nothing to flush with this inode then complete the 3678 * teardown now, otherwise break the link between the xfs inode and the 3679 * linux inode and clean up the xfs inode later. This avoids flushing 3680 * the inode to disk during the delete operation itself. 3681 * 3682 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag 3683 * first to ensure that xfs_iunpin() will never see an xfs inode 3684 * that has a linux inode being reclaimed. Synchronisation is provided 3685 * by the i_flags_lock. 3686 */ 3687 if (!ip->i_update_core && (ip->i_itemp == NULL)) { 3688 xfs_ilock(ip, XFS_ILOCK_EXCL); 3689 xfs_iflock(ip); 3690 return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); 3691 } else { 3692 xfs_mount_t *mp = ip->i_mount; 3693 3694 /* Protect sync and unpin from us */ 3695 XFS_MOUNT_ILOCK(mp); 3696 spin_lock(&ip->i_flags_lock); 3697 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 3698 vn_to_inode(vp)->i_private = NULL; 3699 ip->i_vnode = NULL; 3700 spin_unlock(&ip->i_flags_lock); 3701 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); 3702 XFS_MOUNT_IUNLOCK(mp); 3703 } 3704 return 0; 3705} 3706 3707int 3708xfs_finish_reclaim( 3709 xfs_inode_t *ip, 3710 int locked, 3711 int sync_mode) 3712{ 3713 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); 3714 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 3715 int error; 3716 3717 if (vp && VN_BAD(vp)) 3718 goto reclaim; 3719 3720 /* The hash lock here protects a thread in xfs_iget_core from 3721 * racing with us on linking the inode back with a vnode. 3722 * Once we have the XFS_IRECLAIM flag set it will not touch 3723 * us. 3724 */ 3725 write_lock(&pag->pag_ici_lock); 3726 spin_lock(&ip->i_flags_lock); 3727 if (__xfs_iflags_test(ip, XFS_IRECLAIM) || 3728 (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { 3729 spin_unlock(&ip->i_flags_lock); 3730 write_unlock(&pag->pag_ici_lock); 3731 if (locked) { 3732 xfs_ifunlock(ip); 3733 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3734 } 3735 return 1; 3736 } 3737 __xfs_iflags_set(ip, XFS_IRECLAIM); 3738 spin_unlock(&ip->i_flags_lock); 3739 write_unlock(&pag->pag_ici_lock); 3740 xfs_put_perag(ip->i_mount, pag); 3741 3742 /* 3743 * If the inode is still dirty, then flush it out. If the inode 3744 * is not in the AIL, then it will be OK to flush it delwri as 3745 * long as xfs_iflush() does not keep any references to the inode. 3746 * We leave that decision up to xfs_iflush() since it has the 3747 * knowledge of whether it's OK to simply do a delwri flush of 3748 * the inode or whether we need to wait until the inode is 3749 * pulled from the AIL. 3750 * We get the flush lock regardless, though, just to make sure 3751 * we don't free it while it is being flushed. 3752 */ 3753 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 3754 if (!locked) { 3755 xfs_ilock(ip, XFS_ILOCK_EXCL); 3756 xfs_iflock(ip); 3757 } 3758 3759 if (ip->i_update_core || 3760 ((ip->i_itemp != NULL) && 3761 (ip->i_itemp->ili_format.ilf_fields != 0))) { 3762 error = xfs_iflush(ip, sync_mode); 3763 /* 3764 * If we hit an error, typically because of filesystem 3765 * shutdown, we don't need to let vn_reclaim to know 3766 * because we're gonna reclaim the inode anyway. 3767 */ 3768 if (error) { 3769 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3770 goto reclaim; 3771 } 3772 xfs_iflock(ip); /* synchronize with xfs_iflush_done */ 3773 } 3774 3775 ASSERT(ip->i_update_core == 0); 3776 ASSERT(ip->i_itemp == NULL || 3777 ip->i_itemp->ili_format.ilf_fields == 0); 3778 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3779 } else if (locked) { 3780 /* 3781 * We are not interested in doing an iflush if we're 3782 * in the process of shutting down the filesystem forcibly. 3783 * So, just reclaim the inode. 3784 */ 3785 xfs_ifunlock(ip); 3786 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3787 } 3788 3789 reclaim: 3790 xfs_ireclaim(ip); 3791 return 0; 3792} 3793 3794int 3795xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) 3796{ 3797 int purged; 3798 xfs_inode_t *ip, *n; 3799 int done = 0; 3800 3801 while (!done) { 3802 purged = 0; 3803 XFS_MOUNT_ILOCK(mp); 3804 list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { 3805 if (noblock) { 3806 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) 3807 continue; 3808 if (xfs_ipincount(ip) || 3809 !xfs_iflock_nowait(ip)) { 3810 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3811 continue; 3812 } 3813 } 3814 XFS_MOUNT_IUNLOCK(mp); 3815 if (xfs_finish_reclaim(ip, noblock, 3816 XFS_IFLUSH_DELWRI_ELSE_ASYNC)) 3817 delay(1); 3818 purged = 1; 3819 break; 3820 } 3821 3822 done = !purged; 3823 } 3824 3825 XFS_MOUNT_IUNLOCK(mp); 3826 return 0; 3827} 3828 3829/* 3830 * xfs_alloc_file_space() 3831 * This routine allocates disk space for the given file. 3832 * 3833 * If alloc_type == 0, this request is for an ALLOCSP type 3834 * request which will change the file size. In this case, no 3835 * DMAPI event will be generated by the call. A TRUNCATE event 3836 * will be generated later by xfs_setattr. 3837 * 3838 * If alloc_type != 0, this request is for a RESVSP type 3839 * request, and a DMAPI DM_EVENT_WRITE will be generated if the 3840 * lower block boundary byte address is less than the file's 3841 * length. 3842 * 3843 * RETURNS: 3844 * 0 on success 3845 * errno on error 3846 * 3847 */ 3848STATIC int 3849xfs_alloc_file_space( 3850 xfs_inode_t *ip, 3851 xfs_off_t offset, 3852 xfs_off_t len, 3853 int alloc_type, 3854 int attr_flags) 3855{ 3856 xfs_mount_t *mp = ip->i_mount; 3857 xfs_off_t count; 3858 xfs_filblks_t allocated_fsb; 3859 xfs_filblks_t allocatesize_fsb; 3860 xfs_extlen_t extsz, temp; 3861 xfs_fileoff_t startoffset_fsb; 3862 xfs_fsblock_t firstfsb; 3863 int nimaps; 3864 int bmapi_flag; 3865 int quota_flag; 3866 int rt; 3867 xfs_trans_t *tp; 3868 xfs_bmbt_irec_t imaps[1], *imapp; 3869 xfs_bmap_free_t free_list; 3870 uint qblocks, resblks, resrtextents; 3871 int committed; 3872 int error; 3873 3874 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 3875 3876 if (XFS_FORCED_SHUTDOWN(mp)) 3877 return XFS_ERROR(EIO); 3878 3879 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 3880 return error; 3881 3882 if (len <= 0) 3883 return XFS_ERROR(EINVAL); 3884 3885 rt = XFS_IS_REALTIME_INODE(ip); 3886 extsz = xfs_get_extsz_hint(ip); 3887 3888 count = len; 3889 imapp = &imaps[0]; 3890 nimaps = 1; 3891 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 3892 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 3893 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 3894 3895 /* Generate a DMAPI event if needed. */ 3896 if (alloc_type != 0 && offset < ip->i_size && 3897 (attr_flags&ATTR_DMI) == 0 && 3898 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3899 xfs_off_t end_dmi_offset; 3900 3901 end_dmi_offset = offset+len; 3902 if (end_dmi_offset > ip->i_size) 3903 end_dmi_offset = ip->i_size; 3904 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), 3905 offset, end_dmi_offset - offset, 3906 0, NULL); 3907 if (error) 3908 return error; 3909 } 3910 3911 /* 3912 * Allocate file space until done or until there is an error 3913 */ 3914retry: 3915 while (allocatesize_fsb && !error) { 3916 xfs_fileoff_t s, e; 3917 3918 /* 3919 * Determine space reservations for data/realtime. 3920 */ 3921 if (unlikely(extsz)) { 3922 s = startoffset_fsb; 3923 do_div(s, extsz); 3924 s *= extsz; 3925 e = startoffset_fsb + allocatesize_fsb; 3926 if ((temp = do_mod(startoffset_fsb, extsz))) 3927 e += temp; 3928 if ((temp = do_mod(e, extsz))) 3929 e += extsz - temp; 3930 } else { 3931 s = 0; 3932 e = allocatesize_fsb; 3933 } 3934 3935 if (unlikely(rt)) { 3936 resrtextents = qblocks = (uint)(e - s); 3937 resrtextents /= mp->m_sb.sb_rextsize; 3938 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 3939 quota_flag = XFS_QMOPT_RES_RTBLKS; 3940 } else { 3941 resrtextents = 0; 3942 resblks = qblocks = \ 3943 XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); 3944 quota_flag = XFS_QMOPT_RES_REGBLKS; 3945 } 3946 3947 /* 3948 * Allocate and setup the transaction. 3949 */ 3950 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 3951 error = xfs_trans_reserve(tp, resblks, 3952 XFS_WRITE_LOG_RES(mp), resrtextents, 3953 XFS_TRANS_PERM_LOG_RES, 3954 XFS_WRITE_LOG_COUNT); 3955 /* 3956 * Check for running out of space 3957 */ 3958 if (error) { 3959 /* 3960 * Free the transaction structure. 3961 */ 3962 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 3963 xfs_trans_cancel(tp, 0); 3964 break; 3965 } 3966 xfs_ilock(ip, XFS_ILOCK_EXCL); 3967 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, 3968 qblocks, 0, quota_flag); 3969 if (error) 3970 goto error1; 3971 3972 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3973 xfs_trans_ihold(tp, ip); 3974 3975 /* 3976 * Issue the xfs_bmapi() call to allocate the blocks 3977 */ 3978 XFS_BMAP_INIT(&free_list, &firstfsb); 3979 error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, 3980 allocatesize_fsb, bmapi_flag, 3981 &firstfsb, 0, imapp, &nimaps, 3982 &free_list, NULL); 3983 if (error) { 3984 goto error0; 3985 } 3986 3987 /* 3988 * Complete the transaction 3989 */ 3990 error = xfs_bmap_finish(&tp, &free_list, &committed); 3991 if (error) { 3992 goto error0; 3993 } 3994 3995 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 3996 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3997 if (error) { 3998 break; 3999 } 4000 4001 allocated_fsb = imapp->br_blockcount; 4002 4003 if (nimaps == 0) { 4004 error = XFS_ERROR(ENOSPC); 4005 break; 4006 } 4007 4008 startoffset_fsb += allocated_fsb; 4009 allocatesize_fsb -= allocated_fsb; 4010 } 4011dmapi_enospc_check: 4012 if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && 4013 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { 4014 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 4015 XFS_ITOV(ip), DM_RIGHT_NULL, 4016 XFS_ITOV(ip), DM_RIGHT_NULL, 4017 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ 4018 if (error == 0) 4019 goto retry; /* Maybe DMAPI app. has made space */ 4020 /* else fall through with error from XFS_SEND_DATA */ 4021 } 4022 4023 return error; 4024 4025error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 4026 xfs_bmap_cancel(&free_list); 4027 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); 4028 4029error1: /* Just cancel transaction */ 4030 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4031 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4032 goto dmapi_enospc_check; 4033} 4034 4035/* 4036 * Zero file bytes between startoff and endoff inclusive. 4037 * The iolock is held exclusive and no blocks are buffered. 4038 */ 4039STATIC int 4040xfs_zero_remaining_bytes( 4041 xfs_inode_t *ip, 4042 xfs_off_t startoff, 4043 xfs_off_t endoff) 4044{ 4045 xfs_bmbt_irec_t imap; 4046 xfs_fileoff_t offset_fsb; 4047 xfs_off_t lastoffset; 4048 xfs_off_t offset; 4049 xfs_buf_t *bp; 4050 xfs_mount_t *mp = ip->i_mount; 4051 int nimap; 4052 int error = 0; 4053 4054 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 4055 ip->i_d.di_flags & XFS_DIFLAG_REALTIME ? 4056 mp->m_rtdev_targp : mp->m_ddev_targp); 4057 4058 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 4059 offset_fsb = XFS_B_TO_FSBT(mp, offset); 4060 nimap = 1; 4061 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, 4062 NULL, 0, &imap, &nimap, NULL, NULL); 4063 if (error || nimap < 1) 4064 break; 4065 ASSERT(imap.br_blockcount >= 1); 4066 ASSERT(imap.br_startoff == offset_fsb); 4067 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 4068 if (lastoffset > endoff) 4069 lastoffset = endoff; 4070 if (imap.br_startblock == HOLESTARTBLOCK) 4071 continue; 4072 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4073 if (imap.br_state == XFS_EXT_UNWRITTEN) 4074 continue; 4075 XFS_BUF_UNDONE(bp); 4076 XFS_BUF_UNWRITE(bp); 4077 XFS_BUF_READ(bp); 4078 XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); 4079 xfsbdstrat(mp, bp); 4080 if ((error = xfs_iowait(bp))) { 4081 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 4082 mp, bp, XFS_BUF_ADDR(bp)); 4083 break; 4084 } 4085 memset(XFS_BUF_PTR(bp) + 4086 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 4087 0, lastoffset - offset + 1); 4088 XFS_BUF_UNDONE(bp); 4089 XFS_BUF_UNREAD(bp); 4090 XFS_BUF_WRITE(bp); 4091 xfsbdstrat(mp, bp); 4092 if ((error = xfs_iowait(bp))) { 4093 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 4094 mp, bp, XFS_BUF_ADDR(bp)); 4095 break; 4096 } 4097 } 4098 xfs_buf_free(bp); 4099 return error; 4100} 4101 4102/* 4103 * xfs_free_file_space() 4104 * This routine frees disk space for the given file. 4105 * 4106 * This routine is only called by xfs_change_file_space 4107 * for an UNRESVSP type call. 4108 * 4109 * RETURNS: 4110 * 0 on success 4111 * errno on error 4112 * 4113 */ 4114STATIC int 4115xfs_free_file_space( 4116 xfs_inode_t *ip, 4117 xfs_off_t offset, 4118 xfs_off_t len, 4119 int attr_flags) 4120{ 4121 bhv_vnode_t *vp; 4122 int committed; 4123 int done; 4124 xfs_off_t end_dmi_offset; 4125 xfs_fileoff_t endoffset_fsb; 4126 int error; 4127 xfs_fsblock_t firstfsb; 4128 xfs_bmap_free_t free_list; 4129 xfs_bmbt_irec_t imap; 4130 xfs_off_t ioffset; 4131 xfs_extlen_t mod=0; 4132 xfs_mount_t *mp; 4133 int nimap; 4134 uint resblks; 4135 uint rounding; 4136 int rt; 4137 xfs_fileoff_t startoffset_fsb; 4138 xfs_trans_t *tp; 4139 int need_iolock = 1; 4140 4141 vp = XFS_ITOV(ip); 4142 mp = ip->i_mount; 4143 4144 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 4145 4146 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4147 return error; 4148 4149 error = 0; 4150 if (len <= 0) /* if nothing being freed */ 4151 return error; 4152 rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME); 4153 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 4154 end_dmi_offset = offset + len; 4155 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 4156 4157 if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 && 4158 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 4159 if (end_dmi_offset > ip->i_size) 4160 end_dmi_offset = ip->i_size; 4161 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, 4162 offset, end_dmi_offset - offset, 4163 AT_DELAY_FLAG(attr_flags), NULL); 4164 if (error) 4165 return error; 4166 } 4167 4168 if (attr_flags & ATTR_NOLOCK) 4169 need_iolock = 0; 4170 if (need_iolock) { 4171 xfs_ilock(ip, XFS_IOLOCK_EXCL); 4172 vn_iowait(ip); /* wait for the completion of any pending DIOs */ 4173 } 4174 4175 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); 4176 ioffset = offset & ~(rounding - 1); 4177 4178 if (VN_CACHED(vp) != 0) { 4179 xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, 4180 ctooff(offtoct(ioffset)), -1); 4181 error = xfs_flushinval_pages(ip, 4182 ctooff(offtoct(ioffset)), 4183 -1, FI_REMAPF_LOCKED); 4184 if (error) 4185 goto out_unlock_iolock; 4186 } 4187 4188 /* 4189 * Need to zero the stuff we're not freeing, on disk. 4190 * If its a realtime file & can't use unwritten extents then we 4191 * actually need to zero the extent edges. Otherwise xfs_bunmapi 4192 * will take care of it for us. 4193 */ 4194 if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { 4195 nimap = 1; 4196 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, 4197 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 4198 if (error) 4199 goto out_unlock_iolock; 4200 ASSERT(nimap == 0 || nimap == 1); 4201 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 4202 xfs_daddr_t block; 4203 4204 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4205 block = imap.br_startblock; 4206 mod = do_div(block, mp->m_sb.sb_rextsize); 4207 if (mod) 4208 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 4209 } 4210 nimap = 1; 4211 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, 4212 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 4213 if (error) 4214 goto out_unlock_iolock; 4215 ASSERT(nimap == 0 || nimap == 1); 4216 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 4217 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4218 mod++; 4219 if (mod && (mod != mp->m_sb.sb_rextsize)) 4220 endoffset_fsb -= mod; 4221 } 4222 } 4223 if ((done = (endoffset_fsb <= startoffset_fsb))) 4224 /* 4225 * One contiguous piece to clear 4226 */ 4227 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 4228 else { 4229 /* 4230 * Some full blocks, possibly two pieces to clear 4231 */ 4232 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 4233 error = xfs_zero_remaining_bytes(ip, offset, 4234 XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 4235 if (!error && 4236 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 4237 error = xfs_zero_remaining_bytes(ip, 4238 XFS_FSB_TO_B(mp, endoffset_fsb), 4239 offset + len - 1); 4240 } 4241 4242 /* 4243 * free file space until done or until there is an error 4244 */ 4245 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 4246 while (!error && !done) { 4247 4248 /* 4249 * allocate and setup the transaction. Allow this 4250 * transaction to dip into the reserve blocks to ensure 4251 * the freeing of the space succeeds at ENOSPC. 4252 */ 4253 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4254 tp->t_flags |= XFS_TRANS_RESERVE; 4255 error = xfs_trans_reserve(tp, 4256 resblks, 4257 XFS_WRITE_LOG_RES(mp), 4258 0, 4259 XFS_TRANS_PERM_LOG_RES, 4260 XFS_WRITE_LOG_COUNT); 4261 4262 /* 4263 * check for running out of space 4264 */ 4265 if (error) { 4266 /* 4267 * Free the transaction structure. 4268 */ 4269 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 4270 xfs_trans_cancel(tp, 0); 4271 break; 4272 } 4273 xfs_ilock(ip, XFS_ILOCK_EXCL); 4274 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4275 ip->i_udquot, ip->i_gdquot, resblks, 0, 4276 XFS_QMOPT_RES_REGBLKS); 4277 if (error) 4278 goto error1; 4279 4280 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4281 xfs_trans_ihold(tp, ip); 4282 4283 /* 4284 * issue the bunmapi() call to free the blocks 4285 */ 4286 XFS_BMAP_INIT(&free_list, &firstfsb); 4287 error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, 4288 endoffset_fsb - startoffset_fsb, 4289 0, 2, &firstfsb, &free_list, NULL, &done); 4290 if (error) { 4291 goto error0; 4292 } 4293 4294 /* 4295 * complete the transaction 4296 */ 4297 error = xfs_bmap_finish(&tp, &free_list, &committed); 4298 if (error) { 4299 goto error0; 4300 } 4301 4302 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 4303 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4304 } 4305 4306 out_unlock_iolock: 4307 if (need_iolock) 4308 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 4309 return error; 4310 4311 error0: 4312 xfs_bmap_cancel(&free_list); 4313 error1: 4314 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4315 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 4316 XFS_ILOCK_EXCL); 4317 return error; 4318} 4319 4320/* 4321 * xfs_change_file_space() 4322 * This routine allocates or frees disk space for the given file. 4323 * The user specified parameters are checked for alignment and size 4324 * limitations. 4325 * 4326 * RETURNS: 4327 * 0 on success 4328 * errno on error 4329 * 4330 */ 4331int 4332xfs_change_file_space( 4333 xfs_inode_t *ip, 4334 int cmd, 4335 xfs_flock64_t *bf, 4336 xfs_off_t offset, 4337 cred_t *credp, 4338 int attr_flags) 4339{ 4340 xfs_mount_t *mp = ip->i_mount; 4341 int clrprealloc; 4342 int error; 4343 xfs_fsize_t fsize; 4344 int setprealloc; 4345 xfs_off_t startoffset; 4346 xfs_off_t llen; 4347 xfs_trans_t *tp; 4348 bhv_vattr_t va; 4349 4350 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 4351 4352 /* 4353 * must be a regular file and have write permission 4354 */ 4355 if (!S_ISREG(ip->i_d.di_mode)) 4356 return XFS_ERROR(EINVAL); 4357 4358 xfs_ilock(ip, XFS_ILOCK_SHARED); 4359 4360 if ((error = xfs_iaccess(ip, S_IWUSR, credp))) { 4361 xfs_iunlock(ip, XFS_ILOCK_SHARED); 4362 return error; 4363 } 4364 4365 xfs_iunlock(ip, XFS_ILOCK_SHARED); 4366 4367 switch (bf->l_whence) { 4368 case 0: /*SEEK_SET*/ 4369 break; 4370 case 1: /*SEEK_CUR*/ 4371 bf->l_start += offset; 4372 break; 4373 case 2: /*SEEK_END*/ 4374 bf->l_start += ip->i_size; 4375 break; 4376 default: 4377 return XFS_ERROR(EINVAL); 4378 } 4379 4380 llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; 4381 4382 if ( (bf->l_start < 0) 4383 || (bf->l_start > XFS_MAXIOFFSET(mp)) 4384 || (bf->l_start + llen < 0) 4385 || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) 4386 return XFS_ERROR(EINVAL); 4387 4388 bf->l_whence = 0; 4389 4390 startoffset = bf->l_start; 4391 fsize = ip->i_size; 4392 4393 /* 4394 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 4395 * file space. 4396 * These calls do NOT zero the data space allocated to the file, 4397 * nor do they change the file size. 4398 * 4399 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file 4400 * space. 4401 * These calls cause the new file data to be zeroed and the file 4402 * size to be changed. 4403 */ 4404 setprealloc = clrprealloc = 0; 4405 4406 switch (cmd) { 4407 case XFS_IOC_RESVSP: 4408 case XFS_IOC_RESVSP64: 4409 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 4410 1, attr_flags); 4411 if (error) 4412 return error; 4413 setprealloc = 1; 4414 break; 4415 4416 case XFS_IOC_UNRESVSP: 4417 case XFS_IOC_UNRESVSP64: 4418 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 4419 attr_flags))) 4420 return error; 4421 break; 4422 4423 case XFS_IOC_ALLOCSP: 4424 case XFS_IOC_ALLOCSP64: 4425 case XFS_IOC_FREESP: 4426 case XFS_IOC_FREESP64: 4427 if (startoffset > fsize) { 4428 error = xfs_alloc_file_space(ip, fsize, 4429 startoffset - fsize, 0, attr_flags); 4430 if (error) 4431 break; 4432 } 4433 4434 va.va_mask = XFS_AT_SIZE; 4435 va.va_size = startoffset; 4436 4437 error = xfs_setattr(ip, &va, attr_flags, credp); 4438 4439 if (error) 4440 return error; 4441 4442 clrprealloc = 1; 4443 break; 4444 4445 default: 4446 ASSERT(0); 4447 return XFS_ERROR(EINVAL); 4448 } 4449 4450 /* 4451 * update the inode timestamp, mode, and prealloc flag bits 4452 */ 4453 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 4454 4455 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 4456 0, 0, 0))) { 4457 /* ASSERT(0); */ 4458 xfs_trans_cancel(tp, 0); 4459 return error; 4460 } 4461 4462 xfs_ilock(ip, XFS_ILOCK_EXCL); 4463 4464 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4465 xfs_trans_ihold(tp, ip); 4466 4467 if ((attr_flags & ATTR_DMI) == 0) { 4468 ip->i_d.di_mode &= ~S_ISUID; 4469 4470 /* 4471 * Note that we don't have to worry about mandatory 4472 * file locking being disabled here because we only 4473 * clear the S_ISGID bit if the Group execute bit is 4474 * on, but if it was on then mandatory locking wouldn't 4475 * have been enabled. 4476 */ 4477 if (ip->i_d.di_mode & S_IXGRP) 4478 ip->i_d.di_mode &= ~S_ISGID; 4479 4480 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 4481 } 4482 if (setprealloc) 4483 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 4484 else if (clrprealloc) 4485 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 4486 4487 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4488 xfs_trans_set_sync(tp); 4489 4490 error = xfs_trans_commit(tp, 0); 4491 4492 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4493 4494 return error; 4495}