Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.26-rc9 4005 lines 101 kB view raw
1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "xfs.h" 20#include "xfs_fs.h" 21#include "xfs_types.h" 22#include "xfs_bit.h" 23#include "xfs_log.h" 24#include "xfs_inum.h" 25#include "xfs_trans.h" 26#include "xfs_sb.h" 27#include "xfs_ag.h" 28#include "xfs_dir2.h" 29#include "xfs_dmapi.h" 30#include "xfs_mount.h" 31#include "xfs_da_btree.h" 32#include "xfs_bmap_btree.h" 33#include "xfs_alloc_btree.h" 34#include "xfs_ialloc_btree.h" 35#include "xfs_dir2_sf.h" 36#include "xfs_attr_sf.h" 37#include "xfs_dinode.h" 38#include "xfs_inode.h" 39#include "xfs_inode_item.h" 40#include "xfs_itable.h" 41#include "xfs_btree.h" 42#include "xfs_ialloc.h" 43#include "xfs_alloc.h" 44#include "xfs_bmap.h" 45#include "xfs_attr.h" 46#include "xfs_rw.h" 47#include "xfs_error.h" 48#include "xfs_quota.h" 49#include "xfs_utils.h" 50#include "xfs_rtalloc.h" 51#include "xfs_trans_space.h" 52#include "xfs_log_priv.h" 53#include "xfs_filestream.h" 54#include "xfs_vnodeops.h" 55 56int 57xfs_open( 58 xfs_inode_t *ip) 59{ 60 int mode; 61 62 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 63 return XFS_ERROR(EIO); 64 65 /* 66 * If it's a directory with any blocks, read-ahead block 0 67 * as we're almost certain to have the next operation be a read there. 68 */ 69 if (S_ISDIR(ip->i_d.di_mode) && ip->i_d.di_nextents > 0) { 70 mode = xfs_ilock_map_shared(ip); 71 if (ip->i_d.di_nextents > 0) 72 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); 73 xfs_iunlock(ip, mode); 74 } 75 return 0; 76} 77 78/* 79 * xfs_setattr 80 */ 81int 82xfs_setattr( 83 xfs_inode_t *ip, 84 bhv_vattr_t *vap, 85 int flags, 86 cred_t *credp) 87{ 88 xfs_mount_t *mp = ip->i_mount; 89 xfs_trans_t *tp; 90 int mask; 91 int code; 92 uint lock_flags; 93 uint commit_flags=0; 94 uid_t uid=0, iuid=0; 95 gid_t gid=0, igid=0; 96 int timeflags = 0; 97 xfs_prid_t projid=0, iprojid=0; 98 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 99 int file_owner; 100 int need_iolock = 1; 101 102 xfs_itrace_entry(ip); 103 104 if (mp->m_flags & XFS_MOUNT_RDONLY) 105 return XFS_ERROR(EROFS); 106 107 /* 108 * Cannot set certain attributes. 109 */ 110 mask = vap->va_mask; 111 if (mask & XFS_AT_NOSET) { 112 return XFS_ERROR(EINVAL); 113 } 114 115 if (XFS_FORCED_SHUTDOWN(mp)) 116 return XFS_ERROR(EIO); 117 118 /* 119 * Timestamps do not need to be logged and hence do not 120 * need to be done within a transaction. 121 */ 122 if (mask & XFS_AT_UPDTIMES) { 123 ASSERT((mask & ~XFS_AT_UPDTIMES) == 0); 124 timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) | 125 ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) | 126 ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0); 127 xfs_ichgtime(ip, timeflags); 128 return 0; 129 } 130 131 olddquot1 = olddquot2 = NULL; 132 udqp = gdqp = NULL; 133 134 /* 135 * If disk quotas is on, we make sure that the dquots do exist on disk, 136 * before we start any other transactions. Trying to do this later 137 * is messy. We don't care to take a readlock to look at the ids 138 * in inode here, because we can't hold it across the trans_reserve. 139 * If the IDs do change before we take the ilock, we're covered 140 * because the i_*dquot fields will get updated anyway. 141 */ 142 if (XFS_IS_QUOTA_ON(mp) && 143 (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { 144 uint qflags = 0; 145 146 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { 147 uid = vap->va_uid; 148 qflags |= XFS_QMOPT_UQUOTA; 149 } else { 150 uid = ip->i_d.di_uid; 151 } 152 if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { 153 gid = vap->va_gid; 154 qflags |= XFS_QMOPT_GQUOTA; 155 } else { 156 gid = ip->i_d.di_gid; 157 } 158 if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { 159 projid = vap->va_projid; 160 qflags |= XFS_QMOPT_PQUOTA; 161 } else { 162 projid = ip->i_d.di_projid; 163 } 164 /* 165 * We take a reference when we initialize udqp and gdqp, 166 * so it is important that we never blindly double trip on 167 * the same variable. See xfs_create() for an example. 168 */ 169 ASSERT(udqp == NULL); 170 ASSERT(gdqp == NULL); 171 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, 172 &udqp, &gdqp); 173 if (code) 174 return code; 175 } 176 177 /* 178 * For the other attributes, we acquire the inode lock and 179 * first do an error checking pass. 180 */ 181 tp = NULL; 182 lock_flags = XFS_ILOCK_EXCL; 183 if (flags & ATTR_NOLOCK) 184 need_iolock = 0; 185 if (!(mask & XFS_AT_SIZE)) { 186 if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || 187 (mp->m_flags & XFS_MOUNT_WSYNC)) { 188 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 189 commit_flags = 0; 190 if ((code = xfs_trans_reserve(tp, 0, 191 XFS_ICHANGE_LOG_RES(mp), 0, 192 0, 0))) { 193 lock_flags = 0; 194 goto error_return; 195 } 196 } 197 } else { 198 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 199 !(flags & ATTR_DMI)) { 200 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 201 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip, 202 vap->va_size, 0, dmflags, NULL); 203 if (code) { 204 lock_flags = 0; 205 goto error_return; 206 } 207 } 208 if (need_iolock) 209 lock_flags |= XFS_IOLOCK_EXCL; 210 } 211 212 xfs_ilock(ip, lock_flags); 213 214 /* boolean: are we the file owner? */ 215 file_owner = (current_fsuid(credp) == ip->i_d.di_uid); 216 217 /* 218 * Change various properties of a file. 219 * Only the owner or users with CAP_FOWNER 220 * capability may do these things. 221 */ 222 if (mask & 223 (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| 224 XFS_AT_GID|XFS_AT_PROJID)) { 225 /* 226 * CAP_FOWNER overrides the following restrictions: 227 * 228 * The user ID of the calling process must be equal 229 * to the file owner ID, except in cases where the 230 * CAP_FSETID capability is applicable. 231 */ 232 if (!file_owner && !capable(CAP_FOWNER)) { 233 code = XFS_ERROR(EPERM); 234 goto error_return; 235 } 236 237 /* 238 * CAP_FSETID overrides the following restrictions: 239 * 240 * The effective user ID of the calling process shall match 241 * the file owner when setting the set-user-ID and 242 * set-group-ID bits on that file. 243 * 244 * The effective group ID or one of the supplementary group 245 * IDs of the calling process shall match the group owner of 246 * the file when setting the set-group-ID bit on that file 247 */ 248 if (mask & XFS_AT_MODE) { 249 mode_t m = 0; 250 251 if ((vap->va_mode & S_ISUID) && !file_owner) 252 m |= S_ISUID; 253 if ((vap->va_mode & S_ISGID) && 254 !in_group_p((gid_t)ip->i_d.di_gid)) 255 m |= S_ISGID; 256#if 0 257 /* Linux allows this, Irix doesn't. */ 258 if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode)) 259 m |= S_ISVTX; 260#endif 261 if (m && !capable(CAP_FSETID)) 262 vap->va_mode &= ~m; 263 } 264 } 265 266 /* 267 * Change file ownership. Must be the owner or privileged. 268 * If the system was configured with the "restricted_chown" 269 * option, the owner is not permitted to give away the file, 270 * and can change the group id only to a group of which he 271 * or she is a member. 272 */ 273 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 274 /* 275 * These IDs could have changed since we last looked at them. 276 * But, we're assured that if the ownership did change 277 * while we didn't have the inode locked, inode's dquot(s) 278 * would have changed also. 279 */ 280 iuid = ip->i_d.di_uid; 281 iprojid = ip->i_d.di_projid; 282 igid = ip->i_d.di_gid; 283 gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; 284 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; 285 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : 286 iprojid; 287 288 /* 289 * CAP_CHOWN overrides the following restrictions: 290 * 291 * If _POSIX_CHOWN_RESTRICTED is defined, this capability 292 * shall override the restriction that a process cannot 293 * change the user ID of a file it owns and the restriction 294 * that the group ID supplied to the chown() function 295 * shall be equal to either the group ID or one of the 296 * supplementary group IDs of the calling process. 297 */ 298 if (restricted_chown && 299 (iuid != uid || (igid != gid && 300 !in_group_p((gid_t)gid))) && 301 !capable(CAP_CHOWN)) { 302 code = XFS_ERROR(EPERM); 303 goto error_return; 304 } 305 /* 306 * Do a quota reservation only if uid/projid/gid is actually 307 * going to change. 308 */ 309 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 310 (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || 311 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { 312 ASSERT(tp); 313 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, 314 capable(CAP_FOWNER) ? 315 XFS_QMOPT_FORCE_RES : 0); 316 if (code) /* out of quota */ 317 goto error_return; 318 } 319 } 320 321 /* 322 * Truncate file. Must have write permission and not be a directory. 323 */ 324 if (mask & XFS_AT_SIZE) { 325 /* Short circuit the truncate case for zero length files */ 326 if ((vap->va_size == 0) && 327 (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) { 328 xfs_iunlock(ip, XFS_ILOCK_EXCL); 329 lock_flags &= ~XFS_ILOCK_EXCL; 330 if (mask & XFS_AT_CTIME) 331 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 332 code = 0; 333 goto error_return; 334 } 335 336 if (S_ISDIR(ip->i_d.di_mode)) { 337 code = XFS_ERROR(EISDIR); 338 goto error_return; 339 } else if (!S_ISREG(ip->i_d.di_mode)) { 340 code = XFS_ERROR(EINVAL); 341 goto error_return; 342 } 343 /* 344 * Make sure that the dquots are attached to the inode. 345 */ 346 if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) 347 goto error_return; 348 } 349 350 /* 351 * Change file access or modified times. 352 */ 353 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 354 if (!file_owner) { 355 if ((flags & ATTR_UTIME) && 356 !capable(CAP_FOWNER)) { 357 code = XFS_ERROR(EPERM); 358 goto error_return; 359 } 360 } 361 } 362 363 /* 364 * Change extent size or realtime flag. 365 */ 366 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 367 /* 368 * Can't change extent size if any extents are allocated. 369 */ 370 if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) && 371 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != 372 vap->va_extsize) ) { 373 code = XFS_ERROR(EINVAL); /* EFBIG? */ 374 goto error_return; 375 } 376 377 /* 378 * Can't change realtime flag if any extents are allocated. 379 */ 380 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && 381 (mask & XFS_AT_XFLAGS) && 382 (XFS_IS_REALTIME_INODE(ip)) != 383 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 384 code = XFS_ERROR(EINVAL); /* EFBIG? */ 385 goto error_return; 386 } 387 /* 388 * Extent size must be a multiple of the appropriate block 389 * size, if set at all. 390 */ 391 if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) { 392 xfs_extlen_t size; 393 394 if (XFS_IS_REALTIME_INODE(ip) || 395 ((mask & XFS_AT_XFLAGS) && 396 (vap->va_xflags & XFS_XFLAG_REALTIME))) { 397 size = mp->m_sb.sb_rextsize << 398 mp->m_sb.sb_blocklog; 399 } else { 400 size = mp->m_sb.sb_blocksize; 401 } 402 if (vap->va_extsize % size) { 403 code = XFS_ERROR(EINVAL); 404 goto error_return; 405 } 406 } 407 /* 408 * If realtime flag is set then must have realtime data. 409 */ 410 if ((mask & XFS_AT_XFLAGS) && 411 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 412 if ((mp->m_sb.sb_rblocks == 0) || 413 (mp->m_sb.sb_rextsize == 0) || 414 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { 415 code = XFS_ERROR(EINVAL); 416 goto error_return; 417 } 418 } 419 420 /* 421 * Can't modify an immutable/append-only file unless 422 * we have appropriate permission. 423 */ 424 if ((mask & XFS_AT_XFLAGS) && 425 (ip->i_d.di_flags & 426 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || 427 (vap->va_xflags & 428 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && 429 !capable(CAP_LINUX_IMMUTABLE)) { 430 code = XFS_ERROR(EPERM); 431 goto error_return; 432 } 433 } 434 435 /* 436 * Now we can make the changes. Before we join the inode 437 * to the transaction, if XFS_AT_SIZE is set then take care of 438 * the part of the truncation that must be done without the 439 * inode lock. This needs to be done before joining the inode 440 * to the transaction, because the inode cannot be unlocked 441 * once it is a part of the transaction. 442 */ 443 if (mask & XFS_AT_SIZE) { 444 code = 0; 445 if ((vap->va_size > ip->i_size) && 446 (flags & ATTR_NOSIZETOK) == 0) { 447 code = xfs_igrow_start(ip, vap->va_size, credp); 448 } 449 xfs_iunlock(ip, XFS_ILOCK_EXCL); 450 451 /* 452 * We are going to log the inode size change in this 453 * transaction so any previous writes that are beyond the on 454 * disk EOF and the new EOF that have not been written out need 455 * to be written here. If we do not write the data out, we 456 * expose ourselves to the null files problem. 457 * 458 * Only flush from the on disk size to the smaller of the in 459 * memory file size or the new size as that's the range we 460 * really care about here and prevents waiting for other data 461 * not within the range we care about here. 462 */ 463 if (!code && 464 (ip->i_size != ip->i_d.di_size) && 465 (vap->va_size > ip->i_d.di_size)) { 466 code = xfs_flush_pages(ip, 467 ip->i_d.di_size, vap->va_size, 468 XFS_B_ASYNC, FI_NONE); 469 } 470 471 /* wait for all I/O to complete */ 472 vn_iowait(ip); 473 474 if (!code) 475 code = xfs_itruncate_data(ip, vap->va_size); 476 if (code) { 477 ASSERT(tp == NULL); 478 lock_flags &= ~XFS_ILOCK_EXCL; 479 ASSERT(lock_flags == XFS_IOLOCK_EXCL); 480 goto error_return; 481 } 482 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 483 if ((code = xfs_trans_reserve(tp, 0, 484 XFS_ITRUNCATE_LOG_RES(mp), 0, 485 XFS_TRANS_PERM_LOG_RES, 486 XFS_ITRUNCATE_LOG_COUNT))) { 487 xfs_trans_cancel(tp, 0); 488 if (need_iolock) 489 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 490 return code; 491 } 492 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 493 xfs_ilock(ip, XFS_ILOCK_EXCL); 494 } 495 496 if (tp) { 497 xfs_trans_ijoin(tp, ip, lock_flags); 498 xfs_trans_ihold(tp, ip); 499 } 500 501 /* 502 * Truncate file. Must have write permission and not be a directory. 503 */ 504 if (mask & XFS_AT_SIZE) { 505 /* 506 * Only change the c/mtime if we are changing the size 507 * or we are explicitly asked to change it. This handles 508 * the semantic difference between truncate() and ftruncate() 509 * as implemented in the VFS. 510 */ 511 if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME)) 512 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 513 514 if (vap->va_size > ip->i_size) { 515 xfs_igrow_finish(tp, ip, vap->va_size, 516 !(flags & ATTR_DMI)); 517 } else if ((vap->va_size <= ip->i_size) || 518 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 519 /* 520 * signal a sync transaction unless 521 * we're truncating an already unlinked 522 * file on a wsync filesystem 523 */ 524 code = xfs_itruncate_finish(&tp, ip, 525 (xfs_fsize_t)vap->va_size, 526 XFS_DATA_FORK, 527 ((ip->i_d.di_nlink != 0 || 528 !(mp->m_flags & XFS_MOUNT_WSYNC)) 529 ? 1 : 0)); 530 if (code) 531 goto abort_return; 532 /* 533 * Truncated "down", so we're removing references 534 * to old data here - if we now delay flushing for 535 * a long time, we expose ourselves unduly to the 536 * notorious NULL files problem. So, we mark this 537 * vnode and flush it when the file is closed, and 538 * do not wait the usual (long) time for writeout. 539 */ 540 xfs_iflags_set(ip, XFS_ITRUNCATED); 541 } 542 } 543 544 /* 545 * Change file access modes. 546 */ 547 if (mask & XFS_AT_MODE) { 548 ip->i_d.di_mode &= S_IFMT; 549 ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; 550 551 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 552 timeflags |= XFS_ICHGTIME_CHG; 553 } 554 555 /* 556 * Change file ownership. Must be the owner or privileged. 557 * If the system was configured with the "restricted_chown" 558 * option, the owner is not permitted to give away the file, 559 * and can change the group id only to a group of which he 560 * or she is a member. 561 */ 562 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 563 /* 564 * CAP_FSETID overrides the following restrictions: 565 * 566 * The set-user-ID and set-group-ID bits of a file will be 567 * cleared upon successful return from chown() 568 */ 569 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && 570 !capable(CAP_FSETID)) { 571 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); 572 } 573 574 /* 575 * Change the ownerships and register quota modifications 576 * in the transaction. 577 */ 578 if (iuid != uid) { 579 if (XFS_IS_UQUOTA_ON(mp)) { 580 ASSERT(mask & XFS_AT_UID); 581 ASSERT(udqp); 582 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 583 &ip->i_udquot, udqp); 584 } 585 ip->i_d.di_uid = uid; 586 } 587 if (igid != gid) { 588 if (XFS_IS_GQUOTA_ON(mp)) { 589 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 590 ASSERT(mask & XFS_AT_GID); 591 ASSERT(gdqp); 592 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 593 &ip->i_gdquot, gdqp); 594 } 595 ip->i_d.di_gid = gid; 596 } 597 if (iprojid != projid) { 598 if (XFS_IS_PQUOTA_ON(mp)) { 599 ASSERT(!XFS_IS_GQUOTA_ON(mp)); 600 ASSERT(mask & XFS_AT_PROJID); 601 ASSERT(gdqp); 602 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 603 &ip->i_gdquot, gdqp); 604 } 605 ip->i_d.di_projid = projid; 606 /* 607 * We may have to rev the inode as well as 608 * the superblock version number since projids didn't 609 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. 610 */ 611 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) 612 xfs_bump_ino_vers2(tp, ip); 613 } 614 615 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 616 timeflags |= XFS_ICHGTIME_CHG; 617 } 618 619 620 /* 621 * Change file access or modified times. 622 */ 623 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 624 if (mask & XFS_AT_ATIME) { 625 ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; 626 ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; 627 ip->i_update_core = 1; 628 timeflags &= ~XFS_ICHGTIME_ACC; 629 } 630 if (mask & XFS_AT_MTIME) { 631 ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; 632 ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; 633 timeflags &= ~XFS_ICHGTIME_MOD; 634 timeflags |= XFS_ICHGTIME_CHG; 635 } 636 if (tp && (flags & ATTR_UTIME)) 637 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 638 } 639 640 /* 641 * Change XFS-added attributes. 642 */ 643 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 644 if (mask & XFS_AT_EXTSIZE) { 645 /* 646 * Converting bytes to fs blocks. 647 */ 648 ip->i_d.di_extsize = vap->va_extsize >> 649 mp->m_sb.sb_blocklog; 650 } 651 if (mask & XFS_AT_XFLAGS) { 652 uint di_flags; 653 654 /* can't set PREALLOC this way, just preserve it */ 655 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); 656 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 657 di_flags |= XFS_DIFLAG_IMMUTABLE; 658 if (vap->va_xflags & XFS_XFLAG_APPEND) 659 di_flags |= XFS_DIFLAG_APPEND; 660 if (vap->va_xflags & XFS_XFLAG_SYNC) 661 di_flags |= XFS_DIFLAG_SYNC; 662 if (vap->va_xflags & XFS_XFLAG_NOATIME) 663 di_flags |= XFS_DIFLAG_NOATIME; 664 if (vap->va_xflags & XFS_XFLAG_NODUMP) 665 di_flags |= XFS_DIFLAG_NODUMP; 666 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) 667 di_flags |= XFS_DIFLAG_PROJINHERIT; 668 if (vap->va_xflags & XFS_XFLAG_NODEFRAG) 669 di_flags |= XFS_DIFLAG_NODEFRAG; 670 if (vap->va_xflags & XFS_XFLAG_FILESTREAM) 671 di_flags |= XFS_DIFLAG_FILESTREAM; 672 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 673 if (vap->va_xflags & XFS_XFLAG_RTINHERIT) 674 di_flags |= XFS_DIFLAG_RTINHERIT; 675 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) 676 di_flags |= XFS_DIFLAG_NOSYMLINKS; 677 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) 678 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 679 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 680 if (vap->va_xflags & XFS_XFLAG_REALTIME) 681 di_flags |= XFS_DIFLAG_REALTIME; 682 if (vap->va_xflags & XFS_XFLAG_EXTSIZE) 683 di_flags |= XFS_DIFLAG_EXTSIZE; 684 } 685 ip->i_d.di_flags = di_flags; 686 } 687 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 688 timeflags |= XFS_ICHGTIME_CHG; 689 } 690 691 /* 692 * Change file inode change time only if XFS_AT_CTIME set 693 * AND we have been called by a DMI function. 694 */ 695 696 if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { 697 ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; 698 ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; 699 ip->i_update_core = 1; 700 timeflags &= ~XFS_ICHGTIME_CHG; 701 } 702 703 /* 704 * Send out timestamp changes that need to be set to the 705 * current time. Not done when called by a DMI function. 706 */ 707 if (timeflags && !(flags & ATTR_DMI)) 708 xfs_ichgtime(ip, timeflags); 709 710 XFS_STATS_INC(xs_ig_attrchg); 711 712 /* 713 * If this is a synchronous mount, make sure that the 714 * transaction goes to disk before returning to the user. 715 * This is slightly sub-optimal in that truncates require 716 * two sync transactions instead of one for wsync filesystems. 717 * One for the truncate and one for the timestamps since we 718 * don't want to change the timestamps unless we're sure the 719 * truncate worked. Truncates are less than 1% of the laddis 720 * mix so this probably isn't worth the trouble to optimize. 721 */ 722 code = 0; 723 if (tp) { 724 if (mp->m_flags & XFS_MOUNT_WSYNC) 725 xfs_trans_set_sync(tp); 726 727 code = xfs_trans_commit(tp, commit_flags); 728 } 729 730 xfs_iunlock(ip, lock_flags); 731 732 /* 733 * Release any dquot(s) the inode had kept before chown. 734 */ 735 XFS_QM_DQRELE(mp, olddquot1); 736 XFS_QM_DQRELE(mp, olddquot2); 737 XFS_QM_DQRELE(mp, udqp); 738 XFS_QM_DQRELE(mp, gdqp); 739 740 if (code) { 741 return code; 742 } 743 744 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && 745 !(flags & ATTR_DMI)) { 746 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, 747 NULL, DM_RIGHT_NULL, NULL, NULL, 748 0, 0, AT_DELAY_FLAG(flags)); 749 } 750 return 0; 751 752 abort_return: 753 commit_flags |= XFS_TRANS_ABORT; 754 /* FALLTHROUGH */ 755 error_return: 756 XFS_QM_DQRELE(mp, udqp); 757 XFS_QM_DQRELE(mp, gdqp); 758 if (tp) { 759 xfs_trans_cancel(tp, commit_flags); 760 } 761 if (lock_flags != 0) { 762 xfs_iunlock(ip, lock_flags); 763 } 764 return code; 765} 766 767/* 768 * The maximum pathlen is 1024 bytes. Since the minimum file system 769 * blocksize is 512 bytes, we can get a max of 2 extents back from 770 * bmapi. 771 */ 772#define SYMLINK_MAPS 2 773 774STATIC int 775xfs_readlink_bmap( 776 xfs_inode_t *ip, 777 char *link) 778{ 779 xfs_mount_t *mp = ip->i_mount; 780 int pathlen = ip->i_d.di_size; 781 int nmaps = SYMLINK_MAPS; 782 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 783 xfs_daddr_t d; 784 int byte_cnt; 785 int n; 786 xfs_buf_t *bp; 787 int error = 0; 788 789 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0, 790 mval, &nmaps, NULL, NULL); 791 if (error) 792 goto out; 793 794 for (n = 0; n < nmaps; n++) { 795 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 796 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 797 798 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); 799 error = XFS_BUF_GETERROR(bp); 800 if (error) { 801 xfs_ioerror_alert("xfs_readlink", 802 ip->i_mount, bp, XFS_BUF_ADDR(bp)); 803 xfs_buf_relse(bp); 804 goto out; 805 } 806 if (pathlen < byte_cnt) 807 byte_cnt = pathlen; 808 pathlen -= byte_cnt; 809 810 memcpy(link, XFS_BUF_PTR(bp), byte_cnt); 811 xfs_buf_relse(bp); 812 } 813 814 link[ip->i_d.di_size] = '\0'; 815 error = 0; 816 817 out: 818 return error; 819} 820 821int 822xfs_readlink( 823 xfs_inode_t *ip, 824 char *link) 825{ 826 xfs_mount_t *mp = ip->i_mount; 827 int pathlen; 828 int error = 0; 829 830 xfs_itrace_entry(ip); 831 832 if (XFS_FORCED_SHUTDOWN(mp)) 833 return XFS_ERROR(EIO); 834 835 xfs_ilock(ip, XFS_ILOCK_SHARED); 836 837 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); 838 ASSERT(ip->i_d.di_size <= MAXPATHLEN); 839 840 pathlen = ip->i_d.di_size; 841 if (!pathlen) 842 goto out; 843 844 if (ip->i_df.if_flags & XFS_IFINLINE) { 845 memcpy(link, ip->i_df.if_u1.if_data, pathlen); 846 link[pathlen] = '\0'; 847 } else { 848 error = xfs_readlink_bmap(ip, link); 849 } 850 851 out: 852 xfs_iunlock(ip, XFS_ILOCK_SHARED); 853 return error; 854} 855 856/* 857 * xfs_fsync 858 * 859 * This is called to sync the inode and its data out to disk. We need to hold 860 * the I/O lock while flushing the data, and the inode lock while flushing the 861 * inode. The inode lock CANNOT be held while flushing the data, so acquire 862 * after we're done with that. 863 */ 864int 865xfs_fsync( 866 xfs_inode_t *ip) 867{ 868 xfs_trans_t *tp; 869 int error; 870 int log_flushed = 0, changed = 1; 871 872 xfs_itrace_entry(ip); 873 874 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 875 return XFS_ERROR(EIO); 876 877 /* capture size updates in I/O completion before writing the inode. */ 878 error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); 879 if (error) 880 return XFS_ERROR(error); 881 882 /* 883 * We always need to make sure that the required inode state is safe on 884 * disk. The vnode might be clean but we still might need to force the 885 * log because of committed transactions that haven't hit the disk yet. 886 * Likewise, there could be unflushed non-transactional changes to the 887 * inode core that have to go to disk and this requires us to issue 888 * a synchronous transaction to capture these changes correctly. 889 * 890 * This code relies on the assumption that if the update_* fields 891 * of the inode are clear and the inode is unpinned then it is clean 892 * and no action is required. 893 */ 894 xfs_ilock(ip, XFS_ILOCK_SHARED); 895 896 if (!(ip->i_update_size || ip->i_update_core)) { 897 /* 898 * Timestamps/size haven't changed since last inode flush or 899 * inode transaction commit. That means either nothing got 900 * written or a transaction committed which caught the updates. 901 * If the latter happened and the transaction hasn't hit the 902 * disk yet, the inode will be still be pinned. If it is, 903 * force the log. 904 */ 905 906 xfs_iunlock(ip, XFS_ILOCK_SHARED); 907 908 if (xfs_ipincount(ip)) { 909 error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 910 XFS_LOG_FORCE | XFS_LOG_SYNC, 911 &log_flushed); 912 } else { 913 /* 914 * If the inode is not pinned and nothing has changed 915 * we don't need to flush the cache. 916 */ 917 changed = 0; 918 } 919 } else { 920 /* 921 * Kick off a transaction to log the inode core to get the 922 * updates. The sync transaction will also force the log. 923 */ 924 xfs_iunlock(ip, XFS_ILOCK_SHARED); 925 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 926 error = xfs_trans_reserve(tp, 0, 927 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); 928 if (error) { 929 xfs_trans_cancel(tp, 0); 930 return error; 931 } 932 xfs_ilock(ip, XFS_ILOCK_EXCL); 933 934 /* 935 * Note - it's possible that we might have pushed ourselves out 936 * of the way during trans_reserve which would flush the inode. 937 * But there's no guarantee that the inode buffer has actually 938 * gone out yet (it's delwri). Plus the buffer could be pinned 939 * anyway if it's part of an inode in another recent 940 * transaction. So we play it safe and fire off the 941 * transaction anyway. 942 */ 943 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 944 xfs_trans_ihold(tp, ip); 945 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 946 xfs_trans_set_sync(tp); 947 error = _xfs_trans_commit(tp, 0, &log_flushed); 948 949 xfs_iunlock(ip, XFS_ILOCK_EXCL); 950 } 951 952 if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { 953 /* 954 * If the log write didn't issue an ordered tag we need 955 * to flush the disk cache for the data device now. 956 */ 957 if (!log_flushed) 958 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); 959 960 /* 961 * If this inode is on the RT dev we need to flush that 962 * cache as well. 963 */ 964 if (XFS_IS_REALTIME_INODE(ip)) 965 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); 966 } 967 968 return error; 969} 970 971/* 972 * This is called by xfs_inactive to free any blocks beyond eof 973 * when the link count isn't zero and by xfs_dm_punch_hole() when 974 * punching a hole to EOF. 975 */ 976int 977xfs_free_eofblocks( 978 xfs_mount_t *mp, 979 xfs_inode_t *ip, 980 int flags) 981{ 982 xfs_trans_t *tp; 983 int error; 984 xfs_fileoff_t end_fsb; 985 xfs_fileoff_t last_fsb; 986 xfs_filblks_t map_len; 987 int nimaps; 988 xfs_bmbt_irec_t imap; 989 int use_iolock = (flags & XFS_FREE_EOF_LOCK); 990 991 /* 992 * Figure out if there are any blocks beyond the end 993 * of the file. If not, then there is nothing to do. 994 */ 995 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); 996 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 997 map_len = last_fsb - end_fsb; 998 if (map_len <= 0) 999 return 0; 1000 1001 nimaps = 1; 1002 xfs_ilock(ip, XFS_ILOCK_SHARED); 1003 error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, 1004 NULL, 0, &imap, &nimaps, NULL, NULL); 1005 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1006 1007 if (!error && (nimaps != 0) && 1008 (imap.br_startblock != HOLESTARTBLOCK || 1009 ip->i_delayed_blks)) { 1010 /* 1011 * Attach the dquots to the inode up front. 1012 */ 1013 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1014 return error; 1015 1016 /* 1017 * There are blocks after the end of file. 1018 * Free them up now by truncating the file to 1019 * its current size. 1020 */ 1021 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1022 1023 /* 1024 * Do the xfs_itruncate_start() call before 1025 * reserving any log space because 1026 * itruncate_start will call into the buffer 1027 * cache and we can't 1028 * do that within a transaction. 1029 */ 1030 if (use_iolock) 1031 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1032 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 1033 ip->i_size); 1034 if (error) { 1035 xfs_trans_cancel(tp, 0); 1036 if (use_iolock) 1037 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1038 return error; 1039 } 1040 1041 error = xfs_trans_reserve(tp, 0, 1042 XFS_ITRUNCATE_LOG_RES(mp), 1043 0, XFS_TRANS_PERM_LOG_RES, 1044 XFS_ITRUNCATE_LOG_COUNT); 1045 if (error) { 1046 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1047 xfs_trans_cancel(tp, 0); 1048 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1049 return error; 1050 } 1051 1052 xfs_ilock(ip, XFS_ILOCK_EXCL); 1053 xfs_trans_ijoin(tp, ip, 1054 XFS_IOLOCK_EXCL | 1055 XFS_ILOCK_EXCL); 1056 xfs_trans_ihold(tp, ip); 1057 1058 error = xfs_itruncate_finish(&tp, ip, 1059 ip->i_size, 1060 XFS_DATA_FORK, 1061 0); 1062 /* 1063 * If we get an error at this point we 1064 * simply don't bother truncating the file. 1065 */ 1066 if (error) { 1067 xfs_trans_cancel(tp, 1068 (XFS_TRANS_RELEASE_LOG_RES | 1069 XFS_TRANS_ABORT)); 1070 } else { 1071 error = xfs_trans_commit(tp, 1072 XFS_TRANS_RELEASE_LOG_RES); 1073 } 1074 xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) 1075 : XFS_ILOCK_EXCL)); 1076 } 1077 return error; 1078} 1079 1080/* 1081 * Free a symlink that has blocks associated with it. 1082 */ 1083STATIC int 1084xfs_inactive_symlink_rmt( 1085 xfs_inode_t *ip, 1086 xfs_trans_t **tpp) 1087{ 1088 xfs_buf_t *bp; 1089 int committed; 1090 int done; 1091 int error; 1092 xfs_fsblock_t first_block; 1093 xfs_bmap_free_t free_list; 1094 int i; 1095 xfs_mount_t *mp; 1096 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 1097 int nmaps; 1098 xfs_trans_t *ntp; 1099 int size; 1100 xfs_trans_t *tp; 1101 1102 tp = *tpp; 1103 mp = ip->i_mount; 1104 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); 1105 /* 1106 * We're freeing a symlink that has some 1107 * blocks allocated to it. Free the 1108 * blocks here. We know that we've got 1109 * either 1 or 2 extents and that we can 1110 * free them all in one bunmapi call. 1111 */ 1112 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 1113 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1114 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1115 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1116 xfs_trans_cancel(tp, 0); 1117 *tpp = NULL; 1118 return error; 1119 } 1120 /* 1121 * Lock the inode, fix the size, and join it to the transaction. 1122 * Hold it so in the normal path, we still have it locked for 1123 * the second transaction. In the error paths we need it 1124 * held so the cancel won't rele it, see below. 1125 */ 1126 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1127 size = (int)ip->i_d.di_size; 1128 ip->i_d.di_size = 0; 1129 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1130 xfs_trans_ihold(tp, ip); 1131 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1132 /* 1133 * Find the block(s) so we can inval and unmap them. 1134 */ 1135 done = 0; 1136 XFS_BMAP_INIT(&free_list, &first_block); 1137 nmaps = ARRAY_SIZE(mval); 1138 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 1139 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 1140 &free_list, NULL))) 1141 goto error0; 1142 /* 1143 * Invalidate the block(s). 1144 */ 1145 for (i = 0; i < nmaps; i++) { 1146 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 1147 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 1148 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 1149 xfs_trans_binval(tp, bp); 1150 } 1151 /* 1152 * Unmap the dead block(s) to the free_list. 1153 */ 1154 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 1155 &first_block, &free_list, NULL, &done))) 1156 goto error1; 1157 ASSERT(done); 1158 /* 1159 * Commit the first transaction. This logs the EFI and the inode. 1160 */ 1161 if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) 1162 goto error1; 1163 /* 1164 * The transaction must have been committed, since there were 1165 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. 1166 * The new tp has the extent freeing and EFDs. 1167 */ 1168 ASSERT(committed); 1169 /* 1170 * The first xact was committed, so add the inode to the new one. 1171 * Mark it dirty so it will be logged and moved forward in the log as 1172 * part of every commit. 1173 */ 1174 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1175 xfs_trans_ihold(tp, ip); 1176 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1177 /* 1178 * Get a new, empty transaction to return to our caller. 1179 */ 1180 ntp = xfs_trans_dup(tp); 1181 /* 1182 * Commit the transaction containing extent freeing and EFDs. 1183 * If we get an error on the commit here or on the reserve below, 1184 * we need to unlock the inode since the new transaction doesn't 1185 * have the inode attached. 1186 */ 1187 error = xfs_trans_commit(tp, 0); 1188 tp = ntp; 1189 if (error) { 1190 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1191 goto error0; 1192 } 1193 /* 1194 * Remove the memory for extent descriptions (just bookkeeping). 1195 */ 1196 if (ip->i_df.if_bytes) 1197 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); 1198 ASSERT(ip->i_df.if_bytes == 0); 1199 /* 1200 * Put an itruncate log reservation in the new transaction 1201 * for our caller. 1202 */ 1203 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1204 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1205 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1206 goto error0; 1207 } 1208 /* 1209 * Return with the inode locked but not joined to the transaction. 1210 */ 1211 *tpp = tp; 1212 return 0; 1213 1214 error1: 1215 xfs_bmap_cancel(&free_list); 1216 error0: 1217 /* 1218 * Have to come here with the inode locked and either 1219 * (held and in the transaction) or (not in the transaction). 1220 * If the inode isn't held then cancel would iput it, but 1221 * that's wrong since this is inactive and the vnode ref 1222 * count is 0 already. 1223 * Cancel won't do anything to the inode if held, but it still 1224 * needs to be locked until the cancel is done, if it was 1225 * joined to the transaction. 1226 */ 1227 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1228 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1229 *tpp = NULL; 1230 return error; 1231 1232} 1233 1234STATIC int 1235xfs_inactive_symlink_local( 1236 xfs_inode_t *ip, 1237 xfs_trans_t **tpp) 1238{ 1239 int error; 1240 1241 ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip)); 1242 /* 1243 * We're freeing a symlink which fit into 1244 * the inode. Just free the memory used 1245 * to hold the old symlink. 1246 */ 1247 error = xfs_trans_reserve(*tpp, 0, 1248 XFS_ITRUNCATE_LOG_RES(ip->i_mount), 1249 0, XFS_TRANS_PERM_LOG_RES, 1250 XFS_ITRUNCATE_LOG_COUNT); 1251 1252 if (error) { 1253 xfs_trans_cancel(*tpp, 0); 1254 *tpp = NULL; 1255 return error; 1256 } 1257 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1258 1259 /* 1260 * Zero length symlinks _can_ exist. 1261 */ 1262 if (ip->i_df.if_bytes > 0) { 1263 xfs_idata_realloc(ip, 1264 -(ip->i_df.if_bytes), 1265 XFS_DATA_FORK); 1266 ASSERT(ip->i_df.if_bytes == 0); 1267 } 1268 return 0; 1269} 1270 1271STATIC int 1272xfs_inactive_attrs( 1273 xfs_inode_t *ip, 1274 xfs_trans_t **tpp) 1275{ 1276 xfs_trans_t *tp; 1277 int error; 1278 xfs_mount_t *mp; 1279 1280 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1281 tp = *tpp; 1282 mp = ip->i_mount; 1283 ASSERT(ip->i_d.di_forkoff != 0); 1284 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1285 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1286 if (error) 1287 goto error_unlock; 1288 1289 error = xfs_attr_inactive(ip); 1290 if (error) 1291 goto error_unlock; 1292 1293 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1294 error = xfs_trans_reserve(tp, 0, 1295 XFS_IFREE_LOG_RES(mp), 1296 0, XFS_TRANS_PERM_LOG_RES, 1297 XFS_INACTIVE_LOG_COUNT); 1298 if (error) 1299 goto error_cancel; 1300 1301 xfs_ilock(ip, XFS_ILOCK_EXCL); 1302 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1303 xfs_trans_ihold(tp, ip); 1304 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1305 1306 ASSERT(ip->i_d.di_anextents == 0); 1307 1308 *tpp = tp; 1309 return 0; 1310 1311error_cancel: 1312 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1313 xfs_trans_cancel(tp, 0); 1314error_unlock: 1315 *tpp = NULL; 1316 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1317 return error; 1318} 1319 1320int 1321xfs_release( 1322 xfs_inode_t *ip) 1323{ 1324 bhv_vnode_t *vp = XFS_ITOV(ip); 1325 xfs_mount_t *mp = ip->i_mount; 1326 int error; 1327 1328 if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0)) 1329 return 0; 1330 1331 /* If this is a read-only mount, don't do this (would generate I/O) */ 1332 if (mp->m_flags & XFS_MOUNT_RDONLY) 1333 return 0; 1334 1335 if (!XFS_FORCED_SHUTDOWN(mp)) { 1336 int truncated; 1337 1338 /* 1339 * If we are using filestreams, and we have an unlinked 1340 * file that we are processing the last close on, then nothing 1341 * will be able to reopen and write to this file. Purge this 1342 * inode from the filestreams cache so that it doesn't delay 1343 * teardown of the inode. 1344 */ 1345 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) 1346 xfs_filestream_deassociate(ip); 1347 1348 /* 1349 * If we previously truncated this file and removed old data 1350 * in the process, we want to initiate "early" writeout on 1351 * the last close. This is an attempt to combat the notorious 1352 * NULL files problem which is particularly noticable from a 1353 * truncate down, buffered (re-)write (delalloc), followed by 1354 * a crash. What we are effectively doing here is 1355 * significantly reducing the time window where we'd otherwise 1356 * be exposed to that problem. 1357 */ 1358 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 1359 if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0) 1360 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); 1361 } 1362 1363 if (ip->i_d.di_nlink != 0) { 1364 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1365 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1366 ip->i_delayed_blks > 0)) && 1367 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1368 (!(ip->i_d.di_flags & 1369 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1370 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1371 if (error) 1372 return error; 1373 } 1374 } 1375 1376 return 0; 1377} 1378 1379/* 1380 * xfs_inactive 1381 * 1382 * This is called when the vnode reference count for the vnode 1383 * goes to zero. If the file has been unlinked, then it must 1384 * now be truncated. Also, we clear all of the read-ahead state 1385 * kept for the inode here since the file is now closed. 1386 */ 1387int 1388xfs_inactive( 1389 xfs_inode_t *ip) 1390{ 1391 bhv_vnode_t *vp = XFS_ITOV(ip); 1392 xfs_bmap_free_t free_list; 1393 xfs_fsblock_t first_block; 1394 int committed; 1395 xfs_trans_t *tp; 1396 xfs_mount_t *mp; 1397 int error; 1398 int truncate; 1399 1400 xfs_itrace_entry(ip); 1401 1402 /* 1403 * If the inode is already free, then there can be nothing 1404 * to clean up here. 1405 */ 1406 if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { 1407 ASSERT(ip->i_df.if_real_bytes == 0); 1408 ASSERT(ip->i_df.if_broot_bytes == 0); 1409 return VN_INACTIVE_CACHE; 1410 } 1411 1412 /* 1413 * Only do a truncate if it's a regular file with 1414 * some actual space in it. It's OK to look at the 1415 * inode's fields without the lock because we're the 1416 * only one with a reference to the inode. 1417 */ 1418 truncate = ((ip->i_d.di_nlink == 0) && 1419 ((ip->i_d.di_size != 0) || (ip->i_size != 0) || 1420 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && 1421 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1422 1423 mp = ip->i_mount; 1424 1425 if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) 1426 XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL); 1427 1428 error = 0; 1429 1430 /* If this is a read-only mount, don't do this (would generate I/O) */ 1431 if (mp->m_flags & XFS_MOUNT_RDONLY) 1432 goto out; 1433 1434 if (ip->i_d.di_nlink != 0) { 1435 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1436 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1437 ip->i_delayed_blks > 0)) && 1438 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1439 (!(ip->i_d.di_flags & 1440 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1441 (ip->i_delayed_blks != 0)))) { 1442 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1443 if (error) 1444 return VN_INACTIVE_CACHE; 1445 } 1446 goto out; 1447 } 1448 1449 ASSERT(ip->i_d.di_nlink == 0); 1450 1451 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1452 return VN_INACTIVE_CACHE; 1453 1454 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1455 if (truncate) { 1456 /* 1457 * Do the xfs_itruncate_start() call before 1458 * reserving any log space because itruncate_start 1459 * will call into the buffer cache and we can't 1460 * do that within a transaction. 1461 */ 1462 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1463 1464 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 1465 if (error) { 1466 xfs_trans_cancel(tp, 0); 1467 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1468 return VN_INACTIVE_CACHE; 1469 } 1470 1471 error = xfs_trans_reserve(tp, 0, 1472 XFS_ITRUNCATE_LOG_RES(mp), 1473 0, XFS_TRANS_PERM_LOG_RES, 1474 XFS_ITRUNCATE_LOG_COUNT); 1475 if (error) { 1476 /* Don't call itruncate_cleanup */ 1477 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1478 xfs_trans_cancel(tp, 0); 1479 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1480 return VN_INACTIVE_CACHE; 1481 } 1482 1483 xfs_ilock(ip, XFS_ILOCK_EXCL); 1484 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1485 xfs_trans_ihold(tp, ip); 1486 1487 /* 1488 * normally, we have to run xfs_itruncate_finish sync. 1489 * But if filesystem is wsync and we're in the inactive 1490 * path, then we know that nlink == 0, and that the 1491 * xaction that made nlink == 0 is permanently committed 1492 * since xfs_remove runs as a synchronous transaction. 1493 */ 1494 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1495 (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); 1496 1497 if (error) { 1498 xfs_trans_cancel(tp, 1499 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1500 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1501 return VN_INACTIVE_CACHE; 1502 } 1503 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { 1504 1505 /* 1506 * If we get an error while cleaning up a 1507 * symlink we bail out. 1508 */ 1509 error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ? 1510 xfs_inactive_symlink_rmt(ip, &tp) : 1511 xfs_inactive_symlink_local(ip, &tp); 1512 1513 if (error) { 1514 ASSERT(tp == NULL); 1515 return VN_INACTIVE_CACHE; 1516 } 1517 1518 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1519 xfs_trans_ihold(tp, ip); 1520 } else { 1521 error = xfs_trans_reserve(tp, 0, 1522 XFS_IFREE_LOG_RES(mp), 1523 0, XFS_TRANS_PERM_LOG_RES, 1524 XFS_INACTIVE_LOG_COUNT); 1525 if (error) { 1526 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1527 xfs_trans_cancel(tp, 0); 1528 return VN_INACTIVE_CACHE; 1529 } 1530 1531 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1532 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1533 xfs_trans_ihold(tp, ip); 1534 } 1535 1536 /* 1537 * If there are attributes associated with the file 1538 * then blow them away now. The code calls a routine 1539 * that recursively deconstructs the attribute fork. 1540 * We need to just commit the current transaction 1541 * because we can't use it for xfs_attr_inactive(). 1542 */ 1543 if (ip->i_d.di_anextents > 0) { 1544 error = xfs_inactive_attrs(ip, &tp); 1545 /* 1546 * If we got an error, the transaction is already 1547 * cancelled, and the inode is unlocked. Just get out. 1548 */ 1549 if (error) 1550 return VN_INACTIVE_CACHE; 1551 } else if (ip->i_afp) { 1552 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1553 } 1554 1555 /* 1556 * Free the inode. 1557 */ 1558 XFS_BMAP_INIT(&free_list, &first_block); 1559 error = xfs_ifree(tp, ip, &free_list); 1560 if (error) { 1561 /* 1562 * If we fail to free the inode, shut down. The cancel 1563 * might do that, we need to make sure. Otherwise the 1564 * inode might be lost for a long time or forever. 1565 */ 1566 if (!XFS_FORCED_SHUTDOWN(mp)) { 1567 cmn_err(CE_NOTE, 1568 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1569 error, mp->m_fsname); 1570 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1571 } 1572 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1573 } else { 1574 /* 1575 * Credit the quota account(s). The inode is gone. 1576 */ 1577 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1578 1579 /* 1580 * Just ignore errors at this point. There is nothing we can 1581 * do except to try to keep going. Make sure it's not a silent 1582 * error. 1583 */ 1584 error = xfs_bmap_finish(&tp, &free_list, &committed); 1585 if (error) 1586 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1587 "xfs_bmap_finish() returned error %d", error); 1588 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1589 if (error) 1590 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1591 "xfs_trans_commit() returned error %d", error); 1592 } 1593 /* 1594 * Release the dquots held by inode, if any. 1595 */ 1596 XFS_QM_DQDETACH(mp, ip); 1597 1598 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1599 1600 out: 1601 return VN_INACTIVE_CACHE; 1602} 1603 1604 1605int 1606xfs_lookup( 1607 xfs_inode_t *dp, 1608 struct xfs_name *name, 1609 xfs_inode_t **ipp) 1610{ 1611 xfs_ino_t inum; 1612 int error; 1613 uint lock_mode; 1614 1615 xfs_itrace_entry(dp); 1616 1617 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 1618 return XFS_ERROR(EIO); 1619 1620 lock_mode = xfs_ilock_map_shared(dp); 1621 error = xfs_dir_lookup(NULL, dp, name, &inum); 1622 xfs_iunlock_map_shared(dp, lock_mode); 1623 1624 if (error) 1625 goto out; 1626 1627 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0); 1628 if (error) 1629 goto out; 1630 1631 xfs_itrace_ref(*ipp); 1632 return 0; 1633 1634 out: 1635 *ipp = NULL; 1636 return error; 1637} 1638 1639int 1640xfs_create( 1641 xfs_inode_t *dp, 1642 struct xfs_name *name, 1643 mode_t mode, 1644 xfs_dev_t rdev, 1645 xfs_inode_t **ipp, 1646 cred_t *credp) 1647{ 1648 xfs_mount_t *mp = dp->i_mount; 1649 xfs_inode_t *ip; 1650 xfs_trans_t *tp; 1651 int error; 1652 xfs_bmap_free_t free_list; 1653 xfs_fsblock_t first_block; 1654 boolean_t unlock_dp_on_error = B_FALSE; 1655 int dm_event_sent = 0; 1656 uint cancel_flags; 1657 int committed; 1658 xfs_prid_t prid; 1659 struct xfs_dquot *udqp, *gdqp; 1660 uint resblks; 1661 1662 ASSERT(!*ipp); 1663 xfs_itrace_entry(dp); 1664 1665 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { 1666 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 1667 dp, DM_RIGHT_NULL, NULL, 1668 DM_RIGHT_NULL, name->name, NULL, 1669 mode, 0, 0); 1670 1671 if (error) 1672 return error; 1673 dm_event_sent = 1; 1674 } 1675 1676 if (XFS_FORCED_SHUTDOWN(mp)) 1677 return XFS_ERROR(EIO); 1678 1679 /* Return through std_return after this point. */ 1680 1681 udqp = gdqp = NULL; 1682 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1683 prid = dp->i_d.di_projid; 1684 else 1685 prid = (xfs_prid_t)dfltprid; 1686 1687 /* 1688 * Make sure that we have allocated dquot(s) on disk. 1689 */ 1690 error = XFS_QM_DQVOPALLOC(mp, dp, 1691 current_fsuid(credp), current_fsgid(credp), prid, 1692 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); 1693 if (error) 1694 goto std_return; 1695 1696 ip = NULL; 1697 1698 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1699 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1700 resblks = XFS_CREATE_SPACE_RES(mp, name->len); 1701 /* 1702 * Initially assume that the file does not exist and 1703 * reserve the resources for that case. If that is not 1704 * the case we'll drop the one we have and get a more 1705 * appropriate transaction later. 1706 */ 1707 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, 1708 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1709 if (error == ENOSPC) { 1710 resblks = 0; 1711 error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0, 1712 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1713 } 1714 if (error) { 1715 cancel_flags = 0; 1716 goto error_return; 1717 } 1718 1719 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1720 unlock_dp_on_error = B_TRUE; 1721 1722 XFS_BMAP_INIT(&free_list, &first_block); 1723 1724 ASSERT(ip == NULL); 1725 1726 /* 1727 * Reserve disk quota and the inode. 1728 */ 1729 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 1730 if (error) 1731 goto error_return; 1732 1733 error = xfs_dir_canenter(tp, dp, name, resblks); 1734 if (error) 1735 goto error_return; 1736 error = xfs_dir_ialloc(&tp, dp, mode, 1, 1737 rdev, credp, prid, resblks > 0, 1738 &ip, &committed); 1739 if (error) { 1740 if (error == ENOSPC) 1741 goto error_return; 1742 goto abort_return; 1743 } 1744 xfs_itrace_ref(ip); 1745 1746 /* 1747 * At this point, we've gotten a newly allocated inode. 1748 * It is locked (and joined to the transaction). 1749 */ 1750 1751 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1752 1753 /* 1754 * Now we join the directory inode to the transaction. We do not do it 1755 * earlier because xfs_dir_ialloc might commit the previous transaction 1756 * (and release all the locks). An error from here on will result in 1757 * the transaction cancel unlocking dp so don't do it explicitly in the 1758 * error path. 1759 */ 1760 IHOLD(dp); 1761 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1762 unlock_dp_on_error = B_FALSE; 1763 1764 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1765 &first_block, &free_list, resblks ? 1766 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1767 if (error) { 1768 ASSERT(error != ENOSPC); 1769 goto abort_return; 1770 } 1771 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1772 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1773 1774 /* 1775 * If this is a synchronous mount, make sure that the 1776 * create transaction goes to disk before returning to 1777 * the user. 1778 */ 1779 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1780 xfs_trans_set_sync(tp); 1781 } 1782 1783 dp->i_gen++; 1784 1785 /* 1786 * Attach the dquot(s) to the inodes and modify them incore. 1787 * These ids of the inode couldn't have changed since the new 1788 * inode has been locked ever since it was created. 1789 */ 1790 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 1791 1792 /* 1793 * xfs_trans_commit normally decrements the vnode ref count 1794 * when it unlocks the inode. Since we want to return the 1795 * vnode to the caller, we bump the vnode ref count now. 1796 */ 1797 IHOLD(ip); 1798 1799 error = xfs_bmap_finish(&tp, &free_list, &committed); 1800 if (error) { 1801 xfs_bmap_cancel(&free_list); 1802 goto abort_rele; 1803 } 1804 1805 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1806 if (error) { 1807 IRELE(ip); 1808 tp = NULL; 1809 goto error_return; 1810 } 1811 1812 XFS_QM_DQRELE(mp, udqp); 1813 XFS_QM_DQRELE(mp, gdqp); 1814 1815 *ipp = ip; 1816 1817 /* Fallthrough to std_return with error = 0 */ 1818 1819std_return: 1820 if ((*ipp || (error != 0 && dm_event_sent != 0)) && 1821 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { 1822 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 1823 dp, DM_RIGHT_NULL, 1824 *ipp ? ip : NULL, 1825 DM_RIGHT_NULL, name->name, NULL, 1826 mode, error, 0); 1827 } 1828 return error; 1829 1830 abort_return: 1831 cancel_flags |= XFS_TRANS_ABORT; 1832 /* FALLTHROUGH */ 1833 1834 error_return: 1835 if (tp != NULL) 1836 xfs_trans_cancel(tp, cancel_flags); 1837 1838 XFS_QM_DQRELE(mp, udqp); 1839 XFS_QM_DQRELE(mp, gdqp); 1840 1841 if (unlock_dp_on_error) 1842 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1843 1844 goto std_return; 1845 1846 abort_rele: 1847 /* 1848 * Wait until after the current transaction is aborted to 1849 * release the inode. This prevents recursive transactions 1850 * and deadlocks from xfs_inactive. 1851 */ 1852 cancel_flags |= XFS_TRANS_ABORT; 1853 xfs_trans_cancel(tp, cancel_flags); 1854 IRELE(ip); 1855 1856 XFS_QM_DQRELE(mp, udqp); 1857 XFS_QM_DQRELE(mp, gdqp); 1858 1859 goto std_return; 1860} 1861 1862#ifdef DEBUG 1863/* 1864 * Some counters to see if (and how often) we are hitting some deadlock 1865 * prevention code paths. 1866 */ 1867 1868int xfs_rm_locks; 1869int xfs_rm_lock_delays; 1870int xfs_rm_attempts; 1871#endif 1872 1873/* 1874 * The following routine will lock the inodes associated with the 1875 * directory and the named entry in the directory. The locks are 1876 * acquired in increasing inode number. 1877 * 1878 * If the entry is "..", then only the directory is locked. The 1879 * vnode ref count will still include that from the .. entry in 1880 * this case. 1881 * 1882 * There is a deadlock we need to worry about. If the locked directory is 1883 * in the AIL, it might be blocking up the log. The next inode we lock 1884 * could be already locked by another thread waiting for log space (e.g 1885 * a permanent log reservation with a long running transaction (see 1886 * xfs_itruncate_finish)). To solve this, we must check if the directory 1887 * is in the ail and use lock_nowait. If we can't lock, we need to 1888 * drop the inode lock on the directory and try again. xfs_iunlock will 1889 * potentially push the tail if we were holding up the log. 1890 */ 1891STATIC int 1892xfs_lock_dir_and_entry( 1893 xfs_inode_t *dp, 1894 xfs_inode_t *ip) /* inode of entry 'name' */ 1895{ 1896 int attempts; 1897 xfs_ino_t e_inum; 1898 xfs_inode_t *ips[2]; 1899 xfs_log_item_t *lp; 1900 1901#ifdef DEBUG 1902 xfs_rm_locks++; 1903#endif 1904 attempts = 0; 1905 1906again: 1907 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1908 1909 e_inum = ip->i_ino; 1910 1911 xfs_itrace_ref(ip); 1912 1913 /* 1914 * We want to lock in increasing inum. Since we've already 1915 * acquired the lock on the directory, we may need to release 1916 * if if the inum of the entry turns out to be less. 1917 */ 1918 if (e_inum > dp->i_ino) { 1919 /* 1920 * We are already in the right order, so just 1921 * lock on the inode of the entry. 1922 * We need to use nowait if dp is in the AIL. 1923 */ 1924 1925 lp = (xfs_log_item_t *)dp->i_itemp; 1926 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 1927 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 1928 attempts++; 1929#ifdef DEBUG 1930 xfs_rm_attempts++; 1931#endif 1932 1933 /* 1934 * Unlock dp and try again. 1935 * xfs_iunlock will try to push the tail 1936 * if the inode is in the AIL. 1937 */ 1938 1939 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1940 1941 if ((attempts % 5) == 0) { 1942 delay(1); /* Don't just spin the CPU */ 1943#ifdef DEBUG 1944 xfs_rm_lock_delays++; 1945#endif 1946 } 1947 goto again; 1948 } 1949 } else { 1950 xfs_ilock(ip, XFS_ILOCK_EXCL); 1951 } 1952 } else if (e_inum < dp->i_ino) { 1953 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1954 1955 ips[0] = ip; 1956 ips[1] = dp; 1957 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); 1958 } 1959 /* else e_inum == dp->i_ino */ 1960 /* This can happen if we're asked to lock /x/.. 1961 * the entry is "..", which is also the parent directory. 1962 */ 1963 1964 return 0; 1965} 1966 1967#ifdef DEBUG 1968int xfs_locked_n; 1969int xfs_small_retries; 1970int xfs_middle_retries; 1971int xfs_lots_retries; 1972int xfs_lock_delays; 1973#endif 1974 1975/* 1976 * Bump the subclass so xfs_lock_inodes() acquires each lock with 1977 * a different value 1978 */ 1979static inline int 1980xfs_lock_inumorder(int lock_mode, int subclass) 1981{ 1982 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 1983 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 1984 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 1985 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 1986 1987 return lock_mode; 1988} 1989 1990/* 1991 * The following routine will lock n inodes in exclusive mode. 1992 * We assume the caller calls us with the inodes in i_ino order. 1993 * 1994 * We need to detect deadlock where an inode that we lock 1995 * is in the AIL and we start waiting for another inode that is locked 1996 * by a thread in a long running transaction (such as truncate). This can 1997 * result in deadlock since the long running trans might need to wait 1998 * for the inode we just locked in order to push the tail and free space 1999 * in the log. 2000 */ 2001void 2002xfs_lock_inodes( 2003 xfs_inode_t **ips, 2004 int inodes, 2005 uint lock_mode) 2006{ 2007 int attempts = 0, i, j, try_lock; 2008 xfs_log_item_t *lp; 2009 2010 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 2011 2012 try_lock = 0; 2013 i = 0; 2014 2015again: 2016 for (; i < inodes; i++) { 2017 ASSERT(ips[i]); 2018 2019 if (i && (ips[i] == ips[i-1])) /* Already locked */ 2020 continue; 2021 2022 /* 2023 * If try_lock is not set yet, make sure all locked inodes 2024 * are not in the AIL. 2025 * If any are, set try_lock to be used later. 2026 */ 2027 2028 if (!try_lock) { 2029 for (j = (i - 1); j >= 0 && !try_lock; j--) { 2030 lp = (xfs_log_item_t *)ips[j]->i_itemp; 2031 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2032 try_lock++; 2033 } 2034 } 2035 } 2036 2037 /* 2038 * If any of the previous locks we have locked is in the AIL, 2039 * we must TRY to get the second and subsequent locks. If 2040 * we can't get any, we must release all we have 2041 * and try again. 2042 */ 2043 2044 if (try_lock) { 2045 /* try_lock must be 0 if i is 0. */ 2046 /* 2047 * try_lock means we have an inode locked 2048 * that is in the AIL. 2049 */ 2050 ASSERT(i != 0); 2051 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 2052 attempts++; 2053 2054 /* 2055 * Unlock all previous guys and try again. 2056 * xfs_iunlock will try to push the tail 2057 * if the inode is in the AIL. 2058 */ 2059 2060 for(j = i - 1; j >= 0; j--) { 2061 2062 /* 2063 * Check to see if we've already 2064 * unlocked this one. 2065 * Not the first one going back, 2066 * and the inode ptr is the same. 2067 */ 2068 if ((j != (i - 1)) && ips[j] == 2069 ips[j+1]) 2070 continue; 2071 2072 xfs_iunlock(ips[j], lock_mode); 2073 } 2074 2075 if ((attempts % 5) == 0) { 2076 delay(1); /* Don't just spin the CPU */ 2077#ifdef DEBUG 2078 xfs_lock_delays++; 2079#endif 2080 } 2081 i = 0; 2082 try_lock = 0; 2083 goto again; 2084 } 2085 } else { 2086 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); 2087 } 2088 } 2089 2090#ifdef DEBUG 2091 if (attempts) { 2092 if (attempts < 5) xfs_small_retries++; 2093 else if (attempts < 100) xfs_middle_retries++; 2094 else xfs_lots_retries++; 2095 } else { 2096 xfs_locked_n++; 2097 } 2098#endif 2099} 2100 2101#ifdef DEBUG 2102#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);} 2103int remove_which_error_return = 0; 2104#else /* ! DEBUG */ 2105#define REMOVE_DEBUG_TRACE(x) 2106#endif /* ! DEBUG */ 2107 2108int 2109xfs_remove( 2110 xfs_inode_t *dp, 2111 struct xfs_name *name, 2112 xfs_inode_t *ip) 2113{ 2114 xfs_mount_t *mp = dp->i_mount; 2115 xfs_trans_t *tp = NULL; 2116 int error = 0; 2117 xfs_bmap_free_t free_list; 2118 xfs_fsblock_t first_block; 2119 int cancel_flags; 2120 int committed; 2121 int link_zero; 2122 uint resblks; 2123 2124 xfs_itrace_entry(dp); 2125 2126 if (XFS_FORCED_SHUTDOWN(mp)) 2127 return XFS_ERROR(EIO); 2128 2129 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { 2130 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL, 2131 NULL, DM_RIGHT_NULL, name->name, NULL, 2132 ip->i_d.di_mode, 0, 0); 2133 if (error) 2134 return error; 2135 } 2136 2137 xfs_itrace_entry(ip); 2138 xfs_itrace_ref(ip); 2139 2140 error = XFS_QM_DQATTACH(mp, dp, 0); 2141 if (!error) 2142 error = XFS_QM_DQATTACH(mp, ip, 0); 2143 if (error) { 2144 REMOVE_DEBUG_TRACE(__LINE__); 2145 goto std_return; 2146 } 2147 2148 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 2149 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2150 /* 2151 * We try to get the real space reservation first, 2152 * allowing for directory btree deletion(s) implying 2153 * possible bmap insert(s). If we can't get the space 2154 * reservation then we use 0 instead, and avoid the bmap 2155 * btree insert(s) in the directory code by, if the bmap 2156 * insert tries to happen, instead trimming the LAST 2157 * block from the directory. 2158 */ 2159 resblks = XFS_REMOVE_SPACE_RES(mp); 2160 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 2161 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2162 if (error == ENOSPC) { 2163 resblks = 0; 2164 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 2165 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2166 } 2167 if (error) { 2168 ASSERT(error != ENOSPC); 2169 REMOVE_DEBUG_TRACE(__LINE__); 2170 xfs_trans_cancel(tp, 0); 2171 return error; 2172 } 2173 2174 error = xfs_lock_dir_and_entry(dp, ip); 2175 if (error) { 2176 REMOVE_DEBUG_TRACE(__LINE__); 2177 xfs_trans_cancel(tp, cancel_flags); 2178 goto std_return; 2179 } 2180 2181 /* 2182 * At this point, we've gotten both the directory and the entry 2183 * inodes locked. 2184 */ 2185 IHOLD(ip); 2186 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2187 2188 IHOLD(dp); 2189 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2190 2191 /* 2192 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2193 */ 2194 XFS_BMAP_INIT(&free_list, &first_block); 2195 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2196 &first_block, &free_list, resblks); 2197 if (error) { 2198 ASSERT(error != ENOENT); 2199 REMOVE_DEBUG_TRACE(__LINE__); 2200 goto error1; 2201 } 2202 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2203 2204 dp->i_gen++; 2205 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2206 2207 error = xfs_droplink(tp, ip); 2208 if (error) { 2209 REMOVE_DEBUG_TRACE(__LINE__); 2210 goto error1; 2211 } 2212 2213 /* Determine if this is the last link while 2214 * we are in the transaction. 2215 */ 2216 link_zero = (ip)->i_d.di_nlink==0; 2217 2218 /* 2219 * If this is a synchronous mount, make sure that the 2220 * remove transaction goes to disk before returning to 2221 * the user. 2222 */ 2223 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2224 xfs_trans_set_sync(tp); 2225 } 2226 2227 error = xfs_bmap_finish(&tp, &free_list, &committed); 2228 if (error) { 2229 REMOVE_DEBUG_TRACE(__LINE__); 2230 goto error_rele; 2231 } 2232 2233 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2234 if (error) 2235 goto std_return; 2236 2237 /* 2238 * If we are using filestreams, kill the stream association. 2239 * If the file is still open it may get a new one but that 2240 * will get killed on last close in xfs_close() so we don't 2241 * have to worry about that. 2242 */ 2243 if (link_zero && xfs_inode_is_filestream(ip)) 2244 xfs_filestream_deassociate(ip); 2245 2246 xfs_itrace_exit(ip); 2247 2248/* Fall through to std_return with error = 0 */ 2249 std_return: 2250 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 2251 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2252 dp, DM_RIGHT_NULL, 2253 NULL, DM_RIGHT_NULL, 2254 name->name, NULL, ip->i_d.di_mode, error, 0); 2255 } 2256 return error; 2257 2258 error1: 2259 xfs_bmap_cancel(&free_list); 2260 cancel_flags |= XFS_TRANS_ABORT; 2261 xfs_trans_cancel(tp, cancel_flags); 2262 goto std_return; 2263 2264 error_rele: 2265 /* 2266 * In this case make sure to not release the inode until after 2267 * the current transaction is aborted. Releasing it beforehand 2268 * can cause us to go to xfs_inactive and start a recursive 2269 * transaction which can easily deadlock with the current one. 2270 */ 2271 xfs_bmap_cancel(&free_list); 2272 cancel_flags |= XFS_TRANS_ABORT; 2273 xfs_trans_cancel(tp, cancel_flags); 2274 2275 goto std_return; 2276} 2277 2278int 2279xfs_link( 2280 xfs_inode_t *tdp, 2281 xfs_inode_t *sip, 2282 struct xfs_name *target_name) 2283{ 2284 xfs_mount_t *mp = tdp->i_mount; 2285 xfs_trans_t *tp; 2286 xfs_inode_t *ips[2]; 2287 int error; 2288 xfs_bmap_free_t free_list; 2289 xfs_fsblock_t first_block; 2290 int cancel_flags; 2291 int committed; 2292 int resblks; 2293 2294 xfs_itrace_entry(tdp); 2295 xfs_itrace_entry(sip); 2296 2297 ASSERT(!S_ISDIR(sip->i_d.di_mode)); 2298 2299 if (XFS_FORCED_SHUTDOWN(mp)) 2300 return XFS_ERROR(EIO); 2301 2302 if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) { 2303 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, 2304 tdp, DM_RIGHT_NULL, 2305 sip, DM_RIGHT_NULL, 2306 target_name->name, NULL, 0, 0, 0); 2307 if (error) 2308 return error; 2309 } 2310 2311 /* Return through std_return after this point. */ 2312 2313 error = XFS_QM_DQATTACH(mp, sip, 0); 2314 if (!error && sip != tdp) 2315 error = XFS_QM_DQATTACH(mp, tdp, 0); 2316 if (error) 2317 goto std_return; 2318 2319 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 2320 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2321 resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 2322 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 2323 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2324 if (error == ENOSPC) { 2325 resblks = 0; 2326 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0, 2327 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2328 } 2329 if (error) { 2330 cancel_flags = 0; 2331 goto error_return; 2332 } 2333 2334 if (sip->i_ino < tdp->i_ino) { 2335 ips[0] = sip; 2336 ips[1] = tdp; 2337 } else { 2338 ips[0] = tdp; 2339 ips[1] = sip; 2340 } 2341 2342 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); 2343 2344 /* 2345 * Increment vnode ref counts since xfs_trans_commit & 2346 * xfs_trans_cancel will both unlock the inodes and 2347 * decrement the associated ref counts. 2348 */ 2349 IHOLD(sip); 2350 IHOLD(tdp); 2351 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 2352 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 2353 2354 /* 2355 * If the source has too many links, we can't make any more to it. 2356 */ 2357 if (sip->i_d.di_nlink >= XFS_MAXLINK) { 2358 error = XFS_ERROR(EMLINK); 2359 goto error_return; 2360 } 2361 2362 /* 2363 * If we are using project inheritance, we only allow hard link 2364 * creation in our tree when the project IDs are the same; else 2365 * the tree quota mechanism could be circumvented. 2366 */ 2367 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2368 (tdp->i_d.di_projid != sip->i_d.di_projid))) { 2369 error = XFS_ERROR(EXDEV); 2370 goto error_return; 2371 } 2372 2373 error = xfs_dir_canenter(tp, tdp, target_name, resblks); 2374 if (error) 2375 goto error_return; 2376 2377 XFS_BMAP_INIT(&free_list, &first_block); 2378 2379 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 2380 &first_block, &free_list, resblks); 2381 if (error) 2382 goto abort_return; 2383 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2384 tdp->i_gen++; 2385 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 2386 2387 error = xfs_bumplink(tp, sip); 2388 if (error) 2389 goto abort_return; 2390 2391 /* 2392 * If this is a synchronous mount, make sure that the 2393 * link transaction goes to disk before returning to 2394 * the user. 2395 */ 2396 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2397 xfs_trans_set_sync(tp); 2398 } 2399 2400 error = xfs_bmap_finish (&tp, &free_list, &committed); 2401 if (error) { 2402 xfs_bmap_cancel(&free_list); 2403 goto abort_return; 2404 } 2405 2406 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2407 if (error) 2408 goto std_return; 2409 2410 /* Fall through to std_return with error = 0. */ 2411std_return: 2412 if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) { 2413 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, 2414 tdp, DM_RIGHT_NULL, 2415 sip, DM_RIGHT_NULL, 2416 target_name->name, NULL, 0, error, 0); 2417 } 2418 return error; 2419 2420 abort_return: 2421 cancel_flags |= XFS_TRANS_ABORT; 2422 /* FALLTHROUGH */ 2423 2424 error_return: 2425 xfs_trans_cancel(tp, cancel_flags); 2426 goto std_return; 2427} 2428 2429 2430int 2431xfs_mkdir( 2432 xfs_inode_t *dp, 2433 struct xfs_name *dir_name, 2434 mode_t mode, 2435 xfs_inode_t **ipp, 2436 cred_t *credp) 2437{ 2438 xfs_mount_t *mp = dp->i_mount; 2439 xfs_inode_t *cdp; /* inode of created dir */ 2440 xfs_trans_t *tp; 2441 int cancel_flags; 2442 int error; 2443 int committed; 2444 xfs_bmap_free_t free_list; 2445 xfs_fsblock_t first_block; 2446 boolean_t unlock_dp_on_error = B_FALSE; 2447 boolean_t created = B_FALSE; 2448 int dm_event_sent = 0; 2449 xfs_prid_t prid; 2450 struct xfs_dquot *udqp, *gdqp; 2451 uint resblks; 2452 2453 if (XFS_FORCED_SHUTDOWN(mp)) 2454 return XFS_ERROR(EIO); 2455 2456 tp = NULL; 2457 2458 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { 2459 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 2460 dp, DM_RIGHT_NULL, NULL, 2461 DM_RIGHT_NULL, dir_name->name, NULL, 2462 mode, 0, 0); 2463 if (error) 2464 return error; 2465 dm_event_sent = 1; 2466 } 2467 2468 /* Return through std_return after this point. */ 2469 2470 xfs_itrace_entry(dp); 2471 2472 mp = dp->i_mount; 2473 udqp = gdqp = NULL; 2474 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 2475 prid = dp->i_d.di_projid; 2476 else 2477 prid = (xfs_prid_t)dfltprid; 2478 2479 /* 2480 * Make sure that we have allocated dquot(s) on disk. 2481 */ 2482 error = XFS_QM_DQVOPALLOC(mp, dp, 2483 current_fsuid(credp), current_fsgid(credp), prid, 2484 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2485 if (error) 2486 goto std_return; 2487 2488 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 2489 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2490 resblks = XFS_MKDIR_SPACE_RES(mp, dir_name->len); 2491 error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, 2492 XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); 2493 if (error == ENOSPC) { 2494 resblks = 0; 2495 error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0, 2496 XFS_TRANS_PERM_LOG_RES, 2497 XFS_MKDIR_LOG_COUNT); 2498 } 2499 if (error) { 2500 cancel_flags = 0; 2501 goto error_return; 2502 } 2503 2504 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 2505 unlock_dp_on_error = B_TRUE; 2506 2507 /* 2508 * Check for directory link count overflow. 2509 */ 2510 if (dp->i_d.di_nlink >= XFS_MAXLINK) { 2511 error = XFS_ERROR(EMLINK); 2512 goto error_return; 2513 } 2514 2515 /* 2516 * Reserve disk quota and the inode. 2517 */ 2518 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 2519 if (error) 2520 goto error_return; 2521 2522 error = xfs_dir_canenter(tp, dp, dir_name, resblks); 2523 if (error) 2524 goto error_return; 2525 /* 2526 * create the directory inode. 2527 */ 2528 error = xfs_dir_ialloc(&tp, dp, mode, 2, 2529 0, credp, prid, resblks > 0, 2530 &cdp, NULL); 2531 if (error) { 2532 if (error == ENOSPC) 2533 goto error_return; 2534 goto abort_return; 2535 } 2536 xfs_itrace_ref(cdp); 2537 2538 /* 2539 * Now we add the directory inode to the transaction. 2540 * We waited until now since xfs_dir_ialloc might start 2541 * a new transaction. Had we joined the transaction 2542 * earlier, the locks might have gotten released. An error 2543 * from here on will result in the transaction cancel 2544 * unlocking dp so don't do it explicitly in the error path. 2545 */ 2546 IHOLD(dp); 2547 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2548 unlock_dp_on_error = B_FALSE; 2549 2550 XFS_BMAP_INIT(&free_list, &first_block); 2551 2552 error = xfs_dir_createname(tp, dp, dir_name, cdp->i_ino, 2553 &first_block, &free_list, resblks ? 2554 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2555 if (error) { 2556 ASSERT(error != ENOSPC); 2557 goto error1; 2558 } 2559 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2560 2561 /* 2562 * Bump the in memory version number of the parent directory 2563 * so that other processes accessing it will recognize that 2564 * the directory has changed. 2565 */ 2566 dp->i_gen++; 2567 2568 error = xfs_dir_init(tp, cdp, dp); 2569 if (error) 2570 goto error2; 2571 2572 cdp->i_gen = 1; 2573 error = xfs_bumplink(tp, dp); 2574 if (error) 2575 goto error2; 2576 2577 created = B_TRUE; 2578 2579 *ipp = cdp; 2580 IHOLD(cdp); 2581 2582 /* 2583 * Attach the dquots to the new inode and modify the icount incore. 2584 */ 2585 XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp); 2586 2587 /* 2588 * If this is a synchronous mount, make sure that the 2589 * mkdir transaction goes to disk before returning to 2590 * the user. 2591 */ 2592 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2593 xfs_trans_set_sync(tp); 2594 } 2595 2596 error = xfs_bmap_finish(&tp, &free_list, &committed); 2597 if (error) { 2598 IRELE(cdp); 2599 goto error2; 2600 } 2601 2602 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2603 XFS_QM_DQRELE(mp, udqp); 2604 XFS_QM_DQRELE(mp, gdqp); 2605 if (error) { 2606 IRELE(cdp); 2607 } 2608 2609 /* Fall through to std_return with error = 0 or errno from 2610 * xfs_trans_commit. */ 2611 2612std_return: 2613 if ((created || (error != 0 && dm_event_sent != 0)) && 2614 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { 2615 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2616 dp, DM_RIGHT_NULL, 2617 created ? cdp : NULL, 2618 DM_RIGHT_NULL, 2619 dir_name->name, NULL, 2620 mode, error, 0); 2621 } 2622 return error; 2623 2624 error2: 2625 error1: 2626 xfs_bmap_cancel(&free_list); 2627 abort_return: 2628 cancel_flags |= XFS_TRANS_ABORT; 2629 error_return: 2630 xfs_trans_cancel(tp, cancel_flags); 2631 XFS_QM_DQRELE(mp, udqp); 2632 XFS_QM_DQRELE(mp, gdqp); 2633 2634 if (unlock_dp_on_error) 2635 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2636 2637 goto std_return; 2638} 2639 2640int 2641xfs_rmdir( 2642 xfs_inode_t *dp, 2643 struct xfs_name *name, 2644 xfs_inode_t *cdp) 2645{ 2646 xfs_mount_t *mp = dp->i_mount; 2647 xfs_trans_t *tp; 2648 int error; 2649 xfs_bmap_free_t free_list; 2650 xfs_fsblock_t first_block; 2651 int cancel_flags; 2652 int committed; 2653 int last_cdp_link; 2654 uint resblks; 2655 2656 xfs_itrace_entry(dp); 2657 2658 if (XFS_FORCED_SHUTDOWN(mp)) 2659 return XFS_ERROR(EIO); 2660 2661 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { 2662 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, 2663 dp, DM_RIGHT_NULL, 2664 NULL, DM_RIGHT_NULL, name->name, 2665 NULL, cdp->i_d.di_mode, 0, 0); 2666 if (error) 2667 return XFS_ERROR(error); 2668 } 2669 2670 /* 2671 * Get the dquots for the inodes. 2672 */ 2673 error = XFS_QM_DQATTACH(mp, dp, 0); 2674 if (!error) 2675 error = XFS_QM_DQATTACH(mp, cdp, 0); 2676 if (error) { 2677 REMOVE_DEBUG_TRACE(__LINE__); 2678 goto std_return; 2679 } 2680 2681 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 2682 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2683 /* 2684 * We try to get the real space reservation first, 2685 * allowing for directory btree deletion(s) implying 2686 * possible bmap insert(s). If we can't get the space 2687 * reservation then we use 0 instead, and avoid the bmap 2688 * btree insert(s) in the directory code by, if the bmap 2689 * insert tries to happen, instead trimming the LAST 2690 * block from the directory. 2691 */ 2692 resblks = XFS_REMOVE_SPACE_RES(mp); 2693 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 2694 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 2695 if (error == ENOSPC) { 2696 resblks = 0; 2697 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 2698 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 2699 } 2700 if (error) { 2701 ASSERT(error != ENOSPC); 2702 cancel_flags = 0; 2703 goto error_return; 2704 } 2705 XFS_BMAP_INIT(&free_list, &first_block); 2706 2707 /* 2708 * Now lock the child directory inode and the parent directory 2709 * inode in the proper order. This will take care of validating 2710 * that the directory entry for the child directory inode has 2711 * not changed while we were obtaining a log reservation. 2712 */ 2713 error = xfs_lock_dir_and_entry(dp, cdp); 2714 if (error) { 2715 xfs_trans_cancel(tp, cancel_flags); 2716 goto std_return; 2717 } 2718 2719 IHOLD(dp); 2720 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2721 2722 IHOLD(cdp); 2723 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); 2724 2725 ASSERT(cdp->i_d.di_nlink >= 2); 2726 if (cdp->i_d.di_nlink != 2) { 2727 error = XFS_ERROR(ENOTEMPTY); 2728 goto error_return; 2729 } 2730 if (!xfs_dir_isempty(cdp)) { 2731 error = XFS_ERROR(ENOTEMPTY); 2732 goto error_return; 2733 } 2734 2735 error = xfs_dir_removename(tp, dp, name, cdp->i_ino, 2736 &first_block, &free_list, resblks); 2737 if (error) 2738 goto error1; 2739 2740 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2741 2742 /* 2743 * Bump the in memory generation count on the parent 2744 * directory so that other can know that it has changed. 2745 */ 2746 dp->i_gen++; 2747 2748 /* 2749 * Drop the link from cdp's "..". 2750 */ 2751 error = xfs_droplink(tp, dp); 2752 if (error) { 2753 goto error1; 2754 } 2755 2756 /* 2757 * Drop the link from dp to cdp. 2758 */ 2759 error = xfs_droplink(tp, cdp); 2760 if (error) { 2761 goto error1; 2762 } 2763 2764 /* 2765 * Drop the "." link from cdp to self. 2766 */ 2767 error = xfs_droplink(tp, cdp); 2768 if (error) { 2769 goto error1; 2770 } 2771 2772 /* Determine these before committing transaction */ 2773 last_cdp_link = (cdp)->i_d.di_nlink==0; 2774 2775 /* 2776 * If this is a synchronous mount, make sure that the 2777 * rmdir transaction goes to disk before returning to 2778 * the user. 2779 */ 2780 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2781 xfs_trans_set_sync(tp); 2782 } 2783 2784 error = xfs_bmap_finish (&tp, &free_list, &committed); 2785 if (error) { 2786 xfs_bmap_cancel(&free_list); 2787 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 2788 XFS_TRANS_ABORT)); 2789 goto std_return; 2790 } 2791 2792 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2793 if (error) { 2794 goto std_return; 2795 } 2796 2797 2798 /* Fall through to std_return with error = 0 or the errno 2799 * from xfs_trans_commit. */ 2800 std_return: 2801 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 2802 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2803 dp, DM_RIGHT_NULL, 2804 NULL, DM_RIGHT_NULL, 2805 name->name, NULL, cdp->i_d.di_mode, 2806 error, 0); 2807 } 2808 return error; 2809 2810 error1: 2811 xfs_bmap_cancel(&free_list); 2812 cancel_flags |= XFS_TRANS_ABORT; 2813 /* FALLTHROUGH */ 2814 2815 error_return: 2816 xfs_trans_cancel(tp, cancel_flags); 2817 goto std_return; 2818} 2819 2820int 2821xfs_symlink( 2822 xfs_inode_t *dp, 2823 struct xfs_name *link_name, 2824 const char *target_path, 2825 mode_t mode, 2826 xfs_inode_t **ipp, 2827 cred_t *credp) 2828{ 2829 xfs_mount_t *mp = dp->i_mount; 2830 xfs_trans_t *tp; 2831 xfs_inode_t *ip; 2832 int error; 2833 int pathlen; 2834 xfs_bmap_free_t free_list; 2835 xfs_fsblock_t first_block; 2836 boolean_t unlock_dp_on_error = B_FALSE; 2837 uint cancel_flags; 2838 int committed; 2839 xfs_fileoff_t first_fsb; 2840 xfs_filblks_t fs_blocks; 2841 int nmaps; 2842 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 2843 xfs_daddr_t d; 2844 const char *cur_chunk; 2845 int byte_cnt; 2846 int n; 2847 xfs_buf_t *bp; 2848 xfs_prid_t prid; 2849 struct xfs_dquot *udqp, *gdqp; 2850 uint resblks; 2851 2852 *ipp = NULL; 2853 error = 0; 2854 ip = NULL; 2855 tp = NULL; 2856 2857 xfs_itrace_entry(dp); 2858 2859 if (XFS_FORCED_SHUTDOWN(mp)) 2860 return XFS_ERROR(EIO); 2861 2862 /* 2863 * Check component lengths of the target path name. 2864 */ 2865 pathlen = strlen(target_path); 2866 if (pathlen >= MAXPATHLEN) /* total string too long */ 2867 return XFS_ERROR(ENAMETOOLONG); 2868 2869 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { 2870 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp, 2871 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2872 link_name->name, target_path, 0, 0, 0); 2873 if (error) 2874 return error; 2875 } 2876 2877 /* Return through std_return after this point. */ 2878 2879 udqp = gdqp = NULL; 2880 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 2881 prid = dp->i_d.di_projid; 2882 else 2883 prid = (xfs_prid_t)dfltprid; 2884 2885 /* 2886 * Make sure that we have allocated dquot(s) on disk. 2887 */ 2888 error = XFS_QM_DQVOPALLOC(mp, dp, 2889 current_fsuid(credp), current_fsgid(credp), prid, 2890 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2891 if (error) 2892 goto std_return; 2893 2894 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 2895 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2896 /* 2897 * The symlink will fit into the inode data fork? 2898 * There can't be any attributes so we get the whole variable part. 2899 */ 2900 if (pathlen <= XFS_LITINO(mp)) 2901 fs_blocks = 0; 2902 else 2903 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 2904 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 2905 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 2906 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 2907 if (error == ENOSPC && fs_blocks == 0) { 2908 resblks = 0; 2909 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0, 2910 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 2911 } 2912 if (error) { 2913 cancel_flags = 0; 2914 goto error_return; 2915 } 2916 2917 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 2918 unlock_dp_on_error = B_TRUE; 2919 2920 /* 2921 * Check whether the directory allows new symlinks or not. 2922 */ 2923 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 2924 error = XFS_ERROR(EPERM); 2925 goto error_return; 2926 } 2927 2928 /* 2929 * Reserve disk quota : blocks and inode. 2930 */ 2931 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 2932 if (error) 2933 goto error_return; 2934 2935 /* 2936 * Check for ability to enter directory entry, if no space reserved. 2937 */ 2938 error = xfs_dir_canenter(tp, dp, link_name, resblks); 2939 if (error) 2940 goto error_return; 2941 /* 2942 * Initialize the bmap freelist prior to calling either 2943 * bmapi or the directory create code. 2944 */ 2945 XFS_BMAP_INIT(&free_list, &first_block); 2946 2947 /* 2948 * Allocate an inode for the symlink. 2949 */ 2950 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 2951 1, 0, credp, prid, resblks > 0, &ip, NULL); 2952 if (error) { 2953 if (error == ENOSPC) 2954 goto error_return; 2955 goto error1; 2956 } 2957 xfs_itrace_ref(ip); 2958 2959 /* 2960 * An error after we've joined dp to the transaction will result in the 2961 * transaction cancel unlocking dp so don't do it explicitly in the 2962 * error path. 2963 */ 2964 IHOLD(dp); 2965 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2966 unlock_dp_on_error = B_FALSE; 2967 2968 /* 2969 * Also attach the dquot(s) to it, if applicable. 2970 */ 2971 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 2972 2973 if (resblks) 2974 resblks -= XFS_IALLOC_SPACE_RES(mp); 2975 /* 2976 * If the symlink will fit into the inode, write it inline. 2977 */ 2978 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 2979 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 2980 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); 2981 ip->i_d.di_size = pathlen; 2982 2983 /* 2984 * The inode was initially created in extent format. 2985 */ 2986 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); 2987 ip->i_df.if_flags |= XFS_IFINLINE; 2988 2989 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 2990 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 2991 2992 } else { 2993 first_fsb = 0; 2994 nmaps = SYMLINK_MAPS; 2995 2996 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 2997 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 2998 &first_block, resblks, mval, &nmaps, 2999 &free_list, NULL); 3000 if (error) { 3001 goto error1; 3002 } 3003 3004 if (resblks) 3005 resblks -= fs_blocks; 3006 ip->i_d.di_size = pathlen; 3007 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3008 3009 cur_chunk = target_path; 3010 for (n = 0; n < nmaps; n++) { 3011 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 3012 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 3013 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 3014 BTOBB(byte_cnt), 0); 3015 ASSERT(bp && !XFS_BUF_GETERROR(bp)); 3016 if (pathlen < byte_cnt) { 3017 byte_cnt = pathlen; 3018 } 3019 pathlen -= byte_cnt; 3020 3021 memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt); 3022 cur_chunk += byte_cnt; 3023 3024 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); 3025 } 3026 } 3027 3028 /* 3029 * Create the directory entry for the symlink. 3030 */ 3031 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 3032 &first_block, &free_list, resblks); 3033 if (error) 3034 goto error1; 3035 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3036 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 3037 3038 /* 3039 * Bump the in memory version number of the parent directory 3040 * so that other processes accessing it will recognize that 3041 * the directory has changed. 3042 */ 3043 dp->i_gen++; 3044 3045 /* 3046 * If this is a synchronous mount, make sure that the 3047 * symlink transaction goes to disk before returning to 3048 * the user. 3049 */ 3050 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3051 xfs_trans_set_sync(tp); 3052 } 3053 3054 /* 3055 * xfs_trans_commit normally decrements the vnode ref count 3056 * when it unlocks the inode. Since we want to return the 3057 * vnode to the caller, we bump the vnode ref count now. 3058 */ 3059 IHOLD(ip); 3060 3061 error = xfs_bmap_finish(&tp, &free_list, &committed); 3062 if (error) { 3063 goto error2; 3064 } 3065 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 3066 XFS_QM_DQRELE(mp, udqp); 3067 XFS_QM_DQRELE(mp, gdqp); 3068 3069 /* Fall through to std_return with error = 0 or errno from 3070 * xfs_trans_commit */ 3071std_return: 3072 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) { 3073 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, 3074 dp, DM_RIGHT_NULL, 3075 error ? NULL : ip, 3076 DM_RIGHT_NULL, link_name->name, 3077 target_path, 0, error, 0); 3078 } 3079 3080 if (!error) 3081 *ipp = ip; 3082 return error; 3083 3084 error2: 3085 IRELE(ip); 3086 error1: 3087 xfs_bmap_cancel(&free_list); 3088 cancel_flags |= XFS_TRANS_ABORT; 3089 error_return: 3090 xfs_trans_cancel(tp, cancel_flags); 3091 XFS_QM_DQRELE(mp, udqp); 3092 XFS_QM_DQRELE(mp, gdqp); 3093 3094 if (unlock_dp_on_error) 3095 xfs_iunlock(dp, XFS_ILOCK_EXCL); 3096 3097 goto std_return; 3098} 3099 3100int 3101xfs_inode_flush( 3102 xfs_inode_t *ip, 3103 int flags) 3104{ 3105 xfs_mount_t *mp = ip->i_mount; 3106 int error = 0; 3107 3108 if (XFS_FORCED_SHUTDOWN(mp)) 3109 return XFS_ERROR(EIO); 3110 3111 /* 3112 * Bypass inodes which have already been cleaned by 3113 * the inode flush clustering code inside xfs_iflush 3114 */ 3115 if (xfs_inode_clean(ip)) 3116 return 0; 3117 3118 /* 3119 * We make this non-blocking if the inode is contended, 3120 * return EAGAIN to indicate to the caller that they 3121 * did not succeed. This prevents the flush path from 3122 * blocking on inodes inside another operation right 3123 * now, they get caught later by xfs_sync. 3124 */ 3125 if (flags & FLUSH_SYNC) { 3126 xfs_ilock(ip, XFS_ILOCK_SHARED); 3127 xfs_iflock(ip); 3128 } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 3129 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { 3130 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3131 return EAGAIN; 3132 } 3133 } else { 3134 return EAGAIN; 3135 } 3136 3137 error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC 3138 : XFS_IFLUSH_ASYNC_NOBLOCK); 3139 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3140 3141 return error; 3142} 3143 3144 3145int 3146xfs_set_dmattrs( 3147 xfs_inode_t *ip, 3148 u_int evmask, 3149 u_int16_t state) 3150{ 3151 xfs_mount_t *mp = ip->i_mount; 3152 xfs_trans_t *tp; 3153 int error; 3154 3155 if (!capable(CAP_SYS_ADMIN)) 3156 return XFS_ERROR(EPERM); 3157 3158 if (XFS_FORCED_SHUTDOWN(mp)) 3159 return XFS_ERROR(EIO); 3160 3161 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 3162 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); 3163 if (error) { 3164 xfs_trans_cancel(tp, 0); 3165 return error; 3166 } 3167 xfs_ilock(ip, XFS_ILOCK_EXCL); 3168 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3169 3170 ip->i_d.di_dmevmask = evmask; 3171 ip->i_d.di_dmstate = state; 3172 3173 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3174 IHOLD(ip); 3175 error = xfs_trans_commit(tp, 0); 3176 3177 return error; 3178} 3179 3180int 3181xfs_reclaim( 3182 xfs_inode_t *ip) 3183{ 3184 bhv_vnode_t *vp = XFS_ITOV(ip); 3185 3186 xfs_itrace_entry(ip); 3187 3188 ASSERT(!VN_MAPPED(vp)); 3189 3190 /* bad inode, get out here ASAP */ 3191 if (VN_BAD(vp)) { 3192 xfs_ireclaim(ip); 3193 return 0; 3194 } 3195 3196 vn_iowait(ip); 3197 3198 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 3199 3200 /* 3201 * Make sure the atime in the XFS inode is correct before freeing the 3202 * Linux inode. 3203 */ 3204 xfs_synchronize_atime(ip); 3205 3206 /* 3207 * If we have nothing to flush with this inode then complete the 3208 * teardown now, otherwise break the link between the xfs inode and the 3209 * linux inode and clean up the xfs inode later. This avoids flushing 3210 * the inode to disk during the delete operation itself. 3211 * 3212 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag 3213 * first to ensure that xfs_iunpin() will never see an xfs inode 3214 * that has a linux inode being reclaimed. Synchronisation is provided 3215 * by the i_flags_lock. 3216 */ 3217 if (!ip->i_update_core && (ip->i_itemp == NULL)) { 3218 xfs_ilock(ip, XFS_ILOCK_EXCL); 3219 xfs_iflock(ip); 3220 return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); 3221 } else { 3222 xfs_mount_t *mp = ip->i_mount; 3223 3224 /* Protect sync and unpin from us */ 3225 XFS_MOUNT_ILOCK(mp); 3226 spin_lock(&ip->i_flags_lock); 3227 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 3228 vn_to_inode(vp)->i_private = NULL; 3229 ip->i_vnode = NULL; 3230 spin_unlock(&ip->i_flags_lock); 3231 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); 3232 XFS_MOUNT_IUNLOCK(mp); 3233 } 3234 return 0; 3235} 3236 3237int 3238xfs_finish_reclaim( 3239 xfs_inode_t *ip, 3240 int locked, 3241 int sync_mode) 3242{ 3243 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); 3244 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 3245 int error; 3246 3247 if (vp && VN_BAD(vp)) 3248 goto reclaim; 3249 3250 /* The hash lock here protects a thread in xfs_iget_core from 3251 * racing with us on linking the inode back with a vnode. 3252 * Once we have the XFS_IRECLAIM flag set it will not touch 3253 * us. 3254 */ 3255 write_lock(&pag->pag_ici_lock); 3256 spin_lock(&ip->i_flags_lock); 3257 if (__xfs_iflags_test(ip, XFS_IRECLAIM) || 3258 (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { 3259 spin_unlock(&ip->i_flags_lock); 3260 write_unlock(&pag->pag_ici_lock); 3261 if (locked) { 3262 xfs_ifunlock(ip); 3263 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3264 } 3265 return 1; 3266 } 3267 __xfs_iflags_set(ip, XFS_IRECLAIM); 3268 spin_unlock(&ip->i_flags_lock); 3269 write_unlock(&pag->pag_ici_lock); 3270 xfs_put_perag(ip->i_mount, pag); 3271 3272 /* 3273 * If the inode is still dirty, then flush it out. If the inode 3274 * is not in the AIL, then it will be OK to flush it delwri as 3275 * long as xfs_iflush() does not keep any references to the inode. 3276 * We leave that decision up to xfs_iflush() since it has the 3277 * knowledge of whether it's OK to simply do a delwri flush of 3278 * the inode or whether we need to wait until the inode is 3279 * pulled from the AIL. 3280 * We get the flush lock regardless, though, just to make sure 3281 * we don't free it while it is being flushed. 3282 */ 3283 if (!locked) { 3284 xfs_ilock(ip, XFS_ILOCK_EXCL); 3285 xfs_iflock(ip); 3286 } 3287 3288 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 3289 if (ip->i_update_core || 3290 ((ip->i_itemp != NULL) && 3291 (ip->i_itemp->ili_format.ilf_fields != 0))) { 3292 error = xfs_iflush(ip, sync_mode); 3293 /* 3294 * If we hit an error, typically because of filesystem 3295 * shutdown, we don't need to let vn_reclaim to know 3296 * because we're gonna reclaim the inode anyway. 3297 */ 3298 if (error) { 3299 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3300 goto reclaim; 3301 } 3302 xfs_iflock(ip); /* synchronize with xfs_iflush_done */ 3303 } 3304 3305 ASSERT(ip->i_update_core == 0); 3306 ASSERT(ip->i_itemp == NULL || 3307 ip->i_itemp->ili_format.ilf_fields == 0); 3308 } 3309 3310 xfs_ifunlock(ip); 3311 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3312 3313 reclaim: 3314 xfs_ireclaim(ip); 3315 return 0; 3316} 3317 3318int 3319xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) 3320{ 3321 int purged; 3322 xfs_inode_t *ip, *n; 3323 int done = 0; 3324 3325 while (!done) { 3326 purged = 0; 3327 XFS_MOUNT_ILOCK(mp); 3328 list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { 3329 if (noblock) { 3330 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) 3331 continue; 3332 if (xfs_ipincount(ip) || 3333 !xfs_iflock_nowait(ip)) { 3334 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3335 continue; 3336 } 3337 } 3338 XFS_MOUNT_IUNLOCK(mp); 3339 if (xfs_finish_reclaim(ip, noblock, 3340 XFS_IFLUSH_DELWRI_ELSE_ASYNC)) 3341 delay(1); 3342 purged = 1; 3343 break; 3344 } 3345 3346 done = !purged; 3347 } 3348 3349 XFS_MOUNT_IUNLOCK(mp); 3350 return 0; 3351} 3352 3353/* 3354 * xfs_alloc_file_space() 3355 * This routine allocates disk space for the given file. 3356 * 3357 * If alloc_type == 0, this request is for an ALLOCSP type 3358 * request which will change the file size. In this case, no 3359 * DMAPI event will be generated by the call. A TRUNCATE event 3360 * will be generated later by xfs_setattr. 3361 * 3362 * If alloc_type != 0, this request is for a RESVSP type 3363 * request, and a DMAPI DM_EVENT_WRITE will be generated if the 3364 * lower block boundary byte address is less than the file's 3365 * length. 3366 * 3367 * RETURNS: 3368 * 0 on success 3369 * errno on error 3370 * 3371 */ 3372STATIC int 3373xfs_alloc_file_space( 3374 xfs_inode_t *ip, 3375 xfs_off_t offset, 3376 xfs_off_t len, 3377 int alloc_type, 3378 int attr_flags) 3379{ 3380 xfs_mount_t *mp = ip->i_mount; 3381 xfs_off_t count; 3382 xfs_filblks_t allocated_fsb; 3383 xfs_filblks_t allocatesize_fsb; 3384 xfs_extlen_t extsz, temp; 3385 xfs_fileoff_t startoffset_fsb; 3386 xfs_fsblock_t firstfsb; 3387 int nimaps; 3388 int bmapi_flag; 3389 int quota_flag; 3390 int rt; 3391 xfs_trans_t *tp; 3392 xfs_bmbt_irec_t imaps[1], *imapp; 3393 xfs_bmap_free_t free_list; 3394 uint qblocks, resblks, resrtextents; 3395 int committed; 3396 int error; 3397 3398 xfs_itrace_entry(ip); 3399 3400 if (XFS_FORCED_SHUTDOWN(mp)) 3401 return XFS_ERROR(EIO); 3402 3403 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 3404 return error; 3405 3406 if (len <= 0) 3407 return XFS_ERROR(EINVAL); 3408 3409 rt = XFS_IS_REALTIME_INODE(ip); 3410 extsz = xfs_get_extsz_hint(ip); 3411 3412 count = len; 3413 imapp = &imaps[0]; 3414 nimaps = 1; 3415 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 3416 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 3417 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 3418 3419 /* Generate a DMAPI event if needed. */ 3420 if (alloc_type != 0 && offset < ip->i_size && 3421 (attr_flags&ATTR_DMI) == 0 && 3422 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3423 xfs_off_t end_dmi_offset; 3424 3425 end_dmi_offset = offset+len; 3426 if (end_dmi_offset > ip->i_size) 3427 end_dmi_offset = ip->i_size; 3428 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset, 3429 end_dmi_offset - offset, 0, NULL); 3430 if (error) 3431 return error; 3432 } 3433 3434 /* 3435 * Allocate file space until done or until there is an error 3436 */ 3437retry: 3438 while (allocatesize_fsb && !error) { 3439 xfs_fileoff_t s, e; 3440 3441 /* 3442 * Determine space reservations for data/realtime. 3443 */ 3444 if (unlikely(extsz)) { 3445 s = startoffset_fsb; 3446 do_div(s, extsz); 3447 s *= extsz; 3448 e = startoffset_fsb + allocatesize_fsb; 3449 if ((temp = do_mod(startoffset_fsb, extsz))) 3450 e += temp; 3451 if ((temp = do_mod(e, extsz))) 3452 e += extsz - temp; 3453 } else { 3454 s = 0; 3455 e = allocatesize_fsb; 3456 } 3457 3458 if (unlikely(rt)) { 3459 resrtextents = qblocks = (uint)(e - s); 3460 resrtextents /= mp->m_sb.sb_rextsize; 3461 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 3462 quota_flag = XFS_QMOPT_RES_RTBLKS; 3463 } else { 3464 resrtextents = 0; 3465 resblks = qblocks = \ 3466 XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); 3467 quota_flag = XFS_QMOPT_RES_REGBLKS; 3468 } 3469 3470 /* 3471 * Allocate and setup the transaction. 3472 */ 3473 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 3474 error = xfs_trans_reserve(tp, resblks, 3475 XFS_WRITE_LOG_RES(mp), resrtextents, 3476 XFS_TRANS_PERM_LOG_RES, 3477 XFS_WRITE_LOG_COUNT); 3478 /* 3479 * Check for running out of space 3480 */ 3481 if (error) { 3482 /* 3483 * Free the transaction structure. 3484 */ 3485 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 3486 xfs_trans_cancel(tp, 0); 3487 break; 3488 } 3489 xfs_ilock(ip, XFS_ILOCK_EXCL); 3490 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, 3491 qblocks, 0, quota_flag); 3492 if (error) 3493 goto error1; 3494 3495 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3496 xfs_trans_ihold(tp, ip); 3497 3498 /* 3499 * Issue the xfs_bmapi() call to allocate the blocks 3500 */ 3501 XFS_BMAP_INIT(&free_list, &firstfsb); 3502 error = xfs_bmapi(tp, ip, startoffset_fsb, 3503 allocatesize_fsb, bmapi_flag, 3504 &firstfsb, 0, imapp, &nimaps, 3505 &free_list, NULL); 3506 if (error) { 3507 goto error0; 3508 } 3509 3510 /* 3511 * Complete the transaction 3512 */ 3513 error = xfs_bmap_finish(&tp, &free_list, &committed); 3514 if (error) { 3515 goto error0; 3516 } 3517 3518 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 3519 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3520 if (error) { 3521 break; 3522 } 3523 3524 allocated_fsb = imapp->br_blockcount; 3525 3526 if (nimaps == 0) { 3527 error = XFS_ERROR(ENOSPC); 3528 break; 3529 } 3530 3531 startoffset_fsb += allocated_fsb; 3532 allocatesize_fsb -= allocated_fsb; 3533 } 3534dmapi_enospc_check: 3535 if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && 3536 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { 3537 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 3538 ip, DM_RIGHT_NULL, 3539 ip, DM_RIGHT_NULL, 3540 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ 3541 if (error == 0) 3542 goto retry; /* Maybe DMAPI app. has made space */ 3543 /* else fall through with error from XFS_SEND_DATA */ 3544 } 3545 3546 return error; 3547 3548error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 3549 xfs_bmap_cancel(&free_list); 3550 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); 3551 3552error1: /* Just cancel transaction */ 3553 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 3554 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3555 goto dmapi_enospc_check; 3556} 3557 3558/* 3559 * Zero file bytes between startoff and endoff inclusive. 3560 * The iolock is held exclusive and no blocks are buffered. 3561 */ 3562STATIC int 3563xfs_zero_remaining_bytes( 3564 xfs_inode_t *ip, 3565 xfs_off_t startoff, 3566 xfs_off_t endoff) 3567{ 3568 xfs_bmbt_irec_t imap; 3569 xfs_fileoff_t offset_fsb; 3570 xfs_off_t lastoffset; 3571 xfs_off_t offset; 3572 xfs_buf_t *bp; 3573 xfs_mount_t *mp = ip->i_mount; 3574 int nimap; 3575 int error = 0; 3576 3577 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 3578 XFS_IS_REALTIME_INODE(ip) ? 3579 mp->m_rtdev_targp : mp->m_ddev_targp); 3580 3581 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 3582 offset_fsb = XFS_B_TO_FSBT(mp, offset); 3583 nimap = 1; 3584 error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, 3585 NULL, 0, &imap, &nimap, NULL, NULL); 3586 if (error || nimap < 1) 3587 break; 3588 ASSERT(imap.br_blockcount >= 1); 3589 ASSERT(imap.br_startoff == offset_fsb); 3590 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 3591 if (lastoffset > endoff) 3592 lastoffset = endoff; 3593 if (imap.br_startblock == HOLESTARTBLOCK) 3594 continue; 3595 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 3596 if (imap.br_state == XFS_EXT_UNWRITTEN) 3597 continue; 3598 XFS_BUF_UNDONE(bp); 3599 XFS_BUF_UNWRITE(bp); 3600 XFS_BUF_READ(bp); 3601 XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); 3602 xfsbdstrat(mp, bp); 3603 error = xfs_iowait(bp); 3604 if (error) { 3605 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 3606 mp, bp, XFS_BUF_ADDR(bp)); 3607 break; 3608 } 3609 memset(XFS_BUF_PTR(bp) + 3610 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 3611 0, lastoffset - offset + 1); 3612 XFS_BUF_UNDONE(bp); 3613 XFS_BUF_UNREAD(bp); 3614 XFS_BUF_WRITE(bp); 3615 xfsbdstrat(mp, bp); 3616 error = xfs_iowait(bp); 3617 if (error) { 3618 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 3619 mp, bp, XFS_BUF_ADDR(bp)); 3620 break; 3621 } 3622 } 3623 xfs_buf_free(bp); 3624 return error; 3625} 3626 3627/* 3628 * xfs_free_file_space() 3629 * This routine frees disk space for the given file. 3630 * 3631 * This routine is only called by xfs_change_file_space 3632 * for an UNRESVSP type call. 3633 * 3634 * RETURNS: 3635 * 0 on success 3636 * errno on error 3637 * 3638 */ 3639STATIC int 3640xfs_free_file_space( 3641 xfs_inode_t *ip, 3642 xfs_off_t offset, 3643 xfs_off_t len, 3644 int attr_flags) 3645{ 3646 bhv_vnode_t *vp; 3647 int committed; 3648 int done; 3649 xfs_off_t end_dmi_offset; 3650 xfs_fileoff_t endoffset_fsb; 3651 int error; 3652 xfs_fsblock_t firstfsb; 3653 xfs_bmap_free_t free_list; 3654 xfs_bmbt_irec_t imap; 3655 xfs_off_t ioffset; 3656 xfs_extlen_t mod=0; 3657 xfs_mount_t *mp; 3658 int nimap; 3659 uint resblks; 3660 uint rounding; 3661 int rt; 3662 xfs_fileoff_t startoffset_fsb; 3663 xfs_trans_t *tp; 3664 int need_iolock = 1; 3665 3666 vp = XFS_ITOV(ip); 3667 mp = ip->i_mount; 3668 3669 xfs_itrace_entry(ip); 3670 3671 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 3672 return error; 3673 3674 error = 0; 3675 if (len <= 0) /* if nothing being freed */ 3676 return error; 3677 rt = XFS_IS_REALTIME_INODE(ip); 3678 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 3679 end_dmi_offset = offset + len; 3680 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 3681 3682 if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 && 3683 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3684 if (end_dmi_offset > ip->i_size) 3685 end_dmi_offset = ip->i_size; 3686 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, 3687 offset, end_dmi_offset - offset, 3688 AT_DELAY_FLAG(attr_flags), NULL); 3689 if (error) 3690 return error; 3691 } 3692 3693 if (attr_flags & ATTR_NOLOCK) 3694 need_iolock = 0; 3695 if (need_iolock) { 3696 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3697 vn_iowait(ip); /* wait for the completion of any pending DIOs */ 3698 } 3699 3700 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 3701 ioffset = offset & ~(rounding - 1); 3702 3703 if (VN_CACHED(vp) != 0) { 3704 xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1); 3705 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); 3706 if (error) 3707 goto out_unlock_iolock; 3708 } 3709 3710 /* 3711 * Need to zero the stuff we're not freeing, on disk. 3712 * If its a realtime file & can't use unwritten extents then we 3713 * actually need to zero the extent edges. Otherwise xfs_bunmapi 3714 * will take care of it for us. 3715 */ 3716 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 3717 nimap = 1; 3718 error = xfs_bmapi(NULL, ip, startoffset_fsb, 3719 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 3720 if (error) 3721 goto out_unlock_iolock; 3722 ASSERT(nimap == 0 || nimap == 1); 3723 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 3724 xfs_daddr_t block; 3725 3726 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 3727 block = imap.br_startblock; 3728 mod = do_div(block, mp->m_sb.sb_rextsize); 3729 if (mod) 3730 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 3731 } 3732 nimap = 1; 3733 error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 3734 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 3735 if (error) 3736 goto out_unlock_iolock; 3737 ASSERT(nimap == 0 || nimap == 1); 3738 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 3739 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 3740 mod++; 3741 if (mod && (mod != mp->m_sb.sb_rextsize)) 3742 endoffset_fsb -= mod; 3743 } 3744 } 3745 if ((done = (endoffset_fsb <= startoffset_fsb))) 3746 /* 3747 * One contiguous piece to clear 3748 */ 3749 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 3750 else { 3751 /* 3752 * Some full blocks, possibly two pieces to clear 3753 */ 3754 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 3755 error = xfs_zero_remaining_bytes(ip, offset, 3756 XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 3757 if (!error && 3758 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 3759 error = xfs_zero_remaining_bytes(ip, 3760 XFS_FSB_TO_B(mp, endoffset_fsb), 3761 offset + len - 1); 3762 } 3763 3764 /* 3765 * free file space until done or until there is an error 3766 */ 3767 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 3768 while (!error && !done) { 3769 3770 /* 3771 * allocate and setup the transaction. Allow this 3772 * transaction to dip into the reserve blocks to ensure 3773 * the freeing of the space succeeds at ENOSPC. 3774 */ 3775 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 3776 tp->t_flags |= XFS_TRANS_RESERVE; 3777 error = xfs_trans_reserve(tp, 3778 resblks, 3779 XFS_WRITE_LOG_RES(mp), 3780 0, 3781 XFS_TRANS_PERM_LOG_RES, 3782 XFS_WRITE_LOG_COUNT); 3783 3784 /* 3785 * check for running out of space 3786 */ 3787 if (error) { 3788 /* 3789 * Free the transaction structure. 3790 */ 3791 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 3792 xfs_trans_cancel(tp, 0); 3793 break; 3794 } 3795 xfs_ilock(ip, XFS_ILOCK_EXCL); 3796 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 3797 ip->i_udquot, ip->i_gdquot, resblks, 0, 3798 XFS_QMOPT_RES_REGBLKS); 3799 if (error) 3800 goto error1; 3801 3802 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3803 xfs_trans_ihold(tp, ip); 3804 3805 /* 3806 * issue the bunmapi() call to free the blocks 3807 */ 3808 XFS_BMAP_INIT(&free_list, &firstfsb); 3809 error = xfs_bunmapi(tp, ip, startoffset_fsb, 3810 endoffset_fsb - startoffset_fsb, 3811 0, 2, &firstfsb, &free_list, NULL, &done); 3812 if (error) { 3813 goto error0; 3814 } 3815 3816 /* 3817 * complete the transaction 3818 */ 3819 error = xfs_bmap_finish(&tp, &free_list, &committed); 3820 if (error) { 3821 goto error0; 3822 } 3823 3824 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 3825 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3826 } 3827 3828 out_unlock_iolock: 3829 if (need_iolock) 3830 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 3831 return error; 3832 3833 error0: 3834 xfs_bmap_cancel(&free_list); 3835 error1: 3836 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 3837 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 3838 XFS_ILOCK_EXCL); 3839 return error; 3840} 3841 3842/* 3843 * xfs_change_file_space() 3844 * This routine allocates or frees disk space for the given file. 3845 * The user specified parameters are checked for alignment and size 3846 * limitations. 3847 * 3848 * RETURNS: 3849 * 0 on success 3850 * errno on error 3851 * 3852 */ 3853int 3854xfs_change_file_space( 3855 xfs_inode_t *ip, 3856 int cmd, 3857 xfs_flock64_t *bf, 3858 xfs_off_t offset, 3859 cred_t *credp, 3860 int attr_flags) 3861{ 3862 xfs_mount_t *mp = ip->i_mount; 3863 int clrprealloc; 3864 int error; 3865 xfs_fsize_t fsize; 3866 int setprealloc; 3867 xfs_off_t startoffset; 3868 xfs_off_t llen; 3869 xfs_trans_t *tp; 3870 bhv_vattr_t va; 3871 3872 xfs_itrace_entry(ip); 3873 3874 if (!S_ISREG(ip->i_d.di_mode)) 3875 return XFS_ERROR(EINVAL); 3876 3877 switch (bf->l_whence) { 3878 case 0: /*SEEK_SET*/ 3879 break; 3880 case 1: /*SEEK_CUR*/ 3881 bf->l_start += offset; 3882 break; 3883 case 2: /*SEEK_END*/ 3884 bf->l_start += ip->i_size; 3885 break; 3886 default: 3887 return XFS_ERROR(EINVAL); 3888 } 3889 3890 llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; 3891 3892 if ( (bf->l_start < 0) 3893 || (bf->l_start > XFS_MAXIOFFSET(mp)) 3894 || (bf->l_start + llen < 0) 3895 || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) 3896 return XFS_ERROR(EINVAL); 3897 3898 bf->l_whence = 0; 3899 3900 startoffset = bf->l_start; 3901 fsize = ip->i_size; 3902 3903 /* 3904 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 3905 * file space. 3906 * These calls do NOT zero the data space allocated to the file, 3907 * nor do they change the file size. 3908 * 3909 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file 3910 * space. 3911 * These calls cause the new file data to be zeroed and the file 3912 * size to be changed. 3913 */ 3914 setprealloc = clrprealloc = 0; 3915 3916 switch (cmd) { 3917 case XFS_IOC_RESVSP: 3918 case XFS_IOC_RESVSP64: 3919 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 3920 1, attr_flags); 3921 if (error) 3922 return error; 3923 setprealloc = 1; 3924 break; 3925 3926 case XFS_IOC_UNRESVSP: 3927 case XFS_IOC_UNRESVSP64: 3928 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 3929 attr_flags))) 3930 return error; 3931 break; 3932 3933 case XFS_IOC_ALLOCSP: 3934 case XFS_IOC_ALLOCSP64: 3935 case XFS_IOC_FREESP: 3936 case XFS_IOC_FREESP64: 3937 if (startoffset > fsize) { 3938 error = xfs_alloc_file_space(ip, fsize, 3939 startoffset - fsize, 0, attr_flags); 3940 if (error) 3941 break; 3942 } 3943 3944 va.va_mask = XFS_AT_SIZE; 3945 va.va_size = startoffset; 3946 3947 error = xfs_setattr(ip, &va, attr_flags, credp); 3948 3949 if (error) 3950 return error; 3951 3952 clrprealloc = 1; 3953 break; 3954 3955 default: 3956 ASSERT(0); 3957 return XFS_ERROR(EINVAL); 3958 } 3959 3960 /* 3961 * update the inode timestamp, mode, and prealloc flag bits 3962 */ 3963 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 3964 3965 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 3966 0, 0, 0))) { 3967 /* ASSERT(0); */ 3968 xfs_trans_cancel(tp, 0); 3969 return error; 3970 } 3971 3972 xfs_ilock(ip, XFS_ILOCK_EXCL); 3973 3974 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3975 xfs_trans_ihold(tp, ip); 3976 3977 if ((attr_flags & ATTR_DMI) == 0) { 3978 ip->i_d.di_mode &= ~S_ISUID; 3979 3980 /* 3981 * Note that we don't have to worry about mandatory 3982 * file locking being disabled here because we only 3983 * clear the S_ISGID bit if the Group execute bit is 3984 * on, but if it was on then mandatory locking wouldn't 3985 * have been enabled. 3986 */ 3987 if (ip->i_d.di_mode & S_IXGRP) 3988 ip->i_d.di_mode &= ~S_ISGID; 3989 3990 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3991 } 3992 if (setprealloc) 3993 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 3994 else if (clrprealloc) 3995 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 3996 3997 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3998 xfs_trans_set_sync(tp); 3999 4000 error = xfs_trans_commit(tp, 0); 4001 4002 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4003 4004 return error; 4005}