Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6

+7 -8

Documentation/filesystems/xfs.txt

··· 52 52 and also gets the setgid bit set if it is a directory itself. 53 53 54 54 ihashsize=value 55 - Sets the number of hash buckets available for hashing the 56 - in-memory inodes of the specified mount point. If a value 57 - of zero is used, the value selected by the default algorithm 58 - will be displayed in /proc/mounts. 55 + In memory inode hashes have been removed, so this option has 56 + no function as of August 2007. Option is deprecated. 59 57 60 58 ikeep/noikeep 61 - When inode clusters are emptied of inodes, keep them around 62 - on the disk (ikeep) - this is the traditional XFS behaviour 63 - and is still the default for now. Using the noikeep option, 64 - inode clusters are returned to the free space pool. 59 + When ikeep is specified, XFS does not delete empty inode clusters 60 + and keeps them around on disk. ikeep is the traditional XFS 61 + behaviour. When noikeep is specified, empty inode clusters 62 + are returned to the free space pool. The default is noikeep for 63 + non-DMAPI mounts, while ikeep is the default when DMAPI is in use. 65 64 66 65 inode64 67 66 Indicates that XFS is allowed to create inodes at any location

-12

fs/xfs/Kconfig

··· 35 35 with or without the generic quota support enabled (CONFIG_QUOTA) - 36 36 they are completely independent subsystems. 37 37 38 - config XFS_SECURITY 39 - bool "XFS Security Label support" 40 - depends on XFS_FS 41 - help 42 - Security labels support alternative access control models 43 - implemented by security modules like SELinux. This option 44 - enables an extended attribute namespace for inode security 45 - labels in the XFS filesystem. 46 - 47 - If you are not using a security module that requires using 48 - extended attributes for inode security labels, say N. 49 - 50 38 config XFS_POSIX_ACL 51 39 bool "XFS POSIX ACL support" 52 40 depends on XFS_FS

+3 -3

fs/xfs/linux-2.6/kmem.c

··· 37 37 #ifdef DEBUG 38 38 if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { 39 39 printk(KERN_WARNING "Large %s attempt, size=%ld\n", 40 - __FUNCTION__, (long)size); 40 + __func__, (long)size); 41 41 dump_stack(); 42 42 } 43 43 #endif ··· 52 52 if (!(++retries % 100)) 53 53 printk(KERN_ERR "XFS: possible memory allocation " 54 54 "deadlock in %s (mode:0x%x)\n", 55 - __FUNCTION__, lflags); 55 + __func__, lflags); 56 56 congestion_wait(WRITE, HZ/50); 57 57 } while (1); 58 58 } ··· 129 129 if (!(++retries % 100)) 130 130 printk(KERN_ERR "XFS: possible memory allocation " 131 131 "deadlock in %s (mode:0x%x)\n", 132 - __FUNCTION__, lflags); 132 + __func__, lflags); 133 133 congestion_wait(WRITE, HZ/50); 134 134 } while (1); 135 135 }

+8 -4

fs/xfs/linux-2.6/xfs_aops.c

··· 243 243 size_t size = ioend->io_size; 244 244 245 245 if (likely(!ioend->io_error)) { 246 - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) 247 - xfs_iomap_write_unwritten(ip, offset, size); 246 + if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 247 + int error; 248 + error = xfs_iomap_write_unwritten(ip, offset, size); 249 + if (error) 250 + ioend->io_error = error; 251 + } 248 252 xfs_setfilesize(ioend); 249 253 } 250 254 xfs_destroy_ioend(ioend); ··· 1536 1532 struct xfs_inode *ip = XFS_I(inode); 1537 1533 1538 1534 xfs_itrace_entry(XFS_I(inode)); 1539 - xfs_rwlock(ip, VRWLOCK_READ); 1535 + xfs_ilock(ip, XFS_IOLOCK_SHARED); 1540 1536 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); 1541 - xfs_rwunlock(ip, VRWLOCK_READ); 1537 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 1542 1538 return generic_block_bmap(mapping, block, xfs_get_blocks); 1543 1539 } 1544 1540

+4 -4

fs/xfs/linux-2.6/xfs_buf.c

··· 400 400 printk(KERN_ERR 401 401 "XFS: possible memory allocation " 402 402 "deadlock in %s (mode:0x%x)\n", 403 - __FUNCTION__, gfp_mask); 403 + __func__, gfp_mask); 404 404 405 405 XFS_STATS_INC(xb_page_retries); 406 406 xfsbufd_wakeup(0, gfp_mask); ··· 598 598 error = _xfs_buf_map_pages(bp, flags); 599 599 if (unlikely(error)) { 600 600 printk(KERN_WARNING "%s: failed to map pages\n", 601 - __FUNCTION__); 601 + __func__); 602 602 goto no_buffer; 603 603 } 604 604 } ··· 778 778 error = _xfs_buf_map_pages(bp, XBF_MAPPED); 779 779 if (unlikely(error)) { 780 780 printk(KERN_WARNING "%s: failed to map pages\n", 781 - __FUNCTION__); 781 + __func__); 782 782 goto fail_free_mem; 783 783 } 784 784 ··· 1060 1060 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); 1061 1061 bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); 1062 1062 xfs_buf_delwri_queue(bp, 1); 1063 - return status; 1063 + return 0; 1064 1064 } 1065 1065 1066 1066 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \

+6 -2

fs/xfs/linux-2.6/xfs_buf.h

··· 387 387 return error; 388 388 } 389 389 390 - static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) 390 + /* 391 + * No error can be returned from xfs_buf_iostart for delwri 392 + * buffers as they are queued and no I/O is issued. 393 + */ 394 + static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp) 391 395 { 392 396 bp->b_strat = xfs_bdstrat_cb; 393 397 bp->b_fspriv3 = mp; 394 - return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); 398 + (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); 395 399 } 396 400 397 401 #define XFS_bdstrat(bp) xfs_buf_iorequest(bp)

+1 -1

fs/xfs/linux-2.6/xfs_cred.h

··· 30 30 extern struct cred *sys_cred; 31 31 32 32 /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ 33 - static __inline int capable_cred(cred_t *cr, int cid) 33 + static inline int capable_cred(cred_t *cr, int cid) 34 34 { 35 35 return (cr == sys_cred) ? 1 : capable(cid); 36 36 }

+6 -8

fs/xfs/linux-2.6/xfs_export.c

··· 22 22 #include "xfs_trans.h" 23 23 #include "xfs_sb.h" 24 24 #include "xfs_ag.h" 25 + #include "xfs_dir2.h" 25 26 #include "xfs_dmapi.h" 26 27 #include "xfs_mount.h" 27 28 #include "xfs_export.h" ··· 30 29 #include "xfs_bmap_btree.h" 31 30 #include "xfs_inode.h" 32 31 #include "xfs_vfsops.h" 33 - 34 - static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, }; 35 32 36 33 /* 37 34 * Note that we only accept fileids which are long enough rather than allow ··· 65 66 int len; 66 67 67 68 /* Directories don't need their parent encoded, they have ".." */ 68 - if (S_ISDIR(inode->i_mode)) 69 + if (S_ISDIR(inode->i_mode) || !connectable) 69 70 fileid_type = FILEID_INO32_GEN; 70 71 else 71 72 fileid_type = FILEID_INO32_GEN_PARENT; ··· 212 213 struct dentry *child) 213 214 { 214 215 int error; 215 - bhv_vnode_t *cvp; 216 + struct xfs_inode *cip; 216 217 struct dentry *parent; 217 218 218 - cvp = NULL; 219 - error = xfs_lookup(XFS_I(child->d_inode), &dotdot, &cvp); 219 + error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip); 220 220 if (unlikely(error)) 221 221 return ERR_PTR(-error); 222 222 223 - parent = d_alloc_anon(vn_to_inode(cvp)); 223 + parent = d_alloc_anon(cip->i_vnode); 224 224 if (unlikely(!parent)) { 225 - VN_RELE(cvp); 225 + iput(cip->i_vnode); 226 226 return ERR_PTR(-ENOMEM); 227 227 } 228 228 return parent;

+4 -9

fs/xfs/linux-2.6/xfs_file.c

··· 469 469 struct inode *inode) 470 470 { 471 471 struct xfs_mount *mp = XFS_M(inode->i_sb); 472 + struct xfs_inode *ip = XFS_I(inode); 472 473 473 - if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI)) { 474 - if (DM_EVENT_ENABLED(XFS_I(inode), DM_EVENT_READ)) { 475 - bhv_vnode_t *vp = vn_from_inode(inode); 476 - 477 - return -XFS_SEND_DATA(mp, DM_EVENT_READ, 478 - vp, 0, 0, 0, NULL); 479 - } 480 - } 481 - 474 + if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) && 475 + DM_EVENT_ENABLED(ip, DM_EVENT_READ)) 476 + return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); 482 477 return 0; 483 478 } 484 479 #endif /* HAVE_FOP_OPEN_EXEC */

+11 -25

fs/xfs/linux-2.6/xfs_fs_subr.c

··· 17 17 */ 18 18 #include "xfs.h" 19 19 #include "xfs_vnodeops.h" 20 - 21 - /* 22 - * The following six includes are needed so that we can include 23 - * xfs_inode.h. What a mess.. 24 - */ 25 20 #include "xfs_bmap_btree.h" 26 - #include "xfs_inum.h" 27 - #include "xfs_dir2.h" 28 - #include "xfs_dir2_sf.h" 29 - #include "xfs_attr_sf.h" 30 - #include "xfs_dinode.h" 31 - 32 21 #include "xfs_inode.h" 33 22 34 23 int fs_noerr(void) { return 0; } ··· 31 42 xfs_off_t last, 32 43 int fiopt) 33 44 { 34 - bhv_vnode_t *vp = XFS_ITOV(ip); 35 - struct inode *inode = vn_to_inode(vp); 45 + struct address_space *mapping = ip->i_vnode->i_mapping; 36 46 37 - if (VN_CACHED(vp)) 38 - truncate_inode_pages(inode->i_mapping, first); 47 + if (mapping->nrpages) 48 + truncate_inode_pages(mapping, first); 39 49 } 40 50 41 51 int ··· 44 56 xfs_off_t last, 45 57 int fiopt) 46 58 { 47 - bhv_vnode_t *vp = XFS_ITOV(ip); 48 - struct inode *inode = vn_to_inode(vp); 59 + struct address_space *mapping = ip->i_vnode->i_mapping; 49 60 int ret = 0; 50 61 51 - if (VN_CACHED(vp)) { 62 + if (mapping->nrpages) { 52 63 xfs_iflags_clear(ip, XFS_ITRUNCATED); 53 - ret = filemap_write_and_wait(inode->i_mapping); 64 + ret = filemap_write_and_wait(mapping); 54 65 if (!ret) 55 - truncate_inode_pages(inode->i_mapping, first); 66 + truncate_inode_pages(mapping, first); 56 67 } 57 68 return ret; 58 69 } ··· 64 77 uint64_t flags, 65 78 int fiopt) 66 79 { 67 - bhv_vnode_t *vp = XFS_ITOV(ip); 68 - struct inode *inode = vn_to_inode(vp); 80 + struct address_space *mapping = ip->i_vnode->i_mapping; 69 81 int ret = 0; 70 82 int ret2; 71 83 72 - if (VN_DIRTY(vp)) { 84 + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 73 85 xfs_iflags_clear(ip, XFS_ITRUNCATED); 74 - ret = filemap_fdatawrite(inode->i_mapping); 86 + ret = filemap_fdatawrite(mapping); 75 87 if (flags & XFS_B_ASYNC) 76 88 return ret; 77 - ret2 = filemap_fdatawait(inode->i_mapping); 89 + ret2 = filemap_fdatawait(mapping); 78 90 if (!ret) 79 91 ret = ret2; 80 92 }

+312 -364

fs/xfs/linux-2.6/xfs_ioctl.c

··· 651 651 return -error; 652 652 } 653 653 654 - /* prototypes for a few of the stack-hungry cases that have 655 - * their own functions. Functions are defined after their use 656 - * so gcc doesn't get fancy and inline them with -03 */ 657 - 658 - STATIC int 659 - xfs_ioc_space( 660 - struct xfs_inode *ip, 661 - struct inode *inode, 662 - struct file *filp, 663 - int flags, 664 - unsigned int cmd, 665 - void __user *arg); 666 - 667 - STATIC int 668 - xfs_ioc_bulkstat( 669 - xfs_mount_t *mp, 670 - unsigned int cmd, 671 - void __user *arg); 672 - 673 - STATIC int 674 - xfs_ioc_fsgeometry_v1( 675 - xfs_mount_t *mp, 676 - void __user *arg); 677 - 678 - STATIC int 679 - xfs_ioc_fsgeometry( 680 - xfs_mount_t *mp, 681 - void __user *arg); 682 - 683 - STATIC int 684 - xfs_ioc_xattr( 685 - xfs_inode_t *ip, 686 - struct file *filp, 687 - unsigned int cmd, 688 - void __user *arg); 689 - 690 - STATIC int 691 - xfs_ioc_fsgetxattr( 692 - xfs_inode_t *ip, 693 - int attr, 694 - void __user *arg); 695 - 696 - STATIC int 697 - xfs_ioc_getbmap( 698 - struct xfs_inode *ip, 699 - int flags, 700 - unsigned int cmd, 701 - void __user *arg); 702 - 703 - STATIC int 704 - xfs_ioc_getbmapx( 705 - struct xfs_inode *ip, 706 - void __user *arg); 707 - 708 - int 709 - xfs_ioctl( 710 - xfs_inode_t *ip, 711 - struct file *filp, 712 - int ioflags, 713 - unsigned int cmd, 714 - void __user *arg) 715 - { 716 - struct inode *inode = filp->f_path.dentry->d_inode; 717 - xfs_mount_t *mp = ip->i_mount; 718 - int error; 719 - 720 - xfs_itrace_entry(XFS_I(inode)); 721 - switch (cmd) { 722 - 723 - case XFS_IOC_ALLOCSP: 724 - case XFS_IOC_FREESP: 725 - case XFS_IOC_RESVSP: 726 - case XFS_IOC_UNRESVSP: 727 - case XFS_IOC_ALLOCSP64: 728 - case XFS_IOC_FREESP64: 729 - case XFS_IOC_RESVSP64: 730 - case XFS_IOC_UNRESVSP64: 731 - /* 732 - * Only allow the sys admin to reserve space unless 733 - * unwritten extents are enabled. 734 - */ 735 - if (!xfs_sb_version_hasextflgbit(&mp->m_sb) && 736 - !capable(CAP_SYS_ADMIN)) 737 - return -EPERM; 738 - 739 - return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg); 740 - 741 - case XFS_IOC_DIOINFO: { 742 - struct dioattr da; 743 - xfs_buftarg_t *target = 744 - XFS_IS_REALTIME_INODE(ip) ? 745 - mp->m_rtdev_targp : mp->m_ddev_targp; 746 - 747 - da.d_mem = da.d_miniosz = 1 << target->bt_sshift; 748 - da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); 749 - 750 - if (copy_to_user(arg, &da, sizeof(da))) 751 - return -XFS_ERROR(EFAULT); 752 - return 0; 753 - } 754 - 755 - case XFS_IOC_FSBULKSTAT_SINGLE: 756 - case XFS_IOC_FSBULKSTAT: 757 - case XFS_IOC_FSINUMBERS: 758 - return xfs_ioc_bulkstat(mp, cmd, arg); 759 - 760 - case XFS_IOC_FSGEOMETRY_V1: 761 - return xfs_ioc_fsgeometry_v1(mp, arg); 762 - 763 - case XFS_IOC_FSGEOMETRY: 764 - return xfs_ioc_fsgeometry(mp, arg); 765 - 766 - case XFS_IOC_GETVERSION: 767 - return put_user(inode->i_generation, (int __user *)arg); 768 - 769 - case XFS_IOC_FSGETXATTR: 770 - return xfs_ioc_fsgetxattr(ip, 0, arg); 771 - case XFS_IOC_FSGETXATTRA: 772 - return xfs_ioc_fsgetxattr(ip, 1, arg); 773 - case XFS_IOC_GETXFLAGS: 774 - case XFS_IOC_SETXFLAGS: 775 - case XFS_IOC_FSSETXATTR: 776 - return xfs_ioc_xattr(ip, filp, cmd, arg); 777 - 778 - case XFS_IOC_FSSETDM: { 779 - struct fsdmidata dmi; 780 - 781 - if (copy_from_user(&dmi, arg, sizeof(dmi))) 782 - return -XFS_ERROR(EFAULT); 783 - 784 - error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, 785 - dmi.fsd_dmstate); 786 - return -error; 787 - } 788 - 789 - case XFS_IOC_GETBMAP: 790 - case XFS_IOC_GETBMAPA: 791 - return xfs_ioc_getbmap(ip, ioflags, cmd, arg); 792 - 793 - case XFS_IOC_GETBMAPX: 794 - return xfs_ioc_getbmapx(ip, arg); 795 - 796 - case XFS_IOC_FD_TO_HANDLE: 797 - case XFS_IOC_PATH_TO_HANDLE: 798 - case XFS_IOC_PATH_TO_FSHANDLE: 799 - return xfs_find_handle(cmd, arg); 800 - 801 - case XFS_IOC_OPEN_BY_HANDLE: 802 - return xfs_open_by_handle(mp, arg, filp, inode); 803 - 804 - case XFS_IOC_FSSETDM_BY_HANDLE: 805 - return xfs_fssetdm_by_handle(mp, arg, inode); 806 - 807 - case XFS_IOC_READLINK_BY_HANDLE: 808 - return xfs_readlink_by_handle(mp, arg, inode); 809 - 810 - case XFS_IOC_ATTRLIST_BY_HANDLE: 811 - return xfs_attrlist_by_handle(mp, arg, inode); 812 - 813 - case XFS_IOC_ATTRMULTI_BY_HANDLE: 814 - return xfs_attrmulti_by_handle(mp, arg, inode); 815 - 816 - case XFS_IOC_SWAPEXT: { 817 - error = xfs_swapext((struct xfs_swapext __user *)arg); 818 - return -error; 819 - } 820 - 821 - case XFS_IOC_FSCOUNTS: { 822 - xfs_fsop_counts_t out; 823 - 824 - error = xfs_fs_counts(mp, &out); 825 - if (error) 826 - return -error; 827 - 828 - if (copy_to_user(arg, &out, sizeof(out))) 829 - return -XFS_ERROR(EFAULT); 830 - return 0; 831 - } 832 - 833 - case XFS_IOC_SET_RESBLKS: { 834 - xfs_fsop_resblks_t inout; 835 - __uint64_t in; 836 - 837 - if (!capable(CAP_SYS_ADMIN)) 838 - return -EPERM; 839 - 840 - if (copy_from_user(&inout, arg, sizeof(inout))) 841 - return -XFS_ERROR(EFAULT); 842 - 843 - /* input parameter is passed in resblks field of structure */ 844 - in = inout.resblks; 845 - error = xfs_reserve_blocks(mp, &in, &inout); 846 - if (error) 847 - return -error; 848 - 849 - if (copy_to_user(arg, &inout, sizeof(inout))) 850 - return -XFS_ERROR(EFAULT); 851 - return 0; 852 - } 853 - 854 - case XFS_IOC_GET_RESBLKS: { 855 - xfs_fsop_resblks_t out; 856 - 857 - if (!capable(CAP_SYS_ADMIN)) 858 - return -EPERM; 859 - 860 - error = xfs_reserve_blocks(mp, NULL, &out); 861 - if (error) 862 - return -error; 863 - 864 - if (copy_to_user(arg, &out, sizeof(out))) 865 - return -XFS_ERROR(EFAULT); 866 - 867 - return 0; 868 - } 869 - 870 - case XFS_IOC_FSGROWFSDATA: { 871 - xfs_growfs_data_t in; 872 - 873 - if (!capable(CAP_SYS_ADMIN)) 874 - return -EPERM; 875 - 876 - if (copy_from_user(&in, arg, sizeof(in))) 877 - return -XFS_ERROR(EFAULT); 878 - 879 - error = xfs_growfs_data(mp, &in); 880 - return -error; 881 - } 882 - 883 - case XFS_IOC_FSGROWFSLOG: { 884 - xfs_growfs_log_t in; 885 - 886 - if (!capable(CAP_SYS_ADMIN)) 887 - return -EPERM; 888 - 889 - if (copy_from_user(&in, arg, sizeof(in))) 890 - return -XFS_ERROR(EFAULT); 891 - 892 - error = xfs_growfs_log(mp, &in); 893 - return -error; 894 - } 895 - 896 - case XFS_IOC_FSGROWFSRT: { 897 - xfs_growfs_rt_t in; 898 - 899 - if (!capable(CAP_SYS_ADMIN)) 900 - return -EPERM; 901 - 902 - if (copy_from_user(&in, arg, sizeof(in))) 903 - return -XFS_ERROR(EFAULT); 904 - 905 - error = xfs_growfs_rt(mp, &in); 906 - return -error; 907 - } 908 - 909 - case XFS_IOC_FREEZE: 910 - if (!capable(CAP_SYS_ADMIN)) 911 - return -EPERM; 912 - 913 - if (inode->i_sb->s_frozen == SB_UNFROZEN) 914 - freeze_bdev(inode->i_sb->s_bdev); 915 - return 0; 916 - 917 - case XFS_IOC_THAW: 918 - if (!capable(CAP_SYS_ADMIN)) 919 - return -EPERM; 920 - if (inode->i_sb->s_frozen != SB_UNFROZEN) 921 - thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); 922 - return 0; 923 - 924 - case XFS_IOC_GOINGDOWN: { 925 - __uint32_t in; 926 - 927 - if (!capable(CAP_SYS_ADMIN)) 928 - return -EPERM; 929 - 930 - if (get_user(in, (__uint32_t __user *)arg)) 931 - return -XFS_ERROR(EFAULT); 932 - 933 - error = xfs_fs_goingdown(mp, in); 934 - return -error; 935 - } 936 - 937 - case XFS_IOC_ERROR_INJECTION: { 938 - xfs_error_injection_t in; 939 - 940 - if (!capable(CAP_SYS_ADMIN)) 941 - return -EPERM; 942 - 943 - if (copy_from_user(&in, arg, sizeof(in))) 944 - return -XFS_ERROR(EFAULT); 945 - 946 - error = xfs_errortag_add(in.errtag, mp); 947 - return -error; 948 - } 949 - 950 - case XFS_IOC_ERROR_CLEARALL: 951 - if (!capable(CAP_SYS_ADMIN)) 952 - return -EPERM; 953 - 954 - error = xfs_errortag_clearall(mp, 1); 955 - return -error; 956 - 957 - default: 958 - return -ENOTTY; 959 - } 960 - } 961 - 962 654 STATIC int 963 655 xfs_ioc_space( 964 656 struct xfs_inode *ip, ··· 871 1179 } 872 1180 873 1181 STATIC int 874 - xfs_ioc_xattr( 1182 + xfs_ioc_fssetxattr( 875 1183 xfs_inode_t *ip, 876 1184 struct file *filp, 877 - unsigned int cmd, 878 1185 void __user *arg) 879 1186 { 880 1187 struct fsxattr fa; 881 1188 struct bhv_vattr *vattr; 882 - int error = 0; 1189 + int error; 883 1190 int attr_flags; 884 - unsigned int flags; 1191 + 1192 + if (copy_from_user(&fa, arg, sizeof(fa))) 1193 + return -EFAULT; 885 1194 886 1195 vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); 887 1196 if (unlikely(!vattr)) 888 1197 return -ENOMEM; 889 1198 890 - switch (cmd) { 891 - case XFS_IOC_FSSETXATTR: { 892 - if (copy_from_user(&fa, arg, sizeof(fa))) { 893 - error = -EFAULT; 894 - break; 895 - } 1199 + attr_flags = 0; 1200 + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 1201 + attr_flags |= ATTR_NONBLOCK; 896 1202 897 - attr_flags = 0; 898 - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 899 - attr_flags |= ATTR_NONBLOCK; 1203 + vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID; 1204 + vattr->va_xflags = fa.fsx_xflags; 1205 + vattr->va_extsize = fa.fsx_extsize; 1206 + vattr->va_projid = fa.fsx_projid; 900 1207 901 - vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID; 902 - vattr->va_xflags = fa.fsx_xflags; 903 - vattr->va_extsize = fa.fsx_extsize; 904 - vattr->va_projid = fa.fsx_projid; 1208 + error = -xfs_setattr(ip, vattr, attr_flags, NULL); 1209 + if (!error) 1210 + vn_revalidate(XFS_ITOV(ip)); /* update flags */ 1211 + kfree(vattr); 1212 + return 0; 1213 + } 905 1214 906 - error = xfs_setattr(ip, vattr, attr_flags, NULL); 907 - if (likely(!error)) 908 - vn_revalidate(XFS_ITOV(ip)); /* update flags */ 909 - error = -error; 910 - break; 911 - } 1215 + STATIC int 1216 + xfs_ioc_getxflags( 1217 + xfs_inode_t *ip, 1218 + void __user *arg) 1219 + { 1220 + unsigned int flags; 912 1221 913 - case XFS_IOC_GETXFLAGS: { 914 - flags = xfs_di2lxflags(ip->i_d.di_flags); 915 - if (copy_to_user(arg, &flags, sizeof(flags))) 916 - error = -EFAULT; 917 - break; 918 - } 1222 + flags = xfs_di2lxflags(ip->i_d.di_flags); 1223 + if (copy_to_user(arg, &flags, sizeof(flags))) 1224 + return -EFAULT; 1225 + return 0; 1226 + } 919 1227 920 - case XFS_IOC_SETXFLAGS: { 921 - if (copy_from_user(&flags, arg, sizeof(flags))) { 922 - error = -EFAULT; 923 - break; 924 - } 1228 + STATIC int 1229 + xfs_ioc_setxflags( 1230 + xfs_inode_t *ip, 1231 + struct file *filp, 1232 + void __user *arg) 1233 + { 1234 + struct bhv_vattr *vattr; 1235 + unsigned int flags; 1236 + int attr_flags; 1237 + int error; 925 1238 926 - if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 927 - FS_NOATIME_FL | FS_NODUMP_FL | \ 928 - FS_SYNC_FL)) { 929 - error = -EOPNOTSUPP; 930 - break; 931 - } 1239 + if (copy_from_user(&flags, arg, sizeof(flags))) 1240 + return -EFAULT; 932 1241 933 - attr_flags = 0; 934 - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 935 - attr_flags |= ATTR_NONBLOCK; 1242 + if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 1243 + FS_NOATIME_FL | FS_NODUMP_FL | \ 1244 + FS_SYNC_FL)) 1245 + return -EOPNOTSUPP; 936 1246 937 - vattr->va_mask = XFS_AT_XFLAGS; 938 - vattr->va_xflags = xfs_merge_ioc_xflags(flags, 939 - xfs_ip2xflags(ip)); 1247 + vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); 1248 + if (unlikely(!vattr)) 1249 + return -ENOMEM; 940 1250 941 - error = xfs_setattr(ip, vattr, attr_flags, NULL); 942 - if (likely(!error)) 943 - vn_revalidate(XFS_ITOV(ip)); /* update flags */ 944 - error = -error; 945 - break; 946 - } 1251 + attr_flags = 0; 1252 + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 1253 + attr_flags |= ATTR_NONBLOCK; 947 1254 948 - default: 949 - error = -ENOTTY; 950 - break; 951 - } 1255 + vattr->va_mask = XFS_AT_XFLAGS; 1256 + vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); 952 1257 1258 + error = -xfs_setattr(ip, vattr, attr_flags, NULL); 1259 + if (likely(!error)) 1260 + vn_revalidate(XFS_ITOV(ip)); /* update flags */ 953 1261 kfree(vattr); 954 1262 return error; 955 1263 } ··· 1023 1331 return -XFS_ERROR(EFAULT); 1024 1332 1025 1333 return 0; 1334 + } 1335 + 1336 + int 1337 + xfs_ioctl( 1338 + xfs_inode_t *ip, 1339 + struct file *filp, 1340 + int ioflags, 1341 + unsigned int cmd, 1342 + void __user *arg) 1343 + { 1344 + struct inode *inode = filp->f_path.dentry->d_inode; 1345 + xfs_mount_t *mp = ip->i_mount; 1346 + int error; 1347 + 1348 + xfs_itrace_entry(XFS_I(inode)); 1349 + switch (cmd) { 1350 + 1351 + case XFS_IOC_ALLOCSP: 1352 + case XFS_IOC_FREESP: 1353 + case XFS_IOC_RESVSP: 1354 + case XFS_IOC_UNRESVSP: 1355 + case XFS_IOC_ALLOCSP64: 1356 + case XFS_IOC_FREESP64: 1357 + case XFS_IOC_RESVSP64: 1358 + case XFS_IOC_UNRESVSP64: 1359 + /* 1360 + * Only allow the sys admin to reserve space unless 1361 + * unwritten extents are enabled. 1362 + */ 1363 + if (!xfs_sb_version_hasextflgbit(&mp->m_sb) && 1364 + !capable(CAP_SYS_ADMIN)) 1365 + return -EPERM; 1366 + 1367 + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg); 1368 + 1369 + case XFS_IOC_DIOINFO: { 1370 + struct dioattr da; 1371 + xfs_buftarg_t *target = 1372 + XFS_IS_REALTIME_INODE(ip) ? 1373 + mp->m_rtdev_targp : mp->m_ddev_targp; 1374 + 1375 + da.d_mem = da.d_miniosz = 1 << target->bt_sshift; 1376 + da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); 1377 + 1378 + if (copy_to_user(arg, &da, sizeof(da))) 1379 + return -XFS_ERROR(EFAULT); 1380 + return 0; 1381 + } 1382 + 1383 + case XFS_IOC_FSBULKSTAT_SINGLE: 1384 + case XFS_IOC_FSBULKSTAT: 1385 + case XFS_IOC_FSINUMBERS: 1386 + return xfs_ioc_bulkstat(mp, cmd, arg); 1387 + 1388 + case XFS_IOC_FSGEOMETRY_V1: 1389 + return xfs_ioc_fsgeometry_v1(mp, arg); 1390 + 1391 + case XFS_IOC_FSGEOMETRY: 1392 + return xfs_ioc_fsgeometry(mp, arg); 1393 + 1394 + case XFS_IOC_GETVERSION: 1395 + return put_user(inode->i_generation, (int __user *)arg); 1396 + 1397 + case XFS_IOC_FSGETXATTR: 1398 + return xfs_ioc_fsgetxattr(ip, 0, arg); 1399 + case XFS_IOC_FSGETXATTRA: 1400 + return xfs_ioc_fsgetxattr(ip, 1, arg); 1401 + case XFS_IOC_FSSETXATTR: 1402 + return xfs_ioc_fssetxattr(ip, filp, arg); 1403 + case XFS_IOC_GETXFLAGS: 1404 + return xfs_ioc_getxflags(ip, arg); 1405 + case XFS_IOC_SETXFLAGS: 1406 + return xfs_ioc_setxflags(ip, filp, arg); 1407 + 1408 + case XFS_IOC_FSSETDM: { 1409 + struct fsdmidata dmi; 1410 + 1411 + if (copy_from_user(&dmi, arg, sizeof(dmi))) 1412 + return -XFS_ERROR(EFAULT); 1413 + 1414 + error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, 1415 + dmi.fsd_dmstate); 1416 + return -error; 1417 + } 1418 + 1419 + case XFS_IOC_GETBMAP: 1420 + case XFS_IOC_GETBMAPA: 1421 + return xfs_ioc_getbmap(ip, ioflags, cmd, arg); 1422 + 1423 + case XFS_IOC_GETBMAPX: 1424 + return xfs_ioc_getbmapx(ip, arg); 1425 + 1426 + case XFS_IOC_FD_TO_HANDLE: 1427 + case XFS_IOC_PATH_TO_HANDLE: 1428 + case XFS_IOC_PATH_TO_FSHANDLE: 1429 + return xfs_find_handle(cmd, arg); 1430 + 1431 + case XFS_IOC_OPEN_BY_HANDLE: 1432 + return xfs_open_by_handle(mp, arg, filp, inode); 1433 + 1434 + case XFS_IOC_FSSETDM_BY_HANDLE: 1435 + return xfs_fssetdm_by_handle(mp, arg, inode); 1436 + 1437 + case XFS_IOC_READLINK_BY_HANDLE: 1438 + return xfs_readlink_by_handle(mp, arg, inode); 1439 + 1440 + case XFS_IOC_ATTRLIST_BY_HANDLE: 1441 + return xfs_attrlist_by_handle(mp, arg, inode); 1442 + 1443 + case XFS_IOC_ATTRMULTI_BY_HANDLE: 1444 + return xfs_attrmulti_by_handle(mp, arg, inode); 1445 + 1446 + case XFS_IOC_SWAPEXT: { 1447 + error = xfs_swapext((struct xfs_swapext __user *)arg); 1448 + return -error; 1449 + } 1450 + 1451 + case XFS_IOC_FSCOUNTS: { 1452 + xfs_fsop_counts_t out; 1453 + 1454 + error = xfs_fs_counts(mp, &out); 1455 + if (error) 1456 + return -error; 1457 + 1458 + if (copy_to_user(arg, &out, sizeof(out))) 1459 + return -XFS_ERROR(EFAULT); 1460 + return 0; 1461 + } 1462 + 1463 + case XFS_IOC_SET_RESBLKS: { 1464 + xfs_fsop_resblks_t inout; 1465 + __uint64_t in; 1466 + 1467 + if (!capable(CAP_SYS_ADMIN)) 1468 + return -EPERM; 1469 + 1470 + if (copy_from_user(&inout, arg, sizeof(inout))) 1471 + return -XFS_ERROR(EFAULT); 1472 + 1473 + /* input parameter is passed in resblks field of structure */ 1474 + in = inout.resblks; 1475 + error = xfs_reserve_blocks(mp, &in, &inout); 1476 + if (error) 1477 + return -error; 1478 + 1479 + if (copy_to_user(arg, &inout, sizeof(inout))) 1480 + return -XFS_ERROR(EFAULT); 1481 + return 0; 1482 + } 1483 + 1484 + case XFS_IOC_GET_RESBLKS: { 1485 + xfs_fsop_resblks_t out; 1486 + 1487 + if (!capable(CAP_SYS_ADMIN)) 1488 + return -EPERM; 1489 + 1490 + error = xfs_reserve_blocks(mp, NULL, &out); 1491 + if (error) 1492 + return -error; 1493 + 1494 + if (copy_to_user(arg, &out, sizeof(out))) 1495 + return -XFS_ERROR(EFAULT); 1496 + 1497 + return 0; 1498 + } 1499 + 1500 + case XFS_IOC_FSGROWFSDATA: { 1501 + xfs_growfs_data_t in; 1502 + 1503 + if (!capable(CAP_SYS_ADMIN)) 1504 + return -EPERM; 1505 + 1506 + if (copy_from_user(&in, arg, sizeof(in))) 1507 + return -XFS_ERROR(EFAULT); 1508 + 1509 + error = xfs_growfs_data(mp, &in); 1510 + return -error; 1511 + } 1512 + 1513 + case XFS_IOC_FSGROWFSLOG: { 1514 + xfs_growfs_log_t in; 1515 + 1516 + if (!capable(CAP_SYS_ADMIN)) 1517 + return -EPERM; 1518 + 1519 + if (copy_from_user(&in, arg, sizeof(in))) 1520 + return -XFS_ERROR(EFAULT); 1521 + 1522 + error = xfs_growfs_log(mp, &in); 1523 + return -error; 1524 + } 1525 + 1526 + case XFS_IOC_FSGROWFSRT: { 1527 + xfs_growfs_rt_t in; 1528 + 1529 + if (!capable(CAP_SYS_ADMIN)) 1530 + return -EPERM; 1531 + 1532 + if (copy_from_user(&in, arg, sizeof(in))) 1533 + return -XFS_ERROR(EFAULT); 1534 + 1535 + error = xfs_growfs_rt(mp, &in); 1536 + return -error; 1537 + } 1538 + 1539 + case XFS_IOC_FREEZE: 1540 + if (!capable(CAP_SYS_ADMIN)) 1541 + return -EPERM; 1542 + 1543 + if (inode->i_sb->s_frozen == SB_UNFROZEN) 1544 + freeze_bdev(inode->i_sb->s_bdev); 1545 + return 0; 1546 + 1547 + case XFS_IOC_THAW: 1548 + if (!capable(CAP_SYS_ADMIN)) 1549 + return -EPERM; 1550 + if (inode->i_sb->s_frozen != SB_UNFROZEN) 1551 + thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); 1552 + return 0; 1553 + 1554 + case XFS_IOC_GOINGDOWN: { 1555 + __uint32_t in; 1556 + 1557 + if (!capable(CAP_SYS_ADMIN)) 1558 + return -EPERM; 1559 + 1560 + if (get_user(in, (__uint32_t __user *)arg)) 1561 + return -XFS_ERROR(EFAULT); 1562 + 1563 + error = xfs_fs_goingdown(mp, in); 1564 + return -error; 1565 + } 1566 + 1567 + case XFS_IOC_ERROR_INJECTION: { 1568 + xfs_error_injection_t in; 1569 + 1570 + if (!capable(CAP_SYS_ADMIN)) 1571 + return -EPERM; 1572 + 1573 + if (copy_from_user(&in, arg, sizeof(in))) 1574 + return -XFS_ERROR(EFAULT); 1575 + 1576 + error = xfs_errortag_add(in.errtag, mp); 1577 + return -error; 1578 + } 1579 + 1580 + case XFS_IOC_ERROR_CLEARALL: 1581 + if (!capable(CAP_SYS_ADMIN)) 1582 + return -EPERM; 1583 + 1584 + error = xfs_errortag_clearall(mp, 1); 1585 + return -error; 1586 + 1587 + default: 1588 + return -ENOTTY; 1589 + } 1026 1590 }

+125 -98

fs/xfs/linux-2.6/xfs_iops.c

··· 62 62 xfs_synchronize_atime( 63 63 xfs_inode_t *ip) 64 64 { 65 - bhv_vnode_t *vp; 65 + struct inode *inode = ip->i_vnode; 66 66 67 - vp = XFS_ITOV_NULL(ip); 68 - if (vp) { 69 - ip->i_d.di_atime.t_sec = (__int32_t)vp->i_atime.tv_sec; 70 - ip->i_d.di_atime.t_nsec = (__int32_t)vp->i_atime.tv_nsec; 67 + if (inode) { 68 + ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; 69 + ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; 71 70 } 72 71 } 73 72 ··· 79 80 xfs_mark_inode_dirty_sync( 80 81 xfs_inode_t *ip) 81 82 { 82 - bhv_vnode_t *vp; 83 + struct inode *inode = ip->i_vnode; 83 84 84 - vp = XFS_ITOV_NULL(ip); 85 - if (vp) 86 - mark_inode_dirty_sync(vn_to_inode(vp)); 85 + if (inode) 86 + mark_inode_dirty_sync(inode); 87 87 } 88 88 89 89 /* ··· 213 215 */ 214 216 STATIC int 215 217 xfs_init_security( 216 - bhv_vnode_t *vp, 218 + struct inode *inode, 217 219 struct inode *dir) 218 220 { 219 - struct inode *ip = vn_to_inode(vp); 221 + struct xfs_inode *ip = XFS_I(inode); 220 222 size_t length; 221 223 void *value; 222 224 char *name; 223 225 int error; 224 226 225 - error = security_inode_init_security(ip, dir, &name, &value, &length); 227 + error = security_inode_init_security(inode, dir, &name, 228 + &value, &length); 226 229 if (error) { 227 230 if (error == -EOPNOTSUPP) 228 231 return 0; 229 232 return -error; 230 233 } 231 234 232 - error = xfs_attr_set(XFS_I(ip), name, value, 233 - length, ATTR_SECURE); 235 + error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); 234 236 if (!error) 235 - xfs_iflags_set(XFS_I(ip), XFS_IMODIFIED); 237 + xfs_iflags_set(ip, XFS_IMODIFIED); 236 238 237 239 kfree(name); 238 240 kfree(value); 239 241 return error; 240 242 } 241 243 242 - /* 243 - * Determine whether a process has a valid fs_struct (kernel daemons 244 - * like knfsd don't have an fs_struct). 245 - * 246 - * XXX(hch): nfsd is broken, better fix it instead. 247 - */ 248 - STATIC_INLINE int 249 - xfs_has_fs_struct(struct task_struct *task) 244 + static void 245 + xfs_dentry_to_name( 246 + struct xfs_name *namep, 247 + struct dentry *dentry) 250 248 { 251 - return (task->fs != init_task.fs); 249 + namep->name = dentry->d_name.name; 250 + namep->len = dentry->d_name.len; 252 251 } 253 252 254 253 STATIC void 255 254 xfs_cleanup_inode( 256 255 struct inode *dir, 257 - bhv_vnode_t *vp, 256 + struct inode *inode, 258 257 struct dentry *dentry, 259 258 int mode) 260 259 { 261 - struct dentry teardown = {}; 260 + struct xfs_name teardown; 262 261 263 262 /* Oh, the horror. 264 263 * If we can't add the ACL or we fail in 265 264 * xfs_init_security we must back out. 266 265 * ENOSPC can hit here, among other things. 267 266 */ 268 - teardown.d_inode = vn_to_inode(vp); 269 - teardown.d_name = dentry->d_name; 267 + xfs_dentry_to_name(&teardown, dentry); 270 268 271 269 if (S_ISDIR(mode)) 272 - xfs_rmdir(XFS_I(dir), &teardown); 270 + xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode)); 273 271 else 274 - xfs_remove(XFS_I(dir), &teardown); 275 - VN_RELE(vp); 272 + xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); 273 + iput(inode); 276 274 } 277 275 278 276 STATIC int ··· 278 284 int mode, 279 285 dev_t rdev) 280 286 { 281 - struct inode *ip; 282 - bhv_vnode_t *vp = NULL, *dvp = vn_from_inode(dir); 287 + struct inode *inode; 288 + struct xfs_inode *ip = NULL; 283 289 xfs_acl_t *default_acl = NULL; 290 + struct xfs_name name; 284 291 attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; 285 292 int error; 286 293 ··· 292 297 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) 293 298 return -EINVAL; 294 299 295 - if (unlikely(test_default_acl && test_default_acl(dvp))) { 300 + if (test_default_acl && test_default_acl(dir)) { 296 301 if (!_ACL_ALLOC(default_acl)) { 297 302 return -ENOMEM; 298 303 } 299 - if (!_ACL_GET_DEFAULT(dvp, default_acl)) { 304 + if (!_ACL_GET_DEFAULT(dir, default_acl)) { 300 305 _ACL_FREE(default_acl); 301 306 default_acl = NULL; 302 307 } 303 308 } 304 309 305 - if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current)) 310 + xfs_dentry_to_name(&name, dentry); 311 + 312 + if (IS_POSIXACL(dir) && !default_acl) 306 313 mode &= ~current->fs->umask; 307 314 308 315 switch (mode & S_IFMT) { 309 - case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: 316 + case S_IFCHR: 317 + case S_IFBLK: 318 + case S_IFIFO: 319 + case S_IFSOCK: 310 320 rdev = sysv_encode_dev(rdev); 311 321 case S_IFREG: 312 - error = xfs_create(XFS_I(dir), dentry, mode, rdev, &vp, NULL); 322 + error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); 313 323 break; 314 324 case S_IFDIR: 315 - error = xfs_mkdir(XFS_I(dir), dentry, mode, &vp, NULL); 325 + error = xfs_mkdir(XFS_I(dir), &name, mode, &ip, NULL); 316 326 break; 317 327 default: 318 328 error = EINVAL; 319 329 break; 320 330 } 321 331 322 - if (unlikely(!error)) { 323 - error = xfs_init_security(vp, dir); 324 - if (error) 325 - xfs_cleanup_inode(dir, vp, dentry, mode); 326 - } 332 + if (unlikely(error)) 333 + goto out_free_acl; 327 334 328 - if (unlikely(default_acl)) { 329 - if (!error) { 330 - error = _ACL_INHERIT(vp, mode, default_acl); 331 - if (!error) 332 - xfs_iflags_set(XFS_I(vp), XFS_IMODIFIED); 333 - else 334 - xfs_cleanup_inode(dir, vp, dentry, mode); 335 - } 335 + inode = ip->i_vnode; 336 + 337 + error = xfs_init_security(inode, dir); 338 + if (unlikely(error)) 339 + goto out_cleanup_inode; 340 + 341 + if (default_acl) { 342 + error = _ACL_INHERIT(inode, mode, default_acl); 343 + if (unlikely(error)) 344 + goto out_cleanup_inode; 345 + xfs_iflags_set(ip, XFS_IMODIFIED); 336 346 _ACL_FREE(default_acl); 337 347 } 338 348 339 - if (likely(!error)) { 340 - ASSERT(vp); 341 - ip = vn_to_inode(vp); 342 349 343 - if (S_ISDIR(mode)) 344 - xfs_validate_fields(ip); 345 - d_instantiate(dentry, ip); 346 - xfs_validate_fields(dir); 347 - } 350 + if (S_ISDIR(mode)) 351 + xfs_validate_fields(inode); 352 + d_instantiate(dentry, inode); 353 + xfs_validate_fields(dir); 354 + return -error; 355 + 356 + out_cleanup_inode: 357 + xfs_cleanup_inode(dir, inode, dentry, mode); 358 + out_free_acl: 359 + if (default_acl) 360 + _ACL_FREE(default_acl); 348 361 return -error; 349 362 } 350 363 ··· 381 378 struct dentry *dentry, 382 379 struct nameidata *nd) 383 380 { 384 - bhv_vnode_t *cvp; 381 + struct xfs_inode *cip; 382 + struct xfs_name name; 385 383 int error; 386 384 387 385 if (dentry->d_name.len >= MAXNAMELEN) 388 386 return ERR_PTR(-ENAMETOOLONG); 389 387 390 - error = xfs_lookup(XFS_I(dir), dentry, &cvp); 388 + xfs_dentry_to_name(&name, dentry); 389 + error = xfs_lookup(XFS_I(dir), &name, &cip); 391 390 if (unlikely(error)) { 392 391 if (unlikely(error != ENOENT)) 393 392 return ERR_PTR(-error); ··· 397 392 return NULL; 398 393 } 399 394 400 - return d_splice_alias(vn_to_inode(cvp), dentry); 395 + return d_splice_alias(cip->i_vnode, dentry); 401 396 } 402 397 403 398 STATIC int ··· 406 401 struct inode *dir, 407 402 struct dentry *dentry) 408 403 { 409 - struct inode *ip; /* inode of guy being linked to */ 410 - bhv_vnode_t *vp; /* vp of name being linked */ 404 + struct inode *inode; /* inode of guy being linked to */ 405 + struct xfs_name name; 411 406 int error; 412 407 413 - ip = old_dentry->d_inode; /* inode being linked to */ 414 - vp = vn_from_inode(ip); 408 + inode = old_dentry->d_inode; 409 + xfs_dentry_to_name(&name, dentry); 415 410 416 - VN_HOLD(vp); 417 - error = xfs_link(XFS_I(dir), vp, dentry); 411 + igrab(inode); 412 + error = xfs_link(XFS_I(dir), XFS_I(inode), &name); 418 413 if (unlikely(error)) { 419 - VN_RELE(vp); 420 - } else { 421 - xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); 422 - xfs_validate_fields(ip); 423 - d_instantiate(dentry, ip); 414 + iput(inode); 415 + return -error; 424 416 } 425 - return -error; 417 + 418 + xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); 419 + xfs_validate_fields(inode); 420 + d_instantiate(dentry, inode); 421 + return 0; 426 422 } 427 423 428 424 STATIC int ··· 432 426 struct dentry *dentry) 433 427 { 434 428 struct inode *inode; 429 + struct xfs_name name; 435 430 int error; 436 431 437 432 inode = dentry->d_inode; 433 + xfs_dentry_to_name(&name, dentry); 438 434 439 - error = xfs_remove(XFS_I(dir), dentry); 435 + error = xfs_remove(XFS_I(dir), &name, XFS_I(inode)); 440 436 if (likely(!error)) { 441 437 xfs_validate_fields(dir); /* size needs update */ 442 438 xfs_validate_fields(inode); ··· 452 444 struct dentry *dentry, 453 445 const char *symname) 454 446 { 455 - struct inode *ip; 456 - bhv_vnode_t *cvp; /* used to lookup symlink to put in dentry */ 447 + struct inode *inode; 448 + struct xfs_inode *cip = NULL; 449 + struct xfs_name name; 457 450 int error; 458 451 mode_t mode; 459 452 460 - cvp = NULL; 461 - 462 453 mode = S_IFLNK | 463 454 (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); 455 + xfs_dentry_to_name(&name, dentry); 464 456 465 - error = xfs_symlink(XFS_I(dir), dentry, (char *)symname, mode, 466 - &cvp, NULL); 467 - if (likely(!error && cvp)) { 468 - error = xfs_init_security(cvp, dir); 469 - if (likely(!error)) { 470 - ip = vn_to_inode(cvp); 471 - d_instantiate(dentry, ip); 472 - xfs_validate_fields(dir); 473 - xfs_validate_fields(ip); 474 - } else { 475 - xfs_cleanup_inode(dir, cvp, dentry, 0); 476 - } 477 - } 457 + error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); 458 + if (unlikely(error)) 459 + goto out; 460 + 461 + inode = cip->i_vnode; 462 + 463 + error = xfs_init_security(inode, dir); 464 + if (unlikely(error)) 465 + goto out_cleanup_inode; 466 + 467 + d_instantiate(dentry, inode); 468 + xfs_validate_fields(dir); 469 + xfs_validate_fields(inode); 470 + return 0; 471 + 472 + out_cleanup_inode: 473 + xfs_cleanup_inode(dir, inode, dentry, 0); 474 + out: 478 475 return -error; 479 476 } 480 477 ··· 489 476 struct dentry *dentry) 490 477 { 491 478 struct inode *inode = dentry->d_inode; 479 + struct xfs_name name; 492 480 int error; 493 481 494 - error = xfs_rmdir(XFS_I(dir), dentry); 482 + xfs_dentry_to_name(&name, dentry); 483 + 484 + error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode)); 495 485 if (likely(!error)) { 496 486 xfs_validate_fields(inode); 497 487 xfs_validate_fields(dir); ··· 510 494 struct dentry *ndentry) 511 495 { 512 496 struct inode *new_inode = ndentry->d_inode; 513 - bhv_vnode_t *tvp; /* target directory */ 497 + struct xfs_name oname; 498 + struct xfs_name nname; 514 499 int error; 515 500 516 - tvp = vn_from_inode(ndir); 501 + xfs_dentry_to_name(&oname, odentry); 502 + xfs_dentry_to_name(&nname, ndentry); 517 503 518 - error = xfs_rename(XFS_I(odir), odentry, tvp, ndentry); 504 + error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 505 + XFS_I(ndir), &nname); 519 506 if (likely(!error)) { 520 507 if (new_inode) 521 508 xfs_validate_fields(new_inode); ··· 719 700 return -error; 720 701 } 721 702 703 + /* 704 + * block_truncate_page can return an error, but we can't propagate it 705 + * at all here. Leave a complaint + stack trace in the syslog because 706 + * this could be bad. If it is bad, we need to propagate the error further. 707 + */ 722 708 STATIC void 723 709 xfs_vn_truncate( 724 710 struct inode *inode) 725 711 { 726 - block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_blocks); 712 + int error; 713 + error = block_truncate_page(inode->i_mapping, inode->i_size, 714 + xfs_get_blocks); 715 + WARN_ON(error); 727 716 } 728 717 729 718 STATIC int

-1

fs/xfs/linux-2.6/xfs_linux.h

··· 99 99 /* 100 100 * Feature macros (disable/enable) 101 101 */ 102 - #undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ 103 102 #define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ 104 103 #ifdef CONFIG_SMP 105 104 #define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */

+28 -42

fs/xfs/linux-2.6/xfs_lrw.c

··· 176 176 { 177 177 struct file *file = iocb->ki_filp; 178 178 struct inode *inode = file->f_mapping->host; 179 - bhv_vnode_t *vp = XFS_ITOV(ip); 180 179 xfs_mount_t *mp = ip->i_mount; 181 180 size_t size = 0; 182 181 ssize_t ret = 0; ··· 227 228 xfs_ilock(ip, XFS_IOLOCK_SHARED); 228 229 229 230 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { 230 - bhv_vrwlock_t locktype = VRWLOCK_READ; 231 231 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); 232 + int iolock = XFS_IOLOCK_SHARED; 232 233 233 - ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *offset, size, 234 - dmflags, &locktype); 234 + ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size, 235 + dmflags, &iolock); 235 236 if (ret) { 236 237 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 237 238 if (unlikely(ioflags & IO_ISDIRECT)) ··· 241 242 } 242 243 243 244 if (unlikely(ioflags & IO_ISDIRECT)) { 244 - if (VN_CACHED(vp)) 245 + if (inode->i_mapping->nrpages) 245 246 ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), 246 247 -1, FI_REMAPF_LOCKED); 247 248 mutex_unlock(&inode->i_mutex); ··· 275 276 int flags, 276 277 int ioflags) 277 278 { 278 - bhv_vnode_t *vp = XFS_ITOV(ip); 279 279 xfs_mount_t *mp = ip->i_mount; 280 280 ssize_t ret; 281 281 ··· 285 287 xfs_ilock(ip, XFS_IOLOCK_SHARED); 286 288 287 289 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { 288 - bhv_vrwlock_t locktype = VRWLOCK_READ; 290 + int iolock = XFS_IOLOCK_SHARED; 289 291 int error; 290 292 291 - error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *ppos, count, 292 - FILP_DELAY_FLAG(infilp), &locktype); 293 + error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count, 294 + FILP_DELAY_FLAG(infilp), &iolock); 293 295 if (error) { 294 296 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 295 297 return -error; ··· 315 317 int flags, 316 318 int ioflags) 317 319 { 318 - bhv_vnode_t *vp = XFS_ITOV(ip); 319 320 xfs_mount_t *mp = ip->i_mount; 320 321 ssize_t ret; 321 322 struct inode *inode = outfilp->f_mapping->host; ··· 327 330 xfs_ilock(ip, XFS_IOLOCK_EXCL); 328 331 329 332 if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { 330 - bhv_vrwlock_t locktype = VRWLOCK_WRITE; 333 + int iolock = XFS_IOLOCK_EXCL; 331 334 int error; 332 335 333 - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, *ppos, count, 334 - FILP_DELAY_FLAG(outfilp), &locktype); 336 + error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, 337 + FILP_DELAY_FLAG(outfilp), &iolock); 335 338 if (error) { 336 339 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 337 340 return -error; ··· 570 573 struct file *file = iocb->ki_filp; 571 574 struct address_space *mapping = file->f_mapping; 572 575 struct inode *inode = mapping->host; 573 - bhv_vnode_t *vp = XFS_ITOV(xip); 574 576 unsigned long segs = nsegs; 575 577 xfs_mount_t *mp; 576 578 ssize_t ret = 0, error = 0; 577 579 xfs_fsize_t isize, new_size; 578 580 int iolock; 579 581 int eventsent = 0; 580 - bhv_vrwlock_t locktype; 581 582 size_t ocount = 0, count; 582 583 loff_t pos; 583 584 int need_i_mutex; ··· 602 607 relock: 603 608 if (ioflags & IO_ISDIRECT) { 604 609 iolock = XFS_IOLOCK_SHARED; 605 - locktype = VRWLOCK_WRITE_DIRECT; 606 610 need_i_mutex = 0; 607 611 } else { 608 612 iolock = XFS_IOLOCK_EXCL; 609 - locktype = VRWLOCK_WRITE; 610 613 need_i_mutex = 1; 611 614 mutex_lock(&inode->i_mutex); 612 615 } ··· 627 634 dmflags |= DM_FLAGS_IMUX; 628 635 629 636 xfs_iunlock(xip, XFS_ILOCK_EXCL); 630 - error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, 631 - pos, count, 632 - dmflags, &locktype); 637 + error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip, 638 + pos, count, dmflags, &iolock); 633 639 if (error) { 634 640 goto out_unlock_internal; 635 641 } ··· 656 664 return XFS_ERROR(-EINVAL); 657 665 } 658 666 659 - if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) { 667 + if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) { 660 668 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); 661 669 iolock = XFS_IOLOCK_EXCL; 662 - locktype = VRWLOCK_WRITE; 663 670 need_i_mutex = 1; 664 671 mutex_lock(&inode->i_mutex); 665 672 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); ··· 718 727 current->backing_dev_info = mapping->backing_dev_info; 719 728 720 729 if ((ioflags & IO_ISDIRECT)) { 721 - if (VN_CACHED(vp)) { 730 + if (mapping->nrpages) { 722 731 WARN_ON(need_i_mutex == 0); 723 732 xfs_inval_cached_trace(xip, pos, -1, 724 733 (pos & PAGE_CACHE_MASK), -1); ··· 735 744 mutex_unlock(&inode->i_mutex); 736 745 737 746 iolock = XFS_IOLOCK_SHARED; 738 - locktype = VRWLOCK_WRITE_DIRECT; 739 747 need_i_mutex = 0; 740 748 } 741 749 ··· 771 781 772 782 if (ret == -ENOSPC && 773 783 DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { 774 - xfs_rwunlock(xip, locktype); 784 + xfs_iunlock(xip, iolock); 775 785 if (need_i_mutex) 776 786 mutex_unlock(&inode->i_mutex); 777 - error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, 778 - DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 787 + error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip, 788 + DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL, 779 789 0, 0, 0); /* Delay flag intentionally unused */ 780 790 if (need_i_mutex) 781 791 mutex_lock(&inode->i_mutex); 782 - xfs_rwlock(xip, locktype); 792 + xfs_ilock(xip, iolock); 783 793 if (error) 784 794 goto out_unlock_internal; 785 795 pos = xip->i_size; ··· 807 817 /* Handle various SYNC-type writes */ 808 818 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 809 819 int error2; 810 - xfs_rwunlock(xip, locktype); 820 + 821 + xfs_iunlock(xip, iolock); 811 822 if (need_i_mutex) 812 823 mutex_unlock(&inode->i_mutex); 813 824 error2 = sync_page_range(inode, mapping, pos, ret); ··· 816 825 error = error2; 817 826 if (need_i_mutex) 818 827 mutex_lock(&inode->i_mutex); 819 - xfs_rwlock(xip, locktype); 828 + xfs_ilock(xip, iolock); 820 829 error2 = xfs_write_sync_logforce(mp, xip); 821 830 if (!error) 822 831 error = error2; ··· 837 846 xip->i_d.di_size = xip->i_size; 838 847 xfs_iunlock(xip, XFS_ILOCK_EXCL); 839 848 } 840 - xfs_rwunlock(xip, locktype); 849 + xfs_iunlock(xip, iolock); 841 850 out_unlock_mutex: 842 851 if (need_i_mutex) 843 852 mutex_unlock(&inode->i_mutex); ··· 875 884 } 876 885 877 886 /* 878 - * Wrapper around bdstrat so that we can stop data 879 - * from going to disk in case we are shutting down the filesystem. 880 - * Typically user data goes thru this path; one of the exceptions 881 - * is the superblock. 887 + * Wrapper around bdstrat so that we can stop data from going to disk in case 888 + * we are shutting down the filesystem. Typically user data goes thru this 889 + * path; one of the exceptions is the superblock. 882 890 */ 883 - int 891 + void 884 892 xfsbdstrat( 885 893 struct xfs_mount *mp, 886 894 struct xfs_buf *bp) 887 895 { 888 896 ASSERT(mp); 889 897 if (!XFS_FORCED_SHUTDOWN(mp)) { 890 - /* Grio redirection would go here 891 - * if (XFS_BUF_IS_GRIO(bp)) { 892 - */ 893 - 894 898 xfs_buf_iorequest(bp); 895 - return 0; 899 + return; 896 900 } 897 901 898 902 xfs_buftrace("XFSBDSTRAT IOERROR", bp); 899 - return (xfs_bioerror_relse(bp)); 903 + xfs_bioerror_relse(bp); 900 904 } 901 905 902 906 /*

+2 -1

fs/xfs/linux-2.6/xfs_lrw.h

··· 68 68 #define xfs_inval_cached_trace(ip, offset, len, first, last) 69 69 #endif 70 70 71 - extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); 71 + /* errors from xfsbdstrat() must be extracted from the buffer */ 72 + extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); 72 73 extern int xfs_bdstrat_cb(struct xfs_buf *); 73 74 extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 74 75

+2 -2

fs/xfs/linux-2.6/xfs_stats.h

··· 144 144 # define XFS_STATS_DEC(count) 145 145 # define XFS_STATS_ADD(count, inc) 146 146 147 - static __inline void xfs_init_procfs(void) { }; 148 - static __inline void xfs_cleanup_procfs(void) { }; 147 + static inline void xfs_init_procfs(void) { }; 148 + static inline void xfs_cleanup_procfs(void) { }; 149 149 150 150 #endif /* !CONFIG_PROC_FS */ 151 151

+13 -14

fs/xfs/linux-2.6/xfs_super.c

··· 896 896 struct inode *inode, 897 897 int sync) 898 898 { 899 - int error = 0, flags = FLUSH_INODE; 899 + int error = 0; 900 + int flags = 0; 900 901 901 902 xfs_itrace_entry(XFS_I(inode)); 902 903 if (sync) { ··· 935 934 xfs_inactive(ip); 936 935 xfs_iflags_clear(ip, XFS_IMODIFIED); 937 936 if (xfs_reclaim(ip)) 938 - panic("%s: cannot reclaim 0x%p\n", __FUNCTION__, inode); 937 + panic("%s: cannot reclaim 0x%p\n", __func__, inode); 939 938 } 940 939 941 940 ASSERT(XFS_I(inode) == NULL); ··· 1028 1027 int error; 1029 1028 1030 1029 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) 1031 - error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR | 1032 - SYNC_REFCACHE | SYNC_SUPER); 1030 + error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR); 1033 1031 mp->m_sync_seq++; 1034 1032 wake_up(&mp->m_wait_single_sync_task); 1035 1033 } ··· 1306 1306 void *data, 1307 1307 int silent) 1308 1308 { 1309 - struct inode *rootvp; 1309 + struct inode *root; 1310 1310 struct xfs_mount *mp = NULL; 1311 1311 struct xfs_mount_args *args = xfs_args_allocate(sb, silent); 1312 1312 int error; ··· 1344 1344 sb->s_time_gran = 1; 1345 1345 set_posix_acl_flag(sb); 1346 1346 1347 - rootvp = igrab(mp->m_rootip->i_vnode); 1348 - if (!rootvp) { 1347 + root = igrab(mp->m_rootip->i_vnode); 1348 + if (!root) { 1349 1349 error = ENOENT; 1350 1350 goto fail_unmount; 1351 1351 } 1352 - 1353 - sb->s_root = d_alloc_root(vn_to_inode(rootvp)); 1354 - if (!sb->s_root) { 1355 - error = ENOMEM; 1352 + if (is_bad_inode(root)) { 1353 + error = EINVAL; 1356 1354 goto fail_vnrele; 1357 1355 } 1358 - if (is_bad_inode(sb->s_root->d_inode)) { 1359 - error = EINVAL; 1356 + sb->s_root = d_alloc_root(root); 1357 + if (!sb->s_root) { 1358 + error = ENOMEM; 1360 1359 goto fail_vnrele; 1361 1360 } 1362 1361 ··· 1377 1378 dput(sb->s_root); 1378 1379 sb->s_root = NULL; 1379 1380 } else { 1380 - VN_RELE(rootvp); 1381 + iput(root); 1381 1382 } 1382 1383 1383 1384 fail_unmount:

+1 -7

fs/xfs/linux-2.6/xfs_super.h

··· 50 50 # define set_posix_acl_flag(sb) do { } while (0) 51 51 #endif 52 52 53 - #ifdef CONFIG_XFS_SECURITY 54 - # define XFS_SECURITY_STRING "security attributes, " 55 - # define ENOSECURITY 0 56 - #else 57 - # define XFS_SECURITY_STRING 58 - # define ENOSECURITY EOPNOTSUPP 59 - #endif 53 + #define XFS_SECURITY_STRING "security attributes, " 60 54 61 55 #ifdef CONFIG_XFS_RT 62 56 # define XFS_REALTIME_STRING "realtime, "

-1

fs/xfs/linux-2.6/xfs_vfs.h

··· 49 49 #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ 50 50 #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ 51 51 #define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ 52 - #define SYNC_SUPER 0x0200 /* flush superblock to disk */ 53 52 54 53 /* 55 54 * When remounting a filesystem read-only or freezing the filesystem,

+3 -27

fs/xfs/linux-2.6/xfs_vnode.h

··· 23 23 struct xfs_iomap; 24 24 struct attrlist_cursor_kern; 25 25 26 - typedef struct dentry bhv_vname_t; 27 - typedef __u64 bhv_vnumber_t; 28 26 typedef struct inode bhv_vnode_t; 29 27 30 28 #define VN_ISLNK(vp) S_ISLNK((vp)->i_mode) ··· 44 46 } 45 47 46 48 /* 47 - * Values for the vop_rwlock/rwunlock flags parameter. 48 - */ 49 - typedef enum bhv_vrwlock { 50 - VRWLOCK_NONE, 51 - VRWLOCK_READ, 52 - VRWLOCK_WRITE, 53 - VRWLOCK_WRITE_DIRECT, 54 - VRWLOCK_TRY_READ, 55 - VRWLOCK_TRY_WRITE 56 - } bhv_vrwlock_t; 57 - 58 - /* 59 49 * Return values for xfs_inactive. A return value of 60 50 * VN_INACTIVE_NOCACHE implies that the file system behavior 61 51 * has disassociated its state and bhv_desc_t from the vnode. ··· 59 73 #define IO_INVIS 0x00020 /* don't update inode timestamps */ 60 74 61 75 /* 62 - * Flags for vop_iflush call 76 + * Flags for xfs_inode_flush 63 77 */ 64 78 #define FLUSH_SYNC 1 /* wait for flush to complete */ 65 - #define FLUSH_INODE 2 /* flush the inode itself */ 66 - #define FLUSH_LOG 4 /* force the last log entry for 67 - * this inode out to disk */ 68 79 69 80 /* 70 81 * Flush/Invalidate options for vop_toss/flush/flushinval_pages. ··· 209 226 } 210 227 211 228 /* 212 - * Vname handling macros. 213 - */ 214 - #define VNAME(dentry) ((char *) (dentry)->d_name.name) 215 - #define VNAMELEN(dentry) ((dentry)->d_name.len) 216 - #define VNAME_TO_VNODE(dentry) (vn_from_inode((dentry)->d_inode)) 217 - 218 - /* 219 229 * Dealing with bad inodes 220 230 */ 221 231 static inline int VN_BAD(bhv_vnode_t *vp) ··· 279 303 extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); 280 304 extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); 281 305 #define xfs_itrace_entry(ip) \ 282 - _xfs_itrace_entry(ip, __FUNCTION__, (inst_t *)__return_address) 306 + _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address) 283 307 #define xfs_itrace_exit(ip) \ 284 - _xfs_itrace_exit(ip, __FUNCTION__, (inst_t *)__return_address) 308 + _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address) 285 309 #define xfs_itrace_exit_tag(ip, tag) \ 286 310 _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) 287 311 #define xfs_itrace_ref(ip) \

+14 -6

fs/xfs/quota/xfs_dquot.c

··· 1291 1291 if (flags & XFS_QMOPT_DELWRI) { 1292 1292 xfs_bdwrite(mp, bp); 1293 1293 } else if (flags & XFS_QMOPT_ASYNC) { 1294 - xfs_bawrite(mp, bp); 1294 + error = xfs_bawrite(mp, bp); 1295 1295 } else { 1296 1296 error = xfs_bwrite(mp, bp); 1297 1297 } ··· 1439 1439 uint flags) 1440 1440 { 1441 1441 xfs_dqhash_t *thishash; 1442 - xfs_mount_t *mp; 1443 - 1444 - mp = dqp->q_mount; 1442 + xfs_mount_t *mp = dqp->q_mount; 1445 1443 1446 1444 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); 1447 1445 ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash)); ··· 1483 1485 * we're unmounting, we do care, so we flush it and wait. 1484 1486 */ 1485 1487 if (XFS_DQ_IS_DIRTY(dqp)) { 1488 + int error; 1486 1489 xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); 1487 1490 /* dqflush unlocks dqflock */ 1488 1491 /* ··· 1494 1495 * We don't care about getting disk errors here. We need 1495 1496 * to purge this dquot anyway, so we go ahead regardless. 1496 1497 */ 1497 - (void) xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); 1498 + error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); 1499 + if (error) 1500 + xfs_fs_cmn_err(CE_WARN, mp, 1501 + "xfs_qm_dqpurge: dquot %p flush failed", dqp); 1498 1502 xfs_dqflock(dqp); 1499 1503 } 1500 1504 ASSERT(dqp->q_pincount == 0); ··· 1582 1580 XFS_INCORE_TRYLOCK); 1583 1581 if (bp != NULL) { 1584 1582 if (XFS_BUF_ISDELAYWRITE(bp)) { 1583 + int error; 1585 1584 if (XFS_BUF_ISPINNED(bp)) { 1586 1585 xfs_log_force(dqp->q_mount, 1587 1586 (xfs_lsn_t)0, 1588 1587 XFS_LOG_FORCE); 1589 1588 } 1590 - xfs_bawrite(dqp->q_mount, bp); 1589 + error = xfs_bawrite(dqp->q_mount, bp); 1590 + if (error) 1591 + xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 1592 + "xfs_qm_dqflock_pushbuf_wait: " 1593 + "pushbuf error %d on dqp %p, bp %p", 1594 + error, dqp, bp); 1591 1595 } else { 1592 1596 xfs_buf_relse(bp); 1593 1597 }

+12 -2

fs/xfs/quota/xfs_dquot_item.c

··· 146 146 xfs_dq_logitem_t *logitem) 147 147 { 148 148 xfs_dquot_t *dqp; 149 + int error; 149 150 150 151 dqp = logitem->qli_dquot; 151 152 ··· 162 161 * lock without sleeping, then there must not have been 163 162 * anyone in the process of flushing the dquot. 164 163 */ 165 - xfs_qm_dqflush(dqp, XFS_B_DELWRI); 164 + error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 165 + if (error) 166 + xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 167 + "xfs_qm_dquot_logitem_push: push error %d on dqp %p", 168 + error, dqp); 166 169 xfs_dqunlock(dqp); 167 170 } 168 171 ··· 267 262 XFS_LOG_FORCE); 268 263 } 269 264 if (dopush) { 265 + int error; 270 266 #ifdef XFSRACEDEBUG 271 267 delay_for_intr(); 272 268 delay(300); 273 269 #endif 274 - xfs_bawrite(mp, bp); 270 + error = xfs_bawrite(mp, bp); 271 + if (error) 272 + xfs_fs_cmn_err(CE_WARN, mp, 273 + "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p", 274 + error, qip, bp); 275 275 } else { 276 276 xfs_buf_relse(bp); 277 277 }

+45 -31

fs/xfs/quota/xfs_qm.c

··· 304 304 * necessary data structures like quotainfo. This is also responsible for 305 305 * running a quotacheck as necessary. We are guaranteed that the superblock 306 306 * is consistently read in at this point. 307 + * 308 + * If we fail here, the mount will continue with quota turned off. We don't 309 + * need to inidicate success or failure at all. 307 310 */ 308 - int 311 + void 309 312 xfs_qm_mount_quotas( 310 313 xfs_mount_t *mp, 311 314 int mfsi_flags) 312 315 { 313 316 int error = 0; 314 317 uint sbf; 315 - 316 318 317 319 /* 318 320 * If quotas on realtime volumes is not supported, we disable ··· 334 332 * Allocate the quotainfo structure inside the mount struct, and 335 333 * create quotainode(s), and change/rev superblock if necessary. 336 334 */ 337 - if ((error = xfs_qm_init_quotainfo(mp))) { 335 + error = xfs_qm_init_quotainfo(mp); 336 + if (error) { 338 337 /* 339 338 * We must turn off quotas. 340 339 */ ··· 347 344 * If any of the quotas are not consistent, do a quotacheck. 348 345 */ 349 346 if (XFS_QM_NEED_QUOTACHECK(mp) && 350 - !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { 351 - if ((error = xfs_qm_quotacheck(mp))) { 352 - /* Quotacheck has failed and quotas have 353 - * been disabled. 354 - */ 355 - return XFS_ERROR(error); 347 + !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { 348 + error = xfs_qm_quotacheck(mp); 349 + if (error) { 350 + /* Quotacheck failed and disabled quotas. */ 351 + return; 356 352 } 357 353 } 358 354 /* ··· 359 357 * quotachecked status, since we won't be doing accounting for 360 358 * that type anymore. 361 359 */ 362 - if (!XFS_IS_UQUOTA_ON(mp)) { 360 + if (!XFS_IS_UQUOTA_ON(mp)) 363 361 mp->m_qflags &= ~XFS_UQUOTA_CHKD; 364 - } 365 - if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) { 362 + if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) 366 363 mp->m_qflags &= ~XFS_OQUOTA_CHKD; 367 - } 368 364 369 365 write_changes: 370 366 /* ··· 392 392 xfs_fs_cmn_err(CE_WARN, mp, 393 393 "Failed to initialize disk quotas."); 394 394 } 395 - return XFS_ERROR(error); 395 + return; 396 396 } 397 397 398 398 /* ··· 1438 1438 } 1439 1439 1440 1440 1441 - STATIC int 1441 + STATIC void 1442 1442 xfs_qm_reset_dqcounts( 1443 1443 xfs_mount_t *mp, 1444 1444 xfs_buf_t *bp, ··· 1478 1478 ddq->d_rtbwarns = 0; 1479 1479 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); 1480 1480 } 1481 - 1482 - return 0; 1483 1481 } 1484 1482 1485 1483 STATIC int ··· 1518 1520 if (error) 1519 1521 break; 1520 1522 1521 - (void) xfs_qm_reset_dqcounts(mp, bp, firstid, type); 1523 + xfs_qm_reset_dqcounts(mp, bp, firstid, type); 1522 1524 xfs_bdwrite(mp, bp); 1523 1525 /* 1524 1526 * goto the next block. ··· 1808 1810 * Now release the inode. This will send it to 'inactive', and 1809 1811 * possibly even free blocks. 1810 1812 */ 1811 - VN_RELE(XFS_ITOV(ip)); 1813 + IRELE(ip); 1812 1814 1813 1815 /* 1814 1816 * Goto next inode. ··· 1878 1880 } while (! done); 1879 1881 1880 1882 /* 1883 + * We've made all the changes that we need to make incore. 1884 + * Flush them down to disk buffers if everything was updated 1885 + * successfully. 1886 + */ 1887 + if (!error) 1888 + error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); 1889 + 1890 + /* 1881 1891 * We can get this error if we couldn't do a dquot allocation inside 1882 1892 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the 1883 1893 * dirty dquots that might be cached, we just want to get rid of them ··· 1896 1890 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); 1897 1891 goto error_return; 1898 1892 } 1899 - /* 1900 - * We've made all the changes that we need to make incore. 1901 - * Now flush_them down to disk buffers. 1902 - */ 1903 - xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); 1904 1893 1905 1894 /* 1906 1895 * We didn't log anything, because if we crashed, we'll have to ··· 1927 1926 ASSERT(mp->m_quotainfo != NULL); 1928 1927 ASSERT(xfs_Gqm != NULL); 1929 1928 xfs_qm_destroy_quotainfo(mp); 1930 - (void)xfs_mount_reset_sbqflags(mp); 1929 + if (xfs_mount_reset_sbqflags(mp)) { 1930 + cmn_err(CE_WARN, "XFS quotacheck %s: " 1931 + "Failed to reset quota flags.", mp->m_fsname); 1932 + } 1931 1933 } else { 1932 1934 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); 1933 1935 } ··· 1972 1968 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 1973 1969 0, 0, &gip, 0))) { 1974 1970 if (uip) 1975 - VN_RELE(XFS_ITOV(uip)); 1971 + IRELE(uip); 1976 1972 return XFS_ERROR(error); 1977 1973 } 1978 1974 } ··· 2003 1999 sbflags | XFS_SB_GQUOTINO, flags); 2004 2000 if (error) { 2005 2001 if (uip) 2006 - VN_RELE(XFS_ITOV(uip)); 2002 + IRELE(uip); 2007 2003 2008 2004 return XFS_ERROR(error); 2009 2005 } ··· 2097 2093 * dirty dquots. 2098 2094 */ 2099 2095 if (XFS_DQ_IS_DIRTY(dqp)) { 2096 + int error; 2100 2097 xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); 2101 2098 /* 2102 2099 * We flush it delayed write, so don't bother 2103 2100 * releasing the mplock. 2104 2101 */ 2105 - (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2102 + error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2103 + if (error) { 2104 + xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 2105 + "xfs_qm_dqflush_all: dquot %p flush failed", dqp); 2106 + } 2106 2107 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ 2107 2108 dqp = dqp->dq_flnext; 2108 2109 continue; ··· 2274 2265 * dirty dquots. 2275 2266 */ 2276 2267 if (XFS_DQ_IS_DIRTY(dqp)) { 2268 + int error; 2277 2269 xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); 2278 2270 /* 2279 2271 * We flush it delayed write, so don't bother 2280 2272 * releasing the freelist lock. 2281 2273 */ 2282 - (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2274 + error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2275 + if (error) { 2276 + xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 2277 + "xfs_qm_dqreclaim: dquot %p flush failed", dqp); 2278 + } 2283 2279 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ 2284 2280 continue; 2285 2281 } ··· 2392 2378 } 2393 2379 2394 2380 xfs_mod_sb(tp, flags); 2395 - (void) xfs_trans_commit(tp, 0); 2381 + error = xfs_trans_commit(tp, 0); 2396 2382 2397 - return 0; 2383 + return error; 2398 2384 } 2399 2385 2400 2386

+1 -1

fs/xfs/quota/xfs_qm.h

··· 165 165 #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) 166 166 167 167 extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); 168 - extern int xfs_qm_mount_quotas(xfs_mount_t *, int); 168 + extern void xfs_qm_mount_quotas(xfs_mount_t *, int); 169 169 extern int xfs_qm_quotacheck(xfs_mount_t *); 170 170 extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); 171 171 extern int xfs_qm_unmount_quotas(xfs_mount_t *);

+2 -2

fs/xfs/quota/xfs_qm_stats.h

··· 45 45 46 46 # define XQM_STATS_INC(count) do { } while (0) 47 47 48 - static __inline void xfs_qm_init_procfs(void) { }; 49 - static __inline void xfs_qm_cleanup_procfs(void) { }; 48 + static inline void xfs_qm_init_procfs(void) { }; 49 + static inline void xfs_qm_cleanup_procfs(void) { }; 50 50 51 51 #endif 52 52

+26 -18

fs/xfs/quota/xfs_qm_syscalls.c

··· 279 279 280 280 /* 281 281 * Write the LI_QUOTAOFF log record, and do SB changes atomically, 282 - * and synchronously. 282 + * and synchronously. If we fail to write, we should abort the 283 + * operation as it cannot be recovered safely if we crash. 283 284 */ 284 - xfs_qm_log_quotaoff(mp, &qoffstart, flags); 285 + error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); 286 + if (error) 287 + goto out_error; 285 288 286 289 /* 287 290 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct ··· 340 337 * So, we have QUOTAOFF start and end logitems; the start 341 338 * logitem won't get overwritten until the end logitem appears... 342 339 */ 343 - xfs_qm_log_quotaoff_end(mp, qoffstart, flags); 340 + error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags); 341 + if (error) { 342 + /* We're screwed now. Shutdown is the only option. */ 343 + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 344 + goto out_error; 345 + } 344 346 345 347 /* 346 348 * If quotas is completely disabled, close shop. ··· 369 361 XFS_PURGE_INODE(XFS_QI_GQIP(mp)); 370 362 XFS_QI_GQIP(mp) = NULL; 371 363 } 364 + out_error: 372 365 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 373 366 374 367 return (error); ··· 380 371 xfs_mount_t *mp, 381 372 uint flags) 382 373 { 383 - int error; 374 + int error = 0, error2 = 0; 384 375 xfs_inode_t *qip; 385 376 386 377 if (!capable(CAP_SYS_ADMIN)) 387 378 return XFS_ERROR(EPERM); 388 - error = 0; 389 379 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { 390 380 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); 391 381 return XFS_ERROR(EINVAL); ··· 392 384 393 385 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { 394 386 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); 395 - if (! error) { 396 - (void) xfs_truncate_file(mp, qip); 397 - VN_RELE(XFS_ITOV(qip)); 387 + if (!error) { 388 + error = xfs_truncate_file(mp, qip); 389 + IRELE(qip); 398 390 } 399 391 } 400 392 401 393 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && 402 394 mp->m_sb.sb_gquotino != NULLFSINO) { 403 - error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); 404 - if (! error) { 405 - (void) xfs_truncate_file(mp, qip); 406 - VN_RELE(XFS_ITOV(qip)); 395 + error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); 396 + if (!error2) { 397 + error2 = xfs_truncate_file(mp, qip); 398 + IRELE(qip); 407 399 } 408 400 } 409 401 410 - return (error); 402 + return error ? error : error2; 411 403 } 412 404 413 405 ··· 560 552 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; 561 553 out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; 562 554 if (tempuqip) 563 - VN_RELE(XFS_ITOV(uip)); 555 + IRELE(uip); 564 556 } 565 557 if (gip) { 566 558 out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; 567 559 out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; 568 560 if (tempgqip) 569 - VN_RELE(XFS_ITOV(gip)); 561 + IRELE(gip); 570 562 } 571 563 if (mp->m_quotainfo) { 572 564 out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); ··· 734 726 xfs_trans_log_dquot(tp, dqp); 735 727 736 728 xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); 737 - xfs_trans_commit(tp, 0); 729 + error = xfs_trans_commit(tp, 0); 738 730 xfs_qm_dqprint(dqp); 739 731 xfs_qm_dqrele(dqp); 740 732 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 741 733 742 - return (0); 734 + return error; 743 735 } 744 736 745 737 STATIC int ··· 1103 1095 * inactive code in hell. 1104 1096 */ 1105 1097 if (vnode_refd) 1106 - VN_RELE(vp); 1098 + IRELE(ip); 1107 1099 XFS_MOUNT_ILOCK(mp); 1108 1100 /* 1109 1101 * If an inode was inserted or removed, we gotta

+19 -18

fs/xfs/support/ktrace.c

··· 24 24 void __init 25 25 ktrace_init(int zentries) 26 26 { 27 - ktrace_zentries = zentries; 27 + ktrace_zentries = roundup_pow_of_two(zentries); 28 28 29 29 ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t), 30 30 "ktrace_hdr"); ··· 47 47 * ktrace_alloc() 48 48 * 49 49 * Allocate a ktrace header and enough buffering for the given 50 - * number of entries. 50 + * number of entries. Round the number of entries up to a 51 + * power of 2 so we can do fast masking to get the index from 52 + * the atomic index counter. 51 53 */ 52 54 ktrace_t * 53 55 ktrace_alloc(int nentries, unsigned int __nocast sleep) 54 56 { 55 57 ktrace_t *ktp; 56 58 ktrace_entry_t *ktep; 59 + int entries; 57 60 58 61 ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep); 59 62 ··· 73 70 /* 74 71 * Special treatment for buffers with the ktrace_zentries entries 75 72 */ 76 - if (nentries == ktrace_zentries) { 73 + entries = roundup_pow_of_two(nentries); 74 + if (entries == ktrace_zentries) { 77 75 ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone, 78 76 sleep); 79 77 } else { 80 - ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)), 78 + ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)), 81 79 sleep | KM_LARGE); 82 80 } 83 81 ··· 95 91 } 96 92 97 93 ktp->kt_entries = ktep; 98 - ktp->kt_nentries = nentries; 99 - ktp->kt_index = 0; 94 + ktp->kt_nentries = entries; 95 + ASSERT(is_power_of_2(entries)); 96 + ktp->kt_index_mask = entries - 1; 97 + atomic_set(&ktp->kt_index, 0); 100 98 ktp->kt_rollover = 0; 101 99 return ktp; 102 100 } ··· 157 151 void *val14, 158 152 void *val15) 159 153 { 160 - static DEFINE_SPINLOCK(wrap_lock); 161 - unsigned long flags; 162 154 int index; 163 155 ktrace_entry_t *ktep; 164 156 ··· 165 161 /* 166 162 * Grab an entry by pushing the index up to the next one. 167 163 */ 168 - spin_lock_irqsave(&wrap_lock, flags); 169 - index = ktp->kt_index; 170 - if (++ktp->kt_index == ktp->kt_nentries) 171 - ktp->kt_index = 0; 172 - spin_unlock_irqrestore(&wrap_lock, flags); 173 - 164 + index = atomic_add_return(1, &ktp->kt_index); 165 + index = (index - 1) & ktp->kt_index_mask; 174 166 if (!ktp->kt_rollover && index == ktp->kt_nentries - 1) 175 167 ktp->kt_rollover = 1; 176 168 ··· 199 199 ktrace_nentries( 200 200 ktrace_t *ktp) 201 201 { 202 - if (ktp == NULL) { 202 + int index; 203 + if (ktp == NULL) 203 204 return 0; 204 - } 205 205 206 - return (ktp->kt_rollover ? ktp->kt_nentries : ktp->kt_index); 206 + index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask; 207 + return (ktp->kt_rollover ? ktp->kt_nentries : index); 207 208 } 208 209 209 210 /* ··· 229 228 int nentries; 230 229 231 230 if (ktp->kt_rollover) 232 - index = ktp->kt_index; 231 + index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask; 233 232 else 234 233 index = 0; 235 234

+2 -1

fs/xfs/support/ktrace.h

··· 30 30 */ 31 31 typedef struct ktrace { 32 32 int kt_nentries; /* number of entries in trace buf */ 33 - int kt_index; /* current index in entries */ 33 + atomic_t kt_index; /* current index in entries */ 34 + unsigned int kt_index_mask; 34 35 int kt_rollover; 35 36 ktrace_entry_t *kt_entries; /* buffer of entries */ 36 37 } ktrace_t;

+1 -1

fs/xfs/xfs.h

··· 22 22 #define STATIC 23 23 #define DEBUG 1 24 24 #define XFS_BUF_LOCK_TRACKING 1 25 - /* #define QUOTADEBUG 1 */ 25 + #define QUOTADEBUG 1 26 26 #endif 27 27 28 28 #ifdef CONFIG_XFS_TRACE

+10 -6

fs/xfs/xfs_acl.c

··· 307 307 308 308 VN_HOLD(vp); 309 309 error = xfs_acl_allow_set(vp, kind); 310 - if (error) 311 - goto out; 312 310 313 311 /* Incoming ACL exists, set file mode based on its value */ 314 - if (kind == _ACL_TYPE_ACCESS) 315 - xfs_acl_setmode(vp, xfs_acl, &basicperms); 312 + if (!error && kind == _ACL_TYPE_ACCESS) 313 + error = xfs_acl_setmode(vp, xfs_acl, &basicperms); 314 + 315 + if (error) 316 + goto out; 316 317 317 318 /* 318 319 * If we have more than std unix permissions, set up the actual attr. ··· 324 323 if (!basicperms) { 325 324 xfs_acl_set_attr(vp, xfs_acl, kind, &error); 326 325 } else { 327 - xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); 326 + error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); 328 327 } 329 328 330 329 out: ··· 708 707 709 708 memcpy(cacl, pdaclp, sizeof(xfs_acl_t)); 710 709 xfs_acl_filter_mode(mode, cacl); 711 - xfs_acl_setmode(vp, cacl, &basicperms); 710 + error = xfs_acl_setmode(vp, cacl, &basicperms); 711 + if (error) 712 + goto out_error; 712 713 713 714 /* 714 715 * Set the Default and Access ACL on the file. The mode is already ··· 723 720 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); 724 721 if (!error && !basicperms) 725 722 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); 723 + out_error: 726 724 _ACL_FREE(cacl); 727 725 return error; 728 726 }

+29 -36

fs/xfs/xfs_alloc.c

··· 45 45 #define XFSA_FIXUP_BNO_OK 1 46 46 #define XFSA_FIXUP_CNT_OK 2 47 47 48 - STATIC int 48 + STATIC void 49 49 xfs_alloc_search_busy(xfs_trans_t *tp, 50 50 xfs_agnumber_t agno, 51 51 xfs_agblock_t bno, ··· 55 55 ktrace_t *xfs_alloc_trace_buf; 56 56 57 57 #define TRACE_ALLOC(s,a) \ 58 - xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__) 58 + xfs_alloc_trace_alloc(__func__, s, a, __LINE__) 59 59 #define TRACE_FREE(s,a,b,x,f) \ 60 - xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__) 60 + xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__) 61 61 #define TRACE_MODAGF(s,a,f) \ 62 - xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__) 63 - #define TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp) \ 64 - xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) 65 - #define TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp) \ 66 - xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) 67 - #define TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp) \ 68 - xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) 62 + xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__) 63 + #define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp) \ 64 + xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) 65 + #define TRACE_UNBUSY(__func__,s,ag,sl,tp) \ 66 + xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) 67 + #define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp) \ 68 + xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) 69 69 #else 70 70 #define TRACE_ALLOC(s,a) 71 71 #define TRACE_FREE(s,a,b,x,f) 72 72 #define TRACE_MODAGF(s,a,f) 73 73 #define TRACE_BUSY(s,a,ag,agb,l,sl,tp) 74 74 #define TRACE_UNBUSY(fname,s,ag,sl,tp) 75 - #define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) 75 + #define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp) 76 76 #endif /* XFS_ALLOC_TRACE */ 77 77 78 78 /* ··· 93 93 * Compute aligned version of the found extent. 94 94 * Takes alignment and min length into account. 95 95 */ 96 - STATIC int /* success (>= minlen) */ 96 + STATIC void 97 97 xfs_alloc_compute_aligned( 98 98 xfs_agblock_t foundbno, /* starting block in found extent */ 99 99 xfs_extlen_t foundlen, /* length in found extent */ ··· 116 116 } 117 117 *resbno = bno; 118 118 *reslen = len; 119 - return len >= minlen; 120 119 } 121 120 122 121 /* ··· 836 837 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 837 838 goto error0; 838 839 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 839 - if (!xfs_alloc_compute_aligned(ltbno, ltlen, 840 - args->alignment, args->minlen, 841 - &ltbnoa, &ltlena)) 840 + xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, 841 + args->minlen, &ltbnoa, &ltlena); 842 + if (ltlena < args->minlen) 842 843 continue; 843 844 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 844 845 xfs_alloc_fix_len(args); ··· 957 958 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i))) 958 959 goto error0; 959 960 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 960 - if (xfs_alloc_compute_aligned(ltbno, ltlen, 961 - args->alignment, args->minlen, 962 - &ltbnoa, &ltlena)) 961 + xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, 962 + args->minlen, &ltbnoa, &ltlena); 963 + if (ltlena >= args->minlen) 963 964 break; 964 965 if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) 965 966 goto error0; ··· 973 974 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i))) 974 975 goto error0; 975 976 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 976 - if (xfs_alloc_compute_aligned(gtbno, gtlen, 977 - args->alignment, args->minlen, 978 - &gtbnoa, &gtlena)) 977 + xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, 978 + args->minlen, &gtbnoa, &gtlena); 979 + if (gtlena >= args->minlen) 979 980 break; 980 981 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) 981 982 goto error0; ··· 2561 2562 2562 2563 2563 2564 /* 2564 - * returns non-zero if any of (agno,bno):len is in a busy list 2565 + * If we find the extent in the busy list, force the log out to get the 2566 + * extent out of the busy list so the caller can use it straight away. 2565 2567 */ 2566 - STATIC int 2568 + STATIC void 2567 2569 xfs_alloc_search_busy(xfs_trans_t *tp, 2568 2570 xfs_agnumber_t agno, 2569 2571 xfs_agblock_t bno, ··· 2572 2572 { 2573 2573 xfs_mount_t *mp; 2574 2574 xfs_perag_busy_t *bsy; 2575 - int n; 2576 2575 xfs_agblock_t uend, bend; 2577 2576 xfs_lsn_t lsn; 2578 2577 int cnt; ··· 2584 2585 uend = bno + len - 1; 2585 2586 2586 2587 /* search pagb_list for this slot, skipping open slots */ 2587 - for (bsy = mp->m_perag[agno].pagb_list, n = 0; 2588 - cnt; bsy++, n++) { 2588 + for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { 2589 2589 2590 2590 /* 2591 2591 * (start1,length1) within (start2, length2) 2592 2592 */ 2593 2593 if (bsy->busy_tp != NULL) { 2594 2594 bend = bsy->busy_start + bsy->busy_length - 1; 2595 - if ((bno > bend) || 2596 - (uend < bsy->busy_start)) { 2595 + if ((bno > bend) || (uend < bsy->busy_start)) { 2597 2596 cnt--; 2598 2597 } else { 2599 2598 TRACE_BUSYSEARCH("xfs_alloc_search_busy", 2600 - "found1", agno, bno, len, n, 2601 - tp); 2599 + "found1", agno, bno, len, tp); 2602 2600 break; 2603 2601 } 2604 2602 } ··· 2606 2610 * transaction that freed the block 2607 2611 */ 2608 2612 if (cnt) { 2609 - TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp); 2613 + TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp); 2610 2614 lsn = bsy->busy_tp->t_commit_lsn; 2611 2615 spin_unlock(&mp->m_perag[agno].pagb_lock); 2612 2616 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2613 2617 } else { 2614 - TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp); 2615 - n = -1; 2618 + TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp); 2616 2619 spin_unlock(&mp->m_perag[agno].pagb_lock); 2617 2620 } 2618 - 2619 - return n; 2620 2621 }

+1 -9

fs/xfs/xfs_attr.c

··· 2647 2647 } 2648 2648 2649 2649 STATIC int 2650 - attr_secure_capable( 2651 - bhv_vnode_t *vp, 2652 - cred_t *cred) 2653 - { 2654 - return -ENOSECURITY; 2655 - } 2656 - 2657 - STATIC int 2658 2650 attr_system_set( 2659 2651 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) 2660 2652 { ··· 2716 2724 .attr_get = attr_generic_get, 2717 2725 .attr_set = attr_generic_set, 2718 2726 .attr_remove = attr_generic_remove, 2719 - .attr_capable = attr_secure_capable, 2727 + .attr_capable = (attrcapable_t)fs_noerr, 2720 2728 }; 2721 2729 2722 2730 struct attrnames attr_user = {

+1 -1

fs/xfs/xfs_attr_leaf.c

··· 166 166 167 167 if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { 168 168 if (bytes <= XFS_IFORK_ASIZE(dp)) 169 - return mp->m_attroffset >> 3; 169 + return dp->i_d.di_forkoff; 170 170 return 0; 171 171 } 172 172

+31 -28

fs/xfs/xfs_bmap.c

··· 323 323 int whichfork); /* data or attr fork */ 324 324 325 325 #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \ 326 - xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w) 326 + xfs_bmap_trace_delete(__func__,d,ip,i,c,w) 327 327 #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \ 328 - xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w) 328 + xfs_bmap_trace_insert(__func__,d,ip,i,c,r1,r2,w) 329 329 #define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \ 330 - xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w) 330 + xfs_bmap_trace_post_update(__func__,d,ip,i,w) 331 331 #define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \ 332 - xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w) 332 + xfs_bmap_trace_pre_update(__func__,d,ip,i,w) 333 333 #else 334 334 #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) 335 335 #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) ··· 2402 2402 2403 2403 #define XFS_ALLOC_GAP_UNITS 4 2404 2404 2405 - STATIC int 2405 + STATIC void 2406 2406 xfs_bmap_adjacent( 2407 2407 xfs_bmalloca_t *ap) /* bmap alloc argument struct */ 2408 2408 { ··· 2548 2548 ap->rval = gotbno; 2549 2549 } 2550 2550 #undef ISVALID 2551 - return 0; 2552 2551 } 2553 2552 2554 2553 STATIC int ··· 4153 4154 * number of leaf entries, is controlled by the type of di_nextents 4154 4155 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents 4155 4156 * (a signed 16-bit number, xfs_aextnum_t). 4157 + * 4158 + * Note that we can no longer assume that if we are in ATTR1 that 4159 + * the fork offset of all the inodes will be (m_attroffset >> 3) 4160 + * because we could have mounted with ATTR2 and then mounted back 4161 + * with ATTR1, keeping the di_forkoff's fixed but probably at 4162 + * various positions. Therefore, for both ATTR1 and ATTR2 4163 + * we have to assume the worst case scenario of a minimum size 4164 + * available. 4156 4165 */ 4157 4166 if (whichfork == XFS_DATA_FORK) { 4158 4167 maxleafents = MAXEXTNUM; 4159 - sz = (mp->m_flags & XFS_MOUNT_ATTR2) ? 4160 - XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset; 4168 + sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); 4161 4169 } else { 4162 4170 maxleafents = MAXAEXTNUM; 4163 - sz = (mp->m_flags & XFS_MOUNT_ATTR2) ? 4164 - XFS_BMDR_SPACE_CALC(MINABTPTRS) : 4165 - mp->m_sb.sb_inodesize - mp->m_attroffset; 4171 + sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); 4166 4172 } 4167 4173 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); 4168 4174 minleafrecs = mp->m_bmap_dmnr[0]; ··· 5776 5772 int error; /* return value */ 5777 5773 __int64_t fixlen; /* length for -1 case */ 5778 5774 int i; /* extent number */ 5779 - bhv_vnode_t *vp; /* corresponding vnode */ 5780 5775 int lock; /* lock state */ 5781 5776 xfs_bmbt_irec_t *map; /* buffer for user's data */ 5782 5777 xfs_mount_t *mp; /* file system mount point */ ··· 5792 5789 int bmapi_flags; /* flags for xfs_bmapi */ 5793 5790 __int32_t oflags; /* getbmapx bmv_oflags field */ 5794 5791 5795 - vp = XFS_ITOV(ip); 5796 5792 mp = ip->i_mount; 5797 5793 5798 5794 whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; ··· 5813 5811 if ((interface & BMV_IF_NO_DMAPI_READ) == 0 && 5814 5812 DM_EVENT_ENABLED(ip, DM_EVENT_READ) && 5815 5813 whichfork == XFS_DATA_FORK) { 5816 - error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 0, 0, 0, NULL); 5814 + error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); 5817 5815 if (error) 5818 5816 return XFS_ERROR(error); 5819 5817 } ··· 5871 5869 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ 5872 5870 error = xfs_flush_pages(ip, (xfs_off_t)0, 5873 5871 -1, 0, FI_REMAPF); 5872 + if (error) { 5873 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 5874 + return error; 5875 + } 5874 5876 } 5875 5877 5876 5878 ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); ··· 6168 6162 } 6169 6163 if (*thispa == *pp) { 6170 6164 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", 6171 - __FUNCTION__, j, i, 6165 + __func__, j, i, 6172 6166 (unsigned long long)be64_to_cpu(*thispa)); 6173 6167 panic("%s: ptrs are equal in node\n", 6174 - __FUNCTION__); 6168 + __func__); 6175 6169 } 6176 6170 } 6177 6171 } ··· 6198 6192 xfs_mount_t *mp; /* file system mount structure */ 6199 6193 __be64 *pp; /* pointer to block address */ 6200 6194 xfs_bmbt_rec_t *ep; /* pointer to current extent */ 6201 - xfs_bmbt_rec_t *lastp; /* pointer to previous extent */ 6195 + xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ 6202 6196 xfs_bmbt_rec_t *nextp; /* pointer to next extent */ 6203 6197 int bp_release = 0; 6204 6198 ··· 6268 6262 /* 6269 6263 * Loop over all leaf nodes checking that all extents are in the right order. 6270 6264 */ 6271 - lastp = NULL; 6272 6265 for (;;) { 6273 6266 xfs_fsblock_t nextbno; 6274 6267 xfs_extnum_t num_recs; ··· 6288 6283 */ 6289 6284 6290 6285 ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); 6286 + if (i) { 6287 + xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep); 6288 + } 6291 6289 for (j = 1; j < num_recs; j++) { 6292 6290 nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); 6293 - if (lastp) { 6294 - xfs_btree_check_rec(XFS_BTNUM_BMAP, 6295 - (void *)lastp, (void *)ep); 6296 - } 6297 - xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep, 6298 - (void *)(nextp)); 6299 - lastp = ep; 6291 + xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp); 6300 6292 ep = nextp; 6301 6293 } 6302 6294 6295 + last = *ep; 6303 6296 i += num_recs; 6304 6297 if (bp_release) { 6305 6298 bp_release = 0; ··· 6328 6325 return; 6329 6326 6330 6327 error0: 6331 - cmn_err(CE_WARN, "%s: at error0", __FUNCTION__); 6328 + cmn_err(CE_WARN, "%s: at error0", __func__); 6332 6329 if (bp_release) 6333 6330 xfs_trans_brelse(NULL, bp); 6334 6331 error_norelse: 6335 6332 cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", 6336 - __FUNCTION__, i); 6337 - panic("%s: CORRUPTED BTREE OR SOMETHING", __FUNCTION__); 6333 + __func__, i); 6334 + panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); 6338 6335 return; 6339 6336 } 6340 6337 #endif

+1 -1

fs/xfs/xfs_bmap.h

··· 151 151 xfs_extnum_t cnt, /* count of entries in list */ 152 152 int whichfork); /* data or attr fork */ 153 153 #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ 154 - xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w) 154 + xfs_bmap_trace_exlist(__func__,ip,c,w) 155 155 #else 156 156 #define XFS_BMAP_TRACE_EXLIST(ip,c,w) 157 157 #endif

+44 -10

fs/xfs/xfs_bmap_btree.c

··· 275 275 } 276 276 277 277 #define XFS_BMBT_TRACE_ARGBI(c,b,i) \ 278 - xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__) 278 + xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__) 279 279 #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ 280 - xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__) 280 + xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__) 281 281 #define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ 282 - xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__) 282 + xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__) 283 283 #define XFS_BMBT_TRACE_ARGI(c,i) \ 284 - xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__) 284 + xfs_bmbt_trace_argi(__func__, c, i, __LINE__) 285 285 #define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ 286 - xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__) 286 + xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__) 287 287 #define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ 288 - xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__) 288 + xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__) 289 289 #define XFS_BMBT_TRACE_ARGIK(c,i,k) \ 290 - xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__) 290 + xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__) 291 291 #define XFS_BMBT_TRACE_CURSOR(c,s) \ 292 - xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__) 292 + xfs_bmbt_trace_cursor(__func__, c, s, __LINE__) 293 293 #else 294 294 #define XFS_BMBT_TRACE_ARGBI(c,b,i) 295 295 #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) ··· 2027 2027 2028 2028 /* 2029 2029 * Insert the current record at the point referenced by cur. 2030 + * 2031 + * A multi-level split of the tree on insert will invalidate the original 2032 + * cursor. It appears, however, that some callers assume that the cursor is 2033 + * always valid. Hence if we do a multi-level split we need to revalidate the 2034 + * cursor. 2035 + * 2036 + * When a split occurs, we will see a new cursor returned. Use that as a 2037 + * trigger to determine if we need to revalidate the original cursor. If we get 2038 + * a split, then use the original irec to lookup up the path of the record we 2039 + * just inserted. 2040 + * 2041 + * Note that the fact that the btree root is in the inode means that we can 2042 + * have the level of the tree change without a "split" occurring at the root 2043 + * level. What happens is that the root is migrated to an allocated block and 2044 + * the inode root is pointed to it. This means a single split can change the 2045 + * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence 2046 + * the level change should be accounted as a split so as to correctly trigger a 2047 + * revalidation of the old cursor. 2030 2048 */ 2031 2049 int /* error */ 2032 2050 xfs_bmbt_insert( ··· 2057 2039 xfs_fsblock_t nbno; 2058 2040 xfs_btree_cur_t *ncur; 2059 2041 xfs_bmbt_rec_t nrec; 2042 + xfs_bmbt_irec_t oirec; /* original irec */ 2060 2043 xfs_btree_cur_t *pcur; 2044 + int splits = 0; 2061 2045 2062 2046 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 2063 2047 level = 0; 2064 2048 nbno = NULLFSBLOCK; 2049 + oirec = cur->bc_rec.b; 2065 2050 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); 2066 2051 ncur = NULL; 2067 2052 pcur = cur; ··· 2073 2052 &i))) { 2074 2053 if (pcur != cur) 2075 2054 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 2076 - XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2077 - return error; 2055 + goto error0; 2078 2056 } 2079 2057 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 2080 2058 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { 2059 + /* allocating a new root is effectively a split */ 2060 + if (cur->bc_nlevels != pcur->bc_nlevels) 2061 + splits++; 2081 2062 cur->bc_nlevels = pcur->bc_nlevels; 2082 2063 cur->bc_private.b.allocated += 2083 2064 pcur->bc_private.b.allocated; ··· 2093 2070 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 2094 2071 } 2095 2072 if (ncur) { 2073 + splits++; 2096 2074 pcur = ncur; 2097 2075 ncur = NULL; 2098 2076 } 2099 2077 } while (nbno != NULLFSBLOCK); 2078 + 2079 + if (splits > 1) { 2080 + /* revalidate the old cursor as we had a multi-level split */ 2081 + error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff, 2082 + oirec.br_startblock, oirec.br_blockcount, &i); 2083 + if (error) 2084 + goto error0; 2085 + ASSERT(i == 1); 2086 + } 2087 + 2100 2088 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 2101 2089 *stat = i; 2102 2090 return 0;

+6 -1

fs/xfs/xfs_buf_item.c

··· 645 645 bp = bip->bli_buf; 646 646 647 647 if (XFS_BUF_ISDELAYWRITE(bp)) { 648 - xfs_bawrite(bip->bli_item.li_mountp, bp); 648 + int error; 649 + error = xfs_bawrite(bip->bli_item.li_mountp, bp); 650 + if (error) 651 + xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, 652 + "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", 653 + error, bip, bp); 649 654 } else { 650 655 xfs_buf_relse(bp); 651 656 }

+29 -35

fs/xfs/xfs_dir2.c

··· 44 44 #include "xfs_error.h" 45 45 #include "xfs_vnodeops.h" 46 46 47 + struct xfs_name xfs_name_dotdot = {"..", 2}; 47 48 48 49 void 49 50 xfs_dir_mount( ··· 147 146 xfs_dir_createname( 148 147 xfs_trans_t *tp, 149 148 xfs_inode_t *dp, 150 - char *name, 151 - int namelen, 149 + struct xfs_name *name, 152 150 xfs_ino_t inum, /* new entry inode number */ 153 151 xfs_fsblock_t *first, /* bmap's firstblock */ 154 152 xfs_bmap_free_t *flist, /* bmap's freeblock list */ ··· 162 162 return rval; 163 163 XFS_STATS_INC(xs_dir_create); 164 164 165 - args.name = name; 166 - args.namelen = namelen; 167 - args.hashval = xfs_da_hashname(name, namelen); 165 + args.name = name->name; 166 + args.namelen = name->len; 167 + args.hashval = xfs_da_hashname(name->name, name->len); 168 168 args.inumber = inum; 169 169 args.dp = dp; 170 170 args.firstblock = first; ··· 197 197 xfs_dir_lookup( 198 198 xfs_trans_t *tp, 199 199 xfs_inode_t *dp, 200 - char *name, 201 - int namelen, 200 + struct xfs_name *name, 202 201 xfs_ino_t *inum) /* out: inode number */ 203 202 { 204 203 xfs_da_args_t args; ··· 206 207 207 208 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 208 209 XFS_STATS_INC(xs_dir_lookup); 210 + memset(&args, 0, sizeof(xfs_da_args_t)); 209 211 210 - args.name = name; 211 - args.namelen = namelen; 212 - args.hashval = xfs_da_hashname(name, namelen); 213 - args.inumber = 0; 212 + args.name = name->name; 213 + args.namelen = name->len; 214 + args.hashval = xfs_da_hashname(name->name, name->len); 214 215 args.dp = dp; 215 - args.firstblock = NULL; 216 - args.flist = NULL; 217 - args.total = 0; 218 216 args.whichfork = XFS_DATA_FORK; 219 217 args.trans = tp; 220 - args.justcheck = args.addname = 0; 221 218 args.oknoent = 1; 222 219 223 220 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) ··· 242 247 xfs_dir_removename( 243 248 xfs_trans_t *tp, 244 249 xfs_inode_t *dp, 245 - char *name, 246 - int namelen, 250 + struct xfs_name *name, 247 251 xfs_ino_t ino, 248 252 xfs_fsblock_t *first, /* bmap's firstblock */ 249 253 xfs_bmap_free_t *flist, /* bmap's freeblock list */ ··· 255 261 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 256 262 XFS_STATS_INC(xs_dir_remove); 257 263 258 - args.name = name; 259 - args.namelen = namelen; 260 - args.hashval = xfs_da_hashname(name, namelen); 264 + args.name = name->name; 265 + args.namelen = name->len; 266 + args.hashval = xfs_da_hashname(name->name, name->len); 261 267 args.inumber = ino; 262 268 args.dp = dp; 263 269 args.firstblock = first; ··· 323 329 xfs_dir_replace( 324 330 xfs_trans_t *tp, 325 331 xfs_inode_t *dp, 326 - char *name, /* name of entry to replace */ 327 - int namelen, 332 + struct xfs_name *name, /* name of entry to replace */ 328 333 xfs_ino_t inum, /* new inode number */ 329 334 xfs_fsblock_t *first, /* bmap's firstblock */ 330 335 xfs_bmap_free_t *flist, /* bmap's freeblock list */ ··· 338 345 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 339 346 return rval; 340 347 341 - args.name = name; 342 - args.namelen = namelen; 343 - args.hashval = xfs_da_hashname(name, namelen); 348 + args.name = name->name; 349 + args.namelen = name->len; 350 + args.hashval = xfs_da_hashname(name->name, name->len); 344 351 args.inumber = inum; 345 352 args.dp = dp; 346 353 args.firstblock = first; ··· 367 374 368 375 /* 369 376 * See if this entry can be added to the directory without allocating space. 377 + * First checks that the caller couldn't reserve enough space (resblks = 0). 370 378 */ 371 379 int 372 380 xfs_dir_canenter( 373 381 xfs_trans_t *tp, 374 382 xfs_inode_t *dp, 375 - char *name, /* name of entry to add */ 376 - int namelen) 383 + struct xfs_name *name, /* name of entry to add */ 384 + uint resblks) 377 385 { 378 386 xfs_da_args_t args; 379 387 int rval; 380 388 int v; /* type-checking value */ 381 389 382 - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 390 + if (resblks) 391 + return 0; 383 392 384 - args.name = name; 385 - args.namelen = namelen; 386 - args.hashval = xfs_da_hashname(name, namelen); 387 - args.inumber = 0; 393 + ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 394 + memset(&args, 0, sizeof(xfs_da_args_t)); 395 + 396 + args.name = name->name; 397 + args.namelen = name->len; 398 + args.hashval = xfs_da_hashname(name->name, name->len); 388 399 args.dp = dp; 389 - args.firstblock = NULL; 390 - args.flist = NULL; 391 - args.total = 0; 392 400 args.whichfork = XFS_DATA_FORK; 393 401 args.trans = tp; 394 402 args.justcheck = args.addname = args.oknoent = 1;

+7 -5

fs/xfs/xfs_dir2.h

··· 59 59 */ 60 60 typedef xfs_off_t xfs_dir2_off_t; 61 61 62 + extern struct xfs_name xfs_name_dotdot; 63 + 62 64 /* 63 65 * Generic directory interface routines 64 66 */ ··· 70 68 extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, 71 69 struct xfs_inode *pdp); 72 70 extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, 73 - char *name, int namelen, xfs_ino_t inum, 71 + struct xfs_name *name, xfs_ino_t inum, 74 72 xfs_fsblock_t *first, 75 73 struct xfs_bmap_free *flist, xfs_extlen_t tot); 76 74 extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, 77 - char *name, int namelen, xfs_ino_t *inum); 75 + struct xfs_name *name, xfs_ino_t *inum); 78 76 extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, 79 - char *name, int namelen, xfs_ino_t ino, 77 + struct xfs_name *name, xfs_ino_t ino, 80 78 xfs_fsblock_t *first, 81 79 struct xfs_bmap_free *flist, xfs_extlen_t tot); 82 80 extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, 83 - char *name, int namelen, xfs_ino_t inum, 81 + struct xfs_name *name, xfs_ino_t inum, 84 82 xfs_fsblock_t *first, 85 83 struct xfs_bmap_free *flist, xfs_extlen_t tot); 86 84 extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, 87 - char *name, int namelen); 85 + struct xfs_name *name, uint resblks); 88 86 extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); 89 87 90 88 /*

+1 -1

fs/xfs/xfs_filestream.c

··· 73 73 #define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) 74 74 #define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) 75 75 #define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ 76 - xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \ 76 + xfs_filestreams_trace(mp, t, __func__, __LINE__, \ 77 77 (__psunsigned_t)a0, (__psunsigned_t)a1, \ 78 78 (__psunsigned_t)a2, (__psunsigned_t)a3, \ 79 79 (__psunsigned_t)a4, (__psunsigned_t)a5)

+29 -15

fs/xfs/xfs_ialloc.c

··· 107 107 /* 108 108 * Allocation group level functions. 109 109 */ 110 + static inline int 111 + xfs_ialloc_cluster_alignment( 112 + xfs_alloc_arg_t *args) 113 + { 114 + if (xfs_sb_version_hasalign(&args->mp->m_sb) && 115 + args->mp->m_sb.sb_inoalignmt >= 116 + XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) 117 + return args->mp->m_sb.sb_inoalignmt; 118 + return 1; 119 + } 110 120 111 121 /* 112 122 * Allocate new inodes in the allocation group specified by agbp. ··· 177 167 args.mod = args.total = args.wasdel = args.isfl = 178 168 args.userdata = args.minalignslop = 0; 179 169 args.prod = 1; 180 - args.alignment = 1; 170 + 181 171 /* 182 - * Allow space for the inode btree to split. 172 + * We need to take into account alignment here to ensure that 173 + * we don't modify the free list if we fail to have an exact 174 + * block. If we don't have an exact match, and every oher 175 + * attempt allocation attempt fails, we'll end up cancelling 176 + * a dirty transaction and shutting down. 177 + * 178 + * For an exact allocation, alignment must be 1, 179 + * however we need to take cluster alignment into account when 180 + * fixing up the freelist. Use the minalignslop field to 181 + * indicate that extra blocks might be required for alignment, 182 + * but not to use them in the actual exact allocation. 183 183 */ 184 + args.alignment = 1; 185 + args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; 186 + 187 + /* Allow space for the inode btree to split. */ 184 188 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; 185 189 if ((error = xfs_alloc_vextent(&args))) 186 190 return error; ··· 215 191 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 216 192 args.alignment = args.mp->m_dalign; 217 193 isaligned = 1; 218 - } else if (xfs_sb_version_hasalign(&args.mp->m_sb) && 219 - args.mp->m_sb.sb_inoalignmt >= 220 - XFS_B_TO_FSBT(args.mp, 221 - XFS_INODE_CLUSTER_SIZE(args.mp))) 222 - args.alignment = args.mp->m_sb.sb_inoalignmt; 223 - else 224 - args.alignment = 1; 194 + } else 195 + args.alignment = xfs_ialloc_cluster_alignment(&args); 225 196 /* 226 197 * Need to figure out where to allocate the inode blocks. 227 198 * Ideally they should be spaced out through the a.g. ··· 249 230 args.agbno = be32_to_cpu(agi->agi_root); 250 231 args.fsbno = XFS_AGB_TO_FSB(args.mp, 251 232 be32_to_cpu(agi->agi_seqno), args.agbno); 252 - if (xfs_sb_version_hasalign(&args.mp->m_sb) && 253 - args.mp->m_sb.sb_inoalignmt >= 254 - XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp))) 255 - args.alignment = args.mp->m_sb.sb_inoalignmt; 256 - else 257 - args.alignment = 1; 233 + args.alignment = xfs_ialloc_cluster_alignment(&args); 258 234 if ((error = xfs_alloc_vextent(&args))) 259 235 return error; 260 236 }

+3 -46

fs/xfs/xfs_iget.c

··· 78 78 xfs_inode_t *ip; 79 79 xfs_inode_t *iq; 80 80 int error; 81 - xfs_icluster_t *icl, *new_icl = NULL; 82 81 unsigned long first_index, mask; 83 82 xfs_perag_t *pag; 84 83 xfs_agino_t agino; ··· 228 229 } 229 230 230 231 /* 231 - * This is a bit messy - we preallocate everything we _might_ 232 - * need before we pick up the ici lock. That way we don't have to 233 - * juggle locks and go all the way back to the start. 232 + * Preload the radix tree so we can insert safely under the 233 + * write spinlock. 234 234 */ 235 - new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP); 236 235 if (radix_tree_preload(GFP_KERNEL)) { 237 236 xfs_idestroy(ip); 238 237 delay(1); ··· 239 242 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 240 243 first_index = agino & mask; 241 244 write_lock(&pag->pag_ici_lock); 242 - 243 - /* 244 - * Find the cluster if it exists 245 - */ 246 - icl = NULL; 247 - if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq, 248 - first_index, 1)) { 249 - if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index) 250 - icl = iq->i_cluster; 251 - } 252 - 253 245 /* 254 246 * insert the new inode 255 247 */ ··· 253 267 } 254 268 255 269 /* 256 - * These values _must_ be set before releasing ihlock! 270 + * These values _must_ be set before releasing the radix tree lock! 257 271 */ 258 272 ip->i_udquot = ip->i_gdquot = NULL; 259 273 xfs_iflags_set(ip, XFS_INEW); 260 274 261 - ASSERT(ip->i_cluster == NULL); 262 - 263 - if (!icl) { 264 - spin_lock_init(&new_icl->icl_lock); 265 - INIT_HLIST_HEAD(&new_icl->icl_inodes); 266 - icl = new_icl; 267 - new_icl = NULL; 268 - } else { 269 - ASSERT(!hlist_empty(&icl->icl_inodes)); 270 - } 271 - spin_lock(&icl->icl_lock); 272 - hlist_add_head(&ip->i_cnode, &icl->icl_inodes); 273 - ip->i_cluster = icl; 274 - spin_unlock(&icl->icl_lock); 275 - 276 275 write_unlock(&pag->pag_ici_lock); 277 276 radix_tree_preload_end(); 278 - if (new_icl) 279 - kmem_zone_free(xfs_icluster_zone, new_icl); 280 277 281 278 /* 282 279 * Link ip to its mount and thread it on the mount's inode list. ··· 496 527 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); 497 528 write_unlock(&pag->pag_ici_lock); 498 529 xfs_put_perag(mp, pag); 499 - 500 - /* 501 - * Remove from cluster list 502 - */ 503 - mp = ip->i_mount; 504 - spin_lock(&ip->i_cluster->icl_lock); 505 - hlist_del(&ip->i_cnode); 506 - spin_unlock(&ip->i_cluster->icl_lock); 507 - 508 - /* was last inode in cluster? */ 509 - if (hlist_empty(&ip->i_cluster->icl_inodes)) 510 - kmem_zone_free(xfs_icluster_zone, ip->i_cluster); 511 530 512 531 /* 513 532 * Remove from mount's inode list.

+405 -428

fs/xfs/xfs_inode.c

··· 55 55 56 56 kmem_zone_t *xfs_ifork_zone; 57 57 kmem_zone_t *xfs_inode_zone; 58 - kmem_zone_t *xfs_icluster_zone; 59 58 60 59 /* 61 60 * Used in xfs_itruncate(). This is the maximum number of extents ··· 125 126 #endif 126 127 127 128 /* 129 + * Find the buffer associated with the given inode map 130 + * We do basic validation checks on the buffer once it has been 131 + * retrieved from disk. 132 + */ 133 + STATIC int 134 + xfs_imap_to_bp( 135 + xfs_mount_t *mp, 136 + xfs_trans_t *tp, 137 + xfs_imap_t *imap, 138 + xfs_buf_t **bpp, 139 + uint buf_flags, 140 + uint imap_flags) 141 + { 142 + int error; 143 + int i; 144 + int ni; 145 + xfs_buf_t *bp; 146 + 147 + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 148 + (int)imap->im_len, buf_flags, &bp); 149 + if (error) { 150 + if (error != EAGAIN) { 151 + cmn_err(CE_WARN, 152 + "xfs_imap_to_bp: xfs_trans_read_buf()returned " 153 + "an error %d on %s. Returning error.", 154 + error, mp->m_fsname); 155 + } else { 156 + ASSERT(buf_flags & XFS_BUF_TRYLOCK); 157 + } 158 + return error; 159 + } 160 + 161 + /* 162 + * Validate the magic number and version of every inode in the buffer 163 + * (if DEBUG kernel) or the first inode in the buffer, otherwise. 164 + */ 165 + #ifdef DEBUG 166 + ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; 167 + #else /* usual case */ 168 + ni = 1; 169 + #endif 170 + 171 + for (i = 0; i < ni; i++) { 172 + int di_ok; 173 + xfs_dinode_t *dip; 174 + 175 + dip = (xfs_dinode_t *)xfs_buf_offset(bp, 176 + (i << mp->m_sb.sb_inodelog)); 177 + di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && 178 + XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); 179 + if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 180 + XFS_ERRTAG_ITOBP_INOTOBP, 181 + XFS_RANDOM_ITOBP_INOTOBP))) { 182 + if (imap_flags & XFS_IMAP_BULKSTAT) { 183 + xfs_trans_brelse(tp, bp); 184 + return XFS_ERROR(EINVAL); 185 + } 186 + XFS_CORRUPTION_ERROR("xfs_imap_to_bp", 187 + XFS_ERRLEVEL_HIGH, mp, dip); 188 + #ifdef DEBUG 189 + cmn_err(CE_PANIC, 190 + "Device %s - bad inode magic/vsn " 191 + "daddr %lld #%d (magic=%x)", 192 + XFS_BUFTARG_NAME(mp->m_ddev_targp), 193 + (unsigned long long)imap->im_blkno, i, 194 + be16_to_cpu(dip->di_core.di_magic)); 195 + #endif 196 + xfs_trans_brelse(tp, bp); 197 + return XFS_ERROR(EFSCORRUPTED); 198 + } 199 + } 200 + 201 + xfs_inobp_check(mp, bp); 202 + 203 + /* 204 + * Mark the buffer as an inode buffer now that it looks good 205 + */ 206 + XFS_BUF_SET_VTYPE(bp, B_FS_INO); 207 + 208 + *bpp = bp; 209 + return 0; 210 + } 211 + 212 + /* 128 213 * This routine is called to map an inode number within a file 129 214 * system to the buffer containing the on-disk version of the 130 215 * inode. It returns a pointer to the buffer containing the ··· 230 147 xfs_buf_t **bpp, 231 148 int *offset) 232 149 { 233 - int di_ok; 234 150 xfs_imap_t imap; 235 151 xfs_buf_t *bp; 236 152 int error; 237 - xfs_dinode_t *dip; 238 153 239 - /* 240 - * Call the space management code to find the location of the 241 - * inode on disk. 242 - */ 243 154 imap.im_blkno = 0; 244 155 error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); 245 - if (error != 0) { 246 - cmn_err(CE_WARN, 247 - "xfs_inotobp: xfs_imap() returned an " 248 - "error %d on %s. Returning error.", error, mp->m_fsname); 156 + if (error) 249 157 return error; 250 - } 251 158 252 - /* 253 - * If the inode number maps to a block outside the bounds of the 254 - * file system then return NULL rather than calling read_buf 255 - * and panicing when we get an error from the driver. 256 - */ 257 - if ((imap.im_blkno + imap.im_len) > 258 - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 259 - cmn_err(CE_WARN, 260 - "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds " 261 - "of the file system %s. Returning EINVAL.", 262 - (unsigned long long)imap.im_blkno, 263 - imap.im_len, mp->m_fsname); 264 - return XFS_ERROR(EINVAL); 265 - } 266 - 267 - /* 268 - * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will 269 - * default to just a read_buf() call. 270 - */ 271 - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, 272 - (int)imap.im_len, XFS_BUF_LOCK, &bp); 273 - 274 - if (error) { 275 - cmn_err(CE_WARN, 276 - "xfs_inotobp: xfs_trans_read_buf() returned an " 277 - "error %d on %s. Returning error.", error, mp->m_fsname); 159 + error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); 160 + if (error) 278 161 return error; 279 - } 280 - dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0); 281 - di_ok = 282 - be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && 283 - XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); 284 - if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, 285 - XFS_RANDOM_ITOBP_INOTOBP))) { 286 - XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip); 287 - xfs_trans_brelse(tp, bp); 288 - cmn_err(CE_WARN, 289 - "xfs_inotobp: XFS_TEST_ERROR() returned an " 290 - "error on %s. Returning EFSCORRUPTED.", mp->m_fsname); 291 - return XFS_ERROR(EFSCORRUPTED); 292 - } 293 162 294 - xfs_inobp_check(mp, bp); 295 - 296 - /* 297 - * Set *dipp to point to the on-disk inode in the buffer. 298 - */ 299 163 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 300 164 *bpp = bp; 301 165 *offset = imap.im_boffset; ··· 278 248 xfs_dinode_t **dipp, 279 249 xfs_buf_t **bpp, 280 250 xfs_daddr_t bno, 281 - uint imap_flags) 251 + uint imap_flags, 252 + uint buf_flags) 282 253 { 283 254 xfs_imap_t imap; 284 255 xfs_buf_t *bp; 285 256 int error; 286 - int i; 287 - int ni; 288 257 289 258 if (ip->i_blkno == (xfs_daddr_t)0) { 290 - /* 291 - * Call the space management code to find the location of the 292 - * inode on disk. 293 - */ 294 259 imap.im_blkno = bno; 295 - if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, 296 - XFS_IMAP_LOOKUP | imap_flags))) 260 + error = xfs_imap(mp, tp, ip->i_ino, &imap, 261 + XFS_IMAP_LOOKUP | imap_flags); 262 + if (error) 297 263 return error; 298 - 299 - /* 300 - * If the inode number maps to a block outside the bounds 301 - * of the file system then return NULL rather than calling 302 - * read_buf and panicing when we get an error from the 303 - * driver. 304 - */ 305 - if ((imap.im_blkno + imap.im_len) > 306 - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 307 - #ifdef DEBUG 308 - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " 309 - "(imap.im_blkno (0x%llx) " 310 - "+ imap.im_len (0x%llx)) > " 311 - " XFS_FSB_TO_BB(mp, " 312 - "mp->m_sb.sb_dblocks) (0x%llx)", 313 - (unsigned long long) imap.im_blkno, 314 - (unsigned long long) imap.im_len, 315 - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 316 - #endif /* DEBUG */ 317 - return XFS_ERROR(EINVAL); 318 - } 319 264 320 265 /* 321 266 * Fill in the fields in the inode that will be used to ··· 310 305 } 311 306 ASSERT(bno == 0 || bno == imap.im_blkno); 312 307 313 - /* 314 - * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will 315 - * default to just a read_buf() call. 316 - */ 317 - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, 318 - (int)imap.im_len, XFS_BUF_LOCK, &bp); 319 - if (error) { 320 - #ifdef DEBUG 321 - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " 322 - "xfs_trans_read_buf() returned error %d, " 323 - "imap.im_blkno 0x%llx, imap.im_len 0x%llx", 324 - error, (unsigned long long) imap.im_blkno, 325 - (unsigned long long) imap.im_len); 326 - #endif /* DEBUG */ 308 + error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags); 309 + if (error) 327 310 return error; 311 + 312 + if (!bp) { 313 + ASSERT(buf_flags & XFS_BUF_TRYLOCK); 314 + ASSERT(tp == NULL); 315 + *bpp = NULL; 316 + return EAGAIN; 328 317 } 329 318 330 - /* 331 - * Validate the magic number and version of every inode in the buffer 332 - * (if DEBUG kernel) or the first inode in the buffer, otherwise. 333 - * No validation is done here in userspace (xfs_repair). 334 - */ 335 - #if !defined(__KERNEL__) 336 - ni = 0; 337 - #elif defined(DEBUG) 338 - ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; 339 - #else /* usual case */ 340 - ni = 1; 341 - #endif 342 - 343 - for (i = 0; i < ni; i++) { 344 - int di_ok; 345 - xfs_dinode_t *dip; 346 - 347 - dip = (xfs_dinode_t *)xfs_buf_offset(bp, 348 - (i << mp->m_sb.sb_inodelog)); 349 - di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && 350 - XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); 351 - if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 352 - XFS_ERRTAG_ITOBP_INOTOBP, 353 - XFS_RANDOM_ITOBP_INOTOBP))) { 354 - if (imap_flags & XFS_IMAP_BULKSTAT) { 355 - xfs_trans_brelse(tp, bp); 356 - return XFS_ERROR(EINVAL); 357 - } 358 - #ifdef DEBUG 359 - cmn_err(CE_ALERT, 360 - "Device %s - bad inode magic/vsn " 361 - "daddr %lld #%d (magic=%x)", 362 - XFS_BUFTARG_NAME(mp->m_ddev_targp), 363 - (unsigned long long)imap.im_blkno, i, 364 - be16_to_cpu(dip->di_core.di_magic)); 365 - #endif 366 - XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, 367 - mp, dip); 368 - xfs_trans_brelse(tp, bp); 369 - return XFS_ERROR(EFSCORRUPTED); 370 - } 371 - } 372 - 373 - xfs_inobp_check(mp, bp); 374 - 375 - /* 376 - * Mark the buffer as an inode buffer now that it looks good 377 - */ 378 - XFS_BUF_SET_VTYPE(bp, B_FS_INO); 379 - 380 - /* 381 - * Set *dipp to point to the on-disk inode in the buffer. 382 - */ 383 319 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 384 320 *bpp = bp; 385 321 return 0; ··· 824 878 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will 825 879 * know that this is a new incore inode. 826 880 */ 827 - error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags); 881 + error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); 828 882 if (error) { 829 883 kmem_zone_free(xfs_inode_zone, ip); 830 884 return error; ··· 1464 1518 } 1465 1519 1466 1520 /* 1467 - * Shrink the file to the given new_size. The new 1468 - * size must be smaller than the current size. 1469 - * This will free up the underlying blocks 1470 - * in the removed range after a call to xfs_itruncate_start() 1471 - * or xfs_atruncate_start(). 1521 + * Shrink the file to the given new_size. The new size must be smaller than 1522 + * the current size. This will free up the underlying blocks in the removed 1523 + * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). 1472 1524 * 1473 - * The transaction passed to this routine must have made 1474 - * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES. 1475 - * This routine may commit the given transaction and 1476 - * start new ones, so make sure everything involved in 1477 - * the transaction is tidy before calling here. 1478 - * Some transaction will be returned to the caller to be 1479 - * committed. The incoming transaction must already include 1480 - * the inode, and both inode locks must be held exclusively. 1481 - * The inode must also be "held" within the transaction. On 1482 - * return the inode will be "held" within the returned transaction. 1483 - * This routine does NOT require any disk space to be reserved 1484 - * for it within the transaction. 1525 + * The transaction passed to this routine must have made a permanent log 1526 + * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 1527 + * given transaction and start new ones, so make sure everything involved in 1528 + * the transaction is tidy before calling here. Some transaction will be 1529 + * returned to the caller to be committed. The incoming transaction must 1530 + * already include the inode, and both inode locks must be held exclusively. 1531 + * The inode must also be "held" within the transaction. On return the inode 1532 + * will be "held" within the returned transaction. This routine does NOT 1533 + * require any disk space to be reserved for it within the transaction. 1485 1534 * 1486 - * The fork parameter must be either xfs_attr_fork or xfs_data_fork, 1487 - * and it indicates the fork which is to be truncated. For the 1488 - * attribute fork we only support truncation to size 0. 1535 + * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it 1536 + * indicates the fork which is to be truncated. For the attribute fork we only 1537 + * support truncation to size 0. 1489 1538 * 1490 - * We use the sync parameter to indicate whether or not the first 1491 - * transaction we perform might have to be synchronous. For the attr fork, 1492 - * it needs to be so if the unlink of the inode is not yet known to be 1493 - * permanent in the log. This keeps us from freeing and reusing the 1494 - * blocks of the attribute fork before the unlink of the inode becomes 1495 - * permanent. 1539 + * We use the sync parameter to indicate whether or not the first transaction 1540 + * we perform might have to be synchronous. For the attr fork, it needs to be 1541 + * so if the unlink of the inode is not yet known to be permanent in the log. 1542 + * This keeps us from freeing and reusing the blocks of the attribute fork 1543 + * before the unlink of the inode becomes permanent. 1496 1544 * 1497 - * For the data fork, we normally have to run synchronously if we're 1498 - * being called out of the inactive path or we're being called 1499 - * out of the create path where we're truncating an existing file. 1500 - * Either way, the truncate needs to be sync so blocks don't reappear 1501 - * in the file with altered data in case of a crash. wsync filesystems 1502 - * can run the first case async because anything that shrinks the inode 1503 - * has to run sync so by the time we're called here from inactive, the 1504 - * inode size is permanently set to 0. 1545 + * For the data fork, we normally have to run synchronously if we're being 1546 + * called out of the inactive path or we're being called out of the create path 1547 + * where we're truncating an existing file. Either way, the truncate needs to 1548 + * be sync so blocks don't reappear in the file with altered data in case of a 1549 + * crash. wsync filesystems can run the first case async because anything that 1550 + * shrinks the inode has to run sync so by the time we're called here from 1551 + * inactive, the inode size is permanently set to 0. 1505 1552 * 1506 - * Calls from the truncate path always need to be sync unless we're 1507 - * in a wsync filesystem and the file has already been unlinked. 1553 + * Calls from the truncate path always need to be sync unless we're in a wsync 1554 + * filesystem and the file has already been unlinked. 1508 1555 * 1509 - * The caller is responsible for correctly setting the sync parameter. 1510 - * It gets too hard for us to guess here which path we're being called 1511 - * out of just based on inode state. 1556 + * The caller is responsible for correctly setting the sync parameter. It gets 1557 + * too hard for us to guess here which path we're being called out of just 1558 + * based on inode state. 1559 + * 1560 + * If we get an error, we must return with the inode locked and linked into the 1561 + * current transaction. This keeps things simple for the higher level code, 1562 + * because it always knows that the inode is locked and held in the transaction 1563 + * that returns to it whether errors occur or not. We don't mark the inode 1564 + * dirty on error so that transactions can be easily aborted if possible. 1512 1565 */ 1513 1566 int 1514 1567 xfs_itruncate_finish( ··· 1686 1741 */ 1687 1742 error = xfs_bmap_finish(tp, &free_list, &committed); 1688 1743 ntp = *tp; 1744 + if (committed) { 1745 + /* link the inode into the next xact in the chain */ 1746 + xfs_trans_ijoin(ntp, ip, 1747 + XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1748 + xfs_trans_ihold(ntp, ip); 1749 + } 1750 + 1689 1751 if (error) { 1690 1752 /* 1691 - * If the bmap finish call encounters an error, 1692 - * return to the caller where the transaction 1693 - * can be properly aborted. We just need to 1694 - * make sure we're not holding any resources 1695 - * that we were not when we came in. 1753 + * If the bmap finish call encounters an error, return 1754 + * to the caller where the transaction can be properly 1755 + * aborted. We just need to make sure we're not 1756 + * holding any resources that we were not when we came 1757 + * in. 1696 1758 * 1697 - * Aborting from this point might lose some 1698 - * blocks in the file system, but oh well. 1759 + * Aborting from this point might lose some blocks in 1760 + * the file system, but oh well. 1699 1761 */ 1700 1762 xfs_bmap_cancel(&free_list); 1701 - if (committed) { 1702 - /* 1703 - * If the passed in transaction committed 1704 - * in xfs_bmap_finish(), then we want to 1705 - * add the inode to this one before returning. 1706 - * This keeps things simple for the higher 1707 - * level code, because it always knows that 1708 - * the inode is locked and held in the 1709 - * transaction that returns to it whether 1710 - * errors occur or not. We don't mark the 1711 - * inode dirty so that this transaction can 1712 - * be easily aborted if possible. 1713 - */ 1714 - xfs_trans_ijoin(ntp, ip, 1715 - XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1716 - xfs_trans_ihold(ntp, ip); 1717 - } 1718 1763 return error; 1719 1764 } 1720 1765 1721 1766 if (committed) { 1722 1767 /* 1723 - * The first xact was committed, 1724 - * so add the inode to the new one. 1725 - * Mark it dirty so it will be logged 1726 - * and moved forward in the log as 1727 - * part of every commit. 1768 + * Mark the inode dirty so it will be logged and 1769 + * moved forward in the log as part of every commit. 1728 1770 */ 1729 - xfs_trans_ijoin(ntp, ip, 1730 - XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1731 - xfs_trans_ihold(ntp, ip); 1732 1771 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1733 1772 } 1773 + 1734 1774 ntp = xfs_trans_dup(ntp); 1735 - (void) xfs_trans_commit(*tp, 0); 1775 + error = xfs_trans_commit(*tp, 0); 1736 1776 *tp = ntp; 1737 - error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1738 - XFS_TRANS_PERM_LOG_RES, 1739 - XFS_ITRUNCATE_LOG_COUNT); 1740 - /* 1741 - * Add the inode being truncated to the next chained 1742 - * transaction. 1743 - */ 1777 + 1778 + /* link the inode into the next transaction in the chain */ 1744 1779 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1745 1780 xfs_trans_ihold(ntp, ip); 1781 + 1782 + if (!error) 1783 + error = xfs_trans_reserve(ntp, 0, 1784 + XFS_ITRUNCATE_LOG_RES(mp), 0, 1785 + XFS_TRANS_PERM_LOG_RES, 1786 + XFS_ITRUNCATE_LOG_COUNT); 1746 1787 if (error) 1747 - return (error); 1788 + return error; 1748 1789 } 1749 1790 /* 1750 1791 * Only update the size in the case of the data fork, but ··· 1898 1967 * Here we put the head pointer into our next pointer, 1899 1968 * and then we fall through to point the head at us. 1900 1969 */ 1901 - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 1970 + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 1902 1971 if (error) 1903 1972 return error; 1904 1973 ··· 2006 2075 * of dealing with the buffer when there is no need to 2007 2076 * change it. 2008 2077 */ 2009 - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 2078 + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 2010 2079 if (error) { 2011 2080 cmn_err(CE_WARN, 2012 2081 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", ··· 2068 2137 * Now last_ibp points to the buffer previous to us on 2069 2138 * the unlinked list. Pull us from the list. 2070 2139 */ 2071 - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 2140 + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 2072 2141 if (error) { 2073 2142 cmn_err(CE_WARN, 2074 2143 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", ··· 2101 2170 xfs_inobp_check(mp, last_ibp); 2102 2171 } 2103 2172 return 0; 2104 - } 2105 - 2106 - STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip) 2107 - { 2108 - return (((ip->i_itemp == NULL) || 2109 - !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && 2110 - (ip->i_update_core == 0)); 2111 2173 } 2112 2174 2113 2175 STATIC void ··· 2324 2400 2325 2401 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2326 2402 2327 - error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0); 2403 + error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 2328 2404 if (error) 2329 2405 return error; 2330 2406 ··· 2602 2678 fsbno = imap->im_blkno ? 2603 2679 XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; 2604 2680 error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); 2605 - if (error != 0) { 2681 + if (error) 2606 2682 return error; 2607 - } 2683 + 2608 2684 imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); 2609 2685 imap->im_len = XFS_FSB_TO_BB(mp, len); 2610 2686 imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); 2611 2687 imap->im_ioffset = (ushort)off; 2612 2688 imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); 2689 + 2690 + /* 2691 + * If the inode number maps to a block outside the bounds 2692 + * of the file system then return NULL rather than calling 2693 + * read_buf and panicing when we get an error from the 2694 + * driver. 2695 + */ 2696 + if ((imap->im_blkno + imap->im_len) > 2697 + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 2698 + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 2699 + "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " 2700 + " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", 2701 + (unsigned long long) imap->im_blkno, 2702 + (unsigned long long) imap->im_len, 2703 + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 2704 + return EINVAL; 2705 + } 2613 2706 return 0; 2614 2707 } 2615 2708 ··· 2767 2826 } 2768 2827 2769 2828 /* 2770 - * This is called to wait for the given inode to be unpinned. 2771 - * It will sleep until this happens. The caller must have the 2772 - * inode locked in at least shared mode so that the buffer cannot 2773 - * be subsequently pinned once someone is waiting for it to be 2774 - * unpinned. 2829 + * This is called to unpin an inode. It can be directed to wait or to return 2830 + * immediately without waiting for the inode to be unpinned. The caller must 2831 + * have the inode locked in at least shared mode so that the buffer cannot be 2832 + * subsequently pinned once someone is waiting for it to be unpinned. 2775 2833 */ 2776 2834 STATIC void 2835 + __xfs_iunpin_wait( 2836 + xfs_inode_t *ip, 2837 + int wait) 2838 + { 2839 + xfs_inode_log_item_t *iip = ip->i_itemp; 2840 + 2841 + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); 2842 + if (atomic_read(&ip->i_pincount) == 0) 2843 + return; 2844 + 2845 + /* Give the log a push to start the unpinning I/O */ 2846 + xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? 2847 + iip->ili_last_lsn : 0, XFS_LOG_FORCE); 2848 + if (wait) 2849 + wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); 2850 + } 2851 + 2852 + static inline void 2777 2853 xfs_iunpin_wait( 2778 2854 xfs_inode_t *ip) 2779 2855 { 2780 - xfs_inode_log_item_t *iip; 2781 - xfs_lsn_t lsn; 2856 + __xfs_iunpin_wait(ip, 1); 2857 + } 2782 2858 2783 - ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); 2784 - 2785 - if (atomic_read(&ip->i_pincount) == 0) { 2786 - return; 2787 - } 2788 - 2789 - iip = ip->i_itemp; 2790 - if (iip && iip->ili_last_lsn) { 2791 - lsn = iip->ili_last_lsn; 2792 - } else { 2793 - lsn = (xfs_lsn_t)0; 2794 - } 2795 - 2796 - /* 2797 - * Give the log a push so we don't wait here too long. 2798 - */ 2799 - xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE); 2800 - 2801 - wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); 2859 + static inline void 2860 + xfs_iunpin_nowait( 2861 + xfs_inode_t *ip) 2862 + { 2863 + __xfs_iunpin_wait(ip, 0); 2802 2864 } 2803 2865 2804 2866 ··· 2876 2932 * format indicates the current state of the fork. 2877 2933 */ 2878 2934 /*ARGSUSED*/ 2879 - STATIC int 2935 + STATIC void 2880 2936 xfs_iflush_fork( 2881 2937 xfs_inode_t *ip, 2882 2938 xfs_dinode_t *dip, ··· 2897 2953 static const short extflag[2] = 2898 2954 { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 2899 2955 2900 - if (iip == NULL) 2901 - return 0; 2956 + if (!iip) 2957 + return; 2902 2958 ifp = XFS_IFORK_PTR(ip, whichfork); 2903 2959 /* 2904 2960 * This can happen if we gave up in iformat in an error path, 2905 2961 * for the attribute fork. 2906 2962 */ 2907 - if (ifp == NULL) { 2963 + if (!ifp) { 2908 2964 ASSERT(whichfork == XFS_ATTR_FORK); 2909 - return 0; 2965 + return; 2910 2966 } 2911 2967 cp = XFS_DFORK_PTR(dip, whichfork); 2912 2968 mp = ip->i_mount; ··· 2967 3023 ASSERT(0); 2968 3024 break; 2969 3025 } 3026 + } 2970 3027 3028 + STATIC int 3029 + xfs_iflush_cluster( 3030 + xfs_inode_t *ip, 3031 + xfs_buf_t *bp) 3032 + { 3033 + xfs_mount_t *mp = ip->i_mount; 3034 + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 3035 + unsigned long first_index, mask; 3036 + int ilist_size; 3037 + xfs_inode_t **ilist; 3038 + xfs_inode_t *iq; 3039 + int nr_found; 3040 + int clcount = 0; 3041 + int bufwasdelwri; 3042 + int i; 3043 + 3044 + ASSERT(pag->pagi_inodeok); 3045 + ASSERT(pag->pag_ici_init); 3046 + 3047 + ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); 3048 + ilist = kmem_alloc(ilist_size, KM_MAYFAIL); 3049 + if (!ilist) 3050 + return 0; 3051 + 3052 + mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 3053 + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 3054 + read_lock(&pag->pag_ici_lock); 3055 + /* really need a gang lookup range call here */ 3056 + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 3057 + first_index, 3058 + XFS_INODE_CLUSTER_SIZE(mp)); 3059 + if (nr_found == 0) 3060 + goto out_free; 3061 + 3062 + for (i = 0; i < nr_found; i++) { 3063 + iq = ilist[i]; 3064 + if (iq == ip) 3065 + continue; 3066 + /* if the inode lies outside this cluster, we're done. */ 3067 + if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) 3068 + break; 3069 + /* 3070 + * Do an un-protected check to see if the inode is dirty and 3071 + * is a candidate for flushing. These checks will be repeated 3072 + * later after the appropriate locks are acquired. 3073 + */ 3074 + if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 3075 + continue; 3076 + 3077 + /* 3078 + * Try to get locks. If any are unavailable or it is pinned, 3079 + * then this inode cannot be flushed and is skipped. 3080 + */ 3081 + 3082 + if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 3083 + continue; 3084 + if (!xfs_iflock_nowait(iq)) { 3085 + xfs_iunlock(iq, XFS_ILOCK_SHARED); 3086 + continue; 3087 + } 3088 + if (xfs_ipincount(iq)) { 3089 + xfs_ifunlock(iq); 3090 + xfs_iunlock(iq, XFS_ILOCK_SHARED); 3091 + continue; 3092 + } 3093 + 3094 + /* 3095 + * arriving here means that this inode can be flushed. First 3096 + * re-check that it's dirty before flushing. 3097 + */ 3098 + if (!xfs_inode_clean(iq)) { 3099 + int error; 3100 + error = xfs_iflush_int(iq, bp); 3101 + if (error) { 3102 + xfs_iunlock(iq, XFS_ILOCK_SHARED); 3103 + goto cluster_corrupt_out; 3104 + } 3105 + clcount++; 3106 + } else { 3107 + xfs_ifunlock(iq); 3108 + } 3109 + xfs_iunlock(iq, XFS_ILOCK_SHARED); 3110 + } 3111 + 3112 + if (clcount) { 3113 + XFS_STATS_INC(xs_icluster_flushcnt); 3114 + XFS_STATS_ADD(xs_icluster_flushinode, clcount); 3115 + } 3116 + 3117 + out_free: 3118 + read_unlock(&pag->pag_ici_lock); 3119 + kmem_free(ilist, ilist_size); 2971 3120 return 0; 3121 + 3122 + 3123 + cluster_corrupt_out: 3124 + /* 3125 + * Corruption detected in the clustering loop. Invalidate the 3126 + * inode buffer and shut down the filesystem. 3127 + */ 3128 + read_unlock(&pag->pag_ici_lock); 3129 + /* 3130 + * Clean up the buffer. If it was B_DELWRI, just release it -- 3131 + * brelse can handle it with no problems. If not, shut down the 3132 + * filesystem before releasing the buffer. 3133 + */ 3134 + bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); 3135 + if (bufwasdelwri) 3136 + xfs_buf_relse(bp); 3137 + 3138 + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3139 + 3140 + if (!bufwasdelwri) { 3141 + /* 3142 + * Just like incore_relse: if we have b_iodone functions, 3143 + * mark the buffer as an error and call them. Otherwise 3144 + * mark it as stale and brelse. 3145 + */ 3146 + if (XFS_BUF_IODONE_FUNC(bp)) { 3147 + XFS_BUF_CLR_BDSTRAT_FUNC(bp); 3148 + XFS_BUF_UNDONE(bp); 3149 + XFS_BUF_STALE(bp); 3150 + XFS_BUF_SHUT(bp); 3151 + XFS_BUF_ERROR(bp,EIO); 3152 + xfs_biodone(bp); 3153 + } else { 3154 + XFS_BUF_STALE(bp); 3155 + xfs_buf_relse(bp); 3156 + } 3157 + } 3158 + 3159 + /* 3160 + * Unlocks the flush lock 3161 + */ 3162 + xfs_iflush_abort(iq); 3163 + kmem_free(ilist, ilist_size); 3164 + return XFS_ERROR(EFSCORRUPTED); 2972 3165 } 2973 3166 2974 3167 /* ··· 3127 3046 xfs_dinode_t *dip; 3128 3047 xfs_mount_t *mp; 3129 3048 int error; 3130 - /* REFERENCED */ 3131 - xfs_inode_t *iq; 3132 - int clcount; /* count of inodes clustered */ 3133 - int bufwasdelwri; 3134 - struct hlist_node *entry; 3049 + int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); 3135 3050 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; 3136 3051 3137 3052 XFS_STATS_INC(xs_iflush_count); ··· 3144 3067 * If the inode isn't dirty, then just release the inode 3145 3068 * flush lock and do nothing. 3146 3069 */ 3147 - if ((ip->i_update_core == 0) && 3148 - ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 3070 + if (xfs_inode_clean(ip)) { 3149 3071 ASSERT((iip != NULL) ? 3150 3072 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); 3151 3073 xfs_ifunlock(ip); ··· 3152 3076 } 3153 3077 3154 3078 /* 3155 - * We can't flush the inode until it is unpinned, so 3156 - * wait for it. We know noone new can pin it, because 3157 - * we are holding the inode lock shared and you need 3158 - * to hold it exclusively to pin the inode. 3079 + * We can't flush the inode until it is unpinned, so wait for it if we 3080 + * are allowed to block. We know noone new can pin it, because we are 3081 + * holding the inode lock shared and you need to hold it exclusively to 3082 + * pin the inode. 3083 + * 3084 + * If we are not allowed to block, force the log out asynchronously so 3085 + * that when we come back the inode will be unpinned. If other inodes 3086 + * in the same cluster are dirty, they will probably write the inode 3087 + * out for us if they occur after the log force completes. 3159 3088 */ 3089 + if (noblock && xfs_ipincount(ip)) { 3090 + xfs_iunpin_nowait(ip); 3091 + xfs_ifunlock(ip); 3092 + return EAGAIN; 3093 + } 3160 3094 xfs_iunpin_wait(ip); 3161 3095 3162 3096 /* ··· 3180 3094 iip->ili_format.ilf_fields = 0; 3181 3095 xfs_ifunlock(ip); 3182 3096 return XFS_ERROR(EIO); 3183 - } 3184 - 3185 - /* 3186 - * Get the buffer containing the on-disk inode. 3187 - */ 3188 - error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0); 3189 - if (error) { 3190 - xfs_ifunlock(ip); 3191 - return error; 3192 3097 } 3193 3098 3194 3099 /* ··· 3198 3121 case XFS_IFLUSH_DELWRI_ELSE_SYNC: 3199 3122 flags = 0; 3200 3123 break; 3124 + case XFS_IFLUSH_ASYNC_NOBLOCK: 3201 3125 case XFS_IFLUSH_ASYNC: 3202 3126 case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 3203 3127 flags = INT_ASYNC; ··· 3218 3140 case XFS_IFLUSH_DELWRI: 3219 3141 flags = INT_DELWRI; 3220 3142 break; 3143 + case XFS_IFLUSH_ASYNC_NOBLOCK: 3221 3144 case XFS_IFLUSH_ASYNC: 3222 3145 flags = INT_ASYNC; 3223 3146 break; ··· 3233 3154 } 3234 3155 3235 3156 /* 3157 + * Get the buffer containing the on-disk inode. 3158 + */ 3159 + error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, 3160 + noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); 3161 + if (error || !bp) { 3162 + xfs_ifunlock(ip); 3163 + return error; 3164 + } 3165 + 3166 + /* 3236 3167 * First flush out the inode that xfs_iflush was called with. 3237 3168 */ 3238 3169 error = xfs_iflush_int(ip, bp); 3239 - if (error) { 3170 + if (error) 3240 3171 goto corrupt_out; 3241 - } 3172 + 3173 + /* 3174 + * If the buffer is pinned then push on the log now so we won't 3175 + * get stuck waiting in the write for too long. 3176 + */ 3177 + if (XFS_BUF_ISPINNED(bp)) 3178 + xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 3242 3179 3243 3180 /* 3244 3181 * inode clustering: 3245 3182 * see if other inodes can be gathered into this write 3246 3183 */ 3247 - spin_lock(&ip->i_cluster->icl_lock); 3248 - ip->i_cluster->icl_buf = bp; 3249 - 3250 - clcount = 0; 3251 - hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) { 3252 - if (iq == ip) 3253 - continue; 3254 - 3255 - /* 3256 - * Do an un-protected check to see if the inode is dirty and 3257 - * is a candidate for flushing. These checks will be repeated 3258 - * later after the appropriate locks are acquired. 3259 - */ 3260 - iip = iq->i_itemp; 3261 - if ((iq->i_update_core == 0) && 3262 - ((iip == NULL) || 3263 - !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && 3264 - xfs_ipincount(iq) == 0) { 3265 - continue; 3266 - } 3267 - 3268 - /* 3269 - * Try to get locks. If any are unavailable, 3270 - * then this inode cannot be flushed and is skipped. 3271 - */ 3272 - 3273 - /* get inode locks (just i_lock) */ 3274 - if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) { 3275 - /* get inode flush lock */ 3276 - if (xfs_iflock_nowait(iq)) { 3277 - /* check if pinned */ 3278 - if (xfs_ipincount(iq) == 0) { 3279 - /* arriving here means that 3280 - * this inode can be flushed. 3281 - * first re-check that it's 3282 - * dirty 3283 - */ 3284 - iip = iq->i_itemp; 3285 - if ((iq->i_update_core != 0)|| 3286 - ((iip != NULL) && 3287 - (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 3288 - clcount++; 3289 - error = xfs_iflush_int(iq, bp); 3290 - if (error) { 3291 - xfs_iunlock(iq, 3292 - XFS_ILOCK_SHARED); 3293 - goto cluster_corrupt_out; 3294 - } 3295 - } else { 3296 - xfs_ifunlock(iq); 3297 - } 3298 - } else { 3299 - xfs_ifunlock(iq); 3300 - } 3301 - } 3302 - xfs_iunlock(iq, XFS_ILOCK_SHARED); 3303 - } 3304 - } 3305 - spin_unlock(&ip->i_cluster->icl_lock); 3306 - 3307 - if (clcount) { 3308 - XFS_STATS_INC(xs_icluster_flushcnt); 3309 - XFS_STATS_ADD(xs_icluster_flushinode, clcount); 3310 - } 3311 - 3312 - /* 3313 - * If the buffer is pinned then push on the log so we won't 3314 - * get stuck waiting in the write for too long. 3315 - */ 3316 - if (XFS_BUF_ISPINNED(bp)){ 3317 - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 3318 - } 3184 + error = xfs_iflush_cluster(ip, bp); 3185 + if (error) 3186 + goto cluster_corrupt_out; 3319 3187 3320 3188 if (flags & INT_DELWRI) { 3321 3189 xfs_bdwrite(mp, bp); 3322 3190 } else if (flags & INT_ASYNC) { 3323 - xfs_bawrite(mp, bp); 3191 + error = xfs_bawrite(mp, bp); 3324 3192 } else { 3325 3193 error = xfs_bwrite(mp, bp); 3326 3194 } ··· 3276 3250 corrupt_out: 3277 3251 xfs_buf_relse(bp); 3278 3252 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3279 - xfs_iflush_abort(ip); 3280 - /* 3281 - * Unlocks the flush lock 3282 - */ 3283 - return XFS_ERROR(EFSCORRUPTED); 3284 - 3285 3253 cluster_corrupt_out: 3286 - /* Corruption detected in the clustering loop. Invalidate the 3287 - * inode buffer and shut down the filesystem. 3288 - */ 3289 - spin_unlock(&ip->i_cluster->icl_lock); 3290 - 3291 - /* 3292 - * Clean up the buffer. If it was B_DELWRI, just release it -- 3293 - * brelse can handle it with no problems. If not, shut down the 3294 - * filesystem before releasing the buffer. 3295 - */ 3296 - if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) { 3297 - xfs_buf_relse(bp); 3298 - } 3299 - 3300 - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3301 - 3302 - if(!bufwasdelwri) { 3303 - /* 3304 - * Just like incore_relse: if we have b_iodone functions, 3305 - * mark the buffer as an error and call them. Otherwise 3306 - * mark it as stale and brelse. 3307 - */ 3308 - if (XFS_BUF_IODONE_FUNC(bp)) { 3309 - XFS_BUF_CLR_BDSTRAT_FUNC(bp); 3310 - XFS_BUF_UNDONE(bp); 3311 - XFS_BUF_STALE(bp); 3312 - XFS_BUF_SHUT(bp); 3313 - XFS_BUF_ERROR(bp,EIO); 3314 - xfs_biodone(bp); 3315 - } else { 3316 - XFS_BUF_STALE(bp); 3317 - xfs_buf_relse(bp); 3318 - } 3319 - } 3320 - 3321 - xfs_iflush_abort(iq); 3322 3254 /* 3323 3255 * Unlocks the flush lock 3324 3256 */ 3257 + xfs_iflush_abort(ip); 3325 3258 return XFS_ERROR(EFSCORRUPTED); 3326 3259 } 3327 3260 ··· 3310 3325 * If the inode isn't dirty, then just release the inode 3311 3326 * flush lock and do nothing. 3312 3327 */ 3313 - if ((ip->i_update_core == 0) && 3314 - ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 3328 + if (xfs_inode_clean(ip)) { 3315 3329 xfs_ifunlock(ip); 3316 3330 return 0; 3317 3331 } ··· 3443 3459 } 3444 3460 } 3445 3461 3446 - if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) { 3447 - goto corrupt_out; 3448 - } 3449 - 3450 - if (XFS_IFORK_Q(ip)) { 3451 - /* 3452 - * The only error from xfs_iflush_fork is on the data fork. 3453 - */ 3454 - (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 3455 - } 3462 + xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); 3463 + if (XFS_IFORK_Q(ip)) 3464 + xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 3456 3465 xfs_inobp_check(mp, bp); 3457 3466 3458 3467 /*

+2 -21

fs/xfs/xfs_inode.h

··· 133 133 } dm_attrs_t; 134 134 135 135 /* 136 - * This is the xfs inode cluster structure. This structure is used by 137 - * xfs_iflush to find inodes that share a cluster and can be flushed to disk at 138 - * the same time. 139 - */ 140 - typedef struct xfs_icluster { 141 - struct hlist_head icl_inodes; /* list of inodes on cluster */ 142 - xfs_daddr_t icl_blkno; /* starting block number of 143 - * the cluster */ 144 - struct xfs_buf *icl_buf; /* the inode buffer */ 145 - spinlock_t icl_lock; /* inode list lock */ 146 - } xfs_icluster_t; 147 - 148 - /* 149 136 * This is the xfs in-core inode structure. 150 137 * Most of the on-disk inode is embedded in the i_d field. 151 138 * ··· 227 240 atomic_t i_pincount; /* inode pin count */ 228 241 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ 229 242 spinlock_t i_flags_lock; /* inode i_flags lock */ 230 - #ifdef HAVE_REFCACHE 231 - struct xfs_inode **i_refcache; /* ptr to entry in ref cache */ 232 - struct xfs_inode *i_release; /* inode to unref */ 233 - #endif 234 243 /* Miscellaneous state. */ 235 244 unsigned short i_flags; /* see defined flags below */ 236 245 unsigned char i_update_core; /* timestamps/size is dirty */ ··· 235 252 unsigned int i_delayed_blks; /* count of delay alloc blks */ 236 253 237 254 xfs_icdinode_t i_d; /* most of ondisk inode */ 238 - xfs_icluster_t *i_cluster; /* cluster list header */ 239 - struct hlist_node i_cnode; /* cluster link node */ 240 255 241 256 xfs_fsize_t i_size; /* in-memory size */ 242 257 xfs_fsize_t i_new_size; /* size when write completes */ ··· 442 461 #define XFS_IFLUSH_SYNC 3 443 462 #define XFS_IFLUSH_ASYNC 4 444 463 #define XFS_IFLUSH_DELWRI 5 464 + #define XFS_IFLUSH_ASYNC_NOBLOCK 6 445 465 446 466 /* 447 467 * Flags for xfs_itruncate_start(). ··· 497 515 */ 498 516 int xfs_itobp(struct xfs_mount *, struct xfs_trans *, 499 517 xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **, 500 - xfs_daddr_t, uint); 518 + xfs_daddr_t, uint, uint); 501 519 int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 502 520 xfs_inode_t **, xfs_daddr_t, uint); 503 521 int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); ··· 579 597 #define xfs_inobp_check(mp, bp) 580 598 #endif /* DEBUG */ 581 599 582 - extern struct kmem_zone *xfs_icluster_zone; 583 600 extern struct kmem_zone *xfs_ifork_zone; 584 601 extern struct kmem_zone *xfs_inode_zone; 585 602 extern struct kmem_zone *xfs_ili_zone;

+7 -1

fs/xfs/xfs_inode_item.c

··· 40 40 #include "xfs_btree.h" 41 41 #include "xfs_ialloc.h" 42 42 #include "xfs_rw.h" 43 + #include "xfs_error.h" 43 44 44 45 45 46 kmem_zone_t *xfs_ili_zone; /* inode log item zone */ ··· 814 813 XFS_LOG_FORCE); 815 814 } 816 815 if (dopush) { 817 - xfs_bawrite(mp, bp); 816 + int error; 817 + error = xfs_bawrite(mp, bp); 818 + if (error) 819 + xfs_fs_cmn_err(CE_WARN, mp, 820 + "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", 821 + error, iip, bp); 818 822 } else { 819 823 xfs_buf_relse(bp); 820 824 }

+8

fs/xfs/xfs_inode_item.h

··· 168 168 return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); 169 169 } 170 170 171 + static inline int xfs_inode_clean(xfs_inode_t *ip) 172 + { 173 + return (!ip->i_itemp || 174 + !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && 175 + !ip->i_update_core; 176 + } 177 + 178 + 171 179 #ifdef __KERNEL__ 172 180 173 181 extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);

+5 -2

fs/xfs/xfs_iomap.c

··· 802 802 */ 803 803 nimaps = 1; 804 804 end_fsb = XFS_B_TO_FSB(mp, ip->i_size); 805 - xfs_bmap_last_offset(NULL, ip, &last_block, 806 - XFS_DATA_FORK); 805 + error = xfs_bmap_last_offset(NULL, ip, &last_block, 806 + XFS_DATA_FORK); 807 + if (error) 808 + goto trans_cancel; 809 + 807 810 last_block = XFS_FILEOFF_MAX(last_block, end_fsb); 808 811 if ((map_start_fsb + count_fsb) > last_block) { 809 812 count_fsb = last_block - map_start_fsb;

+3 -4

fs/xfs/xfs_itable.c

··· 129 129 return error; 130 130 } 131 131 132 - STATIC int 132 + STATIC void 133 133 xfs_bulkstat_one_dinode( 134 134 xfs_mount_t *mp, /* mount point for filesystem */ 135 135 xfs_ino_t ino, /* inode number to get data for */ ··· 198 198 buf->bs_blocks = be64_to_cpu(dic->di_nblocks); 199 199 break; 200 200 } 201 - 202 - return 0; 203 201 } 204 202 205 203 STATIC int ··· 612 614 xfs_buf_relse(bp); 613 615 error = xfs_itobp(mp, NULL, ip, 614 616 &dip, &bp, bno, 615 - XFS_IMAP_BULKSTAT); 617 + XFS_IMAP_BULKSTAT, 618 + XFS_BUF_LOCK); 616 619 if (!error) 617 620 clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; 618 621 kmem_zone_free(xfs_inode_zone, ip);

+84 -175

fs/xfs/xfs_log.c

··· 41 41 #include "xfs_inode.h" 42 42 #include "xfs_rw.h" 43 43 44 + kmem_zone_t *xfs_log_ticket_zone; 44 45 45 46 #define xlog_write_adv_cnt(ptr, len, off, bytes) \ 46 47 { (ptr) += (bytes); \ ··· 74 73 xlog_ticket_t *ticket, 75 74 int *continued_write, 76 75 int *logoffsetp); 77 - STATIC void xlog_state_put_ticket(xlog_t *log, 78 - xlog_ticket_t *tic); 79 76 STATIC int xlog_state_release_iclog(xlog_t *log, 80 77 xlog_in_core_t *iclog); 81 78 STATIC void xlog_state_switch_iclogs(xlog_t *log, ··· 100 101 101 102 102 103 /* local ticket functions */ 103 - STATIC void xlog_state_ticket_alloc(xlog_t *log); 104 104 STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, 105 105 int unit_bytes, 106 106 int count, ··· 328 330 */ 329 331 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); 330 332 xlog_ungrant_log_space(log, ticket); 331 - xlog_state_put_ticket(log, ticket); 333 + xlog_ticket_put(log, ticket); 332 334 } else { 333 335 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 334 336 xlog_regrant_reserve_log_space(log, ticket); ··· 382 384 return xlog_state_sync_all(log, flags, log_flushed); 383 385 else 384 386 return xlog_state_sync(log, lsn, flags, log_flushed); 385 - } /* xfs_log_force */ 387 + } /* _xfs_log_force */ 388 + 389 + /* 390 + * Wrapper for _xfs_log_force(), to be used when caller doesn't care 391 + * about errors or whether the log was flushed or not. This is the normal 392 + * interface to use when trying to unpin items or move the log forward. 393 + */ 394 + void 395 + xfs_log_force( 396 + xfs_mount_t *mp, 397 + xfs_lsn_t lsn, 398 + uint flags) 399 + { 400 + int error; 401 + error = _xfs_log_force(mp, lsn, flags, NULL); 402 + if (error) { 403 + xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " 404 + "error %d returned.", error); 405 + } 406 + } 407 + 386 408 387 409 /* 388 410 * Attaches a new iclog I/O completion callback routine during ··· 415 397 void *iclog_hndl, /* iclog to hang callback off */ 416 398 xfs_log_callback_t *cb) 417 399 { 418 - xlog_t *log = mp->m_log; 419 400 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; 420 401 int abortflg; 421 402 422 - cb->cb_next = NULL; 423 - spin_lock(&log->l_icloglock); 403 + spin_lock(&iclog->ic_callback_lock); 424 404 abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); 425 405 if (!abortflg) { 426 406 ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || ··· 427 411 *(iclog->ic_callback_tail) = cb; 428 412 iclog->ic_callback_tail = &(cb->cb_next); 429 413 } 430 - spin_unlock(&log->l_icloglock); 414 + spin_unlock(&iclog->ic_callback_lock); 431 415 return abortflg; 432 416 } /* xfs_log_notify */ 433 417 ··· 487 471 /* may sleep if need to allocate more tickets */ 488 472 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, 489 473 client, flags); 474 + if (!internal_ticket) 475 + return XFS_ERROR(ENOMEM); 490 476 internal_ticket->t_trans_type = t_type; 491 477 *ticket = internal_ticket; 492 478 xlog_trace_loggrant(log, internal_ticket, ··· 654 636 if (mp->m_flags & XFS_MOUNT_RDONLY) 655 637 return 0; 656 638 657 - xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); 639 + error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); 640 + ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); 658 641 659 642 #ifdef DEBUG 660 643 first_iclog = iclog = log->l_iclog; ··· 694 675 695 676 spin_lock(&log->l_icloglock); 696 677 iclog = log->l_iclog; 697 - iclog->ic_refcnt++; 678 + atomic_inc(&iclog->ic_refcnt); 698 679 spin_unlock(&log->l_icloglock); 699 680 xlog_state_want_sync(log, iclog); 700 - (void) xlog_state_release_iclog(log, iclog); 681 + error = xlog_state_release_iclog(log, iclog); 701 682 702 683 spin_lock(&log->l_icloglock); 703 684 if (!(iclog->ic_state == XLOG_STATE_ACTIVE || ··· 714 695 if (tic) { 715 696 xlog_trace_loggrant(log, tic, "unmount rec"); 716 697 xlog_ungrant_log_space(log, tic); 717 - xlog_state_put_ticket(log, tic); 698 + xlog_ticket_put(log, tic); 718 699 } 719 700 } else { 720 701 /* ··· 732 713 */ 733 714 spin_lock(&log->l_icloglock); 734 715 iclog = log->l_iclog; 735 - iclog->ic_refcnt++; 716 + atomic_inc(&iclog->ic_refcnt); 736 717 spin_unlock(&log->l_icloglock); 737 718 738 719 xlog_state_want_sync(log, iclog); 739 - (void) xlog_state_release_iclog(log, iclog); 720 + error = xlog_state_release_iclog(log, iclog); 740 721 741 722 spin_lock(&log->l_icloglock); 742 723 ··· 751 732 } 752 733 } 753 734 754 - return 0; 735 + return error; 755 736 } /* xfs_log_unmount_write */ 756 737 757 738 /* ··· 1229 1210 spin_lock_init(&log->l_icloglock); 1230 1211 spin_lock_init(&log->l_grant_lock); 1231 1212 initnsema(&log->l_flushsema, 0, "ic-flush"); 1232 - xlog_state_ticket_alloc(log); /* wait until after icloglock inited */ 1233 1213 1234 1214 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1235 1215 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); ··· 1258 1240 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1259 1241 iclog->ic_bp = bp; 1260 1242 iclog->hic_data = bp->b_addr; 1261 - 1243 + #ifdef DEBUG 1262 1244 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); 1263 - 1245 + #endif 1264 1246 head = &iclog->ic_header; 1265 1247 memset(head, 0, sizeof(xlog_rec_header_t)); 1266 1248 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); ··· 1271 1253 head->h_fmt = cpu_to_be32(XLOG_FMT); 1272 1254 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 1273 1255 1274 - 1275 1256 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; 1276 1257 iclog->ic_state = XLOG_STATE_ACTIVE; 1277 1258 iclog->ic_log = log; 1259 + atomic_set(&iclog->ic_refcnt, 0); 1260 + spin_lock_init(&iclog->ic_callback_lock); 1278 1261 iclog->ic_callback_tail = &(iclog->ic_callback); 1279 1262 iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; 1280 1263 ··· 1424 1405 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); 1425 1406 1426 1407 XFS_STATS_INC(xs_log_writes); 1427 - ASSERT(iclog->ic_refcnt == 0); 1408 + ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 1428 1409 1429 1410 /* Add for LR header */ 1430 1411 count_init = log->l_iclog_hsize + iclog->ic_offset; ··· 1557 1538 xlog_dealloc_log(xlog_t *log) 1558 1539 { 1559 1540 xlog_in_core_t *iclog, *next_iclog; 1560 - xlog_ticket_t *tic, *next_tic; 1561 1541 int i; 1562 1542 1563 1543 iclog = log->l_iclog; ··· 1577 1559 spinlock_destroy(&log->l_icloglock); 1578 1560 spinlock_destroy(&log->l_grant_lock); 1579 1561 1580 - /* XXXsup take a look at this again. */ 1581 - if ((log->l_ticket_cnt != log->l_ticket_tcnt) && 1582 - !XLOG_FORCED_SHUTDOWN(log)) { 1583 - xfs_fs_cmn_err(CE_WARN, log->l_mp, 1584 - "xlog_dealloc_log: (cnt: %d, total: %d)", 1585 - log->l_ticket_cnt, log->l_ticket_tcnt); 1586 - /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */ 1587 - 1588 - } else { 1589 - tic = log->l_unmount_free; 1590 - while (tic) { 1591 - next_tic = tic->t_next; 1592 - kmem_free(tic, PAGE_SIZE); 1593 - tic = next_tic; 1594 - } 1595 - } 1596 1562 xfs_buf_free(log->l_xbuf); 1597 1563 #ifdef XFS_LOG_TRACE 1598 1564 if (log->l_trace != NULL) { ··· 1989 1987 if (iclog->ic_state == XLOG_STATE_DIRTY) { 1990 1988 iclog->ic_state = XLOG_STATE_ACTIVE; 1991 1989 iclog->ic_offset = 0; 1992 - iclog->ic_callback = NULL; /* don't need to free */ 1990 + ASSERT(iclog->ic_callback == NULL); 1993 1991 /* 1994 1992 * If the number of ops in this iclog indicate it just 1995 1993 * contains the dummy transaction, we can ··· 2192 2190 be64_to_cpu(iclog->ic_header.h_lsn); 2193 2191 spin_unlock(&log->l_grant_lock); 2194 2192 2195 - /* 2196 - * Keep processing entries in the callback list 2197 - * until we come around and it is empty. We 2198 - * need to atomically see that the list is 2199 - * empty and change the state to DIRTY so that 2200 - * we don't miss any more callbacks being added. 2201 - */ 2202 - spin_lock(&log->l_icloglock); 2203 2193 } else { 2194 + spin_unlock(&log->l_icloglock); 2204 2195 ioerrors++; 2205 2196 } 2206 - cb = iclog->ic_callback; 2207 2197 2198 + /* 2199 + * Keep processing entries in the callback list until 2200 + * we come around and it is empty. We need to 2201 + * atomically see that the list is empty and change the 2202 + * state to DIRTY so that we don't miss any more 2203 + * callbacks being added. 2204 + */ 2205 + spin_lock(&iclog->ic_callback_lock); 2206 + cb = iclog->ic_callback; 2208 2207 while (cb) { 2209 2208 iclog->ic_callback_tail = &(iclog->ic_callback); 2210 2209 iclog->ic_callback = NULL; 2211 - spin_unlock(&log->l_icloglock); 2210 + spin_unlock(&iclog->ic_callback_lock); 2212 2211 2213 2212 /* perform callbacks in the order given */ 2214 2213 for (; cb; cb = cb_next) { 2215 2214 cb_next = cb->cb_next; 2216 2215 cb->cb_func(cb->cb_arg, aborted); 2217 2216 } 2218 - spin_lock(&log->l_icloglock); 2217 + spin_lock(&iclog->ic_callback_lock); 2219 2218 cb = iclog->ic_callback; 2220 2219 } 2221 2220 2222 2221 loopdidcallbacks++; 2223 2222 funcdidcallbacks++; 2224 2223 2224 + spin_lock(&log->l_icloglock); 2225 2225 ASSERT(iclog->ic_callback == NULL); 2226 + spin_unlock(&iclog->ic_callback_lock); 2226 2227 if (!(iclog->ic_state & XLOG_STATE_IOERROR)) 2227 2228 iclog->ic_state = XLOG_STATE_DIRTY; 2228 2229 ··· 2246 2241 repeats = 0; 2247 2242 xfs_fs_cmn_err(CE_WARN, log->l_mp, 2248 2243 "%s: possible infinite loop (%d iterations)", 2249 - __FUNCTION__, flushcnt); 2244 + __func__, flushcnt); 2250 2245 } 2251 2246 } while (!ioerrors && loopdidcallbacks); 2252 2247 ··· 2314 2309 2315 2310 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || 2316 2311 iclog->ic_state == XLOG_STATE_IOERROR); 2317 - ASSERT(iclog->ic_refcnt == 0); 2312 + ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 2318 2313 ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); 2319 2314 2320 2315 ··· 2396 2391 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); 2397 2392 head = &iclog->ic_header; 2398 2393 2399 - iclog->ic_refcnt++; /* prevents sync */ 2394 + atomic_inc(&iclog->ic_refcnt); /* prevents sync */ 2400 2395 log_offset = iclog->ic_offset; 2401 2396 2402 2397 /* On the 1st write to an iclog, figure out lsn. This works ··· 2428 2423 xlog_state_switch_iclogs(log, iclog, iclog->ic_size); 2429 2424 2430 2425 /* If I'm the only one writing to this iclog, sync it to disk */ 2431 - if (iclog->ic_refcnt == 1) { 2426 + if (atomic_read(&iclog->ic_refcnt) == 1) { 2432 2427 spin_unlock(&log->l_icloglock); 2433 2428 if ((error = xlog_state_release_iclog(log, iclog))) 2434 2429 return error; 2435 2430 } else { 2436 - iclog->ic_refcnt--; 2431 + atomic_dec(&iclog->ic_refcnt); 2437 2432 spin_unlock(&log->l_icloglock); 2438 2433 } 2439 2434 goto restart; ··· 2797 2792 2798 2793 2799 2794 /* 2800 - * Atomically put back used ticket. 2801 - */ 2802 - STATIC void 2803 - xlog_state_put_ticket(xlog_t *log, 2804 - xlog_ticket_t *tic) 2805 - { 2806 - spin_lock(&log->l_icloglock); 2807 - xlog_ticket_put(log, tic); 2808 - spin_unlock(&log->l_icloglock); 2809 - } /* xlog_state_put_ticket */ 2810 - 2811 - /* 2812 2795 * Flush iclog to disk if this is the last reference to the given iclog and 2813 2796 * the WANT_SYNC bit is set. 2814 2797 * ··· 2806 2813 * 2807 2814 */ 2808 2815 STATIC int 2809 - xlog_state_release_iclog(xlog_t *log, 2810 - xlog_in_core_t *iclog) 2816 + xlog_state_release_iclog( 2817 + xlog_t *log, 2818 + xlog_in_core_t *iclog) 2811 2819 { 2812 2820 int sync = 0; /* do we sync? */ 2813 2821 2814 - xlog_assign_tail_lsn(log->l_mp); 2822 + if (iclog->ic_state & XLOG_STATE_IOERROR) 2823 + return XFS_ERROR(EIO); 2815 2824 2816 - spin_lock(&log->l_icloglock); 2825 + ASSERT(atomic_read(&iclog->ic_refcnt) > 0); 2826 + if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) 2827 + return 0; 2817 2828 2818 2829 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2819 2830 spin_unlock(&log->l_icloglock); 2820 2831 return XFS_ERROR(EIO); 2821 2832 } 2822 - 2823 - ASSERT(iclog->ic_refcnt > 0); 2824 2833 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || 2825 2834 iclog->ic_state == XLOG_STATE_WANT_SYNC); 2826 2835 2827 - if (--iclog->ic_refcnt == 0 && 2828 - iclog->ic_state == XLOG_STATE_WANT_SYNC) { 2836 + if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { 2837 + /* update tail before writing to iclog */ 2838 + xlog_assign_tail_lsn(log->l_mp); 2829 2839 sync++; 2830 2840 iclog->ic_state = XLOG_STATE_SYNCING; 2831 2841 iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); 2832 2842 xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); 2833 2843 /* cycle incremented when incrementing curr_block */ 2834 2844 } 2835 - 2836 2845 spin_unlock(&log->l_icloglock); 2837 2846 2838 2847 /* ··· 2844 2849 * this iclog has consistent data, so we ignore IOERROR 2845 2850 * flags after this point. 2846 2851 */ 2847 - if (sync) { 2852 + if (sync) 2848 2853 return xlog_sync(log, iclog); 2849 - } 2850 2854 return 0; 2851 - 2852 2855 } /* xlog_state_release_iclog */ 2853 2856 2854 2857 ··· 2946 2953 * previous iclog and go to sleep. 2947 2954 */ 2948 2955 if (iclog->ic_state == XLOG_STATE_DIRTY || 2949 - (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) { 2956 + (atomic_read(&iclog->ic_refcnt) == 0 2957 + && iclog->ic_offset == 0)) { 2950 2958 iclog = iclog->ic_prev; 2951 2959 if (iclog->ic_state == XLOG_STATE_ACTIVE || 2952 2960 iclog->ic_state == XLOG_STATE_DIRTY) ··· 2955 2961 else 2956 2962 goto maybe_sleep; 2957 2963 } else { 2958 - if (iclog->ic_refcnt == 0) { 2964 + if (atomic_read(&iclog->ic_refcnt) == 0) { 2959 2965 /* We are the only one with access to this 2960 2966 * iclog. Flush it out now. There should 2961 2967 * be a roundoff of zero to show that someone 2962 2968 * has already taken care of the roundoff from 2963 2969 * the previous sync. 2964 2970 */ 2965 - iclog->ic_refcnt++; 2971 + atomic_inc(&iclog->ic_refcnt); 2966 2972 lsn = be64_to_cpu(iclog->ic_header.h_lsn); 2967 2973 xlog_state_switch_iclogs(log, iclog, 0); 2968 2974 spin_unlock(&log->l_icloglock); ··· 3094 3100 already_slept = 1; 3095 3101 goto try_again; 3096 3102 } else { 3097 - iclog->ic_refcnt++; 3103 + atomic_inc(&iclog->ic_refcnt); 3098 3104 xlog_state_switch_iclogs(log, iclog, 0); 3099 3105 spin_unlock(&log->l_icloglock); 3100 3106 if (xlog_state_release_iclog(log, iclog)) ··· 3166 3172 */ 3167 3173 3168 3174 /* 3169 - * Algorithm doesn't take into account page size. ;-( 3170 - */ 3171 - STATIC void 3172 - xlog_state_ticket_alloc(xlog_t *log) 3173 - { 3174 - xlog_ticket_t *t_list; 3175 - xlog_ticket_t *next; 3176 - xfs_caddr_t buf; 3177 - uint i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2; 3178 - 3179 - /* 3180 - * The kmem_zalloc may sleep, so we shouldn't be holding the 3181 - * global lock. XXXmiken: may want to use zone allocator. 3182 - */ 3183 - buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP); 3184 - 3185 - spin_lock(&log->l_icloglock); 3186 - 3187 - /* Attach 1st ticket to Q, so we can keep track of allocated memory */ 3188 - t_list = (xlog_ticket_t *)buf; 3189 - t_list->t_next = log->l_unmount_free; 3190 - log->l_unmount_free = t_list++; 3191 - log->l_ticket_cnt++; 3192 - log->l_ticket_tcnt++; 3193 - 3194 - /* Next ticket becomes first ticket attached to ticket free list */ 3195 - if (log->l_freelist != NULL) { 3196 - ASSERT(log->l_tail != NULL); 3197 - log->l_tail->t_next = t_list; 3198 - } else { 3199 - log->l_freelist = t_list; 3200 - } 3201 - log->l_ticket_cnt++; 3202 - log->l_ticket_tcnt++; 3203 - 3204 - /* Cycle through rest of alloc'ed memory, building up free Q */ 3205 - for ( ; i > 0; i--) { 3206 - next = t_list + 1; 3207 - t_list->t_next = next; 3208 - t_list = next; 3209 - log->l_ticket_cnt++; 3210 - log->l_ticket_tcnt++; 3211 - } 3212 - t_list->t_next = NULL; 3213 - log->l_tail = t_list; 3214 - spin_unlock(&log->l_icloglock); 3215 - } /* xlog_state_ticket_alloc */ 3216 - 3217 - 3218 - /* 3219 - * Put ticket into free list 3220 - * 3221 - * Assumption: log lock is held around this call. 3175 + * Free a used ticket. 3222 3176 */ 3223 3177 STATIC void 3224 3178 xlog_ticket_put(xlog_t *log, 3225 3179 xlog_ticket_t *ticket) 3226 3180 { 3227 3181 sv_destroy(&ticket->t_sema); 3228 - 3229 - /* 3230 - * Don't think caching will make that much difference. It's 3231 - * more important to make debug easier. 3232 - */ 3233 - #if 0 3234 - /* real code will want to use LIFO for caching */ 3235 - ticket->t_next = log->l_freelist; 3236 - log->l_freelist = ticket; 3237 - /* no need to clear fields */ 3238 - #else 3239 - /* When we debug, it is easier if tickets are cycled */ 3240 - ticket->t_next = NULL; 3241 - if (log->l_tail) { 3242 - log->l_tail->t_next = ticket; 3243 - } else { 3244 - ASSERT(log->l_freelist == NULL); 3245 - log->l_freelist = ticket; 3246 - } 3247 - log->l_tail = ticket; 3248 - #endif /* DEBUG */ 3249 - log->l_ticket_cnt++; 3182 + kmem_zone_free(xfs_log_ticket_zone, ticket); 3250 3183 } /* xlog_ticket_put */ 3251 3184 3252 3185 3253 3186 /* 3254 - * Grab ticket off freelist or allocation some more 3187 + * Allocate and initialise a new log ticket. 3255 3188 */ 3256 3189 STATIC xlog_ticket_t * 3257 3190 xlog_ticket_get(xlog_t *log, ··· 3190 3269 xlog_ticket_t *tic; 3191 3270 uint num_headers; 3192 3271 3193 - alloc: 3194 - if (log->l_freelist == NULL) 3195 - xlog_state_ticket_alloc(log); /* potentially sleep */ 3196 - 3197 - spin_lock(&log->l_icloglock); 3198 - if (log->l_freelist == NULL) { 3199 - spin_unlock(&log->l_icloglock); 3200 - goto alloc; 3201 - } 3202 - tic = log->l_freelist; 3203 - log->l_freelist = tic->t_next; 3204 - if (log->l_freelist == NULL) 3205 - log->l_tail = NULL; 3206 - log->l_ticket_cnt--; 3207 - spin_unlock(&log->l_icloglock); 3272 + tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); 3273 + if (!tic) 3274 + return NULL; 3208 3275 3209 3276 /* 3210 3277 * Permanent reservations have up to 'cnt'-1 active log operations ··· 3520 3611 * before we mark the filesystem SHUTDOWN and wake 3521 3612 * everybody up to tell the bad news. 3522 3613 */ 3523 - spin_lock(&log->l_grant_lock); 3524 3614 spin_lock(&log->l_icloglock); 3615 + spin_lock(&log->l_grant_lock); 3525 3616 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3526 3617 XFS_BUF_DONE(mp->m_sb_bp); 3527 3618 /*

+3 -2

fs/xfs/xfs_log.h

··· 142 142 xfs_lsn_t lsn, 143 143 uint flags, 144 144 int *log_forced); 145 - #define xfs_log_force(mp, lsn, flags) \ 146 - _xfs_log_force(mp, lsn, flags, NULL); 145 + void xfs_log_force(struct xfs_mount *mp, 146 + xfs_lsn_t lsn, 147 + uint flags); 147 148 int xfs_log_mount(struct xfs_mount *mp, 148 149 struct xfs_buftarg *log_target, 149 150 xfs_daddr_t start_block,

+73 -52

fs/xfs/xfs_log_priv.h

··· 242 242 243 243 typedef struct xlog_ticket { 244 244 sv_t t_sema; /* sleep on this semaphore : 20 */ 245 - struct xlog_ticket *t_next; /* :4|8 */ 245 + struct xlog_ticket *t_next; /* :4|8 */ 246 246 struct xlog_ticket *t_prev; /* :4|8 */ 247 247 xlog_tid_t t_tid; /* transaction identifier : 4 */ 248 248 int t_curr_res; /* current reservation in bytes : 4 */ ··· 324 324 * - ic_offset is the current number of bytes written to in this iclog. 325 325 * - ic_refcnt is bumped when someone is writing to the log. 326 326 * - ic_state is the state of the iclog. 327 + * 328 + * Because of cacheline contention on large machines, we need to separate 329 + * various resources onto different cachelines. To start with, make the 330 + * structure cacheline aligned. The following fields can be contended on 331 + * by independent processes: 332 + * 333 + * - ic_callback_* 334 + * - ic_refcnt 335 + * - fields protected by the global l_icloglock 336 + * 337 + * so we need to ensure that these fields are located in separate cachelines. 338 + * We'll put all the read-only and l_icloglock fields in the first cacheline, 339 + * and move everything else out to subsequent cachelines. 327 340 */ 328 341 typedef struct xlog_iclog_fields { 329 342 sv_t ic_forcesema; ··· 345 332 struct xlog_in_core *ic_prev; 346 333 struct xfs_buf *ic_bp; 347 334 struct log *ic_log; 348 - xfs_log_callback_t *ic_callback; 349 - xfs_log_callback_t **ic_callback_tail; 350 - #ifdef XFS_LOG_TRACE 351 - struct ktrace *ic_trace; 352 - #endif 353 335 int ic_size; 354 336 int ic_offset; 355 - int ic_refcnt; 356 337 int ic_bwritecnt; 357 338 ushort_t ic_state; 358 339 char *ic_datap; /* pointer to iclog data */ 340 + #ifdef XFS_LOG_TRACE 341 + struct ktrace *ic_trace; 342 + #endif 343 + 344 + /* Callback structures need their own cacheline */ 345 + spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; 346 + xfs_log_callback_t *ic_callback; 347 + xfs_log_callback_t **ic_callback_tail; 348 + 349 + /* reference counts need their own cacheline */ 350 + atomic_t ic_refcnt ____cacheline_aligned_in_smp; 359 351 } xlog_iclog_fields_t; 360 352 361 353 typedef union xlog_in_core2 { ··· 384 366 #define ic_bp hic_fields.ic_bp 385 367 #define ic_log hic_fields.ic_log 386 368 #define ic_callback hic_fields.ic_callback 369 + #define ic_callback_lock hic_fields.ic_callback_lock 387 370 #define ic_callback_tail hic_fields.ic_callback_tail 388 371 #define ic_trace hic_fields.ic_trace 389 372 #define ic_size hic_fields.ic_size ··· 402 383 * that round off problems won't occur when releasing partial reservations. 403 384 */ 404 385 typedef struct log { 405 - /* The following block of fields are changed while holding icloglock */ 406 - sema_t l_flushsema; /* iclog flushing semaphore */ 407 - int l_flushcnt; /* # of procs waiting on this 408 - * sema */ 409 - int l_ticket_cnt; /* free ticket count */ 410 - int l_ticket_tcnt; /* total ticket count */ 411 - int l_covered_state;/* state of "covering disk 412 - * log entries" */ 413 - xlog_ticket_t *l_freelist; /* free list of tickets */ 414 - xlog_ticket_t *l_unmount_free;/* kmem_free these addresses */ 415 - xlog_ticket_t *l_tail; /* free list of tickets */ 416 - xlog_in_core_t *l_iclog; /* head log queue */ 417 - spinlock_t l_icloglock; /* grab to change iclog state */ 418 - xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed 419 - * buffers */ 420 - xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ 386 + /* The following fields don't need locking */ 421 387 struct xfs_mount *l_mp; /* mount point */ 422 388 struct xfs_buf *l_xbuf; /* extra buffer for log 423 389 * wrapping */ 424 390 struct xfs_buftarg *l_targ; /* buftarg of log */ 425 - xfs_daddr_t l_logBBstart; /* start block of log */ 426 - int l_logsize; /* size of log in bytes */ 427 - int l_logBBsize; /* size of log in BB chunks */ 428 - int l_curr_cycle; /* Cycle number of log writes */ 429 - int l_prev_cycle; /* Cycle number before last 430 - * block increment */ 431 - int l_curr_block; /* current logical log block */ 432 - int l_prev_block; /* previous logical log block */ 433 - int l_iclog_size; /* size of log in bytes */ 434 - int l_iclog_size_log; /* log power size of log */ 435 - int l_iclog_bufs; /* number of iclog buffers */ 436 - 437 - /* The following field are used for debugging; need to hold icloglock */ 438 - char *l_iclog_bak[XLOG_MAX_ICLOGS]; 439 - 440 - /* The following block of fields are changed while holding grant_lock */ 441 - spinlock_t l_grant_lock; 442 - xlog_ticket_t *l_reserve_headq; 443 - xlog_ticket_t *l_write_headq; 444 - int l_grant_reserve_cycle; 445 - int l_grant_reserve_bytes; 446 - int l_grant_write_cycle; 447 - int l_grant_write_bytes; 448 - 449 - /* The following fields don't need locking */ 450 - #ifdef XFS_LOG_TRACE 451 - struct ktrace *l_trace; 452 - struct ktrace *l_grant_trace; 453 - #endif 454 391 uint l_flags; 455 392 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ 456 393 struct xfs_buf_cancel **l_buf_cancel_table; ··· 415 440 uint l_sectbb_log; /* log2 of sector size in BBs */ 416 441 uint l_sectbb_mask; /* sector size (in BBs) 417 442 * alignment mask */ 443 + int l_iclog_size; /* size of log in bytes */ 444 + int l_iclog_size_log; /* log power size of log */ 445 + int l_iclog_bufs; /* number of iclog buffers */ 446 + xfs_daddr_t l_logBBstart; /* start block of log */ 447 + int l_logsize; /* size of log in bytes */ 448 + int l_logBBsize; /* size of log in BB chunks */ 449 + 450 + /* The following block of fields are changed while holding icloglock */ 451 + sema_t l_flushsema ____cacheline_aligned_in_smp; 452 + /* iclog flushing semaphore */ 453 + int l_flushcnt; /* # of procs waiting on this 454 + * sema */ 455 + int l_covered_state;/* state of "covering disk 456 + * log entries" */ 457 + xlog_in_core_t *l_iclog; /* head log queue */ 458 + spinlock_t l_icloglock; /* grab to change iclog state */ 459 + xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed 460 + * buffers */ 461 + xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ 462 + int l_curr_cycle; /* Cycle number of log writes */ 463 + int l_prev_cycle; /* Cycle number before last 464 + * block increment */ 465 + int l_curr_block; /* current logical log block */ 466 + int l_prev_block; /* previous logical log block */ 467 + 468 + /* The following block of fields are changed while holding grant_lock */ 469 + spinlock_t l_grant_lock ____cacheline_aligned_in_smp; 470 + xlog_ticket_t *l_reserve_headq; 471 + xlog_ticket_t *l_write_headq; 472 + int l_grant_reserve_cycle; 473 + int l_grant_reserve_bytes; 474 + int l_grant_write_cycle; 475 + int l_grant_write_bytes; 476 + 477 + #ifdef XFS_LOG_TRACE 478 + struct ktrace *l_trace; 479 + struct ktrace *l_grant_trace; 480 + #endif 481 + 482 + /* The following field are used for debugging; need to hold icloglock */ 483 + #ifdef DEBUG 484 + char *l_iclog_bak[XLOG_MAX_ICLOGS]; 485 + #endif 486 + 418 487 } xlog_t; 419 488 420 489 #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) ··· 477 458 extern struct xfs_buf *xlog_get_bp(xlog_t *, int); 478 459 extern void xlog_put_bp(struct xfs_buf *); 479 460 extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); 461 + 462 + extern kmem_zone_t *xfs_log_ticket_zone; 480 463 481 464 /* iclog tracing */ 482 465 #define XLOG_TRACE_GRAB_FLUSH 1

+85 -38

fs/xfs/xfs_log_recover.c

··· 46 46 #include "xfs_trans_priv.h" 47 47 #include "xfs_quota.h" 48 48 #include "xfs_rw.h" 49 + #include "xfs_utils.h" 49 50 50 51 STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); 51 52 STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); ··· 121 120 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); 122 121 123 122 xfsbdstrat(log->l_mp, bp); 124 - if ((error = xfs_iowait(bp))) 123 + error = xfs_iowait(bp); 124 + if (error) 125 125 xfs_ioerror_alert("xlog_bread", log->l_mp, 126 126 bp, XFS_BUF_ADDR(bp)); 127 127 return error; ··· 193 191 { 194 192 int b; 195 193 196 - cmn_err(CE_DEBUG, "%s: SB : uuid = ", __FUNCTION__); 194 + cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); 197 195 for (b = 0; b < 16; b++) 198 196 cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); 199 197 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); ··· 1162 1160 if (j == 0 && (start_block + endcount > ealign)) { 1163 1161 offset = XFS_BUF_PTR(bp); 1164 1162 balign = BBTOB(ealign - start_block); 1165 - XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb)); 1166 - if ((error = xlog_bread(log, ealign, sectbb, bp))) 1163 + error = XFS_BUF_SET_PTR(bp, offset + balign, 1164 + BBTOB(sectbb)); 1165 + if (!error) 1166 + error = xlog_bread(log, ealign, sectbb, bp); 1167 + if (!error) 1168 + error = XFS_BUF_SET_PTR(bp, offset, bufblks); 1169 + if (error) 1167 1170 break; 1168 - XFS_BUF_SET_PTR(bp, offset, bufblks); 1169 1171 } 1170 1172 1171 1173 offset = xlog_align(log, start_block, endcount, bp); ··· 2286 2280 * invalidate the buffer when we write it out below. 2287 2281 */ 2288 2282 imap.im_blkno = 0; 2289 - xfs_imap(log->l_mp, NULL, ino, &imap, 0); 2283 + error = xfs_imap(log->l_mp, NULL, ino, &imap, 0); 2284 + if (error) 2285 + goto error; 2290 2286 } 2291 2287 2292 2288 /* ··· 2972 2964 * Process an extent free intent item that was recovered from 2973 2965 * the log. We need to free the extents that it describes. 2974 2966 */ 2975 - STATIC void 2967 + STATIC int 2976 2968 xlog_recover_process_efi( 2977 2969 xfs_mount_t *mp, 2978 2970 xfs_efi_log_item_t *efip) ··· 2980 2972 xfs_efd_log_item_t *efdp; 2981 2973 xfs_trans_t *tp; 2982 2974 int i; 2975 + int error = 0; 2983 2976 xfs_extent_t *extp; 2984 2977 xfs_fsblock_t startblock_fsb; 2985 2978 ··· 3004 2995 * free the memory associated with it. 3005 2996 */ 3006 2997 xfs_efi_release(efip, efip->efi_format.efi_nextents); 3007 - return; 2998 + return XFS_ERROR(EIO); 3008 2999 } 3009 3000 } 3010 3001 3011 3002 tp = xfs_trans_alloc(mp, 0); 3012 - xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); 3003 + error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); 3004 + if (error) 3005 + goto abort_error; 3013 3006 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 3014 3007 3015 3008 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 3016 3009 extp = &(efip->efi_format.efi_extents[i]); 3017 - xfs_free_extent(tp, extp->ext_start, extp->ext_len); 3010 + error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); 3011 + if (error) 3012 + goto abort_error; 3018 3013 xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, 3019 3014 extp->ext_len); 3020 3015 } 3021 3016 3022 3017 efip->efi_flags |= XFS_EFI_RECOVERED; 3023 - xfs_trans_commit(tp, 0); 3018 + error = xfs_trans_commit(tp, 0); 3019 + return error; 3020 + 3021 + abort_error: 3022 + xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3023 + return error; 3024 3024 } 3025 3025 3026 3026 /* ··· 3077 3059 * everything already in the AIL, we stop processing as soon as 3078 3060 * we see something other than an EFI in the AIL. 3079 3061 */ 3080 - STATIC void 3062 + STATIC int 3081 3063 xlog_recover_process_efis( 3082 3064 xlog_t *log) 3083 3065 { ··· 3085 3067 xfs_efi_log_item_t *efip; 3086 3068 int gen; 3087 3069 xfs_mount_t *mp; 3070 + int error = 0; 3088 3071 3089 3072 mp = log->l_mp; 3090 3073 spin_lock(&mp->m_ail_lock); ··· 3110 3091 } 3111 3092 3112 3093 spin_unlock(&mp->m_ail_lock); 3113 - xlog_recover_process_efi(mp, efip); 3094 + error = xlog_recover_process_efi(mp, efip); 3095 + if (error) 3096 + return error; 3114 3097 spin_lock(&mp->m_ail_lock); 3115 3098 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 3116 3099 } 3117 3100 spin_unlock(&mp->m_ail_lock); 3101 + return error; 3118 3102 } 3119 3103 3120 3104 /* ··· 3137 3115 int error; 3138 3116 3139 3117 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); 3140 - xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); 3141 - 3142 - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 3118 + error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); 3119 + if (!error) 3120 + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 3143 3121 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 3144 3122 XFS_FSS_TO_BB(mp, 1), 0, &agibp); 3145 - if (error) { 3146 - xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3147 - return; 3148 - } 3123 + if (error) 3124 + goto out_abort; 3149 3125 3126 + error = EINVAL; 3150 3127 agi = XFS_BUF_TO_AGI(agibp); 3151 - if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) { 3152 - xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3153 - return; 3154 - } 3128 + if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) 3129 + goto out_abort; 3155 3130 3156 3131 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 3157 3132 offset = offsetof(xfs_agi_t, agi_unlinked) + ··· 3156 3137 xfs_trans_log_buf(tp, agibp, offset, 3157 3138 (offset + sizeof(xfs_agino_t) - 1)); 3158 3139 3159 - (void) xfs_trans_commit(tp, 0); 3140 + error = xfs_trans_commit(tp, 0); 3141 + if (error) 3142 + goto out_error; 3143 + return; 3144 + 3145 + out_abort: 3146 + xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3147 + out_error: 3148 + xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " 3149 + "failed to clear agi %d. Continuing.", agno); 3150 + return; 3160 3151 } 3161 3152 3162 3153 /* ··· 3243 3214 * next inode in the bucket. 3244 3215 */ 3245 3216 error = xfs_itobp(mp, NULL, ip, &dip, 3246 - &ibp, 0, 0); 3217 + &ibp, 0, 0, 3218 + XFS_BUF_LOCK); 3247 3219 ASSERT(error || (dip != NULL)); 3248 3220 } 3249 3221 ··· 3277 3247 if (ip->i_d.di_mode == 0) 3278 3248 xfs_iput_new(ip, 0); 3279 3249 else 3280 - VN_RELE(XFS_ITOV(ip)); 3250 + IRELE(ip); 3281 3251 } else { 3282 3252 /* 3283 3253 * We can't read in the inode ··· 3475 3445 (!rhead->h_version || 3476 3446 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { 3477 3447 xlog_warn("XFS: %s: unrecognised log version (%d).", 3478 - __FUNCTION__, be32_to_cpu(rhead->h_version)); 3448 + __func__, be32_to_cpu(rhead->h_version)); 3479 3449 return XFS_ERROR(EIO); 3480 3450 } 3481 3451 ··· 3634 3604 * _first_, then the log start (LR header end) 3635 3605 * - order is important. 3636 3606 */ 3607 + wrapped_hblks = hblks - split_hblks; 3637 3608 bufaddr = XFS_BUF_PTR(hbp); 3638 - XFS_BUF_SET_PTR(hbp, 3609 + error = XFS_BUF_SET_PTR(hbp, 3639 3610 bufaddr + BBTOB(split_hblks), 3640 3611 BBTOB(hblks - split_hblks)); 3641 - wrapped_hblks = hblks - split_hblks; 3642 - error = xlog_bread(log, 0, wrapped_hblks, hbp); 3612 + if (!error) 3613 + error = xlog_bread(log, 0, 3614 + wrapped_hblks, hbp); 3615 + if (!error) 3616 + error = XFS_BUF_SET_PTR(hbp, bufaddr, 3617 + BBTOB(hblks)); 3643 3618 if (error) 3644 3619 goto bread_err2; 3645 - XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks)); 3646 3620 if (!offset) 3647 3621 offset = xlog_align(log, 0, 3648 3622 wrapped_hblks, hbp); ··· 3698 3664 * - order is important. 3699 3665 */ 3700 3666 bufaddr = XFS_BUF_PTR(dbp); 3701 - XFS_BUF_SET_PTR(dbp, 3667 + error = XFS_BUF_SET_PTR(dbp, 3702 3668 bufaddr + BBTOB(split_bblks), 3703 3669 BBTOB(bblks - split_bblks)); 3704 - if ((error = xlog_bread(log, wrapped_hblks, 3705 - bblks - split_bblks, dbp))) 3670 + if (!error) 3671 + error = xlog_bread(log, wrapped_hblks, 3672 + bblks - split_bblks, 3673 + dbp); 3674 + if (!error) 3675 + error = XFS_BUF_SET_PTR(dbp, bufaddr, 3676 + h_size); 3677 + if (error) 3706 3678 goto bread_err2; 3707 - XFS_BUF_SET_PTR(dbp, bufaddr, h_size); 3708 3679 if (!offset) 3709 3680 offset = xlog_align(log, wrapped_hblks, 3710 3681 bblks - split_bblks, dbp); ··· 3865 3826 XFS_BUF_READ(bp); 3866 3827 XFS_BUF_UNASYNC(bp); 3867 3828 xfsbdstrat(log->l_mp, bp); 3868 - if ((error = xfs_iowait(bp))) { 3829 + error = xfs_iowait(bp); 3830 + if (error) { 3869 3831 xfs_ioerror_alert("xlog_do_recover", 3870 3832 log->l_mp, bp, XFS_BUF_ADDR(bp)); 3871 3833 ASSERT(0); ··· 3957 3917 * rather than accepting new requests. 3958 3918 */ 3959 3919 if (log->l_flags & XLOG_RECOVERY_NEEDED) { 3960 - xlog_recover_process_efis(log); 3920 + int error; 3921 + error = xlog_recover_process_efis(log); 3922 + if (error) { 3923 + cmn_err(CE_ALERT, 3924 + "Failed to recover EFIs on filesystem: %s", 3925 + log->l_mp->m_fsname); 3926 + return error; 3927 + } 3961 3928 /* 3962 3929 * Sync the log to get all the EFIs out of the AIL. 3963 3930 * This isn't absolutely necessary, but it helps in

+37 -29

fs/xfs/xfs_mount.c

··· 43 43 #include "xfs_rw.h" 44 44 #include "xfs_quota.h" 45 45 #include "xfs_fsops.h" 46 + #include "xfs_utils.h" 46 47 47 - STATIC void xfs_mount_log_sb(xfs_mount_t *, __int64_t); 48 + STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t); 48 49 STATIC int xfs_uuid_mount(xfs_mount_t *); 49 50 STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 50 51 STATIC void xfs_unmountfs_wait(xfs_mount_t *); ··· 58 57 STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 59 58 STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 60 59 int64_t, int); 61 - STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 60 + STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 62 61 63 62 #else 64 63 ··· 957 956 { 958 957 xfs_sb_t *sbp = &(mp->m_sb); 959 958 xfs_inode_t *rip; 960 - bhv_vnode_t *rvp = NULL; 961 959 __uint64_t resblks; 962 960 __int64_t update_flags = 0LL; 963 961 uint quotamount, quotaflags; ··· 964 964 int uuid_mounted = 0; 965 965 int error = 0; 966 966 967 - if (mp->m_sb_bp == NULL) { 968 - error = xfs_readsb(mp, mfsi_flags); 969 - if (error) 970 - return error; 971 - } 972 967 xfs_mount_common(mp, sbp); 973 968 974 969 /* ··· 1158 1163 } 1159 1164 1160 1165 ASSERT(rip != NULL); 1161 - rvp = XFS_ITOV(rip); 1162 1166 1163 1167 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1164 1168 cmn_err(CE_WARN, "XFS: corrupted root inode"); ··· 1189 1195 /* 1190 1196 * If fs is not mounted readonly, then update the superblock changes. 1191 1197 */ 1192 - if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) 1193 - xfs_mount_log_sb(mp, update_flags); 1198 + if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { 1199 + error = xfs_mount_log_sb(mp, update_flags); 1200 + if (error) { 1201 + cmn_err(CE_WARN, "XFS: failed to write sb changes"); 1202 + goto error4; 1203 + } 1204 + } 1194 1205 1195 1206 /* 1196 1207 * Initialise the XFS quota management subsystem for this mount ··· 1232 1233 * 1233 1234 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. 1234 1235 * This may drive us straight to ENOSPC on mount, but that implies 1235 - * we were already there on the last unmount. 1236 + * we were already there on the last unmount. Warn if this occurs. 1236 1237 */ 1237 1238 resblks = mp->m_sb.sb_dblocks; 1238 1239 do_div(resblks, 20); 1239 1240 resblks = min_t(__uint64_t, resblks, 1024); 1240 - xfs_reserve_blocks(mp, &resblks, NULL); 1241 + error = xfs_reserve_blocks(mp, &resblks, NULL); 1242 + if (error) 1243 + cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. " 1244 + "Continuing without a reserve pool."); 1241 1245 1242 1246 return 0; 1243 1247 ··· 1248 1246 /* 1249 1247 * Free up the root inode. 1250 1248 */ 1251 - VN_RELE(rvp); 1249 + IRELE(rip); 1252 1250 error3: 1253 1251 xfs_log_unmount_dealloc(mp); 1254 1252 error2: ··· 1276 1274 xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1277 1275 { 1278 1276 __uint64_t resblks; 1277 + int error = 0; 1279 1278 1280 1279 /* 1281 1280 * We can potentially deadlock here if we have an inode cluster ··· 1320 1317 * value does not matter.... 1321 1318 */ 1322 1319 resblks = 0; 1323 - xfs_reserve_blocks(mp, &resblks, NULL); 1320 + error = xfs_reserve_blocks(mp, &resblks, NULL); 1321 + if (error) 1322 + cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. " 1323 + "Freespace may not be correct on next mount."); 1324 1324 1325 - xfs_log_sbcount(mp, 1); 1325 + error = xfs_log_sbcount(mp, 1); 1326 + if (error) 1327 + cmn_err(CE_WARN, "XFS: Unable to update superblock counters. " 1328 + "Freespace may not be correct on next mount."); 1326 1329 xfs_unmountfs_writesb(mp); 1327 1330 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1328 1331 xfs_log_unmount(mp); /* Done! No more fs ops. */ ··· 1420 1411 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); 1421 1412 if (sync) 1422 1413 xfs_trans_set_sync(tp); 1423 - xfs_trans_commit(tp, 0); 1424 - 1425 - return 0; 1414 + error = xfs_trans_commit(tp, 0); 1415 + return error; 1426 1416 } 1427 1417 1428 1418 STATIC void ··· 1470 1462 XFS_BUF_UNASYNC(sbp); 1471 1463 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); 1472 1464 xfsbdstrat(mp, sbp); 1473 - /* Nevermind errors we might get here. */ 1474 1465 error = xfs_iowait(sbp); 1475 1466 if (error) 1476 1467 xfs_ioerror_alert("xfs_unmountfs_writesb", ··· 1918 1911 * be altered by the mount options, as well as any potential sb_features2 1919 1912 * fixup. Only the first superblock is updated. 1920 1913 */ 1921 - STATIC void 1914 + STATIC int 1922 1915 xfs_mount_log_sb( 1923 1916 xfs_mount_t *mp, 1924 1917 __int64_t fields) 1925 1918 { 1926 1919 xfs_trans_t *tp; 1920 + int error; 1927 1921 1928 1922 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | 1929 1923 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2)); 1930 1924 1931 1925 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1932 - if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1933 - XFS_DEFAULT_LOG_COUNT)) { 1926 + error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1927 + XFS_DEFAULT_LOG_COUNT); 1928 + if (error) { 1934 1929 xfs_trans_cancel(tp, 0); 1935 - return; 1930 + return error; 1936 1931 } 1937 1932 xfs_mod_sb(tp, fields); 1938 - xfs_trans_commit(tp, 0); 1933 + error = xfs_trans_commit(tp, 0); 1934 + return error; 1939 1935 } 1940 1936 1941 1937 ··· 2199 2189 return test_bit(field, &mp->m_icsb_counters); 2200 2190 } 2201 2191 2202 - STATIC int 2192 + STATIC void 2203 2193 xfs_icsb_disable_counter( 2204 2194 xfs_mount_t *mp, 2205 2195 xfs_sb_field_t field) ··· 2217 2207 * the m_icsb_mutex. 2218 2208 */ 2219 2209 if (xfs_icsb_counter_disabled(mp, field)) 2220 - return 0; 2210 + return; 2221 2211 2222 2212 xfs_icsb_lock_all_counters(mp); 2223 2213 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { ··· 2240 2230 } 2241 2231 2242 2232 xfs_icsb_unlock_all_counters(mp); 2243 - 2244 - return 0; 2245 2233 } 2246 2234 2247 2235 STATIC void

+15 -15

fs/xfs/xfs_mount.h

··· 66 66 * Prototypes and functions for the Data Migration subsystem. 67 67 */ 68 68 69 - typedef int (*xfs_send_data_t)(int, bhv_vnode_t *, 70 - xfs_off_t, size_t, int, bhv_vrwlock_t *); 69 + typedef int (*xfs_send_data_t)(int, struct xfs_inode *, 70 + xfs_off_t, size_t, int, int *); 71 71 typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint); 72 - typedef int (*xfs_send_destroy_t)(bhv_vnode_t *, dm_right_t); 72 + typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t); 73 73 typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, 74 - bhv_vnode_t *, 75 - dm_right_t, bhv_vnode_t *, dm_right_t, 76 - char *, char *, mode_t, int, int); 74 + struct xfs_inode *, dm_right_t, 75 + struct xfs_inode *, dm_right_t, 76 + const char *, const char *, mode_t, int, int); 77 77 typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, 78 78 char *, char *); 79 - typedef void (*xfs_send_unmount_t)(struct xfs_mount *, bhv_vnode_t *, 79 + typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *, 80 80 dm_right_t, mode_t, int, int); 81 81 82 82 typedef struct xfs_dmops { ··· 88 88 xfs_send_unmount_t xfs_send_unmount; 89 89 } xfs_dmops_t; 90 90 91 - #define XFS_SEND_DATA(mp, ev,vp,off,len,fl,lock) \ 92 - (*(mp)->m_dm_ops->xfs_send_data)(ev,vp,off,len,fl,lock) 91 + #define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \ 92 + (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock) 93 93 #define XFS_SEND_MMAP(mp, vma,fl) \ 94 94 (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl) 95 - #define XFS_SEND_DESTROY(mp, vp,right) \ 96 - (*(mp)->m_dm_ops->xfs_send_destroy)(vp,right) 95 + #define XFS_SEND_DESTROY(mp, ip,right) \ 96 + (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right) 97 97 #define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ 98 98 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) 99 99 #define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ 100 100 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) 101 101 #define XFS_SEND_MOUNT(mp,right,path,name) \ 102 102 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) 103 - #define XFS_SEND_UNMOUNT(mp, vp,right,mode,rval,fl) \ 104 - (*(mp)->m_dm_ops->xfs_send_unmount)(mp,vp,right,mode,rval,fl) 103 + #define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \ 104 + (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl) 105 105 106 106 107 107 /* ··· 220 220 #endif 221 221 222 222 typedef struct xfs_ail { 223 - xfs_ail_entry_t xa_ail; 223 + struct list_head xa_ail; 224 224 uint xa_gen; 225 225 struct task_struct *xa_task; 226 226 xfs_lsn_t xa_target; ··· 401 401 402 402 /* 403 403 * Allow large block sizes to be reported to userspace programs if the 404 - * "largeio" mount option is used. 404 + * "largeio" mount option is used. 405 405 * 406 406 * If compatibility mode is specified, simply return the basic unit of caching 407 407 * so that we don't get inefficient read/modify/write I/O from user apps.

+40 -81

fs/xfs/xfs_rename.c

··· 36 36 #include "xfs_bmap.h" 37 37 #include "xfs_error.h" 38 38 #include "xfs_quota.h" 39 - #include "xfs_refcache.h" 40 39 #include "xfs_utils.h" 41 40 #include "xfs_trans_space.h" 42 41 #include "xfs_vnodeops.h" ··· 83 84 */ 84 85 STATIC int 85 86 xfs_lock_for_rename( 86 - xfs_inode_t *dp1, /* old (source) directory inode */ 87 - xfs_inode_t *dp2, /* new (target) directory inode */ 88 - bhv_vname_t *vname1,/* old entry name */ 89 - bhv_vname_t *vname2,/* new entry name */ 90 - xfs_inode_t **ipp1, /* inode of old entry */ 91 - xfs_inode_t **ipp2, /* inode of new entry, if it 87 + xfs_inode_t *dp1, /* in: old (source) directory inode */ 88 + xfs_inode_t *dp2, /* in: new (target) directory inode */ 89 + xfs_inode_t *ip1, /* in: inode of old entry */ 90 + struct xfs_name *name2, /* in: new entry name */ 91 + xfs_inode_t **ipp2, /* out: inode of new entry, if it 92 92 already exists, NULL otherwise. */ 93 - xfs_inode_t **i_tab,/* array of inode returned, sorted */ 94 - int *num_inodes) /* number of inodes in array */ 93 + xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ 94 + int *num_inodes) /* out: number of inodes in array */ 95 95 { 96 - xfs_inode_t *ip1, *ip2, *temp; 96 + xfs_inode_t *ip2 = NULL; 97 + xfs_inode_t *temp; 97 98 xfs_ino_t inum1, inum2; 98 99 int error; 99 100 int i, j; 100 101 uint lock_mode; 101 102 int diff_dirs = (dp1 != dp2); 102 - 103 - ip2 = NULL; 104 103 105 104 /* 106 105 * First, find out the current inums of the entries so that we ··· 107 110 * to see if we still have the right inodes, directories, etc. 108 111 */ 109 112 lock_mode = xfs_ilock_map_shared(dp1); 110 - error = xfs_get_dir_entry(vname1, &ip1); 111 - if (error) { 112 - xfs_iunlock_map_shared(dp1, lock_mode); 113 - return error; 114 - } 113 + IHOLD(ip1); 114 + xfs_itrace_ref(ip1); 115 115 116 116 inum1 = ip1->i_ino; 117 - 118 - ASSERT(ip1); 119 - xfs_itrace_ref(ip1); 120 117 121 118 /* 122 119 * Unlock dp1 and lock dp2 if they are different. 123 120 */ 124 - 125 121 if (diff_dirs) { 126 122 xfs_iunlock_map_shared(dp1, lock_mode); 127 123 lock_mode = xfs_ilock_map_shared(dp2); 128 124 } 129 125 130 - error = xfs_dir_lookup_int(dp2, lock_mode, vname2, &inum2, &ip2); 126 + error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2); 131 127 if (error == ENOENT) { /* target does not need to exist. */ 132 128 inum2 = 0; 133 129 } else if (error) { ··· 152 162 *num_inodes = 4; 153 163 i_tab[3] = ip2; 154 164 } 165 + *ipp2 = i_tab[3]; 155 166 156 167 /* 157 168 * Sort the elements via bubble sort. (Remember, there are at ··· 190 199 xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED); 191 200 } 192 201 193 - /* 194 - * Set the return value. Null out any unused entries in i_tab. 195 - */ 196 - *ipp1 = *ipp2 = NULL; 197 - for (i=0; i < *num_inodes; i++) { 198 - if (i_tab[i]->i_ino == inum1) { 199 - *ipp1 = i_tab[i]; 200 - } 201 - if (i_tab[i]->i_ino == inum2) { 202 - *ipp2 = i_tab[i]; 203 - } 204 - } 205 - for (;i < 4; i++) { 206 - i_tab[i] = NULL; 207 - } 208 202 return 0; 209 203 } 210 204 ··· 199 223 int 200 224 xfs_rename( 201 225 xfs_inode_t *src_dp, 202 - bhv_vname_t *src_vname, 203 - bhv_vnode_t *target_dir_vp, 204 - bhv_vname_t *target_vname) 226 + struct xfs_name *src_name, 227 + xfs_inode_t *src_ip, 228 + xfs_inode_t *target_dp, 229 + struct xfs_name *target_name) 205 230 { 206 - bhv_vnode_t *src_dir_vp = XFS_ITOV(src_dp); 207 231 xfs_trans_t *tp; 208 - xfs_inode_t *target_dp, *src_ip, *target_ip; 232 + xfs_inode_t *target_ip; 209 233 xfs_mount_t *mp = src_dp->i_mount; 210 234 int new_parent; /* moving to a new dir */ 211 235 int src_is_directory; /* src_name is a directory */ ··· 219 243 int spaceres; 220 244 int target_link_zero = 0; 221 245 int num_inodes; 222 - char *src_name = VNAME(src_vname); 223 - char *target_name = VNAME(target_vname); 224 - int src_namelen = VNAMELEN(src_vname); 225 - int target_namelen = VNAMELEN(target_vname); 226 246 227 247 xfs_itrace_entry(src_dp); 228 - xfs_itrace_entry(xfs_vtoi(target_dir_vp)); 229 - 230 - /* 231 - * Find the XFS behavior descriptor for the target directory 232 - * vnode since it was not handed to us. 233 - */ 234 - target_dp = xfs_vtoi(target_dir_vp); 235 - if (target_dp == NULL) { 236 - return XFS_ERROR(EXDEV); 237 - } 248 + xfs_itrace_entry(target_dp); 238 249 239 250 if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) || 240 251 DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) { 241 252 error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, 242 - src_dir_vp, DM_RIGHT_NULL, 243 - target_dir_vp, DM_RIGHT_NULL, 244 - src_name, target_name, 253 + src_dp, DM_RIGHT_NULL, 254 + target_dp, DM_RIGHT_NULL, 255 + src_name->name, target_name->name, 245 256 0, 0, 0); 246 257 if (error) { 247 258 return error; ··· 245 282 * does not exist in the source directory. 246 283 */ 247 284 tp = NULL; 248 - error = xfs_lock_for_rename(src_dp, target_dp, src_vname, 249 - target_vname, &src_ip, &target_ip, inodes, 250 - &num_inodes); 251 - 285 + error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name, 286 + &target_ip, inodes, &num_inodes); 252 287 if (error) { 253 288 /* 254 289 * We have nothing locked, no inode references, and ··· 292 331 XFS_BMAP_INIT(&free_list, &first_block); 293 332 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 294 333 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 295 - spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen); 334 + spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); 296 335 error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, 297 336 XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); 298 337 if (error == ENOSPC) { ··· 326 365 * them when they unlock the inodes. Also, we need to be careful 327 366 * not to add an inode to the transaction more than once. 328 367 */ 329 - VN_HOLD(src_dir_vp); 368 + IHOLD(src_dp); 330 369 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 331 370 if (new_parent) { 332 - VN_HOLD(target_dir_vp); 371 + IHOLD(target_dp); 333 372 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 334 373 } 335 374 if ((src_ip != src_dp) && (src_ip != target_dp)) { ··· 350 389 * If there's no space reservation, check the entry will 351 390 * fit before actually inserting it. 352 391 */ 353 - if (spaceres == 0 && 354 - (error = xfs_dir_canenter(tp, target_dp, target_name, 355 - target_namelen))) 392 + error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); 393 + if (error) 356 394 goto error_return; 357 395 /* 358 396 * If target does not exist and the rename crosses ··· 359 399 * to account for the ".." reference from the new entry. 360 400 */ 361 401 error = xfs_dir_createname(tp, target_dp, target_name, 362 - target_namelen, src_ip->i_ino, 363 - &first_block, &free_list, spaceres); 402 + src_ip->i_ino, &first_block, 403 + &free_list, spaceres); 364 404 if (error == ENOSPC) 365 405 goto error_return; 366 406 if (error) ··· 399 439 * name at the destination directory, remove it first. 400 440 */ 401 441 error = xfs_dir_replace(tp, target_dp, target_name, 402 - target_namelen, src_ip->i_ino, 442 + src_ip->i_ino, 403 443 &first_block, &free_list, spaceres); 404 444 if (error) 405 445 goto abort_return; ··· 436 476 * Rewrite the ".." entry to point to the new 437 477 * directory. 438 478 */ 439 - error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino, 479 + error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, 480 + target_dp->i_ino, 440 481 &first_block, &free_list, spaceres); 441 482 ASSERT(error != EEXIST); 442 483 if (error) ··· 473 512 goto abort_return; 474 513 } 475 514 476 - error = xfs_dir_removename(tp, src_dp, src_name, src_namelen, 477 - src_ip->i_ino, &first_block, &free_list, spaceres); 515 + error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 516 + &first_block, &free_list, spaceres); 478 517 if (error) 479 518 goto abort_return; 480 519 xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); ··· 541 580 * the vnode references. 542 581 */ 543 582 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 544 - if (target_ip != NULL) { 545 - xfs_refcache_purge_ip(target_ip); 583 + if (target_ip != NULL) 546 584 IRELE(target_ip); 547 - } 548 585 /* 549 586 * Let interposed file systems know about removed links. 550 587 */ ··· 557 598 if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) || 558 599 DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) { 559 600 (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, 560 - src_dir_vp, DM_RIGHT_NULL, 561 - target_dir_vp, DM_RIGHT_NULL, 562 - src_name, target_name, 601 + src_dp, DM_RIGHT_NULL, 602 + target_dp, DM_RIGHT_NULL, 603 + src_name->name, target_name->name, 563 604 0, error, 0); 564 605 } 565 606 return error;

+25 -16

fs/xfs/xfs_rtalloc.c

··· 44 44 #include "xfs_rw.h" 45 45 #include "xfs_inode_item.h" 46 46 #include "xfs_trans_space.h" 47 + #include "xfs_utils.h" 47 48 48 49 49 50 /* ··· 124 123 XFS_GROWRTALLOC_LOG_RES(mp), 0, 125 124 XFS_TRANS_PERM_LOG_RES, 126 125 XFS_DEFAULT_PERM_LOG_COUNT))) 127 - goto error_exit; 126 + goto error_cancel; 128 127 cancelflags = XFS_TRANS_RELEASE_LOG_RES; 129 128 /* 130 129 * Lock the inode. 131 130 */ 132 131 if ((error = xfs_trans_iget(mp, tp, ino, 0, 133 132 XFS_ILOCK_EXCL, &ip))) 134 - goto error_exit; 133 + goto error_cancel; 135 134 XFS_BMAP_INIT(&flist, &firstblock); 136 135 /* 137 136 * Allocate blocks to the bitmap file. ··· 144 143 if (!error && nmap < 1) 145 144 error = XFS_ERROR(ENOSPC); 146 145 if (error) 147 - goto error_exit; 146 + goto error_cancel; 148 147 /* 149 148 * Free any blocks freed up in the transaction, then commit. 150 149 */ 151 150 error = xfs_bmap_finish(&tp, &flist, &committed); 152 151 if (error) 153 - goto error_exit; 154 - xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 152 + goto error_cancel; 153 + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 154 + if (error) 155 + goto error; 155 156 /* 156 157 * Now we need to clear the allocated blocks. 157 158 * Do this one block per transaction, to keep it simple. ··· 168 165 */ 169 166 if ((error = xfs_trans_reserve(tp, 0, 170 167 XFS_GROWRTZERO_LOG_RES(mp), 0, 0, 0))) 171 - goto error_exit; 168 + goto error_cancel; 172 169 /* 173 170 * Lock the bitmap inode. 174 171 */ 175 172 if ((error = xfs_trans_iget(mp, tp, ino, 0, 176 173 XFS_ILOCK_EXCL, &ip))) 177 - goto error_exit; 174 + goto error_cancel; 178 175 /* 179 176 * Get a buffer for the block. 180 177 */ ··· 183 180 mp->m_bsize, 0); 184 181 if (bp == NULL) { 185 182 error = XFS_ERROR(EIO); 186 - goto error_exit; 183 + goto error_cancel; 187 184 } 188 185 memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); 189 186 xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); 190 187 /* 191 188 * Commit the transaction. 192 189 */ 193 - xfs_trans_commit(tp, 0); 190 + error = xfs_trans_commit(tp, 0); 191 + if (error) 192 + goto error; 194 193 } 195 194 /* 196 195 * Go on to the next extent, if any. ··· 200 195 oblocks = map.br_startoff + map.br_blockcount; 201 196 } 202 197 return 0; 203 - error_exit: 198 + error_cancel: 204 199 xfs_trans_cancel(tp, cancelflags); 200 + error: 205 201 return error; 206 202 } 207 203 ··· 1881 1875 xfs_trans_t *tp; /* transaction pointer */ 1882 1876 1883 1877 sbp = &mp->m_sb; 1878 + cancelflags = 0; 1884 1879 /* 1885 1880 * Initial error checking. 1886 1881 */ ··· 2048 2041 */ 2049 2042 mp->m_rsumlevels = nrsumlevels; 2050 2043 mp->m_rsumsize = nrsumsize; 2051 - /* 2052 - * Commit the transaction. 2053 - */ 2054 - xfs_trans_commit(tp, 0); 2044 + 2045 + error = xfs_trans_commit(tp, 0); 2046 + if (error) { 2047 + tp = NULL; 2048 + break; 2049 + } 2055 2050 } 2056 2051 2057 - if (error) 2052 + if (error && tp) 2058 2053 xfs_trans_cancel(tp, cancelflags); 2059 2054 2060 2055 /* ··· 2287 2278 ASSERT(sbp->sb_rsumino != NULLFSINO); 2288 2279 error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); 2289 2280 if (error) { 2290 - VN_RELE(XFS_ITOV(mp->m_rbmip)); 2281 + IRELE(mp->m_rbmip); 2291 2282 return error; 2292 2283 } 2293 2284 ASSERT(mp->m_rsumip != NULL);

+4 -4

fs/xfs/xfs_rw.c

··· 126 126 * when we return. 127 127 */ 128 128 if (iip && iip->ili_last_lsn) { 129 - xfs_log_force(mp, iip->ili_last_lsn, 130 - XFS_LOG_FORCE | XFS_LOG_SYNC); 129 + error = _xfs_log_force(mp, iip->ili_last_lsn, 130 + XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); 131 131 } else if (xfs_ipincount(ip) > 0) { 132 - xfs_log_force(mp, (xfs_lsn_t)0, 133 - XFS_LOG_FORCE | XFS_LOG_SYNC); 132 + error = _xfs_log_force(mp, (xfs_lsn_t)0, 133 + XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); 134 134 } 135 135 136 136 } else {

+1 -7

fs/xfs/xfs_trans.h

··· 113 113 struct xfs_trans; 114 114 struct xfs_dquot_acct; 115 115 116 - typedef struct xfs_ail_entry { 117 - struct xfs_log_item *ail_forw; /* AIL forw pointer */ 118 - struct xfs_log_item *ail_back; /* AIL back pointer */ 119 - } xfs_ail_entry_t; 120 - 121 116 typedef struct xfs_log_item { 122 - xfs_ail_entry_t li_ail; /* AIL pointers */ 117 + struct list_head li_ail; /* AIL pointers */ 123 118 xfs_lsn_t li_lsn; /* last on-disk lsn */ 124 119 struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ 125 120 struct xfs_mount *li_mountp; /* ptr to fs mount */ ··· 336 341 unsigned int t_rtx_res; /* # of rt extents resvd */ 337 342 unsigned int t_rtx_res_used; /* # of resvd rt extents used */ 338 343 xfs_log_ticket_t t_ticket; /* log mgr ticket */ 339 - sema_t t_sema; /* sema for commit completion */ 340 344 xfs_lsn_t t_lsn; /* log seq num of start of 341 345 * transaction. */ 342 346 xfs_lsn_t t_commit_lsn; /* log seq num of end of

+62 -91

fs/xfs/xfs_trans_ail.c

··· 28 28 #include "xfs_trans_priv.h" 29 29 #include "xfs_error.h" 30 30 31 - STATIC void xfs_ail_insert(xfs_ail_entry_t *, xfs_log_item_t *); 32 - STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_entry_t *, xfs_log_item_t *); 33 - STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_entry_t *); 34 - STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_entry_t *, xfs_log_item_t *); 31 + STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *); 32 + STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *); 33 + STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *); 34 + STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *); 35 35 36 36 #ifdef DEBUG 37 - STATIC void xfs_ail_check(xfs_ail_entry_t *, xfs_log_item_t *); 37 + STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *); 38 38 #else 39 39 #define xfs_ail_check(a,l) 40 40 #endif /* DEBUG */ ··· 57 57 xfs_log_item_t *lip; 58 58 59 59 spin_lock(&mp->m_ail_lock); 60 - lip = xfs_ail_min(&(mp->m_ail.xa_ail)); 60 + lip = xfs_ail_min(&mp->m_ail); 61 61 if (lip == NULL) { 62 62 lsn = (xfs_lsn_t)0; 63 63 } else { ··· 91 91 { 92 92 xfs_log_item_t *lip; 93 93 94 - lip = xfs_ail_min(&mp->m_ail.xa_ail); 94 + lip = xfs_ail_min(&mp->m_ail); 95 95 if (lip && !XFS_FORCED_SHUTDOWN(mp)) { 96 96 if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) 97 97 xfsaild_wakeup(mp, threshold_lsn); ··· 111 111 { 112 112 xfs_log_item_t *lip; 113 113 114 - lip = xfs_ail_min(&(mp->m_ail.xa_ail)); 114 + lip = xfs_ail_min(&mp->m_ail); 115 115 *gen = (int)mp->m_ail.xa_gen; 116 116 if (lsn == 0) 117 117 return lip; 118 118 119 - while (lip && (XFS_LSN_CMP(lip->li_lsn, lsn) < 0)) 120 - lip = lip->li_ail.ail_forw; 119 + list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) { 120 + if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) 121 + return lip; 122 + } 121 123 122 - return lip; 124 + return NULL; 123 125 } 124 126 125 127 /* ··· 331 329 * the call to xfs_log_move_tail() doesn't do anything if there's 332 330 * not enough free space to wake people up so we're safe calling it. 333 331 */ 334 - min_lip = xfs_ail_min(&mp->m_ail.xa_ail); 332 + min_lip = xfs_ail_min(&mp->m_ail); 335 333 336 334 if (min_lip == lip) 337 335 xfs_log_move_tail(mp, 1); ··· 359 357 xfs_log_item_t *lip, 360 358 xfs_lsn_t lsn) __releases(mp->m_ail_lock) 361 359 { 362 - xfs_ail_entry_t *ailp; 363 360 xfs_log_item_t *dlip=NULL; 364 361 xfs_log_item_t *mlip; /* ptr to minimum lip */ 365 362 366 - ailp = &(mp->m_ail.xa_ail); 367 - mlip = xfs_ail_min(ailp); 363 + mlip = xfs_ail_min(&mp->m_ail); 368 364 369 365 if (lip->li_flags & XFS_LI_IN_AIL) { 370 - dlip = xfs_ail_delete(ailp, lip); 366 + dlip = xfs_ail_delete(&mp->m_ail, lip); 371 367 ASSERT(dlip == lip); 372 368 } else { 373 369 lip->li_flags |= XFS_LI_IN_AIL; ··· 373 373 374 374 lip->li_lsn = lsn; 375 375 376 - xfs_ail_insert(ailp, lip); 376 + xfs_ail_insert(&mp->m_ail, lip); 377 377 mp->m_ail.xa_gen++; 378 378 379 379 if (mlip == dlip) { 380 - mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); 380 + mlip = xfs_ail_min(&mp->m_ail); 381 381 spin_unlock(&mp->m_ail_lock); 382 382 xfs_log_move_tail(mp, mlip->li_lsn); 383 383 } else { ··· 407 407 xfs_mount_t *mp, 408 408 xfs_log_item_t *lip) __releases(mp->m_ail_lock) 409 409 { 410 - xfs_ail_entry_t *ailp; 411 410 xfs_log_item_t *dlip; 412 411 xfs_log_item_t *mlip; 413 412 414 413 if (lip->li_flags & XFS_LI_IN_AIL) { 415 - ailp = &(mp->m_ail.xa_ail); 416 - mlip = xfs_ail_min(ailp); 417 - dlip = xfs_ail_delete(ailp, lip); 414 + mlip = xfs_ail_min(&mp->m_ail); 415 + dlip = xfs_ail_delete(&mp->m_ail, lip); 418 416 ASSERT(dlip == lip); 419 417 420 418 ··· 421 423 mp->m_ail.xa_gen++; 422 424 423 425 if (mlip == dlip) { 424 - mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); 426 + mlip = xfs_ail_min(&mp->m_ail); 425 427 spin_unlock(&mp->m_ail_lock); 426 428 xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); 427 429 } else { ··· 438 440 else { 439 441 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 440 442 "%s: attempting to delete a log item that is not in the AIL", 441 - __FUNCTION__); 443 + __func__); 442 444 spin_unlock(&mp->m_ail_lock); 443 445 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 444 446 } ··· 459 461 { 460 462 xfs_log_item_t *lip; 461 463 462 - lip = xfs_ail_min(&(mp->m_ail.xa_ail)); 464 + lip = xfs_ail_min(&mp->m_ail); 463 465 *gen = (int)mp->m_ail.xa_gen; 464 466 465 467 return lip; ··· 483 485 484 486 ASSERT(mp && lip && gen); 485 487 if (mp->m_ail.xa_gen == *gen) { 486 - nlip = xfs_ail_next(&(mp->m_ail.xa_ail), lip); 488 + nlip = xfs_ail_next(&mp->m_ail, lip); 487 489 } else { 488 - nlip = xfs_ail_min(&(mp->m_ail).xa_ail); 490 + nlip = xfs_ail_min(&mp->m_ail); 489 491 *gen = (int)mp->m_ail.xa_gen; 490 492 if (restarts != NULL) { 491 493 XFS_STATS_INC(xs_push_ail_restarts); ··· 515 517 xfs_trans_ail_init( 516 518 xfs_mount_t *mp) 517 519 { 518 - mp->m_ail.xa_ail.ail_forw = (xfs_log_item_t*)&mp->m_ail.xa_ail; 519 - mp->m_ail.xa_ail.ail_back = (xfs_log_item_t*)&mp->m_ail.xa_ail; 520 + INIT_LIST_HEAD(&mp->m_ail.xa_ail); 520 521 return xfsaild_start(mp); 521 522 } 522 523 ··· 534 537 */ 535 538 STATIC void 536 539 xfs_ail_insert( 537 - xfs_ail_entry_t *base, 540 + xfs_ail_t *ailp, 538 541 xfs_log_item_t *lip) 539 542 /* ARGSUSED */ 540 543 { ··· 543 546 /* 544 547 * If the list is empty, just insert the item. 545 548 */ 546 - if (base->ail_back == (xfs_log_item_t*)base) { 547 - base->ail_forw = lip; 548 - base->ail_back = lip; 549 - lip->li_ail.ail_forw = (xfs_log_item_t*)base; 550 - lip->li_ail.ail_back = (xfs_log_item_t*)base; 549 + if (list_empty(&ailp->xa_ail)) { 550 + list_add(&lip->li_ail, &ailp->xa_ail); 551 551 return; 552 552 } 553 553 554 - next_lip = base->ail_back; 555 - while ((next_lip != (xfs_log_item_t*)base) && 556 - (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) > 0)) { 557 - next_lip = next_lip->li_ail.ail_back; 554 + list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { 555 + if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0) 556 + break; 558 557 } 559 - ASSERT((next_lip == (xfs_log_item_t*)base) || 560 - (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); 561 - lip->li_ail.ail_forw = next_lip->li_ail.ail_forw; 562 - lip->li_ail.ail_back = next_lip; 563 - next_lip->li_ail.ail_forw = lip; 564 - lip->li_ail.ail_forw->li_ail.ail_back = lip; 565 558 566 - xfs_ail_check(base, lip); 559 + ASSERT((&next_lip->li_ail == &ailp->xa_ail) || 560 + (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); 561 + 562 + list_add(&lip->li_ail, &next_lip->li_ail); 563 + 564 + xfs_ail_check(ailp, lip); 567 565 return; 568 566 } 569 567 ··· 568 576 /*ARGSUSED*/ 569 577 STATIC xfs_log_item_t * 570 578 xfs_ail_delete( 571 - xfs_ail_entry_t *base, 579 + xfs_ail_t *ailp, 572 580 xfs_log_item_t *lip) 573 581 /* ARGSUSED */ 574 582 { 575 - xfs_ail_check(base, lip); 576 - lip->li_ail.ail_forw->li_ail.ail_back = lip->li_ail.ail_back; 577 - lip->li_ail.ail_back->li_ail.ail_forw = lip->li_ail.ail_forw; 578 - lip->li_ail.ail_forw = NULL; 579 - lip->li_ail.ail_back = NULL; 583 + xfs_ail_check(ailp, lip); 584 + 585 + list_del(&lip->li_ail); 580 586 581 587 return lip; 582 588 } ··· 585 595 */ 586 596 STATIC xfs_log_item_t * 587 597 xfs_ail_min( 588 - xfs_ail_entry_t *base) 598 + xfs_ail_t *ailp) 589 599 /* ARGSUSED */ 590 600 { 591 - register xfs_log_item_t *forw = base->ail_forw; 592 - if (forw == (xfs_log_item_t*)base) { 601 + if (list_empty(&ailp->xa_ail)) 593 602 return NULL; 594 - } 595 - return forw; 603 + 604 + return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); 596 605 } 597 606 598 607 /* ··· 601 612 */ 602 613 STATIC xfs_log_item_t * 603 614 xfs_ail_next( 604 - xfs_ail_entry_t *base, 615 + xfs_ail_t *ailp, 605 616 xfs_log_item_t *lip) 606 617 /* ARGSUSED */ 607 618 { 608 - if (lip->li_ail.ail_forw == (xfs_log_item_t*)base) { 619 + if (lip->li_ail.next == &ailp->xa_ail) 609 620 return NULL; 610 - } 611 - return lip->li_ail.ail_forw; 612 621 622 + return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); 613 623 } 614 624 615 625 #ifdef DEBUG ··· 617 629 */ 618 630 STATIC void 619 631 xfs_ail_check( 620 - xfs_ail_entry_t *base, 632 + xfs_ail_t *ailp, 621 633 xfs_log_item_t *lip) 622 634 { 623 635 xfs_log_item_t *prev_lip; 624 636 625 - prev_lip = base->ail_forw; 626 - if (prev_lip == (xfs_log_item_t*)base) { 627 - /* 628 - * Make sure the pointers are correct when the list 629 - * is empty. 630 - */ 631 - ASSERT(base->ail_back == (xfs_log_item_t*)base); 637 + if (list_empty(&ailp->xa_ail)) 632 638 return; 633 - } 634 639 635 640 /* 636 641 * Check the next and previous entries are valid. 637 642 */ 638 643 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); 639 - prev_lip = lip->li_ail.ail_back; 640 - if (prev_lip != (xfs_log_item_t*)base) { 641 - ASSERT(prev_lip->li_ail.ail_forw == lip); 644 + prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); 645 + if (&prev_lip->li_ail != &ailp->xa_ail) 642 646 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); 643 - } 644 - prev_lip = lip->li_ail.ail_forw; 645 - if (prev_lip != (xfs_log_item_t*)base) { 646 - ASSERT(prev_lip->li_ail.ail_back == lip); 647 + 648 + prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); 649 + if (&prev_lip->li_ail != &ailp->xa_ail) 647 650 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); 648 - } 649 651 650 652 651 653 #ifdef XFS_TRANS_DEBUG 652 654 /* 653 - * Walk the list checking forward and backward pointers, 654 - * lsn ordering, and that every entry has the XFS_LI_IN_AIL 655 - * flag set. This is really expensive, so only do it when 656 - * specifically debugging the transaction subsystem. 655 + * Walk the list checking lsn ordering, and that every entry has the 656 + * XFS_LI_IN_AIL flag set. This is really expensive, so only do it 657 + * when specifically debugging the transaction subsystem. 657 658 */ 658 - prev_lip = (xfs_log_item_t*)base; 659 - while (lip != (xfs_log_item_t*)base) { 660 - if (prev_lip != (xfs_log_item_t*)base) { 661 - ASSERT(prev_lip->li_ail.ail_forw == lip); 659 + prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); 660 + list_for_each_entry(lip, &ailp->xa_ail, li_ail) { 661 + if (&prev_lip->li_ail != &ailp->xa_ail) 662 662 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); 663 - } 664 - ASSERT(lip->li_ail.ail_back == prev_lip); 665 663 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); 666 664 prev_lip = lip; 667 - lip = lip->li_ail.ail_forw; 668 665 } 669 - ASSERT(lip == (xfs_log_item_t*)base); 670 - ASSERT(base->ail_back == prev_lip); 671 666 #endif /* XFS_TRANS_DEBUG */ 672 667 } 673 668 #endif /* DEBUG */

+7 -8

fs/xfs/xfs_trans_buf.c

··· 304 304 if (tp == NULL) { 305 305 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 306 306 if (!bp) 307 - return XFS_ERROR(ENOMEM); 307 + return (flags & XFS_BUF_TRYLOCK) ? 308 + EAGAIN : XFS_ERROR(ENOMEM); 308 309 309 310 if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { 310 311 xfs_ioerror_alert("xfs_trans_read_buf", mp, ··· 354 353 ASSERT(!XFS_BUF_ISASYNC(bp)); 355 354 XFS_BUF_READ(bp); 356 355 xfsbdstrat(tp->t_mountp, bp); 357 - xfs_iowait(bp); 358 - if (XFS_BUF_GETERROR(bp) != 0) { 356 + error = xfs_iowait(bp); 357 + if (error) { 359 358 xfs_ioerror_alert("xfs_trans_read_buf", mp, 360 359 bp, blkno); 361 - error = XFS_BUF_GETERROR(bp); 362 360 xfs_buf_relse(bp); 363 361 /* 364 - * We can gracefully recover from most 365 - * read errors. Ones we can't are those 366 - * that happen after the transaction's 367 - * already dirty. 362 + * We can gracefully recover from most read 363 + * errors. Ones we can't are those that happen 364 + * after the transaction's already dirty. 368 365 */ 369 366 if (tp->t_flags & XFS_TRANS_DIRTY) 370 367 xfs_force_shutdown(tp->t_mountp,

+5

fs/xfs/xfs_types.h

··· 160 160 XFS_BTNUM_MAX 161 161 } xfs_btnum_t; 162 162 163 + struct xfs_name { 164 + const char *name; 165 + int len; 166 + }; 167 + 163 168 #endif /* __XFS_TYPES_H__ */

+2 -24

fs/xfs/xfs_utils.c

··· 40 40 #include "xfs_itable.h" 41 41 #include "xfs_utils.h" 42 42 43 - /* 44 - * xfs_get_dir_entry is used to get a reference to an inode given 45 - * its parent directory inode and the name of the file. It does 46 - * not lock the child inode, and it unlocks the directory before 47 - * returning. The directory's generation number is returned for 48 - * use by a later call to xfs_lock_dir_and_entry. 49 - */ 50 - int 51 - xfs_get_dir_entry( 52 - bhv_vname_t *dentry, 53 - xfs_inode_t **ipp) 54 - { 55 - bhv_vnode_t *vp; 56 - 57 - vp = VNAME_TO_VNODE(dentry); 58 - 59 - *ipp = xfs_vtoi(vp); 60 - if (!*ipp) 61 - return XFS_ERROR(ENOENT); 62 - VN_HOLD(vp); 63 - return 0; 64 - } 65 43 66 44 int 67 45 xfs_dir_lookup_int( 68 46 xfs_inode_t *dp, 69 47 uint lock_mode, 70 - bhv_vname_t *dentry, 48 + struct xfs_name *name, 71 49 xfs_ino_t *inum, 72 50 xfs_inode_t **ipp) 73 51 { ··· 53 75 54 76 xfs_itrace_entry(dp); 55 77 56 - error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum); 78 + error = xfs_dir_lookup(NULL, dp, name, inum); 57 79 if (!error) { 58 80 /* 59 81 * Unlock the directory. We do this because we can't

+7 -8

fs/xfs/xfs_utils.h

··· 21 21 #define IRELE(ip) VN_RELE(XFS_ITOV(ip)) 22 22 #define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) 23 23 24 - extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **); 25 - extern int xfs_dir_lookup_int (xfs_inode_t *, uint, bhv_vname_t *, xfs_ino_t *, 26 - xfs_inode_t **); 27 - extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *); 28 - extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 24 + extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *, 25 + xfs_ino_t *, xfs_inode_t **); 26 + extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); 27 + extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 29 28 xfs_dev_t, cred_t *, prid_t, int, 30 29 xfs_inode_t **, int *); 31 - extern int xfs_droplink (xfs_trans_t *, xfs_inode_t *); 32 - extern int xfs_bumplink (xfs_trans_t *, xfs_inode_t *); 33 - extern void xfs_bump_ino_vers2 (xfs_trans_t *, xfs_inode_t *); 30 + extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); 31 + extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); 32 + extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *); 34 33 35 34 #endif /* __XFS_UTILS_H__ */

+24 -52

fs/xfs/xfs_vfsops.c

··· 43 43 #include "xfs_error.h" 44 44 #include "xfs_bmap.h" 45 45 #include "xfs_rw.h" 46 - #include "xfs_refcache.h" 47 46 #include "xfs_buf_item.h" 48 47 #include "xfs_log_priv.h" 49 48 #include "xfs_dir2_trace.h" ··· 55 56 #include "xfs_fsops.h" 56 57 #include "xfs_vnodeops.h" 57 58 #include "xfs_vfsops.h" 59 + #include "xfs_utils.h" 58 60 59 61 60 62 int __init ··· 69 69 /* 70 70 * Initialize all of the zone allocators we use. 71 71 */ 72 + xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), 73 + "xfs_log_ticket"); 72 74 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), 73 - "xfs_bmap_free_item"); 75 + "xfs_bmap_free_item"); 74 76 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), 75 - "xfs_btree_cur"); 76 - xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); 77 - xfs_da_state_zone = 78 - kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state"); 77 + "xfs_btree_cur"); 78 + xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), 79 + "xfs_da_state"); 79 80 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); 80 81 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); 82 + xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); 81 83 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); 82 84 xfs_mru_cache_init(); 83 85 xfs_filestream_init(); ··· 114 112 KM_ZONE_SPREAD, NULL); 115 113 xfs_ili_zone = 116 114 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", 117 - KM_ZONE_SPREAD, NULL); 118 - xfs_icluster_zone = 119 - kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster", 120 115 KM_ZONE_SPREAD, NULL); 121 116 122 117 /* ··· 152 153 extern kmem_zone_t *xfs_inode_zone; 153 154 extern kmem_zone_t *xfs_efd_zone; 154 155 extern kmem_zone_t *xfs_efi_zone; 155 - extern kmem_zone_t *xfs_icluster_zone; 156 156 157 157 xfs_cleanup_procfs(); 158 158 xfs_sysctl_unregister(); 159 - xfs_refcache_destroy(); 160 159 xfs_filestream_uninit(); 161 160 xfs_mru_cache_uninit(); 162 161 xfs_acl_zone_destroy(xfs_acl_zone); ··· 186 189 kmem_zone_destroy(xfs_efi_zone); 187 190 kmem_zone_destroy(xfs_ifork_zone); 188 191 kmem_zone_destroy(xfs_ili_zone); 189 - kmem_zone_destroy(xfs_icluster_zone); 190 192 } 191 193 192 194 /* ··· 569 573 #ifdef HAVE_DMAPI 570 574 if (mp->m_flags & XFS_MOUNT_DMAPI) { 571 575 error = XFS_SEND_PREUNMOUNT(mp, 572 - rvp, DM_RIGHT_NULL, rvp, DM_RIGHT_NULL, 576 + rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL, 573 577 NULL, NULL, 0, 0, 574 578 (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))? 575 579 0:DM_FLAGS_UNWANTED); ··· 580 584 0 : DM_FLAGS_UNWANTED; 581 585 } 582 586 #endif 583 - /* 584 - * First blow any referenced inode from this file system 585 - * out of the reference cache, and delete the timer. 586 - */ 587 - xfs_refcache_purge_mp(mp); 588 587 589 588 /* 590 589 * Blow away any referenced inode in the filestreams cache. ··· 598 607 /* 599 608 * Drop the reference count 600 609 */ 601 - VN_RELE(rvp); 610 + IRELE(rip); 602 611 603 612 /* 604 613 * If we're forcing a shutdown, typically because of a media error, ··· 620 629 /* Note: mp structure must still exist for 621 630 * XFS_SEND_UNMOUNT() call. 622 631 */ 623 - XFS_SEND_UNMOUNT(mp, error == 0 ? rvp : NULL, 632 + XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL, 624 633 DM_RIGHT_NULL, 0, error, unmount_event_flags); 625 634 } 626 635 if (xfs_unmountfs_needed) { ··· 637 646 return XFS_ERROR(error); 638 647 } 639 648 640 - STATIC int 649 + STATIC void 641 650 xfs_quiesce_fs( 642 651 xfs_mount_t *mp) 643 652 { 644 653 int count = 0, pincount; 645 654 646 - xfs_refcache_purge_mp(mp); 647 655 xfs_flush_buftarg(mp->m_ddev_targp, 0); 648 656 xfs_finish_reclaim_all(mp, 0); 649 657 ··· 661 671 count++; 662 672 } 663 673 } while (count < 2); 664 - 665 - return 0; 666 674 } 667 675 668 676 /* ··· 672 684 xfs_attr_quiesce( 673 685 xfs_mount_t *mp) 674 686 { 687 + int error = 0; 688 + 675 689 /* wait for all modifications to complete */ 676 690 while (atomic_read(&mp->m_active_trans) > 0) 677 691 delay(100); ··· 684 694 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); 685 695 686 696 /* Push the superblock and write an unmount record */ 687 - xfs_log_sbcount(mp, 1); 697 + error = xfs_log_sbcount(mp, 1); 698 + if (error) 699 + xfs_fs_cmn_err(CE_WARN, mp, 700 + "xfs_attr_quiesce: failed to log sb changes. " 701 + "Frozen image may not be consistent."); 688 702 xfs_log_unmount_write(mp); 689 703 xfs_unmountfs_writesb(mp); 690 704 } ··· 784 790 goto fscorrupt_out2; 785 791 786 792 if (rbmip) { 787 - VN_RELE(XFS_ITOV(rbmip)); 788 - VN_RELE(XFS_ITOV(rsumip)); 793 + IRELE(rbmip); 794 + IRELE(rsumip); 789 795 } 790 796 791 797 xfs_iunlock(rip, XFS_ILOCK_EXCL); ··· 1163 1169 * above, then wait until after we've unlocked 1164 1170 * the inode to release the reference. This is 1165 1171 * because we can be already holding the inode 1166 - * lock when VN_RELE() calls xfs_inactive(). 1172 + * lock when IRELE() calls xfs_inactive(). 1167 1173 * 1168 1174 * Make sure to drop the mount lock before calling 1169 - * VN_RELE() so that we don't trip over ourselves if 1175 + * IRELE() so that we don't trip over ourselves if 1170 1176 * we have to go for the mount lock again in the 1171 1177 * inactive code. 1172 1178 */ ··· 1174 1180 IPOINTER_INSERT(ip, mp); 1175 1181 } 1176 1182 1177 - VN_RELE(vp); 1183 + IRELE(ip); 1178 1184 1179 1185 vnode_refed = B_FALSE; 1180 1186 } ··· 1317 1323 } 1318 1324 1319 1325 /* 1320 - * If this is the periodic sync, then kick some entries out of 1321 - * the reference cache. This ensures that idle entries are 1322 - * eventually kicked out of the cache. 1323 - */ 1324 - if (flags & SYNC_REFCACHE) { 1325 - if (flags & SYNC_WAIT) 1326 - xfs_refcache_purge_mp(mp); 1327 - else 1328 - xfs_refcache_purge_some(mp); 1329 - } 1330 - 1331 - /* 1332 - * If asked, update the disk superblock with incore counter values if we 1333 - * are using non-persistent counters so that they don't get too far out 1334 - * of sync if we crash or get a forced shutdown. We don't want to force 1335 - * this to disk, just get a transaction into the iclogs.... 1336 - */ 1337 - if (flags & SYNC_SUPER) 1338 - xfs_log_sbcount(mp, 0); 1339 - 1340 - /* 1341 1326 * Now check to see if the log needs a "dummy" transaction. 1342 1327 */ 1343 - 1344 1328 if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { 1345 1329 xfs_trans_t *tp; 1346 1330 xfs_inode_t *ip;

+159 -346

fs/xfs/xfs_vnodeops.c

··· 48 48 #include "xfs_quota.h" 49 49 #include "xfs_utils.h" 50 50 #include "xfs_rtalloc.h" 51 - #include "xfs_refcache.h" 52 51 #include "xfs_trans_space.h" 53 52 #include "xfs_log_priv.h" 54 53 #include "xfs_filestream.h" ··· 326 327 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 327 328 !(flags & ATTR_DMI)) { 328 329 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 329 - code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, 330 + code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip, 330 331 vap->va_size, 0, dmflags, NULL); 331 332 if (code) { 332 333 lock_flags = 0; ··· 633 634 * Truncate file. Must have write permission and not be a directory. 634 635 */ 635 636 if (mask & XFS_AT_SIZE) { 637 + /* 638 + * Only change the c/mtime if we are changing the size 639 + * or we are explicitly asked to change it. This handles 640 + * the semantic difference between truncate() and ftruncate() 641 + * as implemented in the VFS. 642 + */ 643 + if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME)) 644 + timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 645 + 636 646 if (vap->va_size > ip->i_size) { 637 647 xfs_igrow_finish(tp, ip, vap->va_size, 638 648 !(flags & ATTR_DMI)); ··· 670 662 */ 671 663 xfs_iflags_set(ip, XFS_ITRUNCATED); 672 664 } 673 - /* 674 - * Have to do this even if the file's size doesn't change. 675 - */ 676 - timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 677 665 } 678 666 679 667 /* ··· 881 877 882 878 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && 883 879 !(flags & ATTR_DMI)) { 884 - (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, 880 + (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, 885 881 NULL, DM_RIGHT_NULL, NULL, NULL, 886 882 0, 0, AT_DELAY_FLAG(flags)); 887 883 } ··· 1447 1443 tp = *tpp; 1448 1444 mp = ip->i_mount; 1449 1445 ASSERT(ip->i_d.di_forkoff != 0); 1450 - xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1446 + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1451 1447 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1448 + if (error) 1449 + goto error_unlock; 1452 1450 1453 1451 error = xfs_attr_inactive(ip); 1454 - if (error) { 1455 - *tpp = NULL; 1456 - xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1457 - return error; /* goto out */ 1458 - } 1452 + if (error) 1453 + goto error_unlock; 1459 1454 1460 1455 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1461 1456 error = xfs_trans_reserve(tp, 0, 1462 1457 XFS_IFREE_LOG_RES(mp), 1463 1458 0, XFS_TRANS_PERM_LOG_RES, 1464 1459 XFS_INACTIVE_LOG_COUNT); 1465 - if (error) { 1466 - ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1467 - xfs_trans_cancel(tp, 0); 1468 - *tpp = NULL; 1469 - xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1470 - return error; 1471 - } 1460 + if (error) 1461 + goto error_cancel; 1472 1462 1473 1463 xfs_ilock(ip, XFS_ILOCK_EXCL); 1474 1464 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); ··· 1473 1475 1474 1476 *tpp = tp; 1475 1477 return 0; 1478 + 1479 + error_cancel: 1480 + ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1481 + xfs_trans_cancel(tp, 0); 1482 + error_unlock: 1483 + *tpp = NULL; 1484 + xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1485 + return error; 1476 1486 } 1477 1487 1478 1488 int ··· 1525 1519 if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0) 1526 1520 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); 1527 1521 } 1528 - 1529 - #ifdef HAVE_REFCACHE 1530 - /* If we are in the NFS reference cache then don't do this now */ 1531 - if (ip->i_refcache) 1532 - return 0; 1533 - #endif 1534 1522 1535 1523 if (ip->i_d.di_nlink != 0) { 1536 1524 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && ··· 1588 1588 1589 1589 mp = ip->i_mount; 1590 1590 1591 - if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) { 1592 - (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL); 1593 - } 1591 + if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) 1592 + XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL); 1594 1593 1595 1594 error = 0; 1596 1595 ··· 1743 1744 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1744 1745 1745 1746 /* 1746 - * Just ignore errors at this point. There is 1747 - * nothing we can do except to try to keep going. 1747 + * Just ignore errors at this point. There is nothing we can 1748 + * do except to try to keep going. Make sure it's not a silent 1749 + * error. 1748 1750 */ 1749 - (void) xfs_bmap_finish(&tp, &free_list, &committed); 1750 - (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1751 + error = xfs_bmap_finish(&tp, &free_list, &committed); 1752 + if (error) 1753 + xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1754 + "xfs_bmap_finish() returned error %d", error); 1755 + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1756 + if (error) 1757 + xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1758 + "xfs_trans_commit() returned error %d", error); 1751 1759 } 1752 1760 /* 1753 1761 * Release the dquots held by inode, if any. ··· 1771 1765 int 1772 1766 xfs_lookup( 1773 1767 xfs_inode_t *dp, 1774 - bhv_vname_t *dentry, 1775 - bhv_vnode_t **vpp) 1768 + struct xfs_name *name, 1769 + xfs_inode_t **ipp) 1776 1770 { 1777 1771 xfs_inode_t *ip; 1778 1772 xfs_ino_t e_inum; ··· 1785 1779 return XFS_ERROR(EIO); 1786 1780 1787 1781 lock_mode = xfs_ilock_map_shared(dp); 1788 - error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip); 1782 + error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip); 1789 1783 if (!error) { 1790 - *vpp = XFS_ITOV(ip); 1784 + *ipp = ip; 1791 1785 xfs_itrace_ref(ip); 1792 1786 } 1793 1787 xfs_iunlock_map_shared(dp, lock_mode); ··· 1797 1791 int 1798 1792 xfs_create( 1799 1793 xfs_inode_t *dp, 1800 - bhv_vname_t *dentry, 1794 + struct xfs_name *name, 1801 1795 mode_t mode, 1802 1796 xfs_dev_t rdev, 1803 - bhv_vnode_t **vpp, 1797 + xfs_inode_t **ipp, 1804 1798 cred_t *credp) 1805 1799 { 1806 - char *name = VNAME(dentry); 1807 - xfs_mount_t *mp = dp->i_mount; 1808 - bhv_vnode_t *dir_vp = XFS_ITOV(dp); 1800 + xfs_mount_t *mp = dp->i_mount; 1809 1801 xfs_inode_t *ip; 1810 - bhv_vnode_t *vp = NULL; 1811 1802 xfs_trans_t *tp; 1812 - int error; 1803 + int error; 1813 1804 xfs_bmap_free_t free_list; 1814 1805 xfs_fsblock_t first_block; 1815 1806 boolean_t unlock_dp_on_error = B_FALSE; ··· 1816 1813 xfs_prid_t prid; 1817 1814 struct xfs_dquot *udqp, *gdqp; 1818 1815 uint resblks; 1819 - int namelen; 1820 1816 1821 - ASSERT(!*vpp); 1817 + ASSERT(!*ipp); 1822 1818 xfs_itrace_entry(dp); 1823 - 1824 - namelen = VNAMELEN(dentry); 1825 1819 1826 1820 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { 1827 1821 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 1828 - dir_vp, DM_RIGHT_NULL, NULL, 1829 - DM_RIGHT_NULL, name, NULL, 1822 + dp, DM_RIGHT_NULL, NULL, 1823 + DM_RIGHT_NULL, name->name, NULL, 1830 1824 mode, 0, 0); 1831 1825 1832 1826 if (error) ··· 1855 1855 1856 1856 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1857 1857 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1858 - resblks = XFS_CREATE_SPACE_RES(mp, namelen); 1858 + resblks = XFS_CREATE_SPACE_RES(mp, name->len); 1859 1859 /* 1860 1860 * Initially assume that the file does not exist and 1861 1861 * reserve the resources for that case. If that is not ··· 1888 1888 if (error) 1889 1889 goto error_return; 1890 1890 1891 - if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) 1891 + error = xfs_dir_canenter(tp, dp, name, resblks); 1892 + if (error) 1892 1893 goto error_return; 1893 1894 error = xfs_dir_ialloc(&tp, dp, mode, 1, 1894 1895 rdev, credp, prid, resblks > 0, ··· 1915 1914 * the transaction cancel unlocking dp so don't do it explicitly in the 1916 1915 * error path. 1917 1916 */ 1918 - VN_HOLD(dir_vp); 1917 + IHOLD(dp); 1919 1918 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1920 1919 unlock_dp_on_error = B_FALSE; 1921 1920 1922 - error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, 1921 + error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1923 1922 &first_block, &free_list, resblks ? 1924 1923 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1925 1924 if (error) { ··· 1953 1952 * vnode to the caller, we bump the vnode ref count now. 1954 1953 */ 1955 1954 IHOLD(ip); 1956 - vp = XFS_ITOV(ip); 1957 1955 1958 1956 error = xfs_bmap_finish(&tp, &free_list, &committed); 1959 1957 if (error) { ··· 1970 1970 XFS_QM_DQRELE(mp, udqp); 1971 1971 XFS_QM_DQRELE(mp, gdqp); 1972 1972 1973 - *vpp = vp; 1973 + *ipp = ip; 1974 1974 1975 1975 /* Fallthrough to std_return with error = 0 */ 1976 1976 1977 1977 std_return: 1978 - if ((*vpp || (error != 0 && dm_event_sent != 0)) && 1978 + if ((*ipp || (error != 0 && dm_event_sent != 0)) && 1979 1979 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { 1980 1980 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 1981 - dir_vp, DM_RIGHT_NULL, 1982 - *vpp ? vp:NULL, 1983 - DM_RIGHT_NULL, name, NULL, 1981 + dp, DM_RIGHT_NULL, 1982 + *ipp ? ip : NULL, 1983 + DM_RIGHT_NULL, name->name, NULL, 1984 1984 mode, error, 0); 1985 1985 } 1986 1986 return error; ··· 2272 2272 int 2273 2273 xfs_remove( 2274 2274 xfs_inode_t *dp, 2275 - bhv_vname_t *dentry) 2275 + struct xfs_name *name, 2276 + xfs_inode_t *ip) 2276 2277 { 2277 - bhv_vnode_t *dir_vp = XFS_ITOV(dp); 2278 - char *name = VNAME(dentry); 2279 2278 xfs_mount_t *mp = dp->i_mount; 2280 - xfs_inode_t *ip; 2281 2279 xfs_trans_t *tp = NULL; 2282 2280 int error = 0; 2283 2281 xfs_bmap_free_t free_list; 2284 2282 xfs_fsblock_t first_block; 2285 2283 int cancel_flags; 2286 2284 int committed; 2287 - int dm_di_mode = 0; 2288 2285 int link_zero; 2289 2286 uint resblks; 2290 - int namelen; 2291 2287 2292 2288 xfs_itrace_entry(dp); 2293 2289 2294 2290 if (XFS_FORCED_SHUTDOWN(mp)) 2295 2291 return XFS_ERROR(EIO); 2296 2292 2297 - namelen = VNAMELEN(dentry); 2298 - 2299 - if (!xfs_get_dir_entry(dentry, &ip)) { 2300 - dm_di_mode = ip->i_d.di_mode; 2301 - IRELE(ip); 2302 - } 2303 - 2304 2293 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { 2305 - error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, 2306 - DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2307 - name, NULL, dm_di_mode, 0, 0); 2294 + error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL, 2295 + NULL, DM_RIGHT_NULL, name->name, NULL, 2296 + ip->i_d.di_mode, 0, 0); 2308 2297 if (error) 2309 2298 return error; 2310 2299 } 2311 - 2312 - /* From this point on, return through std_return */ 2313 - ip = NULL; 2314 2300 2315 2301 /* 2316 2302 * We need to get a reference to ip before we get our log ··· 2310 2324 * when we call xfs_iget. Instead we get an unlocked reference 2311 2325 * to the inode before getting our log reservation. 2312 2326 */ 2313 - error = xfs_get_dir_entry(dentry, &ip); 2314 - if (error) { 2315 - REMOVE_DEBUG_TRACE(__LINE__); 2316 - goto std_return; 2317 - } 2318 - 2319 - dm_di_mode = ip->i_d.di_mode; 2327 + IHOLD(ip); 2320 2328 2321 2329 xfs_itrace_entry(ip); 2322 2330 xfs_itrace_ref(ip); ··· 2378 2398 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2379 2399 */ 2380 2400 XFS_BMAP_INIT(&free_list, &first_block); 2381 - error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, 2401 + error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2382 2402 &first_block, &free_list, 0); 2383 2403 if (error) { 2384 2404 ASSERT(error != ENOENT); ··· 2429 2449 } 2430 2450 2431 2451 /* 2432 - * Before we drop our extra reference to the inode, purge it 2433 - * from the refcache if it is there. By waiting until afterwards 2434 - * to do the IRELE, we ensure that we won't go inactive in the 2435 - * xfs_refcache_purge_ip routine (although that would be OK). 2436 - */ 2437 - xfs_refcache_purge_ip(ip); 2438 - 2439 - /* 2440 2452 * If we are using filestreams, kill the stream association. 2441 2453 * If the file is still open it may get a new one but that 2442 2454 * will get killed on last close in xfs_close() so we don't ··· 2444 2472 std_return: 2445 2473 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 2446 2474 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2447 - dir_vp, DM_RIGHT_NULL, 2475 + dp, DM_RIGHT_NULL, 2448 2476 NULL, DM_RIGHT_NULL, 2449 - name, NULL, dm_di_mode, error, 0); 2477 + name->name, NULL, ip->i_d.di_mode, error, 0); 2450 2478 } 2451 2479 return error; 2452 2480 ··· 2467 2495 cancel_flags |= XFS_TRANS_ABORT; 2468 2496 xfs_trans_cancel(tp, cancel_flags); 2469 2497 2470 - /* 2471 - * Before we drop our extra reference to the inode, purge it 2472 - * from the refcache if it is there. By waiting until afterwards 2473 - * to do the IRELE, we ensure that we won't go inactive in the 2474 - * xfs_refcache_purge_ip routine (although that would be OK). 2475 - */ 2476 - xfs_refcache_purge_ip(ip); 2477 - 2478 2498 IRELE(ip); 2479 2499 2480 2500 goto std_return; ··· 2475 2511 int 2476 2512 xfs_link( 2477 2513 xfs_inode_t *tdp, 2478 - bhv_vnode_t *src_vp, 2479 - bhv_vname_t *dentry) 2514 + xfs_inode_t *sip, 2515 + struct xfs_name *target_name) 2480 2516 { 2481 - bhv_vnode_t *target_dir_vp = XFS_ITOV(tdp); 2482 2517 xfs_mount_t *mp = tdp->i_mount; 2483 - xfs_inode_t *sip = xfs_vtoi(src_vp); 2484 2518 xfs_trans_t *tp; 2485 2519 xfs_inode_t *ips[2]; 2486 2520 int error; ··· 2487 2525 int cancel_flags; 2488 2526 int committed; 2489 2527 int resblks; 2490 - char *target_name = VNAME(dentry); 2491 - int target_namelen; 2492 2528 2493 2529 xfs_itrace_entry(tdp); 2494 - xfs_itrace_entry(xfs_vtoi(src_vp)); 2530 + xfs_itrace_entry(sip); 2495 2531 2496 - target_namelen = VNAMELEN(dentry); 2497 - ASSERT(!VN_ISDIR(src_vp)); 2532 + ASSERT(!S_ISDIR(sip->i_d.di_mode)); 2498 2533 2499 2534 if (XFS_FORCED_SHUTDOWN(mp)) 2500 2535 return XFS_ERROR(EIO); 2501 2536 2502 2537 if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) { 2503 2538 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, 2504 - target_dir_vp, DM_RIGHT_NULL, 2505 - src_vp, DM_RIGHT_NULL, 2506 - target_name, NULL, 0, 0, 0); 2539 + tdp, DM_RIGHT_NULL, 2540 + sip, DM_RIGHT_NULL, 2541 + target_name->name, NULL, 0, 0, 0); 2507 2542 if (error) 2508 2543 return error; 2509 2544 } ··· 2515 2556 2516 2557 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 2517 2558 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2518 - resblks = XFS_LINK_SPACE_RES(mp, target_namelen); 2559 + resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 2519 2560 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 2520 2561 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2521 2562 if (error == ENOSPC) { ··· 2543 2584 * xfs_trans_cancel will both unlock the inodes and 2544 2585 * decrement the associated ref counts. 2545 2586 */ 2546 - VN_HOLD(src_vp); 2547 - VN_HOLD(target_dir_vp); 2587 + IHOLD(sip); 2588 + IHOLD(tdp); 2548 2589 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 2549 2590 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 2550 2591 ··· 2567 2608 goto error_return; 2568 2609 } 2569 2610 2570 - if (resblks == 0 && 2571 - (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) 2611 + error = xfs_dir_canenter(tp, tdp, target_name, resblks); 2612 + if (error) 2572 2613 goto error_return; 2573 2614 2574 2615 XFS_BMAP_INIT(&free_list, &first_block); 2575 2616 2576 - error = xfs_dir_createname(tp, tdp, target_name, target_namelen, 2577 - sip->i_ino, &first_block, &free_list, 2578 - resblks); 2617 + error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 2618 + &first_block, &free_list, resblks); 2579 2619 if (error) 2580 2620 goto abort_return; 2581 2621 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); ··· 2608 2650 std_return: 2609 2651 if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) { 2610 2652 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, 2611 - target_dir_vp, DM_RIGHT_NULL, 2612 - src_vp, DM_RIGHT_NULL, 2613 - target_name, NULL, 0, error, 0); 2653 + tdp, DM_RIGHT_NULL, 2654 + sip, DM_RIGHT_NULL, 2655 + target_name->name, NULL, 0, error, 0); 2614 2656 } 2615 2657 return error; 2616 2658 ··· 2627 2669 int 2628 2670 xfs_mkdir( 2629 2671 xfs_inode_t *dp, 2630 - bhv_vname_t *dentry, 2672 + struct xfs_name *dir_name, 2631 2673 mode_t mode, 2632 - bhv_vnode_t **vpp, 2674 + xfs_inode_t **ipp, 2633 2675 cred_t *credp) 2634 2676 { 2635 - bhv_vnode_t *dir_vp = XFS_ITOV(dp); 2636 - char *dir_name = VNAME(dentry); 2637 - int dir_namelen = VNAMELEN(dentry); 2638 2677 xfs_mount_t *mp = dp->i_mount; 2639 2678 xfs_inode_t *cdp; /* inode of created dir */ 2640 - bhv_vnode_t *cvp; /* vnode of created dir */ 2641 2679 xfs_trans_t *tp; 2642 2680 int cancel_flags; 2643 2681 int error; ··· 2654 2700 2655 2701 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { 2656 2702 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 2657 - dir_vp, DM_RIGHT_NULL, NULL, 2658 - DM_RIGHT_NULL, dir_name, NULL, 2703 + dp, DM_RIGHT_NULL, NULL, 2704 + DM_RIGHT_NULL, dir_name->name, NULL, 2659 2705 mode, 0, 0); 2660 2706 if (error) 2661 2707 return error; ··· 2684 2730 2685 2731 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 2686 2732 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2687 - resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen); 2733 + resblks = XFS_MKDIR_SPACE_RES(mp, dir_name->len); 2688 2734 error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, 2689 2735 XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); 2690 2736 if (error == ENOSPC) { ··· 2716 2762 if (error) 2717 2763 goto error_return; 2718 2764 2719 - if (resblks == 0 && 2720 - (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) 2765 + error = xfs_dir_canenter(tp, dp, dir_name, resblks); 2766 + if (error) 2721 2767 goto error_return; 2722 2768 /* 2723 2769 * create the directory inode. ··· 2740 2786 * from here on will result in the transaction cancel 2741 2787 * unlocking dp so don't do it explicitly in the error path. 2742 2788 */ 2743 - VN_HOLD(dir_vp); 2789 + IHOLD(dp); 2744 2790 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2745 2791 unlock_dp_on_error = B_FALSE; 2746 2792 2747 2793 XFS_BMAP_INIT(&free_list, &first_block); 2748 2794 2749 - error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, 2750 - &first_block, &free_list, resblks ? 2751 - resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2795 + error = xfs_dir_createname(tp, dp, dir_name, cdp->i_ino, 2796 + &first_block, &free_list, resblks ? 2797 + resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2752 2798 if (error) { 2753 2799 ASSERT(error != ENOSPC); 2754 2800 goto error1; ··· 2771 2817 if (error) 2772 2818 goto error2; 2773 2819 2774 - cvp = XFS_ITOV(cdp); 2775 - 2776 2820 created = B_TRUE; 2777 2821 2778 - *vpp = cvp; 2822 + *ipp = cdp; 2779 2823 IHOLD(cdp); 2780 2824 2781 2825 /* ··· 2810 2858 if ((created || (error != 0 && dm_event_sent != 0)) && 2811 2859 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { 2812 2860 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2813 - dir_vp, DM_RIGHT_NULL, 2814 - created ? XFS_ITOV(cdp):NULL, 2861 + dp, DM_RIGHT_NULL, 2862 + created ? cdp : NULL, 2815 2863 DM_RIGHT_NULL, 2816 - dir_name, NULL, 2864 + dir_name->name, NULL, 2817 2865 mode, error, 0); 2818 2866 } 2819 2867 return error; ··· 2837 2885 int 2838 2886 xfs_rmdir( 2839 2887 xfs_inode_t *dp, 2840 - bhv_vname_t *dentry) 2888 + struct xfs_name *name, 2889 + xfs_inode_t *cdp) 2841 2890 { 2842 2891 bhv_vnode_t *dir_vp = XFS_ITOV(dp); 2843 - char *name = VNAME(dentry); 2844 - int namelen = VNAMELEN(dentry); 2845 2892 xfs_mount_t *mp = dp->i_mount; 2846 - xfs_inode_t *cdp; /* child directory */ 2847 2893 xfs_trans_t *tp; 2848 2894 int error; 2849 2895 xfs_bmap_free_t free_list; 2850 2896 xfs_fsblock_t first_block; 2851 2897 int cancel_flags; 2852 2898 int committed; 2853 - int dm_di_mode = S_IFDIR; 2854 2899 int last_cdp_link; 2855 2900 uint resblks; 2856 2901 ··· 2856 2907 if (XFS_FORCED_SHUTDOWN(mp)) 2857 2908 return XFS_ERROR(EIO); 2858 2909 2859 - if (!xfs_get_dir_entry(dentry, &cdp)) { 2860 - dm_di_mode = cdp->i_d.di_mode; 2861 - IRELE(cdp); 2862 - } 2863 - 2864 2910 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { 2865 2911 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, 2866 - dir_vp, DM_RIGHT_NULL, 2867 - NULL, DM_RIGHT_NULL, 2868 - name, NULL, dm_di_mode, 0, 0); 2912 + dp, DM_RIGHT_NULL, 2913 + NULL, DM_RIGHT_NULL, name->name, 2914 + NULL, cdp->i_d.di_mode, 0, 0); 2869 2915 if (error) 2870 2916 return XFS_ERROR(error); 2871 2917 } 2872 - 2873 - /* Return through std_return after this point. */ 2874 - 2875 - cdp = NULL; 2876 2918 2877 2919 /* 2878 2920 * We need to get a reference to cdp before we get our log ··· 2877 2937 * when we call xfs_iget. Instead we get an unlocked reference 2878 2938 * to the inode before getting our log reservation. 2879 2939 */ 2880 - error = xfs_get_dir_entry(dentry, &cdp); 2881 - if (error) { 2882 - REMOVE_DEBUG_TRACE(__LINE__); 2883 - goto std_return; 2884 - } 2885 - mp = dp->i_mount; 2886 - dm_di_mode = cdp->i_d.di_mode; 2940 + IHOLD(cdp); 2887 2941 2888 2942 /* 2889 2943 * Get the dquots for the inodes. ··· 2954 3020 goto error_return; 2955 3021 } 2956 3022 2957 - error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, 3023 + error = xfs_dir_removename(tp, dp, name, cdp->i_ino, 2958 3024 &first_block, &free_list, resblks); 2959 3025 if (error) 2960 3026 goto error1; ··· 3032 3098 std_return: 3033 3099 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 3034 3100 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 3035 - dir_vp, DM_RIGHT_NULL, 3101 + dp, DM_RIGHT_NULL, 3036 3102 NULL, DM_RIGHT_NULL, 3037 - name, NULL, dm_di_mode, 3103 + name->name, NULL, cdp->i_d.di_mode, 3038 3104 error, 0); 3039 3105 } 3040 3106 return error; ··· 3052 3118 int 3053 3119 xfs_symlink( 3054 3120 xfs_inode_t *dp, 3055 - bhv_vname_t *dentry, 3056 - char *target_path, 3121 + struct xfs_name *link_name, 3122 + const char *target_path, 3057 3123 mode_t mode, 3058 - bhv_vnode_t **vpp, 3124 + xfs_inode_t **ipp, 3059 3125 cred_t *credp) 3060 3126 { 3061 - bhv_vnode_t *dir_vp = XFS_ITOV(dp); 3062 3127 xfs_mount_t *mp = dp->i_mount; 3063 3128 xfs_trans_t *tp; 3064 3129 xfs_inode_t *ip; ··· 3073 3140 int nmaps; 3074 3141 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 3075 3142 xfs_daddr_t d; 3076 - char *cur_chunk; 3143 + const char *cur_chunk; 3077 3144 int byte_cnt; 3078 3145 int n; 3079 3146 xfs_buf_t *bp; 3080 3147 xfs_prid_t prid; 3081 3148 struct xfs_dquot *udqp, *gdqp; 3082 3149 uint resblks; 3083 - char *link_name = VNAME(dentry); 3084 - int link_namelen; 3085 3150 3086 - *vpp = NULL; 3151 + *ipp = NULL; 3087 3152 error = 0; 3088 3153 ip = NULL; 3089 3154 tp = NULL; ··· 3091 3160 if (XFS_FORCED_SHUTDOWN(mp)) 3092 3161 return XFS_ERROR(EIO); 3093 3162 3094 - link_namelen = VNAMELEN(dentry); 3095 - 3096 3163 /* 3097 3164 * Check component lengths of the target path name. 3098 3165 */ 3099 3166 pathlen = strlen(target_path); 3100 3167 if (pathlen >= MAXPATHLEN) /* total string too long */ 3101 3168 return XFS_ERROR(ENAMETOOLONG); 3102 - if (pathlen >= MAXNAMELEN) { /* is any component too long? */ 3103 - int len, total; 3104 - char *path; 3105 - 3106 - for (total = 0, path = target_path; total < pathlen;) { 3107 - /* 3108 - * Skip any slashes. 3109 - */ 3110 - while(*path == '/') { 3111 - total++; 3112 - path++; 3113 - } 3114 - 3115 - /* 3116 - * Count up to the next slash or end of path. 3117 - * Error out if the component is bigger than MAXNAMELEN. 3118 - */ 3119 - for(len = 0; *path != '/' && total < pathlen;total++, path++) { 3120 - if (++len >= MAXNAMELEN) { 3121 - error = ENAMETOOLONG; 3122 - return error; 3123 - } 3124 - } 3125 - } 3126 - } 3127 3169 3128 3170 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { 3129 - error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp, 3171 + error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp, 3130 3172 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 3131 - link_name, target_path, 0, 0, 0); 3173 + link_name->name, target_path, 0, 0, 0); 3132 3174 if (error) 3133 3175 return error; 3134 3176 } ··· 3133 3229 fs_blocks = 0; 3134 3230 else 3135 3231 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 3136 - resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks); 3232 + resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 3137 3233 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 3138 3234 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3139 3235 if (error == ENOSPC && fs_blocks == 0) { ··· 3167 3263 /* 3168 3264 * Check for ability to enter directory entry, if no space reserved. 3169 3265 */ 3170 - if (resblks == 0 && 3171 - (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) 3266 + error = xfs_dir_canenter(tp, dp, link_name, resblks); 3267 + if (error) 3172 3268 goto error_return; 3173 3269 /* 3174 3270 * Initialize the bmap freelist prior to calling either ··· 3193 3289 * transaction cancel unlocking dp so don't do it explicitly in the 3194 3290 * error path. 3195 3291 */ 3196 - VN_HOLD(dir_vp); 3292 + IHOLD(dp); 3197 3293 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3198 3294 unlock_dp_on_error = B_FALSE; 3199 3295 ··· 3260 3356 /* 3261 3357 * Create the directory entry for the symlink. 3262 3358 */ 3263 - error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, 3264 - &first_block, &free_list, resblks); 3359 + error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 3360 + &first_block, &free_list, resblks); 3265 3361 if (error) 3266 3362 goto error1; 3267 3363 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); ··· 3303 3399 std_return: 3304 3400 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) { 3305 3401 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, 3306 - dir_vp, DM_RIGHT_NULL, 3307 - error ? NULL : XFS_ITOV(ip), 3308 - DM_RIGHT_NULL, link_name, target_path, 3309 - 0, error, 0); 3402 + dp, DM_RIGHT_NULL, 3403 + error ? NULL : ip, 3404 + DM_RIGHT_NULL, link_name->name, 3405 + target_path, 0, error, 0); 3310 3406 } 3311 3407 3312 - if (!error) { 3313 - bhv_vnode_t *vp; 3314 - 3315 - ASSERT(ip); 3316 - vp = XFS_ITOV(ip); 3317 - *vpp = vp; 3318 - } 3408 + if (!error) 3409 + *ipp = ip; 3319 3410 return error; 3320 3411 3321 3412 error2: ··· 3330 3431 } 3331 3432 3332 3433 int 3333 - xfs_rwlock( 3334 - xfs_inode_t *ip, 3335 - bhv_vrwlock_t locktype) 3336 - { 3337 - if (S_ISDIR(ip->i_d.di_mode)) 3338 - return 1; 3339 - if (locktype == VRWLOCK_WRITE) { 3340 - xfs_ilock(ip, XFS_IOLOCK_EXCL); 3341 - } else if (locktype == VRWLOCK_TRY_READ) { 3342 - return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED); 3343 - } else if (locktype == VRWLOCK_TRY_WRITE) { 3344 - return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL); 3345 - } else { 3346 - ASSERT((locktype == VRWLOCK_READ) || 3347 - (locktype == VRWLOCK_WRITE_DIRECT)); 3348 - xfs_ilock(ip, XFS_IOLOCK_SHARED); 3349 - } 3350 - 3351 - return 1; 3352 - } 3353 - 3354 - 3355 - void 3356 - xfs_rwunlock( 3357 - xfs_inode_t *ip, 3358 - bhv_vrwlock_t locktype) 3359 - { 3360 - if (S_ISDIR(ip->i_d.di_mode)) 3361 - return; 3362 - if (locktype == VRWLOCK_WRITE) { 3363 - /* 3364 - * In the write case, we may have added a new entry to 3365 - * the reference cache. This might store a pointer to 3366 - * an inode to be released in this inode. If it is there, 3367 - * clear the pointer and release the inode after unlocking 3368 - * this one. 3369 - */ 3370 - xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL); 3371 - } else { 3372 - ASSERT((locktype == VRWLOCK_READ) || 3373 - (locktype == VRWLOCK_WRITE_DIRECT)); 3374 - xfs_iunlock(ip, XFS_IOLOCK_SHARED); 3375 - } 3376 - return; 3377 - } 3378 - 3379 - 3380 - int 3381 3434 xfs_inode_flush( 3382 3435 xfs_inode_t *ip, 3383 3436 int flags) 3384 3437 { 3385 3438 xfs_mount_t *mp = ip->i_mount; 3386 - xfs_inode_log_item_t *iip = ip->i_itemp; 3387 3439 int error = 0; 3388 3440 3389 3441 if (XFS_FORCED_SHUTDOWN(mp)) ··· 3344 3494 * Bypass inodes which have already been cleaned by 3345 3495 * the inode flush clustering code inside xfs_iflush 3346 3496 */ 3347 - if ((ip->i_update_core == 0) && 3348 - ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) 3497 + if (xfs_inode_clean(ip)) 3349 3498 return 0; 3350 - 3351 - if (flags & FLUSH_LOG) { 3352 - if (iip && iip->ili_last_lsn) { 3353 - xlog_t *log = mp->m_log; 3354 - xfs_lsn_t sync_lsn; 3355 - int log_flags = XFS_LOG_FORCE; 3356 - 3357 - spin_lock(&log->l_grant_lock); 3358 - sync_lsn = log->l_last_sync_lsn; 3359 - spin_unlock(&log->l_grant_lock); 3360 - 3361 - if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { 3362 - if (flags & FLUSH_SYNC) 3363 - log_flags |= XFS_LOG_SYNC; 3364 - error = xfs_log_force(mp, iip->ili_last_lsn, log_flags); 3365 - if (error) 3366 - return error; 3367 - } 3368 - 3369 - if (ip->i_update_core == 0) 3370 - return 0; 3371 - } 3372 - } 3373 3499 3374 3500 /* 3375 3501 * We make this non-blocking if the inode is contended, ··· 3354 3528 * blocking on inodes inside another operation right 3355 3529 * now, they get caught later by xfs_sync. 3356 3530 */ 3357 - if (flags & FLUSH_INODE) { 3358 - int flush_flags; 3359 - 3360 - if (flags & FLUSH_SYNC) { 3361 - xfs_ilock(ip, XFS_ILOCK_SHARED); 3362 - xfs_iflock(ip); 3363 - } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 3364 - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { 3365 - xfs_iunlock(ip, XFS_ILOCK_SHARED); 3366 - return EAGAIN; 3367 - } 3368 - } else { 3531 + if (flags & FLUSH_SYNC) { 3532 + xfs_ilock(ip, XFS_ILOCK_SHARED); 3533 + xfs_iflock(ip); 3534 + } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 3535 + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { 3536 + xfs_iunlock(ip, XFS_ILOCK_SHARED); 3369 3537 return EAGAIN; 3370 3538 } 3371 - 3372 - if (flags & FLUSH_SYNC) 3373 - flush_flags = XFS_IFLUSH_SYNC; 3374 - else 3375 - flush_flags = XFS_IFLUSH_ASYNC; 3376 - 3377 - error = xfs_iflush(ip, flush_flags); 3378 - xfs_iunlock(ip, XFS_ILOCK_SHARED); 3539 + } else { 3540 + return EAGAIN; 3379 3541 } 3542 + 3543 + error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC 3544 + : XFS_IFLUSH_ASYNC_NOBLOCK); 3545 + xfs_iunlock(ip, XFS_ILOCK_SHARED); 3380 3546 3381 3547 return error; 3382 3548 } ··· 3512 3694 * We get the flush lock regardless, though, just to make sure 3513 3695 * we don't free it while it is being flushed. 3514 3696 */ 3515 - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 3516 - if (!locked) { 3517 - xfs_ilock(ip, XFS_ILOCK_EXCL); 3518 - xfs_iflock(ip); 3519 - } 3697 + if (!locked) { 3698 + xfs_ilock(ip, XFS_ILOCK_EXCL); 3699 + xfs_iflock(ip); 3700 + } 3520 3701 3702 + if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 3521 3703 if (ip->i_update_core || 3522 3704 ((ip->i_itemp != NULL) && 3523 3705 (ip->i_itemp->ili_format.ilf_fields != 0))) { ··· 3537 3719 ASSERT(ip->i_update_core == 0); 3538 3720 ASSERT(ip->i_itemp == NULL || 3539 3721 ip->i_itemp->ili_format.ilf_fields == 0); 3540 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 3541 - } else if (locked) { 3542 - /* 3543 - * We are not interested in doing an iflush if we're 3544 - * in the process of shutting down the filesystem forcibly. 3545 - * So, just reclaim the inode. 3546 - */ 3547 - xfs_ifunlock(ip); 3548 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 3549 3722 } 3723 + 3724 + xfs_ifunlock(ip); 3725 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 3550 3726 3551 3727 reclaim: 3552 3728 xfs_ireclaim(ip); ··· 3657 3845 end_dmi_offset = offset+len; 3658 3846 if (end_dmi_offset > ip->i_size) 3659 3847 end_dmi_offset = ip->i_size; 3660 - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), 3661 - offset, end_dmi_offset - offset, 3662 - 0, NULL); 3848 + error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset, 3849 + end_dmi_offset - offset, 0, NULL); 3663 3850 if (error) 3664 3851 return error; 3665 3852 } ··· 3767 3956 if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && 3768 3957 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { 3769 3958 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 3770 - XFS_ITOV(ip), DM_RIGHT_NULL, 3771 - XFS_ITOV(ip), DM_RIGHT_NULL, 3959 + ip, DM_RIGHT_NULL, 3960 + ip, DM_RIGHT_NULL, 3772 3961 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ 3773 3962 if (error == 0) 3774 3963 goto retry; /* Maybe DMAPI app. has made space */ ··· 3832 4021 XFS_BUF_READ(bp); 3833 4022 XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); 3834 4023 xfsbdstrat(mp, bp); 3835 - if ((error = xfs_iowait(bp))) { 4024 + error = xfs_iowait(bp); 4025 + if (error) { 3836 4026 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 3837 4027 mp, bp, XFS_BUF_ADDR(bp)); 3838 4028 break; ··· 3845 4033 XFS_BUF_UNREAD(bp); 3846 4034 XFS_BUF_WRITE(bp); 3847 4035 xfsbdstrat(mp, bp); 3848 - if ((error = xfs_iowait(bp))) { 4036 + error = xfs_iowait(bp); 4037 + if (error) { 3849 4038 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 3850 4039 mp, bp, XFS_BUF_ADDR(bp)); 3851 4040 break; ··· 3915 4102 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3916 4103 if (end_dmi_offset > ip->i_size) 3917 4104 end_dmi_offset = ip->i_size; 3918 - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, 4105 + error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, 3919 4106 offset, end_dmi_offset - offset, 3920 4107 AT_DELAY_FLAG(attr_flags), NULL); 3921 4108 if (error)

+17 -16

fs/xfs/xfs_vnodeops.h

··· 23 23 xfs_off_t stop); 24 24 int xfs_release(struct xfs_inode *ip); 25 25 int xfs_inactive(struct xfs_inode *ip); 26 - int xfs_lookup(struct xfs_inode *dp, bhv_vname_t *dentry, 27 - bhv_vnode_t **vpp); 28 - int xfs_create(struct xfs_inode *dp, bhv_vname_t *dentry, mode_t mode, 29 - xfs_dev_t rdev, bhv_vnode_t **vpp, struct cred *credp); 30 - int xfs_remove(struct xfs_inode *dp, bhv_vname_t *dentry); 31 - int xfs_link(struct xfs_inode *tdp, bhv_vnode_t *src_vp, 32 - bhv_vname_t *dentry); 33 - int xfs_mkdir(struct xfs_inode *dp, bhv_vname_t *dentry, 34 - mode_t mode, bhv_vnode_t **vpp, struct cred *credp); 35 - int xfs_rmdir(struct xfs_inode *dp, bhv_vname_t *dentry); 26 + int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 27 + struct xfs_inode **ipp); 28 + int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, 29 + xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); 30 + int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 31 + struct xfs_inode *ip); 32 + int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, 33 + struct xfs_name *target_name); 34 + int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, 35 + mode_t mode, struct xfs_inode **ipp, struct cred *credp); 36 + int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name, 37 + struct xfs_inode *cdp); 36 38 int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, 37 39 xfs_off_t *offset, filldir_t filldir); 38 - int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry, 39 - char *target_path, mode_t mode, bhv_vnode_t **vpp, 40 + int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, 41 + const char *target_path, mode_t mode, struct xfs_inode **ipp, 40 42 struct cred *credp); 41 - int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); 42 - void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); 43 43 int xfs_inode_flush(struct xfs_inode *ip, int flags); 44 44 int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 45 45 int xfs_reclaim(struct xfs_inode *ip); 46 46 int xfs_change_file_space(struct xfs_inode *ip, int cmd, 47 47 xfs_flock64_t *bf, xfs_off_t offset, 48 48 struct cred *credp, int attr_flags); 49 - int xfs_rename(struct xfs_inode *src_dp, bhv_vname_t *src_vname, 50 - bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname); 49 + int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 50 + struct xfs_inode *src_ip, struct xfs_inode *target_dp, 51 + struct xfs_name *target_name); 51 52 int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, 52 53 int *valuelenp, int flags, cred_t *cred); 53 54 int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,