Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'gfs2-v5.17-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2

Pull gfs2 fixes from Andreas Gruenbacher:

- To avoid deadlocks, actively cancel dlm locking requests when we give
up on them.

Further dlm operations on the same lock will return -EBUSY until the
cancel has been completed, so in that case, wait and repeat. (This is
rare.)

- Lock inversion fixes in gfs2_inode_lookup() and gfs2_create_inode().

- Some more fallout from the gfs2 mmap + page fault deadlock fixes
(merged in commit c03098d4b9ad7: "Merge tag 'gfs2-v5.15-rc5-mmap-fault'").

- Various other minor bug fixes and cleanups.

* tag 'gfs2-v5.17-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2:
gfs2: Make sure FITRIM minlen is rounded up to fs block size
gfs2: Make sure not to return short direct writes
gfs2: Remove dead code in gfs2_file_read_iter
gfs2: Fix gfs2_file_buffered_write endless loop workaround
gfs2: Minor retry logic cleanup
gfs2: Disable page faults during lockless buffered reads
gfs2: Fix should_fault_in_pages() logic
gfs2: Remove return value for gfs2_indirect_init
gfs2: Initialize gh_error in gfs2_glock_nq
gfs2: Make use of list_is_first
gfs2: Switch lock order of inode and iopen glock
gfs2: cancel timed-out glock requests
gfs2: Expect -EBUSY after canceling dlm locking requests
gfs2: gfs2_setattr_size error path fix
gfs2: assign rgrp glock before compute_bitstructs

+104 -76
+4 -5
fs/gfs2/bmap.c
··· 606 606 return ret; 607 607 } 608 608 609 - static inline __be64 *gfs2_indirect_init(struct metapath *mp, 610 - struct gfs2_glock *gl, unsigned int i, 611 - unsigned offset, u64 bn) 609 + static inline void gfs2_indirect_init(struct metapath *mp, 610 + struct gfs2_glock *gl, unsigned int i, 611 + unsigned offset, u64 bn) 612 612 { 613 613 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + 614 614 ((i > 1) ? sizeof(struct gfs2_meta_header) : ··· 621 621 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); 622 622 ptr += offset; 623 623 *ptr = cpu_to_be64(bn); 624 - return ptr; 625 624 } 626 625 627 626 enum alloc_state { ··· 2145 2146 2146 2147 ret = do_shrink(inode, newsize); 2147 2148 out: 2148 - gfs2_rs_delete(ip, NULL); 2149 + gfs2_rs_delete(ip); 2149 2150 gfs2_qa_put(ip); 2150 2151 return ret; 2151 2152 }
+33 -35
fs/gfs2/file.c
··· 706 706 707 707 if (file->f_mode & FMODE_WRITE) { 708 708 if (gfs2_rs_active(&ip->i_res)) 709 - gfs2_rs_delete(ip, &inode->i_writecount); 709 + gfs2_rs_delete(ip); 710 710 gfs2_qa_put(ip); 711 711 } 712 712 return 0; ··· 775 775 size_t *window_size) 776 776 { 777 777 size_t count = iov_iter_count(i); 778 - char __user *p; 779 - int pages = 1; 778 + size_t size, offs; 780 779 781 780 if (likely(!count)) 782 781 return false; ··· 784 785 if (!iter_is_iovec(i)) 785 786 return false; 786 787 788 + size = PAGE_SIZE; 789 + offs = offset_in_page(i->iov[0].iov_base + i->iov_offset); 787 790 if (*prev_count != count || !*window_size) { 788 - int pages, nr_dirtied; 791 + size_t nr_dirtied; 789 792 790 - pages = min_t(int, BIO_MAX_VECS, DIV_ROUND_UP(count, PAGE_SIZE)); 793 + size = ALIGN(offs + count, PAGE_SIZE); 794 + size = min_t(size_t, size, SZ_1M); 791 795 nr_dirtied = max(current->nr_dirtied_pause - 792 - current->nr_dirtied, 1); 793 - pages = min(pages, nr_dirtied); 796 + current->nr_dirtied, 8); 797 + size = min(size, nr_dirtied << PAGE_SHIFT); 794 798 } 795 799 796 800 *prev_count = count; 797 - p = i->iov[0].iov_base + i->iov_offset; 798 - *window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p); 801 + *window_size = size - offs; 799 802 return true; 800 803 } 801 804 ··· 852 851 leftover = fault_in_iov_iter_writeable(to, window_size); 853 852 gfs2_holder_disallow_demote(gh); 854 853 if (leftover != window_size) { 855 - if (!gfs2_holder_queued(gh)) 856 - goto retry; 857 - goto retry_under_glock; 854 + if (gfs2_holder_queued(gh)) 855 + goto retry_under_glock; 856 + goto retry; 858 857 } 859 858 } 860 859 if (gfs2_holder_queued(gh)) ··· 921 920 leftover = fault_in_iov_iter_readable(from, window_size); 922 921 gfs2_holder_disallow_demote(gh); 923 922 if (leftover != window_size) { 924 - if (!gfs2_holder_queued(gh)) 925 - goto retry; 926 - goto retry_under_glock; 923 + if (gfs2_holder_queued(gh)) 924 + goto retry_under_glock; 925 + goto retry; 927 926 } 928 927 } 929 928 out: ··· 951 950 * and retry. 952 951 */ 953 952 954 - if (iocb->ki_flags & IOCB_DIRECT) { 955 - ret = gfs2_file_direct_read(iocb, to, &gh); 956 - if (likely(ret != -ENOTBLK)) 957 - return ret; 958 - iocb->ki_flags &= ~IOCB_DIRECT; 959 - } 953 + if (iocb->ki_flags & IOCB_DIRECT) 954 + return gfs2_file_direct_read(iocb, to, &gh); 955 + 956 + pagefault_disable(); 960 957 iocb->ki_flags |= IOCB_NOIO; 961 958 ret = generic_file_read_iter(iocb, to); 962 959 iocb->ki_flags &= ~IOCB_NOIO; 960 + pagefault_enable(); 963 961 if (ret >= 0) { 964 962 if (!iov_iter_count(to)) 965 963 return ret; 966 964 written = ret; 967 - } else { 965 + } else if (ret != -EFAULT) { 968 966 if (ret != -EAGAIN) 969 967 return ret; 970 968 if (iocb->ki_flags & IOCB_NOWAIT) ··· 989 989 leftover = fault_in_iov_iter_writeable(to, window_size); 990 990 gfs2_holder_disallow_demote(&gh); 991 991 if (leftover != window_size) { 992 - if (!gfs2_holder_queued(&gh)) { 993 - if (written) 994 - goto out_uninit; 995 - goto retry; 996 - } 997 - goto retry_under_glock; 992 + if (gfs2_holder_queued(&gh)) 993 + goto retry_under_glock; 994 + if (written) 995 + goto out_uninit; 996 + goto retry; 998 997 } 999 998 } 1000 999 if (gfs2_holder_queued(&gh)) ··· 1067 1068 gfs2_holder_disallow_demote(gh); 1068 1069 if (leftover != window_size) { 1069 1070 from->count = min(from->count, window_size - leftover); 1070 - if (!gfs2_holder_queued(gh)) { 1071 - if (read) 1072 - goto out_uninit; 1073 - goto retry; 1074 - } 1075 - goto retry_under_glock; 1071 + if (gfs2_holder_queued(gh)) 1072 + goto retry_under_glock; 1073 + if (read && !(iocb->ki_flags & IOCB_DIRECT)) 1074 + goto out_uninit; 1075 + goto retry; 1076 1076 } 1077 1077 } 1078 1078 out_unlock: ··· 1081 1083 gfs2_holder_uninit(gh); 1082 1084 if (statfs_gh) 1083 1085 kfree(statfs_gh); 1086 + from->count = orig_count - read; 1084 1087 return read ? read : ret; 1085 1088 } 1086 1089 ··· 1496 1497 if (error != GLR_TRYFAILED) 1497 1498 break; 1498 1499 fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT; 1499 - fl_gh->gh_error = 0; 1500 1500 msleep(sleeptime); 1501 1501 } 1502 1502 if (error) {
+12 -2
fs/gfs2/glock.c
··· 542 542 * some reason. If this holder is the head of the list, it 543 543 * means we have a blocked holder at the head, so return 1. 544 544 */ 545 - if (gh->gh_list.prev == &gl->gl_holders) 545 + if (list_is_first(&gh->gh_list, &gl->gl_holders)) 546 546 return 1; 547 547 do_error(gl, 0); 548 548 break; ··· 669 669 670 670 /* Check for state != intended state */ 671 671 if (unlikely(state != gl->gl_target)) { 672 + if (gh && (ret & LM_OUT_CANCELED)) 673 + gfs2_holder_wake(gh); 672 674 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { 673 675 /* move to back of queue and try next entry */ 674 676 if (ret & LM_OUT_CANCELED) { ··· 1261 1259 gh->gh_owner_pid = get_pid(task_pid(current)); 1262 1260 gh->gh_state = state; 1263 1261 gh->gh_flags = flags; 1264 - gh->gh_error = 0; 1265 1262 gh->gh_iflags = 0; 1266 1263 gfs2_glock_hold(gl); 1267 1264 } ··· 1566 1565 if (test_bit(GLF_LRU, &gl->gl_flags)) 1567 1566 gfs2_glock_remove_from_lru(gl); 1568 1567 1568 + gh->gh_error = 0; 1569 1569 spin_lock(&gl->gl_lockref.lock); 1570 1570 add_to_queue(gh); 1571 1571 if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && ··· 1693 1691 struct gfs2_glock *gl = gh->gh_gl; 1694 1692 1695 1693 spin_lock(&gl->gl_lockref.lock); 1694 + if (list_is_first(&gh->gh_list, &gl->gl_holders) && 1695 + !test_bit(HIF_HOLDER, &gh->gh_iflags)) { 1696 + spin_unlock(&gl->gl_lockref.lock); 1697 + gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); 1698 + wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); 1699 + spin_lock(&gl->gl_lockref.lock); 1700 + } 1701 + 1696 1702 __gfs2_glock_dq(gh); 1697 1703 spin_unlock(&gl->gl_lockref.lock); 1698 1704 }
+28 -23
fs/gfs2/inode.c
··· 131 131 struct gfs2_sbd *sdp = GFS2_SB(inode); 132 132 struct gfs2_glock *io_gl; 133 133 134 - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 134 + error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, 135 + &ip->i_gl); 136 + if (unlikely(error)) 137 + goto fail; 138 + 139 + error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, 140 + &io_gl); 141 + if (unlikely(error)) 142 + goto fail; 143 + 144 + if (blktype != GFS2_BLKST_UNLINKED) 145 + gfs2_cancel_delete_work(io_gl); 146 + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, 147 + &ip->i_iopen_gh); 148 + gfs2_glock_put(io_gl); 135 149 if (unlikely(error)) 136 150 goto fail; 137 151 ··· 174 160 } 175 161 176 162 set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); 177 - 178 - error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 179 - if (unlikely(error)) 180 - goto fail; 181 - if (blktype != GFS2_BLKST_UNLINKED) 182 - gfs2_cancel_delete_work(io_gl); 183 - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 184 - gfs2_glock_put(io_gl); 185 - if (unlikely(error)) 186 - goto fail; 187 163 188 164 /* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */ 189 165 inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1); ··· 720 716 error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); 721 717 BUG_ON(error); 722 718 723 - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 719 + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 724 720 if (error) 725 721 goto fail_gunlock2; 726 722 723 + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 724 + if (error) 725 + goto fail_gunlock3; 726 + 727 727 error = gfs2_trans_begin(sdp, blocks, 0); 728 728 if (error) 729 - goto fail_gunlock2; 729 + goto fail_gunlock3; 730 730 731 731 if (blocks > 1) { 732 732 ip->i_eattr = ip->i_no_addr + 1; ··· 738 730 } 739 731 init_dinode(dip, ip, symname); 740 732 gfs2_trans_end(sdp); 741 - 742 - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 743 - if (error) 744 - goto fail_gunlock2; 745 733 746 734 glock_set_object(ip->i_gl, ip); 747 735 glock_set_object(io_gl, ip); ··· 749 745 if (default_acl) { 750 746 error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); 751 747 if (error) 752 - goto fail_gunlock3; 748 + goto fail_gunlock4; 753 749 posix_acl_release(default_acl); 754 750 default_acl = NULL; 755 751 } 756 752 if (acl) { 757 753 error = __gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); 758 754 if (error) 759 - goto fail_gunlock3; 755 + goto fail_gunlock4; 760 756 posix_acl_release(acl); 761 757 acl = NULL; 762 758 } ··· 764 760 error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name, 765 761 &gfs2_initxattrs, NULL); 766 762 if (error) 767 - goto fail_gunlock3; 763 + goto fail_gunlock4; 768 764 769 765 error = link_dinode(dip, name, ip, &da); 770 766 if (error) 771 - goto fail_gunlock3; 767 + goto fail_gunlock4; 772 768 773 769 mark_inode_dirty(inode); 774 770 d_instantiate(dentry, inode); ··· 786 782 unlock_new_inode(inode); 787 783 return error; 788 784 789 - fail_gunlock3: 785 + fail_gunlock4: 790 786 glock_clear_object(ip->i_gl, ip); 791 787 glock_clear_object(io_gl, ip); 788 + fail_gunlock3: 792 789 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 793 790 fail_gunlock2: 794 791 gfs2_glock_put(io_gl); ··· 798 793 if (free_vfs_inode) /* else evict will do the put for us */ 799 794 gfs2_glock_put(ip->i_gl); 800 795 } 801 - gfs2_rs_delete(ip, NULL); 796 + gfs2_rs_deltree(&ip->i_res); 802 797 gfs2_qa_put(ip); 803 798 fail_free_acls: 804 799 posix_acl_release(default_acl);
+14 -1
fs/gfs2/lock_dlm.c
··· 261 261 int req; 262 262 u32 lkf; 263 263 char strname[GDLM_STRNAME_BYTES] = ""; 264 + int error; 264 265 265 266 req = make_mode(gl->gl_name.ln_sbd, req_state); 266 267 lkf = make_flags(gl, flags, req); ··· 280 279 * Submit the actual lock request. 281 280 */ 282 281 283 - return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, 282 + again: 283 + error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, 284 284 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 285 + if (error == -EBUSY) { 286 + msleep(20); 287 + goto again; 288 + } 289 + return error; 285 290 } 286 291 287 292 static void gdlm_put_lock(struct gfs2_glock *gl) ··· 319 312 return; 320 313 } 321 314 315 + again: 322 316 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, 323 317 NULL, gl); 318 + if (error == -EBUSY) { 319 + msleep(20); 320 + goto again; 321 + } 322 + 324 323 if (error) { 325 324 fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", 326 325 gl->gl_name.ln_type,
+11 -8
fs/gfs2/rgrp.c
··· 680 680 /** 681 681 * gfs2_rs_delete - delete a multi-block reservation 682 682 * @ip: The inode for this reservation 683 - * @wcount: The inode's write count, or NULL 684 683 * 685 684 */ 686 - void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) 685 + void gfs2_rs_delete(struct gfs2_inode *ip) 687 686 { 687 + struct inode *inode = &ip->i_inode; 688 + 688 689 down_write(&ip->i_rw_mutex); 689 - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) 690 + if (atomic_read(&inode->i_writecount) <= 1) 690 691 gfs2_rs_deltree(&ip->i_res); 691 692 up_write(&ip->i_rw_mutex); 692 693 } ··· 923 922 spin_lock_init(&rgd->rd_rsspin); 924 923 mutex_init(&rgd->rd_mutex); 925 924 926 - error = compute_bitstructs(rgd); 927 - if (error) 928 - goto fail; 929 - 930 925 error = gfs2_glock_get(sdp, rgd->rd_addr, 931 926 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); 932 927 if (error) 933 928 goto fail; 929 + 930 + error = compute_bitstructs(rgd); 931 + if (error) 932 + goto fail_glock; 934 933 935 934 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; 936 935 rgd->rd_flags &= ~GFS2_RDF_PREFERRED; ··· 945 944 } 946 945 947 946 error = 0; /* someone else read in the rgrp; free it and ignore it */ 947 + fail_glock: 948 948 gfs2_glock_put(rgd->rd_gl); 949 949 950 950 fail: ··· 1417 1415 1418 1416 start = r.start >> bs_shift; 1419 1417 end = start + (r.len >> bs_shift); 1420 - minlen = max_t(u64, r.minlen, 1418 + minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); 1419 + minlen = max_t(u64, minlen, 1421 1420 q->limits.discard_granularity) >> bs_shift; 1422 1421 1423 1422 if (end <= start || minlen > sdp->sd_max_rg_data)
+1 -1
fs/gfs2/rgrp.h
··· 45 45 bool dinode, u64 *generation); 46 46 47 47 extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); 48 - extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); 48 + extern void gfs2_rs_delete(struct gfs2_inode *ip); 49 49 extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, 50 50 u64 bstart, u32 blen, int meta); 51 51 extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+1 -1
fs/gfs2/super.c
··· 1396 1396 truncate_inode_pages_final(&inode->i_data); 1397 1397 if (ip->i_qadata) 1398 1398 gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); 1399 - gfs2_rs_delete(ip, NULL); 1399 + gfs2_rs_deltree(&ip->i_res); 1400 1400 gfs2_ordered_del_inode(ip); 1401 1401 clear_inode(inode); 1402 1402 gfs2_dir_hash_inval(ip);