Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux

Pull file locking updates from Jeff Layton:
"File locking related changes for v4.5 (pile #1)

Highlights:
- new Kconfig option to allow disabling mandatory locking (which is
racy anyway)
- new tracepoints for setlk and close codepaths
- fix for a long-standing bug in code that handles races between
setting a POSIX lock and close()"

* tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux:
locks: rename __posix_lock_file to posix_lock_inode
locks: prink more detail when there are leaked locks
locks: pass inode pointer to locks_free_lock_context
locks: sprinkle some tracepoints around the file locking code
locks: don't check for race with close when setting OFD lock
locks: fix unlock when fcntl_setlk races with a close
fs: make locks.c explicitly non-modular
locks: use list_first_entry_or_null()
locks: Don't allow mounts in user namespaces to enable mandatory locking
locks: Allow disabling mandatory locking at compile time

+233 -84
+10
fs/Kconfig
··· 73 73 for filesystems like NFS and for the flock() system 74 74 call. Disabling this option saves about 11k. 75 75 76 + config MANDATORY_FILE_LOCKING 77 + bool "Enable Mandatory file locking" 78 + depends on FILE_LOCKING 79 + default y 80 + help 81 + This option enables files appropriately marked files on appropriely 82 + mounted filesystems to support mandatory locking. 83 + 84 + To the best of my knowledge this is dead code that no one cares about. 85 + 76 86 source "fs/notify/Kconfig" 77 87 78 88 source "fs/quota/Kconfig"
+1 -1
fs/inode.c
··· 225 225 inode_detach_wb(inode); 226 226 security_inode_free(inode); 227 227 fsnotify_inode_delete(inode); 228 - locks_free_lock_context(inode->i_flctx); 228 + locks_free_lock_context(inode); 229 229 if (!inode->i_nlink) { 230 230 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); 231 231 atomic_long_dec(&inode->i_sb->s_remove_count);
+93 -47
fs/locks.c
··· 119 119 #include <linux/fdtable.h> 120 120 #include <linux/fs.h> 121 121 #include <linux/init.h> 122 - #include <linux/module.h> 123 122 #include <linux/security.h> 124 123 #include <linux/slab.h> 125 124 #include <linux/syscalls.h> ··· 229 230 ctx = smp_load_acquire(&inode->i_flctx); 230 231 } 231 232 out: 233 + trace_locks_get_lock_context(inode, type, ctx); 232 234 return ctx; 233 235 } 234 236 235 - void 236 - locks_free_lock_context(struct file_lock_context *ctx) 237 + static void 238 + locks_dump_ctx_list(struct list_head *list, char *list_type) 237 239 { 238 - if (ctx) { 239 - WARN_ON_ONCE(!list_empty(&ctx->flc_flock)); 240 - WARN_ON_ONCE(!list_empty(&ctx->flc_posix)); 241 - WARN_ON_ONCE(!list_empty(&ctx->flc_lease)); 240 + struct file_lock *fl; 241 + 242 + list_for_each_entry(fl, list, fl_list) { 243 + pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid); 244 + } 245 + } 246 + 247 + static void 248 + locks_check_ctx_lists(struct inode *inode) 249 + { 250 + struct file_lock_context *ctx = inode->i_flctx; 251 + 252 + if (unlikely(!list_empty(&ctx->flc_flock) || 253 + !list_empty(&ctx->flc_posix) || 254 + !list_empty(&ctx->flc_lease))) { 255 + pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n", 256 + MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), 257 + inode->i_ino); 258 + locks_dump_ctx_list(&ctx->flc_flock, "FLOCK"); 259 + locks_dump_ctx_list(&ctx->flc_posix, "POSIX"); 260 + locks_dump_ctx_list(&ctx->flc_lease, "LEASE"); 261 + } 262 + } 263 + 264 + void 265 + locks_free_lock_context(struct inode *inode) 266 + { 267 + struct file_lock_context *ctx = inode->i_flctx; 268 + 269 + if (unlikely(ctx)) { 270 + locks_check_ctx_lists(inode); 242 271 kmem_cache_free(flctx_cache, ctx); 243 272 } 244 273 } ··· 961 934 return error; 962 935 } 963 936 964 - static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) 937 + static int posix_lock_inode(struct inode *inode, struct file_lock *request, 938 + struct file_lock *conflock) 965 939 { 966 940 struct file_lock *fl, *tmp; 967 941 struct file_lock *new_fl = NULL; ··· 1170 1142 if (new_fl2) 1171 1143 locks_free_lock(new_fl2); 1172 1144 locks_dispose_list(&dispose); 1145 + trace_posix_lock_inode(inode, request, error); 1146 + 1173 1147 return error; 1174 1148 } 1175 1149 ··· 1192 1162 int posix_lock_file(struct file *filp, struct file_lock *fl, 1193 1163 struct file_lock *conflock) 1194 1164 { 1195 - return __posix_lock_file(file_inode(filp), fl, conflock); 1165 + return posix_lock_inode(file_inode(filp), fl, conflock); 1196 1166 } 1197 1167 EXPORT_SYMBOL(posix_lock_file); 1198 1168 ··· 1208 1178 int error; 1209 1179 might_sleep (); 1210 1180 for (;;) { 1211 - error = __posix_lock_file(inode, fl, NULL); 1181 + error = posix_lock_inode(inode, fl, NULL); 1212 1182 if (error != FILE_LOCK_DEFERRED) 1213 1183 break; 1214 1184 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); ··· 1221 1191 return error; 1222 1192 } 1223 1193 1194 + #ifdef CONFIG_MANDATORY_FILE_LOCKING 1224 1195 /** 1225 1196 * locks_mandatory_locked - Check for an active lock 1226 1197 * @file: the file to check ··· 1291 1260 if (filp) { 1292 1261 fl.fl_owner = filp; 1293 1262 fl.fl_flags &= ~FL_SLEEP; 1294 - error = __posix_lock_file(inode, &fl, NULL); 1263 + error = posix_lock_inode(inode, &fl, NULL); 1295 1264 if (!error) 1296 1265 break; 1297 1266 } ··· 1299 1268 if (sleep) 1300 1269 fl.fl_flags |= FL_SLEEP; 1301 1270 fl.fl_owner = current->files; 1302 - error = __posix_lock_file(inode, &fl, NULL); 1271 + error = posix_lock_inode(inode, &fl, NULL); 1303 1272 if (error != FILE_LOCK_DEFERRED) 1304 1273 break; 1305 1274 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); ··· 1320 1289 } 1321 1290 1322 1291 EXPORT_SYMBOL(locks_mandatory_area); 1292 + #endif /* CONFIG_MANDATORY_FILE_LOCKING */ 1323 1293 1324 1294 static void lease_clear_pending(struct file_lock *fl, int arg) 1325 1295 { ··· 1535 1503 ctx = smp_load_acquire(&inode->i_flctx); 1536 1504 if (ctx && !list_empty_careful(&ctx->flc_lease)) { 1537 1505 spin_lock(&ctx->flc_lock); 1538 - if (!list_empty(&ctx->flc_lease)) { 1539 - fl = list_first_entry(&ctx->flc_lease, 1540 - struct file_lock, fl_list); 1541 - if (fl->fl_type == F_WRLCK) 1542 - has_lease = true; 1543 - } 1506 + fl = list_first_entry_or_null(&ctx->flc_lease, 1507 + struct file_lock, fl_list); 1508 + if (fl && (fl->fl_type == F_WRLCK)) 1509 + has_lease = true; 1544 1510 spin_unlock(&ctx->flc_lock); 1545 1511 } 1546 1512 ··· 2195 2165 if (file_lock == NULL) 2196 2166 return -ENOLCK; 2197 2167 2168 + inode = file_inode(filp); 2169 + 2198 2170 /* 2199 2171 * This might block, so we do it before checking the inode. 2200 2172 */ 2201 2173 error = -EFAULT; 2202 2174 if (copy_from_user(&flock, l, sizeof(flock))) 2203 2175 goto out; 2204 - 2205 - inode = file_inode(filp); 2206 2176 2207 2177 /* Don't allow mandatory locks on files that may be memory mapped 2208 2178 * and shared. ··· 2212 2182 goto out; 2213 2183 } 2214 2184 2215 - again: 2216 2185 error = flock_to_posix_lock(filp, file_lock, &flock); 2217 2186 if (error) 2218 2187 goto out; ··· 2250 2221 error = do_lock_file_wait(filp, cmd, file_lock); 2251 2222 2252 2223 /* 2253 - * Attempt to detect a close/fcntl race and recover by 2254 - * releasing the lock that was just acquired. 2224 + * Attempt to detect a close/fcntl race and recover by releasing the 2225 + * lock that was just acquired. There is no need to do that when we're 2226 + * unlocking though, or for OFD locks. 2255 2227 */ 2256 - /* 2257 - * we need that spin_lock here - it prevents reordering between 2258 - * update of i_flctx->flc_posix and check for it done in close(). 2259 - * rcu_read_lock() wouldn't do. 2260 - */ 2261 - spin_lock(&current->files->file_lock); 2262 - f = fcheck(fd); 2263 - spin_unlock(&current->files->file_lock); 2264 - if (!error && f != filp && flock.l_type != F_UNLCK) { 2265 - flock.l_type = F_UNLCK; 2266 - goto again; 2228 + if (!error && file_lock->fl_type != F_UNLCK && 2229 + !(file_lock->fl_flags & FL_OFDLCK)) { 2230 + /* 2231 + * We need that spin_lock here - it prevents reordering between 2232 + * update of i_flctx->flc_posix and check for it done in 2233 + * close(). rcu_read_lock() wouldn't do. 2234 + */ 2235 + spin_lock(&current->files->file_lock); 2236 + f = fcheck(fd); 2237 + spin_unlock(&current->files->file_lock); 2238 + if (f != filp) { 2239 + file_lock->fl_type = F_UNLCK; 2240 + error = do_lock_file_wait(filp, cmd, file_lock); 2241 + WARN_ON_ONCE(error); 2242 + error = -EBADF; 2243 + } 2267 2244 } 2268 - 2269 2245 out: 2246 + trace_fcntl_setlk(inode, file_lock, error); 2270 2247 locks_free_lock(file_lock); 2271 2248 return error; 2272 2249 } ··· 2357 2322 goto out; 2358 2323 } 2359 2324 2360 - again: 2361 2325 error = flock64_to_posix_lock(filp, file_lock, &flock); 2362 2326 if (error) 2363 2327 goto out; ··· 2395 2361 error = do_lock_file_wait(filp, cmd, file_lock); 2396 2362 2397 2363 /* 2398 - * Attempt to detect a close/fcntl race and recover by 2399 - * releasing the lock that was just acquired. 2364 + * Attempt to detect a close/fcntl race and recover by releasing the 2365 + * lock that was just acquired. There is no need to do that when we're 2366 + * unlocking though, or for OFD locks. 2400 2367 */ 2401 - spin_lock(&current->files->file_lock); 2402 - f = fcheck(fd); 2403 - spin_unlock(&current->files->file_lock); 2404 - if (!error && f != filp && flock.l_type != F_UNLCK) { 2405 - flock.l_type = F_UNLCK; 2406 - goto again; 2368 + if (!error && file_lock->fl_type != F_UNLCK && 2369 + !(file_lock->fl_flags & FL_OFDLCK)) { 2370 + /* 2371 + * We need that spin_lock here - it prevents reordering between 2372 + * update of i_flctx->flc_posix and check for it done in 2373 + * close(). rcu_read_lock() wouldn't do. 2374 + */ 2375 + spin_lock(&current->files->file_lock); 2376 + f = fcheck(fd); 2377 + spin_unlock(&current->files->file_lock); 2378 + if (f != filp) { 2379 + file_lock->fl_type = F_UNLCK; 2380 + error = do_lock_file_wait(filp, cmd, file_lock); 2381 + WARN_ON_ONCE(error); 2382 + error = -EBADF; 2383 + } 2407 2384 } 2408 - 2409 2385 out: 2410 2386 locks_free_lock(file_lock); 2411 2387 return error; ··· 2429 2385 */ 2430 2386 void locks_remove_posix(struct file *filp, fl_owner_t owner) 2431 2387 { 2388 + int error; 2432 2389 struct file_lock lock; 2433 2390 struct file_lock_context *ctx; 2434 2391 ··· 2452 2407 lock.fl_ops = NULL; 2453 2408 lock.fl_lmops = NULL; 2454 2409 2455 - vfs_lock_file(filp, F_SETLK, &lock, NULL); 2410 + error = vfs_lock_file(filp, F_SETLK, &lock, NULL); 2456 2411 2457 2412 if (lock.fl_ops && lock.fl_ops->fl_release_private) 2458 2413 lock.fl_ops->fl_release_private(&lock); 2414 + trace_locks_remove_posix(file_inode(filp), &lock, error); 2459 2415 } 2460 2416 2461 2417 EXPORT_SYMBOL(locks_remove_posix); ··· 2752 2706 proc_create("locks", 0, NULL, &proc_locks_operations); 2753 2707 return 0; 2754 2708 } 2755 - module_init(proc_locks_init); 2709 + fs_initcall(proc_locks_init); 2756 2710 #endif 2757 2711 2758 2712 static int __init filelock_init(void)
+10
fs/namespace.c
··· 1584 1584 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN); 1585 1585 } 1586 1586 1587 + static inline bool may_mandlock(void) 1588 + { 1589 + #ifndef CONFIG_MANDATORY_FILE_LOCKING 1590 + return false; 1591 + #endif 1592 + return capable(CAP_SYS_ADMIN); 1593 + } 1594 + 1587 1595 /* 1588 1596 * Now umount can handle mount points as well as block devices. 1589 1597 * This is important for filesystems which use unnamed block devices. ··· 2684 2676 retval = security_sb_mount(dev_name, &path, 2685 2677 type_page, flags, data_page); 2686 2678 if (!retval && !may_mount()) 2679 + retval = -EPERM; 2680 + if (!retval && (flags & MS_MANDLOCK) && !may_mandlock()) 2687 2681 retval = -EPERM; 2688 2682 if (retval) 2689 2683 goto dput_out;
+42 -36
include/linux/fs.h
··· 1043 1043 extern int fcntl_getlease(struct file *filp); 1044 1044 1045 1045 /* fs/locks.c */ 1046 - void locks_free_lock_context(struct file_lock_context *ctx); 1046 + void locks_free_lock_context(struct inode *inode); 1047 1047 void locks_free_lock(struct file_lock *fl); 1048 1048 extern void locks_init_lock(struct file_lock *); 1049 1049 extern struct file_lock * locks_alloc_lock(void); ··· 1104 1104 } 1105 1105 1106 1106 static inline void 1107 - locks_free_lock_context(struct file_lock_context *ctx) 1107 + locks_free_lock_context(struct inode *inode) 1108 1108 { 1109 1109 } 1110 1110 ··· 2030 2030 #define FLOCK_VERIFY_READ 1 2031 2031 #define FLOCK_VERIFY_WRITE 2 2032 2032 2033 - #ifdef CONFIG_FILE_LOCKING 2033 + #ifdef CONFIG_MANDATORY_FILE_LOCKING 2034 2034 extern int locks_mandatory_locked(struct file *); 2035 2035 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); 2036 2036 ··· 2075 2075 return 0; 2076 2076 } 2077 2077 2078 + #else /* !CONFIG_MANDATORY_FILE_LOCKING */ 2079 + 2080 + static inline int locks_mandatory_locked(struct file *file) 2081 + { 2082 + return 0; 2083 + } 2084 + 2085 + static inline int locks_mandatory_area(int rw, struct inode *inode, 2086 + struct file *filp, loff_t offset, 2087 + size_t count) 2088 + { 2089 + return 0; 2090 + } 2091 + 2092 + static inline int __mandatory_lock(struct inode *inode) 2093 + { 2094 + return 0; 2095 + } 2096 + 2097 + static inline int mandatory_lock(struct inode *inode) 2098 + { 2099 + return 0; 2100 + } 2101 + 2102 + static inline int locks_verify_locked(struct file *file) 2103 + { 2104 + return 0; 2105 + } 2106 + 2107 + static inline int locks_verify_truncate(struct inode *inode, struct file *filp, 2108 + size_t size) 2109 + { 2110 + return 0; 2111 + } 2112 + 2113 + #endif /* CONFIG_MANDATORY_FILE_LOCKING */ 2114 + 2115 + 2116 + #ifdef CONFIG_FILE_LOCKING 2078 2117 static inline int break_lease(struct inode *inode, unsigned int mode) 2079 2118 { 2080 2119 /* ··· 2175 2136 } 2176 2137 2177 2138 #else /* !CONFIG_FILE_LOCKING */ 2178 - static inline int locks_mandatory_locked(struct file *file) 2179 - { 2180 - return 0; 2181 - } 2182 - 2183 - static inline int locks_mandatory_area(int rw, struct inode *inode, 2184 - struct file *filp, loff_t offset, 2185 - size_t count) 2186 - { 2187 - return 0; 2188 - } 2189 - 2190 - static inline int __mandatory_lock(struct inode *inode) 2191 - { 2192 - return 0; 2193 - } 2194 - 2195 - static inline int mandatory_lock(struct inode *inode) 2196 - { 2197 - return 0; 2198 - } 2199 - 2200 - static inline int locks_verify_locked(struct file *file) 2201 - { 2202 - return 0; 2203 - } 2204 - 2205 - static inline int locks_verify_truncate(struct inode *inode, struct file *filp, 2206 - size_t size) 2207 - { 2208 - return 0; 2209 - } 2210 - 2211 2139 static inline int break_lease(struct inode *inode, unsigned int mode) 2212 2140 { 2213 2141 return 0;
+77
include/trace/events/filelock.h
··· 34 34 { F_WRLCK, "F_WRLCK" }, \ 35 35 { F_UNLCK, "F_UNLCK" }) 36 36 37 + TRACE_EVENT(locks_get_lock_context, 38 + TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx), 39 + 40 + TP_ARGS(inode, type, ctx), 41 + 42 + TP_STRUCT__entry( 43 + __field(unsigned long, i_ino) 44 + __field(dev_t, s_dev) 45 + __field(unsigned char, type) 46 + __field(struct file_lock_context *, ctx) 47 + ), 48 + 49 + TP_fast_assign( 50 + __entry->s_dev = inode->i_sb->s_dev; 51 + __entry->i_ino = inode->i_ino; 52 + __entry->type = type; 53 + __entry->ctx = ctx; 54 + ), 55 + 56 + TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p", 57 + MAJOR(__entry->s_dev), MINOR(__entry->s_dev), 58 + __entry->i_ino, show_fl_type(__entry->type), __entry->ctx) 59 + ); 60 + 61 + DECLARE_EVENT_CLASS(filelock_lock, 62 + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), 63 + 64 + TP_ARGS(inode, fl, ret), 65 + 66 + TP_STRUCT__entry( 67 + __field(struct file_lock *, fl) 68 + __field(unsigned long, i_ino) 69 + __field(dev_t, s_dev) 70 + __field(struct file_lock *, fl_next) 71 + __field(fl_owner_t, fl_owner) 72 + __field(unsigned int, fl_pid) 73 + __field(unsigned int, fl_flags) 74 + __field(unsigned char, fl_type) 75 + __field(loff_t, fl_start) 76 + __field(loff_t, fl_end) 77 + __field(int, ret) 78 + ), 79 + 80 + TP_fast_assign( 81 + __entry->fl = fl ? fl : NULL; 82 + __entry->s_dev = inode->i_sb->s_dev; 83 + __entry->i_ino = inode->i_ino; 84 + __entry->fl_next = fl ? fl->fl_next : NULL; 85 + __entry->fl_owner = fl ? fl->fl_owner : NULL; 86 + __entry->fl_pid = fl ? fl->fl_pid : 0; 87 + __entry->fl_flags = fl ? fl->fl_flags : 0; 88 + __entry->fl_type = fl ? fl->fl_type : 0; 89 + __entry->fl_start = fl ? fl->fl_start : 0; 90 + __entry->fl_end = fl ? fl->fl_end : 0; 91 + __entry->ret = ret; 92 + ), 93 + 94 + TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d", 95 + __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), 96 + __entry->i_ino, __entry->fl_next, __entry->fl_owner, 97 + __entry->fl_pid, show_fl_flags(__entry->fl_flags), 98 + show_fl_type(__entry->fl_type), 99 + __entry->fl_start, __entry->fl_end, __entry->ret) 100 + ); 101 + 102 + DEFINE_EVENT(filelock_lock, posix_lock_inode, 103 + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), 104 + TP_ARGS(inode, fl, ret)); 105 + 106 + DEFINE_EVENT(filelock_lock, fcntl_setlk, 107 + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), 108 + TP_ARGS(inode, fl, ret)); 109 + 110 + DEFINE_EVENT(filelock_lock, locks_remove_posix, 111 + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), 112 + TP_ARGS(inode, fl, ret)); 113 + 37 114 DECLARE_EVENT_CLASS(filelock_lease, 38 115 39 116 TP_PROTO(struct inode *inode, struct file_lock *fl),