Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

f2fs: add an ioctl to disable GC for specific file

This patch gives a flag to disable GC on given file, which would be useful, when
user wants to keep its block map. It also conducts in-place-update for dontmove
file.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

+136 -2
+2
fs/f2fs/data.c
··· 1479 1479 { 1480 1480 struct inode *inode = fio->page->mapping->host; 1481 1481 1482 + if (f2fs_is_pinned_file(inode)) 1483 + return true; 1482 1484 if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) 1483 1485 return false; 1484 1486 if (is_cold_data(fio->page))
+28 -1
fs/f2fs/f2fs.h
··· 356 356 #define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ 357 357 struct f2fs_gc_range) 358 358 #define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32) 359 + #define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) 360 + #define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) 359 361 360 362 #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY 361 363 #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY ··· 595 593 unsigned long i_flags; /* keep an inode flags for ioctl */ 596 594 unsigned char i_advise; /* use to give file attribute hints */ 597 595 unsigned char i_dir_level; /* use for dentry level for large dir */ 598 - unsigned int i_current_depth; /* use only in directory structure */ 596 + union { 597 + unsigned int i_current_depth; /* only for directory depth */ 598 + unsigned short i_gc_failures; /* only for regular file */ 599 + }; 599 600 unsigned int i_pino; /* parent inode number */ 600 601 umode_t i_acl_mode; /* keep file acl mode temporarily */ 601 602 ··· 1146 1141 1147 1142 /* threshold for converting bg victims for fg */ 1148 1143 u64 fggc_threshold; 1144 + 1145 + /* threshold for gc trials on pinned files */ 1146 + u64 gc_pin_file_threshold; 1149 1147 1150 1148 /* maximum # of trials to find a victim segment for SSR and GC */ 1151 1149 unsigned int max_victim_search; ··· 2149 2141 FI_HOT_DATA, /* indicate file is hot */ 2150 2142 FI_EXTRA_ATTR, /* indicate file has extra attribute */ 2151 2143 FI_PROJ_INHERIT, /* indicate file inherits projectid */ 2144 + FI_PIN_FILE, /* indicate file should not be gced */ 2152 2145 }; 2153 2146 2154 2147 static inline void __mark_inode_dirty_flag(struct inode *inode, ··· 2164 2155 return; 2165 2156 case FI_DATA_EXIST: 2166 2157 case FI_INLINE_DOTS: 2158 + case FI_PIN_FILE: 2167 2159 f2fs_mark_inode_dirty_sync(inode, true); 2168 2160 } 2169 2161 } ··· 2245 2235 f2fs_mark_inode_dirty_sync(inode, true); 2246 2236 } 2247 2237 2238 + static inline void f2fs_i_gc_failures_write(struct inode *inode, 2239 + unsigned int count) 2240 + { 2241 + F2FS_I(inode)->i_gc_failures = count; 2242 + f2fs_mark_inode_dirty_sync(inode, true); 2243 + } 2244 + 2248 2245 static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid) 2249 2246 { 2250 2247 F2FS_I(inode)->i_xattr_nid = xnid; ··· 2280 2263 set_bit(FI_INLINE_DOTS, &fi->flags); 2281 2264 if (ri->i_inline & F2FS_EXTRA_ATTR) 2282 2265 set_bit(FI_EXTRA_ATTR, &fi->flags); 2266 + if (ri->i_inline & F2FS_PIN_FILE) 2267 + set_bit(FI_PIN_FILE, &fi->flags); 2283 2268 } 2284 2269 2285 2270 static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ··· 2300 2281 ri->i_inline |= F2FS_INLINE_DOTS; 2301 2282 if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) 2302 2283 ri->i_inline |= F2FS_EXTRA_ATTR; 2284 + if (is_inode_flag_set(inode, FI_PIN_FILE)) 2285 + ri->i_inline |= F2FS_PIN_FILE; 2303 2286 } 2304 2287 2305 2288 static inline int f2fs_has_extra_attr(struct inode *inode) ··· 2345 2324 static inline int f2fs_has_inline_dots(struct inode *inode) 2346 2325 { 2347 2326 return is_inode_flag_set(inode, FI_INLINE_DOTS); 2327 + } 2328 + 2329 + static inline bool f2fs_is_pinned_file(struct inode *inode) 2330 + { 2331 + return is_inode_flag_set(inode, FI_PIN_FILE); 2348 2332 } 2349 2333 2350 2334 static inline bool f2fs_is_atomic_file(struct inode *inode) ··· 2566 2540 void truncate_data_blocks_range(struct dnode_of_data *dn, int count); 2567 2541 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); 2568 2542 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 2543 + int f2fs_pin_file_control(struct inode *inode, bool inc); 2569 2544 2570 2545 /* 2571 2546 * inode.c
+83
fs/f2fs/file.c
··· 2672 2672 return 0; 2673 2673 } 2674 2674 2675 + int f2fs_pin_file_control(struct inode *inode, bool inc) 2676 + { 2677 + struct f2fs_inode_info *fi = F2FS_I(inode); 2678 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2679 + 2680 + /* Use i_gc_failures for normal file as a risk signal. */ 2681 + if (inc) 2682 + f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); 2683 + 2684 + if (fi->i_gc_failures > sbi->gc_pin_file_threshold) { 2685 + f2fs_msg(sbi->sb, KERN_WARNING, 2686 + "%s: Enable GC = ino %lx after %x GC trials\n", 2687 + __func__, inode->i_ino, fi->i_gc_failures); 2688 + clear_inode_flag(inode, FI_PIN_FILE); 2689 + return -EAGAIN; 2690 + } 2691 + return 0; 2692 + } 2693 + 2694 + static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) 2695 + { 2696 + struct inode *inode = file_inode(filp); 2697 + __u32 pin; 2698 + int ret = 0; 2699 + 2700 + if (!inode_owner_or_capable(inode)) 2701 + return -EACCES; 2702 + 2703 + if (get_user(pin, (__u32 __user *)arg)) 2704 + return -EFAULT; 2705 + 2706 + if (!S_ISREG(inode->i_mode)) 2707 + return -EINVAL; 2708 + 2709 + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) 2710 + return -EROFS; 2711 + 2712 + ret = mnt_want_write_file(filp); 2713 + if (ret) 2714 + return ret; 2715 + 2716 + inode_lock(inode); 2717 + 2718 + if (!pin) { 2719 + clear_inode_flag(inode, FI_PIN_FILE); 2720 + F2FS_I(inode)->i_gc_failures = 1; 2721 + goto done; 2722 + } 2723 + 2724 + if (f2fs_pin_file_control(inode, false)) { 2725 + ret = -EAGAIN; 2726 + goto out; 2727 + } 2728 + ret = f2fs_convert_inline_inode(inode); 2729 + if (ret) 2730 + goto out; 2731 + 2732 + set_inode_flag(inode, FI_PIN_FILE); 2733 + ret = F2FS_I(inode)->i_gc_failures; 2734 + done: 2735 + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2736 + out: 2737 + inode_unlock(inode); 2738 + mnt_drop_write_file(filp); 2739 + return ret; 2740 + } 2741 + 2742 + static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) 2743 + { 2744 + struct inode *inode = file_inode(filp); 2745 + __u32 pin = 0; 2746 + 2747 + if (is_inode_flag_set(inode, FI_PIN_FILE)) 2748 + pin = F2FS_I(inode)->i_gc_failures; 2749 + return put_user(pin, (u32 __user *)arg); 2750 + } 2751 + 2675 2752 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 2676 2753 { 2677 2754 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) ··· 2799 2722 return f2fs_ioc_fsgetxattr(filp, arg); 2800 2723 case F2FS_IOC_FSSETXATTR: 2801 2724 return f2fs_ioc_fssetxattr(filp, arg); 2725 + case F2FS_IOC_GET_PIN_FILE: 2726 + return f2fs_ioc_get_pin_file(filp, arg); 2727 + case F2FS_IOC_SET_PIN_FILE: 2728 + return f2fs_ioc_set_pin_file(filp, arg); 2802 2729 default: 2803 2730 return -ENOTTY; 2804 2731 } ··· 2878 2797 case F2FS_IOC_GET_FEATURES: 2879 2798 case F2FS_IOC_FSGETXATTR: 2880 2799 case F2FS_IOC_FSSETXATTR: 2800 + case F2FS_IOC_GET_PIN_FILE: 2801 + case F2FS_IOC_SET_PIN_FILE: 2881 2802 break; 2882 2803 default: 2883 2804 return -ENOIOCTLCMD;
+11
fs/f2fs/gc.c
··· 624 624 if (f2fs_is_atomic_file(inode)) 625 625 goto out; 626 626 627 + if (f2fs_is_pinned_file(inode)) { 628 + f2fs_pin_file_control(inode, true); 629 + goto out; 630 + } 631 + 627 632 set_new_dnode(&dn, inode, NULL, NULL, 0); 628 633 err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); 629 634 if (err) ··· 725 720 726 721 if (f2fs_is_atomic_file(inode)) 727 722 goto out; 723 + if (f2fs_is_pinned_file(inode)) { 724 + if (gc_type == FG_GC) 725 + f2fs_pin_file_control(inode, true); 726 + goto out; 727 + } 728 728 729 729 if (gc_type == BG_GC) { 730 730 if (PageWriteback(page)) ··· 1101 1091 1102 1092 sbi->fggc_threshold = div64_u64((main_count - ovp_count) * 1103 1093 BLKS_PER_SEC(sbi), (main_count - resv_count)); 1094 + sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; 1104 1095 1105 1096 /* give warm/cold data area from slower device */ 1106 1097 if (sbi->s_ndevs && sbi->segs_per_sec == 1)
+2
fs/f2fs/gc.h
··· 20 20 #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ 21 21 #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ 22 22 23 + #define DEF_GC_FAILED_PINNED_FILES 2048 24 + 23 25 /* Search max. number of dirty segments to select a victim segment */ 24 26 #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ 25 27
+2
fs/f2fs/sysfs.c
··· 301 301 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); 302 302 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); 303 303 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); 304 + F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); 304 305 #ifdef CONFIG_F2FS_FAULT_INJECTION 305 306 F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); 306 307 F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); ··· 350 349 ATTR_LIST(idle_interval), 351 350 ATTR_LIST(iostat_enable), 352 351 ATTR_LIST(readdir_ra), 352 + ATTR_LIST(gc_pin_file_thresh), 353 353 #ifdef CONFIG_F2FS_FAULT_INJECTION 354 354 ATTR_LIST(inject_rate), 355 355 ATTR_LIST(inject_type),
+8 -1
include/linux/f2fs_fs.h
··· 212 212 #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ 213 213 #define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ 214 214 #define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ 215 + #define F2FS_PIN_FILE 0x40 /* file should not be gced */ 215 216 216 217 struct f2fs_inode { 217 218 __le16 i_mode; /* file mode */ ··· 230 229 __le32 i_ctime_nsec; /* change time in nano scale */ 231 230 __le32 i_mtime_nsec; /* modification time in nano scale */ 232 231 __le32 i_generation; /* file version (for NFS) */ 233 - __le32 i_current_depth; /* only for directory depth */ 232 + union { 233 + __le32 i_current_depth; /* only for directory depth */ 234 + __le16 i_gc_failures; /* 235 + * # of gc failures on pinned file. 236 + * only for regular files. 237 + */ 238 + }; 234 239 __le32 i_xattr_nid; /* nid to save xattr */ 235 240 __le32 i_flags; /* file attributes */ 236 241 __le32 i_pino; /* parent inode number */