Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

aio: kill ki_retry

Thanks to Zach Brown's work to rip out the retry infrastructure, we don't
need this anymore - ki_retry was only called right after the kiocb was
initialized.

This also refactors and trims some duplicated code, as well as cleaning up
the refcounting/error handling a bit.

[akpm@linux-foundation.org: use fmode_t in aio_run_iocb()]
[akpm@linux-foundation.org: fix file_start_write/file_end_write tests]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Kent Overstreet <koverstreet@google.com>
Cc: Zach Brown <zab@redhat.com>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Asai Thambi S P <asamymuthupa@micron.com>
Cc: Selvan Mani <smani@micron.com>
Cc: Sam Bradshaw <sbradshaw@micron.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Reviewed-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Kent Overstreet and committed by
Linus Torvalds
41ef4eb8 8a660890

+85 -165
+85 -139
fs/aio.c
··· 903 903 BUG_ON(ret > 0 && iocb->ki_left == 0); 904 904 } 905 905 906 - static ssize_t aio_rw_vect_retry(struct kiocb *iocb) 906 + typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, 907 + unsigned long, loff_t); 908 + 909 + static ssize_t aio_rw_vect_retry(struct kiocb *iocb, int rw, aio_rw_op *rw_op) 907 910 { 908 911 struct file *file = iocb->ki_filp; 909 912 struct address_space *mapping = file->f_mapping; 910 913 struct inode *inode = mapping->host; 911 - ssize_t (*rw_op)(struct kiocb *, const struct iovec *, 912 - unsigned long, loff_t); 913 914 ssize_t ret = 0; 914 - unsigned short opcode; 915 - 916 - if ((iocb->ki_opcode == IOCB_CMD_PREADV) || 917 - (iocb->ki_opcode == IOCB_CMD_PREAD)) { 918 - rw_op = file->f_op->aio_read; 919 - opcode = IOCB_CMD_PREADV; 920 - } else { 921 - rw_op = file->f_op->aio_write; 922 - opcode = IOCB_CMD_PWRITEV; 923 - } 924 915 925 916 /* This matches the pread()/pwrite() logic */ 926 917 if (iocb->ki_pos < 0) 927 918 return -EINVAL; 928 919 929 - if (opcode == IOCB_CMD_PWRITEV) 920 + if (rw == WRITE) 930 921 file_start_write(file); 931 922 do { 932 923 ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], ··· 929 938 /* retry all partial writes. retry partial reads as long as its a 930 939 * regular file. */ 931 940 } while (ret > 0 && iocb->ki_left > 0 && 932 - (opcode == IOCB_CMD_PWRITEV || 941 + (rw == WRITE || 933 942 (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); 934 - if (opcode == IOCB_CMD_PWRITEV) 943 + if (rw == WRITE) 935 944 file_end_write(file); 936 945 937 946 /* This means we must have transferred all that we could */ ··· 941 950 942 951 /* If we managed to write some out we return that, rather than 943 952 * the eventual error. */ 944 - if (opcode == IOCB_CMD_PWRITEV 953 + if (rw == WRITE 945 954 && ret < 0 && ret != -EIOCBQUEUED 946 955 && iocb->ki_nbytes - iocb->ki_left) 947 956 ret = iocb->ki_nbytes - iocb->ki_left; ··· 949 958 return ret; 950 959 } 951 960 952 - static ssize_t aio_fdsync(struct kiocb *iocb) 953 - { 954 - struct file *file = iocb->ki_filp; 955 - ssize_t ret = -EINVAL; 956 - 957 - if (file->f_op->aio_fsync) 958 - ret = file->f_op->aio_fsync(iocb, 1); 959 - return ret; 960 - } 961 - 962 - static ssize_t aio_fsync(struct kiocb *iocb) 963 - { 964 - struct file *file = iocb->ki_filp; 965 - ssize_t ret = -EINVAL; 966 - 967 - if (file->f_op->aio_fsync) 968 - ret = file->f_op->aio_fsync(iocb, 0); 969 - return ret; 970 - } 971 - 972 - static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat) 961 + static ssize_t aio_setup_vectored_rw(int rw, struct kiocb *kiocb, bool compat) 973 962 { 974 963 ssize_t ret; 975 964 965 + kiocb->ki_nr_segs = kiocb->ki_nbytes; 966 + 976 967 #ifdef CONFIG_COMPAT 977 968 if (compat) 978 - ret = compat_rw_copy_check_uvector(type, 969 + ret = compat_rw_copy_check_uvector(rw, 979 970 (struct compat_iovec __user *)kiocb->ki_buf, 980 - kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, 971 + kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec, 981 972 &kiocb->ki_iovec); 982 973 else 983 974 #endif 984 - ret = rw_copy_check_uvector(type, 975 + ret = rw_copy_check_uvector(rw, 985 976 (struct iovec __user *)kiocb->ki_buf, 986 - kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, 977 + kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec, 987 978 &kiocb->ki_iovec); 988 979 if (ret < 0) 989 - goto out; 980 + return ret; 990 981 991 - ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret); 992 - if (ret < 0) 993 - goto out; 994 - 995 - kiocb->ki_nr_segs = kiocb->ki_nbytes; 996 - kiocb->ki_cur_seg = 0; 997 - /* ki_nbytes/left now reflect bytes instead of segs */ 982 + /* ki_nbytes now reflect bytes instead of segs */ 998 983 kiocb->ki_nbytes = ret; 999 - kiocb->ki_left = ret; 1000 - 1001 - ret = 0; 1002 - out: 1003 - return ret; 984 + return 0; 1004 985 } 1005 986 1006 - static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb) 987 + static ssize_t aio_setup_single_vector(int rw, struct kiocb *kiocb) 1007 988 { 1008 - int bytes; 1009 - 1010 - bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left); 1011 - if (bytes < 0) 1012 - return bytes; 989 + if (unlikely(!access_ok(!rw, kiocb->ki_buf, kiocb->ki_nbytes))) 990 + return -EFAULT; 1013 991 1014 992 kiocb->ki_iovec = &kiocb->ki_inline_vec; 1015 993 kiocb->ki_iovec->iov_base = kiocb->ki_buf; 1016 - kiocb->ki_iovec->iov_len = bytes; 994 + kiocb->ki_iovec->iov_len = kiocb->ki_nbytes; 1017 995 kiocb->ki_nr_segs = 1; 1018 - kiocb->ki_cur_seg = 0; 1019 996 return 0; 1020 997 } 1021 998 ··· 992 1033 * Performs the initial checks and aio retry method 993 1034 * setup for the kiocb at the time of io submission. 994 1035 */ 995 - static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) 1036 + static ssize_t aio_run_iocb(struct kiocb *req, bool compat) 996 1037 { 997 - struct file *file = kiocb->ki_filp; 998 - ssize_t ret = 0; 1038 + struct file *file = req->ki_filp; 1039 + ssize_t ret; 1040 + int rw; 1041 + fmode_t mode; 1042 + aio_rw_op *rw_op; 999 1043 1000 - switch (kiocb->ki_opcode) { 1044 + switch (req->ki_opcode) { 1001 1045 case IOCB_CMD_PREAD: 1002 - ret = -EBADF; 1003 - if (unlikely(!(file->f_mode & FMODE_READ))) 1004 - break; 1005 - ret = -EFAULT; 1006 - if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf, 1007 - kiocb->ki_left))) 1008 - break; 1009 - ret = aio_setup_single_vector(READ, file, kiocb); 1010 - if (ret) 1011 - break; 1012 - ret = -EINVAL; 1013 - if (file->f_op->aio_read) 1014 - kiocb->ki_retry = aio_rw_vect_retry; 1015 - break; 1016 - case IOCB_CMD_PWRITE: 1017 - ret = -EBADF; 1018 - if (unlikely(!(file->f_mode & FMODE_WRITE))) 1019 - break; 1020 - ret = -EFAULT; 1021 - if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf, 1022 - kiocb->ki_left))) 1023 - break; 1024 - ret = aio_setup_single_vector(WRITE, file, kiocb); 1025 - if (ret) 1026 - break; 1027 - ret = -EINVAL; 1028 - if (file->f_op->aio_write) 1029 - kiocb->ki_retry = aio_rw_vect_retry; 1030 - break; 1031 1046 case IOCB_CMD_PREADV: 1032 - ret = -EBADF; 1033 - if (unlikely(!(file->f_mode & FMODE_READ))) 1034 - break; 1035 - ret = aio_setup_vectored_rw(READ, kiocb, compat); 1036 - if (ret) 1037 - break; 1038 - ret = -EINVAL; 1039 - if (file->f_op->aio_read) 1040 - kiocb->ki_retry = aio_rw_vect_retry; 1041 - break; 1047 + mode = FMODE_READ; 1048 + rw = READ; 1049 + rw_op = file->f_op->aio_read; 1050 + goto rw_common; 1051 + 1052 + case IOCB_CMD_PWRITE: 1042 1053 case IOCB_CMD_PWRITEV: 1043 - ret = -EBADF; 1044 - if (unlikely(!(file->f_mode & FMODE_WRITE))) 1045 - break; 1046 - ret = aio_setup_vectored_rw(WRITE, kiocb, compat); 1054 + mode = FMODE_WRITE; 1055 + rw = WRITE; 1056 + rw_op = file->f_op->aio_write; 1057 + goto rw_common; 1058 + rw_common: 1059 + if (unlikely(!(file->f_mode & mode))) 1060 + return -EBADF; 1061 + 1062 + if (!rw_op) 1063 + return -EINVAL; 1064 + 1065 + ret = (req->ki_opcode == IOCB_CMD_PREADV || 1066 + req->ki_opcode == IOCB_CMD_PWRITEV) 1067 + ? aio_setup_vectored_rw(rw, req, compat) 1068 + : aio_setup_single_vector(rw, req); 1047 1069 if (ret) 1048 - break; 1049 - ret = -EINVAL; 1050 - if (file->f_op->aio_write) 1051 - kiocb->ki_retry = aio_rw_vect_retry; 1070 + return ret; 1071 + 1072 + ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); 1073 + if (ret < 0) 1074 + return ret; 1075 + 1076 + req->ki_nbytes = ret; 1077 + req->ki_left = ret; 1078 + 1079 + ret = aio_rw_vect_retry(req, rw, rw_op); 1052 1080 break; 1081 + 1053 1082 case IOCB_CMD_FDSYNC: 1054 - ret = -EINVAL; 1055 - if (file->f_op->aio_fsync) 1056 - kiocb->ki_retry = aio_fdsync; 1083 + if (!file->f_op->aio_fsync) 1084 + return -EINVAL; 1085 + 1086 + ret = file->f_op->aio_fsync(req, 1); 1057 1087 break; 1088 + 1058 1089 case IOCB_CMD_FSYNC: 1059 - ret = -EINVAL; 1060 - if (file->f_op->aio_fsync) 1061 - kiocb->ki_retry = aio_fsync; 1090 + if (!file->f_op->aio_fsync) 1091 + return -EINVAL; 1092 + 1093 + ret = file->f_op->aio_fsync(req, 0); 1062 1094 break; 1095 + 1063 1096 default: 1064 1097 pr_debug("EINVAL: no operation provided\n"); 1065 - ret = -EINVAL; 1098 + return -EINVAL; 1066 1099 } 1067 1100 1068 - if (!kiocb->ki_retry) 1069 - return ret; 1101 + if (ret != -EIOCBQUEUED) { 1102 + /* 1103 + * There's no easy way to restart the syscall since other AIO's 1104 + * may be already running. Just fail this IO with EINTR. 1105 + */ 1106 + if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || 1107 + ret == -ERESTARTNOHAND || 1108 + ret == -ERESTART_RESTARTBLOCK)) 1109 + ret = -EINTR; 1110 + aio_complete(req, ret, 0); 1111 + } 1070 1112 1071 1113 return 0; 1072 1114 } ··· 1094 1134 return -EINVAL; 1095 1135 } 1096 1136 1097 - req = aio_get_req(ctx); /* returns with 2 references to req */ 1137 + req = aio_get_req(ctx); 1098 1138 if (unlikely(!req)) 1099 1139 return -EAGAIN; 1100 1140 ··· 1133 1173 req->ki_left = req->ki_nbytes = iocb->aio_nbytes; 1134 1174 req->ki_opcode = iocb->aio_lio_opcode; 1135 1175 1136 - ret = aio_setup_iocb(req, compat); 1176 + ret = aio_run_iocb(req, compat); 1137 1177 if (ret) 1138 1178 goto out_put_req; 1139 1179 1140 - ret = req->ki_retry(req); 1141 - if (ret != -EIOCBQUEUED) { 1142 - /* 1143 - * There's no easy way to restart the syscall since other AIO's 1144 - * may be already running. Just fail this IO with EINTR. 1145 - */ 1146 - if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || 1147 - ret == -ERESTARTNOHAND || 1148 - ret == -ERESTART_RESTARTBLOCK)) 1149 - ret = -EINTR; 1150 - aio_complete(req, ret, 0); 1151 - } 1152 - 1153 1180 aio_put_req(req); /* drop extra ref to req */ 1154 1181 return 0; 1155 - 1156 1182 out_put_req: 1157 1183 atomic_dec(&ctx->reqs_active); 1158 1184 aio_put_req(req); /* drop extra ref to req */
-26
include/linux/aio.h
··· 29 29 30 30 typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *); 31 31 32 - /* is there a better place to document function pointer methods? */ 33 - /** 34 - * ki_retry - iocb forward progress callback 35 - * @kiocb: The kiocb struct to advance by performing an operation. 36 - * 37 - * This callback is called when the AIO core wants a given AIO operation 38 - * to make forward progress. The kiocb argument describes the operation 39 - * that is to be performed. As the operation proceeds, perhaps partially, 40 - * ki_retry is expected to update the kiocb with progress made. Typically 41 - * ki_retry is set in the AIO core and it itself calls file_operations 42 - * helpers. 43 - * 44 - * ki_retry's return value determines when the AIO operation is completed 45 - * and an event is generated in the AIO event ring. Except the special 46 - * return values described below, the value that is returned from ki_retry 47 - * is transferred directly into the completion ring as the operation's 48 - * resulting status. Once this has happened ki_retry *MUST NOT* reference 49 - * the kiocb pointer again. 50 - * 51 - * If ki_retry returns -EIOCBQUEUED it has made a promise that aio_complete() 52 - * will be called on the kiocb pointer in the future. The AIO core will 53 - * not ask the method again -- ki_retry must ensure forward progress. 54 - * aio_complete() must be called once and only once in the future, multiple 55 - * calls may result in undefined behaviour. 56 - */ 57 32 struct kiocb { 58 33 atomic_t ki_users; 59 34 60 35 struct file *ki_filp; 61 36 struct kioctx *ki_ctx; /* NULL for sync ops */ 62 37 kiocb_cancel_fn *ki_cancel; 63 - ssize_t (*ki_retry)(struct kiocb *); 64 38 void (*ki_dtor)(struct kiocb *); 65 39 66 40 union {