Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
13#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
17#include "kern_constants.h"
18
19struct aio_thread_req {
20 enum aio_type type;
21 int io_fd;
22 unsigned long long offset;
23 char *buf;
24 int len;
25 struct aio_context *aio;
26};
27
28#if defined(HAVE_AIO_ABI)
29#include <linux/aio_abi.h>
30
31/* If we have the headers, we are going to build with AIO enabled.
32 * If we don't have aio in libc, we define the necessary stubs here.
33 */
34
35#if !defined(HAVE_AIO_LIBC)
36
37static long io_setup(int n, aio_context_t *ctxp)
38{
39 return syscall(__NR_io_setup, n, ctxp);
40}
41
42static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
43{
44 return syscall(__NR_io_submit, ctx, nr, iocbpp);
45}
46
47static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
48 struct io_event *events, struct timespec *timeout)
49{
50 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
51}
52
53#endif
54
55/* The AIO_MMAP cases force the mmapped page into memory here
56 * rather than in whatever place first touches the data. I used
57 * to do this by touching the page, but that's delicate because
58 * gcc is prone to optimizing that away. So, what's done here
59 * is we read from the descriptor from which the page was
60 * mapped. The caller is required to pass an offset which is
61 * inside the page that was mapped. Thus, when the read
62 * returns, we know that the page is in the page cache, and
63 * that it now backs the mmapped area.
64 */
65
66static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
67 int len, unsigned long long offset, struct aio_context *aio)
68{
69 struct iocb *iocbp = & ((struct iocb) {
70 .aio_data = (unsigned long) aio,
71 .aio_fildes = fd,
72 .aio_buf = (unsigned long) buf,
73 .aio_nbytes = len,
74 .aio_offset = offset
75 });
76 char c;
77
78 switch (type) {
79 case AIO_READ:
80 iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
81 break;
82 case AIO_WRITE:
83 iocbp->aio_lio_opcode = IOCB_CMD_PWRITE;
84 break;
85 case AIO_MMAP:
86 iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
87 iocbp->aio_buf = (unsigned long) &c;
88 iocbp->aio_nbytes = sizeof(c);
89 break;
90 default:
91 printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type);
92 return -EINVAL;
93 }
94
95 return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno;
96}
97
98/* Initialized in an initcall and unchanged thereafter */
99static aio_context_t ctx = 0;
100
101static int aio_thread(void *arg)
102{
103 struct aio_thread_reply reply;
104 struct io_event event;
105 int err, n, reply_fd;
106
107 signal(SIGWINCH, SIG_IGN);
108
109 while(1){
110 n = io_getevents(ctx, 1, 1, &event, NULL);
111 if(n < 0){
112 if(errno == EINTR)
113 continue;
114 printk("aio_thread - io_getevents failed, "
115 "errno = %d\n", errno);
116 }
117 else {
118 reply = ((struct aio_thread_reply)
119 { .data = (void *) (long) event.data,
120 .err = event.res });
121 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
122 err = write(reply_fd, &reply, sizeof(reply));
123 if(err != sizeof(reply))
124 printk("aio_thread - write failed, fd = %d, "
125 "err = %d\n", reply_fd, errno);
126 }
127 }
128 return 0;
129}
130
131#endif
132
133static int do_not_aio(struct aio_thread_req *req)
134{
135 char c;
136 unsigned long long actual;
137 int n;
138
139 actual = lseek64(req->io_fd, req->offset, SEEK_SET);
140 if(actual != req->offset)
141 return -errno;
142
143 switch(req->type){
144 case AIO_READ:
145 n = read(req->io_fd, req->buf, req->len);
146 break;
147 case AIO_WRITE:
148 n = write(req->io_fd, req->buf, req->len);
149 break;
150 case AIO_MMAP:
151 n = read(req->io_fd, &c, sizeof(c));
152 break;
153 default:
154 printk("do_not_aio - bad request type : %d\n", req->type);
155 return -EINVAL;
156 }
157
158 if(n < 0)
159 return -errno;
160 return 0;
161}
162
163/* These are initialized in initcalls and not changed */
164static int aio_req_fd_r = -1;
165static int aio_req_fd_w = -1;
166static int aio_pid = -1;
167static unsigned long aio_stack;
168
169static int not_aio_thread(void *arg)
170{
171 struct aio_thread_req req;
172 struct aio_thread_reply reply;
173 int err;
174
175 signal(SIGWINCH, SIG_IGN);
176 while(1){
177 err = read(aio_req_fd_r, &req, sizeof(req));
178 if(err != sizeof(req)){
179 if(err < 0)
180 printk("not_aio_thread - read failed, "
181 "fd = %d, err = %d\n", aio_req_fd_r,
182 errno);
183 else {
184 printk("not_aio_thread - short read, fd = %d, "
185 "length = %d\n", aio_req_fd_r, err);
186 }
187 continue;
188 }
189 err = do_not_aio(&req);
190 reply = ((struct aio_thread_reply) { .data = req.aio,
191 .err = err });
192 err = write(req.aio->reply_fd, &reply, sizeof(reply));
193 if(err != sizeof(reply))
194 printk("not_aio_thread - write failed, fd = %d, "
195 "err = %d\n", req.aio->reply_fd, errno);
196 }
197
198 return 0;
199}
200
201static int init_aio_24(void)
202{
203 int fds[2], err;
204
205 err = os_pipe(fds, 1, 1);
206 if(err)
207 goto out;
208
209 aio_req_fd_w = fds[0];
210 aio_req_fd_r = fds[1];
211
212 err = os_set_fd_block(aio_req_fd_w, 0);
213 if(err)
214 goto out_close_pipe;
215
216 err = run_helper_thread(not_aio_thread, NULL,
217 CLONE_FILES | CLONE_VM | SIGCHLD, &aio_stack);
218 if(err < 0)
219 goto out_close_pipe;
220
221 aio_pid = err;
222 goto out;
223
224out_close_pipe:
225 os_close_file(fds[0]);
226 os_close_file(fds[1]);
227 aio_req_fd_w = -1;
228 aio_req_fd_r = -1;
229out:
230#ifndef HAVE_AIO_ABI
231 printk("/usr/include/linux/aio_abi.h not present during build\n");
232#endif
233 printk("2.6 host AIO support not used - falling back to I/O "
234 "thread\n");
235 return 0;
236}
237
238#ifdef HAVE_AIO_ABI
239#define DEFAULT_24_AIO 0
240static int init_aio_26(void)
241{
242 int err;
243
244 if(io_setup(256, &ctx)){
245 err = -errno;
246 printk("aio_thread failed to initialize context, err = %d\n",
247 errno);
248 return err;
249 }
250
251 err = run_helper_thread(aio_thread, NULL,
252 CLONE_FILES | CLONE_VM | SIGCHLD, &aio_stack);
253 if(err < 0)
254 return err;
255
256 aio_pid = err;
257
258 printk("Using 2.6 host AIO\n");
259 return 0;
260}
261
262static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
263 unsigned long long offset, struct aio_context *aio)
264{
265 struct aio_thread_reply reply;
266 int err;
267
268 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
269 if(err){
270 reply = ((struct aio_thread_reply) { .data = aio,
271 .err = err });
272 err = write(aio->reply_fd, &reply, sizeof(reply));
273 if(err != sizeof(reply)){
274 err = -errno;
275 printk("submit_aio_26 - write failed, "
276 "fd = %d, err = %d\n", aio->reply_fd, -err);
277 }
278 else err = 0;
279 }
280
281 return err;
282}
283
284#else
285#define DEFAULT_24_AIO 1
286static int init_aio_26(void)
287{
288 return -ENOSYS;
289}
290
291static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
292 unsigned long long offset, struct aio_context *aio)
293{
294 return -ENOSYS;
295}
296#endif
297
298/* Initialized in an initcall and unchanged thereafter */
299static int aio_24 = DEFAULT_24_AIO;
300
301static int __init set_aio_24(char *name, int *add)
302{
303 aio_24 = 1;
304 return 0;
305}
306
307__uml_setup("aio=2.4", set_aio_24,
308"aio=2.4\n"
309" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
310" available. 2.4 AIO is a single thread that handles one request at a\n"
311" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
312" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
313" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
314" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
315" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
316" your /usr/include/linux in order to build an AIO-capable UML\n\n"
317);
318
319static int init_aio(void)
320{
321 int err;
322
323 CHOOSE_MODE(({ if(!aio_24){
324 printk("Disabling 2.6 AIO in tt mode\n");
325 aio_24 = 1;
326 } }), (void) 0);
327
328 if(!aio_24){
329 err = init_aio_26();
330 if(err && (errno == ENOSYS)){
331 printk("2.6 AIO not supported on the host - "
332 "reverting to 2.4 AIO\n");
333 aio_24 = 1;
334 }
335 else return err;
336 }
337
338 if(aio_24)
339 return init_aio_24();
340
341 return 0;
342}
343
344/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
345 * needs to be called when the kernel is running because it calls run_helper,
346 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
347 * kernel does not run __exitcalls on shutdown, and can't because many of them
348 * break when called outside of module unloading.
349 */
350__initcall(init_aio);
351
352static void exit_aio(void)
353{
354 if (aio_pid != -1) {
355 os_kill_process(aio_pid, 1);
356 free_stack(aio_stack, 0);
357 }
358}
359
360__uml_exitcall(exit_aio);
361
362static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
363 unsigned long long offset, struct aio_context *aio)
364{
365 struct aio_thread_req req = { .type = type,
366 .io_fd = io_fd,
367 .offset = offset,
368 .buf = buf,
369 .len = len,
370 .aio = aio,
371 };
372 int err;
373
374 err = write(aio_req_fd_w, &req, sizeof(req));
375 if(err == sizeof(req))
376 err = 0;
377 else err = -errno;
378
379 return err;
380}
381
382int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
383 unsigned long long offset, int reply_fd,
384 struct aio_context *aio)
385{
386 aio->reply_fd = reply_fd;
387 if(aio_24)
388 return submit_aio_24(type, io_fd, buf, len, offset, aio);
389 else {
390 return submit_aio_26(type, io_fd, buf, len, offset, aio);
391 }
392}