Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
13#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
17
18struct aio_thread_req {
19 enum aio_type type;
20 int io_fd;
21 unsigned long long offset;
22 char *buf;
23 int len;
24 struct aio_context *aio;
25};
26
27static int aio_req_fd_r = -1;
28static int aio_req_fd_w = -1;
29
30#if defined(HAVE_AIO_ABI)
31#include <linux/aio_abi.h>
32
33/* If we have the headers, we are going to build with AIO enabled.
34 * If we don't have aio in libc, we define the necessary stubs here.
35 */
36
37#if !defined(HAVE_AIO_LIBC)
38
39static long io_setup(int n, aio_context_t *ctxp)
40{
41 return syscall(__NR_io_setup, n, ctxp);
42}
43
44static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
45{
46 return syscall(__NR_io_submit, ctx, nr, iocbpp);
47}
48
49static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
50 struct io_event *events, struct timespec *timeout)
51{
52 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
53}
54
55#endif
56
57/* The AIO_MMAP cases force the mmapped page into memory here
58 * rather than in whatever place first touches the data. I used
59 * to do this by touching the page, but that's delicate because
60 * gcc is prone to optimizing that away. So, what's done here
61 * is we read from the descriptor from which the page was
62 * mapped. The caller is required to pass an offset which is
63 * inside the page that was mapped. Thus, when the read
64 * returns, we know that the page is in the page cache, and
65 * that it now backs the mmapped area.
66 */
67
68static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
69 int len, unsigned long long offset, struct aio_context *aio)
70{
71 struct iocb iocb, *iocbp = &iocb;
72 char c;
73 int err;
74
75 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
76 .aio_reqprio = 0,
77 .aio_fildes = fd,
78 .aio_buf = (unsigned long) buf,
79 .aio_nbytes = len,
80 .aio_offset = offset,
81 .aio_reserved1 = 0,
82 .aio_reserved2 = 0,
83 .aio_reserved3 = 0 });
84
85 switch(type){
86 case AIO_READ:
87 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
88 err = io_submit(ctx, 1, &iocbp);
89 break;
90 case AIO_WRITE:
91 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
92 err = io_submit(ctx, 1, &iocbp);
93 break;
94 case AIO_MMAP:
95 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
96 iocb.aio_buf = (unsigned long) &c;
97 iocb.aio_nbytes = sizeof(c);
98 err = io_submit(ctx, 1, &iocbp);
99 break;
100 default:
101 printk("Bogus op in do_aio - %d\n", type);
102 err = -EINVAL;
103 break;
104 }
105
106 if(err > 0)
107 err = 0;
108 else
109 err = -errno;
110
111 return err;
112}
113
114static aio_context_t ctx = 0;
115
116static int aio_thread(void *arg)
117{
118 struct aio_thread_reply reply;
119 struct io_event event;
120 int err, n, reply_fd;
121
122 signal(SIGWINCH, SIG_IGN);
123
124 while(1){
125 n = io_getevents(ctx, 1, 1, &event, NULL);
126 if(n < 0){
127 if(errno == EINTR)
128 continue;
129 printk("aio_thread - io_getevents failed, "
130 "errno = %d\n", errno);
131 }
132 else {
133 reply = ((struct aio_thread_reply)
134 { .data = (void *) (long) event.data,
135 .err = event.res });
136 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
137 err = os_write_file(reply_fd, &reply, sizeof(reply));
138 if(err != sizeof(reply))
139 printk("aio_thread - write failed, fd = %d, "
140 "err = %d\n", aio_req_fd_r, -err);
141 }
142 }
143 return 0;
144}
145
146#endif
147
148static int do_not_aio(struct aio_thread_req *req)
149{
150 char c;
151 int err;
152
153 switch(req->type){
154 case AIO_READ:
155 err = os_seek_file(req->io_fd, req->offset);
156 if(err)
157 goto out;
158
159 err = os_read_file(req->io_fd, req->buf, req->len);
160 break;
161 case AIO_WRITE:
162 err = os_seek_file(req->io_fd, req->offset);
163 if(err)
164 goto out;
165
166 err = os_write_file(req->io_fd, req->buf, req->len);
167 break;
168 case AIO_MMAP:
169 err = os_seek_file(req->io_fd, req->offset);
170 if(err)
171 goto out;
172
173 err = os_read_file(req->io_fd, &c, sizeof(c));
174 break;
175 default:
176 printk("do_not_aio - bad request type : %d\n", req->type);
177 err = -EINVAL;
178 break;
179 }
180
181out:
182 return err;
183}
184
185static int not_aio_thread(void *arg)
186{
187 struct aio_thread_req req;
188 struct aio_thread_reply reply;
189 int err;
190
191 signal(SIGWINCH, SIG_IGN);
192 while(1){
193 err = os_read_file(aio_req_fd_r, &req, sizeof(req));
194 if(err != sizeof(req)){
195 if(err < 0)
196 printk("not_aio_thread - read failed, "
197 "fd = %d, err = %d\n", aio_req_fd_r,
198 -err);
199 else {
200 printk("not_aio_thread - short read, fd = %d, "
201 "length = %d\n", aio_req_fd_r, err);
202 }
203 continue;
204 }
205 err = do_not_aio(&req);
206 reply = ((struct aio_thread_reply) { .data = req.aio,
207 .err = err });
208 err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply));
209 if(err != sizeof(reply))
210 printk("not_aio_thread - write failed, fd = %d, "
211 "err = %d\n", aio_req_fd_r, -err);
212 }
213
214 return 0;
215}
216
217static int aio_pid = -1;
218
219static int init_aio_24(void)
220{
221 unsigned long stack;
222 int fds[2], err;
223
224 err = os_pipe(fds, 1, 1);
225 if(err)
226 goto out;
227
228 aio_req_fd_w = fds[0];
229 aio_req_fd_r = fds[1];
230 err = run_helper_thread(not_aio_thread, NULL,
231 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
232 if(err < 0)
233 goto out_close_pipe;
234
235 aio_pid = err;
236 goto out;
237
238out_close_pipe:
239 os_close_file(fds[0]);
240 os_close_file(fds[1]);
241 aio_req_fd_w = -1;
242 aio_req_fd_r = -1;
243out:
244#ifndef HAVE_AIO_ABI
245 printk("/usr/include/linux/aio_abi.h not present during build\n");
246#endif
247 printk("2.6 host AIO support not used - falling back to I/O "
248 "thread\n");
249 return 0;
250}
251
252#ifdef HAVE_AIO_ABI
253#define DEFAULT_24_AIO 0
254static int init_aio_26(void)
255{
256 unsigned long stack;
257 int err;
258
259 if(io_setup(256, &ctx)){
260 err = -errno;
261 printk("aio_thread failed to initialize context, err = %d\n",
262 errno);
263 return err;
264 }
265
266 err = run_helper_thread(aio_thread, NULL,
267 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
268 if(err < 0)
269 return err;
270
271 aio_pid = err;
272
273 printk("Using 2.6 host AIO\n");
274 return 0;
275}
276
277static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
278 unsigned long long offset, struct aio_context *aio)
279{
280 struct aio_thread_reply reply;
281 int err;
282
283 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
284 if(err){
285 reply = ((struct aio_thread_reply) { .data = aio,
286 .err = err });
287 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
288 if(err != sizeof(reply))
289 printk("submit_aio_26 - write failed, "
290 "fd = %d, err = %d\n", aio->reply_fd, -err);
291 else err = 0;
292 }
293
294 return err;
295}
296
297#else
298#define DEFAULT_24_AIO 1
299static int init_aio_26(void)
300{
301 return -ENOSYS;
302}
303
304static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
305 unsigned long long offset, struct aio_context *aio)
306{
307 return -ENOSYS;
308}
309#endif
310
311static int aio_24 = DEFAULT_24_AIO;
312
313static int __init set_aio_24(char *name, int *add)
314{
315 aio_24 = 1;
316 return 0;
317}
318
319__uml_setup("aio=2.4", set_aio_24,
320"aio=2.4\n"
321" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
322" available. 2.4 AIO is a single thread that handles one request at a\n"
323" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
324" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
325" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
326" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
327" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
328" your /usr/include/linux in order to build an AIO-capable UML\n\n"
329);
330
331static int init_aio(void)
332{
333 int err;
334
335 CHOOSE_MODE(({ if(!aio_24){
336 printk("Disabling 2.6 AIO in tt mode\n");
337 aio_24 = 1;
338 } }), (void) 0);
339
340 if(!aio_24){
341 err = init_aio_26();
342 if(err && (errno == ENOSYS)){
343 printk("2.6 AIO not supported on the host - "
344 "reverting to 2.4 AIO\n");
345 aio_24 = 1;
346 }
347 else return err;
348 }
349
350 if(aio_24)
351 return init_aio_24();
352
353 return 0;
354}
355
356/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
357 * needs to be called when the kernel is running because it calls run_helper,
358 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
359 * kernel does not run __exitcalls on shutdown, and can't because many of them
360 * break when called outside of module unloading.
361 */
362__initcall(init_aio);
363
364static void exit_aio(void)
365{
366 if(aio_pid != -1)
367 os_kill_process(aio_pid, 1);
368}
369
370__uml_exitcall(exit_aio);
371
372static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
373 unsigned long long offset, struct aio_context *aio)
374{
375 struct aio_thread_req req = { .type = type,
376 .io_fd = io_fd,
377 .offset = offset,
378 .buf = buf,
379 .len = len,
380 .aio = aio,
381 };
382 int err;
383
384 err = os_write_file(aio_req_fd_w, &req, sizeof(req));
385 if(err == sizeof(req))
386 err = 0;
387
388 return err;
389}
390
391int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
392 unsigned long long offset, int reply_fd,
393 struct aio_context *aio)
394{
395 aio->reply_fd = reply_fd;
396 if(aio_24)
397 return submit_aio_24(type, io_fd, buf, len, offset, aio);
398 else {
399 return submit_aio_26(type, io_fd, buf, len, offset, aio);
400 }
401}