Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-4.21/aio-20181221' of git://git.kernel.dk/linux-block

Pull aio updates from Jens Axboe:
"Flushing out pre-patches for the buffered/polled aio series. Some
fixes in here, but also optimizations"

* tag 'for-4.21/aio-20181221' of git://git.kernel.dk/linux-block:
aio: abstract out io_event filler helper
aio: split out iocb copy from io_submit_one()
aio: use iocb_put() instead of open coding it
aio: only use blk plugs for > 2 depth submissions
aio: don't zero entire aio_kiocb aio_get_req()
aio: separate out ring reservation from req allocation
aio: use assigned completion handler

+89 -57
+89 -57
fs/aio.c
··· 70 70 struct io_event io_events[0]; 71 71 }; /* 128 bytes + ring size */ 72 72 73 + /* 74 + * Plugging is meant to work with larger batches of IOs. If we don't 75 + * have more than the below, then don't bother setting up a plug. 76 + */ 77 + #define AIO_PLUG_THRESHOLD 2 78 + 73 79 #define AIO_RING_PAGES 8 74 80 75 81 struct kioctx_table { ··· 908 902 local_irq_restore(flags); 909 903 } 910 904 911 - static bool get_reqs_available(struct kioctx *ctx) 905 + static bool __get_reqs_available(struct kioctx *ctx) 912 906 { 913 907 struct kioctx_cpu *kcpu; 914 908 bool ret = false; ··· 1000 994 spin_unlock_irq(&ctx->completion_lock); 1001 995 } 1002 996 997 + static bool get_reqs_available(struct kioctx *ctx) 998 + { 999 + if (__get_reqs_available(ctx)) 1000 + return true; 1001 + user_refill_reqs_available(ctx); 1002 + return __get_reqs_available(ctx); 1003 + } 1004 + 1003 1005 /* aio_get_req 1004 1006 * Allocate a slot for an aio request. 1005 1007 * Returns NULL if no requests are free. ··· 1016 1002 { 1017 1003 struct aio_kiocb *req; 1018 1004 1019 - if (!get_reqs_available(ctx)) { 1020 - user_refill_reqs_available(ctx); 1021 - if (!get_reqs_available(ctx)) 1022 - return NULL; 1023 - } 1024 - 1025 - req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); 1005 + req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL); 1026 1006 if (unlikely(!req)) 1027 - goto out_put; 1007 + return NULL; 1028 1008 1029 1009 percpu_ref_get(&ctx->reqs); 1010 + req->ki_ctx = ctx; 1030 1011 INIT_LIST_HEAD(&req->ki_list); 1031 1012 refcount_set(&req->ki_refcnt, 0); 1032 - req->ki_ctx = ctx; 1013 + req->ki_eventfd = NULL; 1033 1014 return req; 1034 - out_put: 1035 - put_reqs_available(ctx, 1); 1036 - return NULL; 1037 1015 } 1038 1016 1039 1017 static struct kioctx *lookup_ioctx(unsigned long ctx_id) ··· 1065 1059 } 1066 1060 } 1067 1061 1062 + static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb, 1063 + long res, long res2) 1064 + { 1065 + ev->obj = (u64)(unsigned long)iocb->ki_user_iocb; 1066 + ev->data = iocb->ki_user_data; 1067 + ev->res = res; 1068 + ev->res2 = res2; 1069 + } 1070 + 1068 1071 /* aio_complete 1069 1072 * Called when the io request on the given iocb is complete. 1070 1073 */ ··· 1101 1086 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1102 1087 event = ev_page + pos % AIO_EVENTS_PER_PAGE; 1103 1088 1104 - event->obj = (u64)(unsigned long)iocb->ki_user_iocb; 1105 - event->data = iocb->ki_user_data; 1106 - event->res = res; 1107 - event->res2 = res2; 1089 + aio_fill_event(event, iocb, res, res2); 1108 1090 1109 1091 kunmap_atomic(ev_page); 1110 1092 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); ··· 1428 1416 aio_complete(iocb, res, res2); 1429 1417 } 1430 1418 1431 - static int aio_prep_rw(struct kiocb *req, struct iocb *iocb) 1419 + static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) 1432 1420 { 1433 1421 int ret; 1434 1422 ··· 1469 1457 return ret; 1470 1458 } 1471 1459 1472 - static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec, 1460 + static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec, 1473 1461 bool vectored, bool compat, struct iov_iter *iter) 1474 1462 { 1475 1463 void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; ··· 1504 1492 ret = -EINTR; 1505 1493 /*FALLTHRU*/ 1506 1494 default: 1507 - aio_complete_rw(req, ret, 0); 1495 + req->ki_complete(req, ret, 0); 1508 1496 } 1509 1497 } 1510 1498 1511 - static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored, 1512 - bool compat) 1499 + static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb, 1500 + bool vectored, bool compat) 1513 1501 { 1514 1502 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; 1515 1503 struct iov_iter iter; ··· 1541 1529 return ret; 1542 1530 } 1543 1531 1544 - static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, 1545 - bool compat) 1532 + static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb, 1533 + bool vectored, bool compat) 1546 1534 { 1547 1535 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; 1548 1536 struct iov_iter iter; ··· 1597 1585 aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0); 1598 1586 } 1599 1587 1600 - static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) 1588 + static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, 1589 + bool datasync) 1601 1590 { 1602 1591 if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes || 1603 1592 iocb->aio_rw_flags)) ··· 1726 1713 add_wait_queue(head, &pt->iocb->poll.wait); 1727 1714 } 1728 1715 1729 - static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) 1716 + static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) 1730 1717 { 1731 1718 struct kioctx *ctx = aiocb->ki_ctx; 1732 1719 struct poll_iocb *req = &aiocb->poll; ··· 1745 1732 req->file = fget(iocb->aio_fildes); 1746 1733 if (unlikely(!req->file)) 1747 1734 return -EBADF; 1735 + 1736 + req->head = NULL; 1737 + req->woken = false; 1738 + req->cancelled = false; 1748 1739 1749 1740 apt.pt._qproc = aio_poll_queue_proc; 1750 1741 apt.pt._key = req->events; ··· 1798 1781 return 0; 1799 1782 } 1800 1783 1801 - static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1802 - bool compat) 1784 + static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, 1785 + struct iocb __user *user_iocb, bool compat) 1803 1786 { 1804 1787 struct aio_kiocb *req; 1805 - struct iocb iocb; 1806 1788 ssize_t ret; 1807 1789 1808 - if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb)))) 1809 - return -EFAULT; 1810 - 1811 1790 /* enforce forwards compatibility on users */ 1812 - if (unlikely(iocb.aio_reserved2)) { 1791 + if (unlikely(iocb->aio_reserved2)) { 1813 1792 pr_debug("EINVAL: reserve field set\n"); 1814 1793 return -EINVAL; 1815 1794 } 1816 1795 1817 1796 /* prevent overflows */ 1818 1797 if (unlikely( 1819 - (iocb.aio_buf != (unsigned long)iocb.aio_buf) || 1820 - (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) || 1821 - ((ssize_t)iocb.aio_nbytes < 0) 1798 + (iocb->aio_buf != (unsigned long)iocb->aio_buf) || 1799 + (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) || 1800 + ((ssize_t)iocb->aio_nbytes < 0) 1822 1801 )) { 1823 1802 pr_debug("EINVAL: overflow check\n"); 1824 1803 return -EINVAL; 1825 1804 } 1826 1805 1827 - req = aio_get_req(ctx); 1828 - if (unlikely(!req)) 1806 + if (!get_reqs_available(ctx)) 1829 1807 return -EAGAIN; 1830 1808 1831 - if (iocb.aio_flags & IOCB_FLAG_RESFD) { 1809 + ret = -EAGAIN; 1810 + req = aio_get_req(ctx); 1811 + if (unlikely(!req)) 1812 + goto out_put_reqs_available; 1813 + 1814 + if (iocb->aio_flags & IOCB_FLAG_RESFD) { 1832 1815 /* 1833 1816 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an 1834 1817 * instance of the file* now. The file descriptor must be 1835 1818 * an eventfd() fd, and will be signaled for each completed 1836 1819 * event using the eventfd_signal() function. 1837 1820 */ 1838 - req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd); 1821 + req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); 1839 1822 if (IS_ERR(req->ki_eventfd)) { 1840 1823 ret = PTR_ERR(req->ki_eventfd); 1841 1824 req->ki_eventfd = NULL; ··· 1850 1833 } 1851 1834 1852 1835 req->ki_user_iocb = user_iocb; 1853 - req->ki_user_data = iocb.aio_data; 1836 + req->ki_user_data = iocb->aio_data; 1854 1837 1855 - switch (iocb.aio_lio_opcode) { 1838 + switch (iocb->aio_lio_opcode) { 1856 1839 case IOCB_CMD_PREAD: 1857 - ret = aio_read(&req->rw, &iocb, false, compat); 1840 + ret = aio_read(&req->rw, iocb, false, compat); 1858 1841 break; 1859 1842 case IOCB_CMD_PWRITE: 1860 - ret = aio_write(&req->rw, &iocb, false, compat); 1843 + ret = aio_write(&req->rw, iocb, false, compat); 1861 1844 break; 1862 1845 case IOCB_CMD_PREADV: 1863 - ret = aio_read(&req->rw, &iocb, true, compat); 1846 + ret = aio_read(&req->rw, iocb, true, compat); 1864 1847 break; 1865 1848 case IOCB_CMD_PWRITEV: 1866 - ret = aio_write(&req->rw, &iocb, true, compat); 1849 + ret = aio_write(&req->rw, iocb, true, compat); 1867 1850 break; 1868 1851 case IOCB_CMD_FSYNC: 1869 - ret = aio_fsync(&req->fsync, &iocb, false); 1852 + ret = aio_fsync(&req->fsync, iocb, false); 1870 1853 break; 1871 1854 case IOCB_CMD_FDSYNC: 1872 - ret = aio_fsync(&req->fsync, &iocb, true); 1855 + ret = aio_fsync(&req->fsync, iocb, true); 1873 1856 break; 1874 1857 case IOCB_CMD_POLL: 1875 - ret = aio_poll(req, &iocb); 1858 + ret = aio_poll(req, iocb); 1876 1859 break; 1877 1860 default: 1878 - pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode); 1861 + pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode); 1879 1862 ret = -EINVAL; 1880 1863 break; 1881 1864 } ··· 1889 1872 goto out_put_req; 1890 1873 return 0; 1891 1874 out_put_req: 1892 - put_reqs_available(ctx, 1); 1893 - percpu_ref_put(&ctx->reqs); 1894 1875 if (req->ki_eventfd) 1895 1876 eventfd_ctx_put(req->ki_eventfd); 1896 - kmem_cache_free(kiocb_cachep, req); 1877 + iocb_put(req); 1878 + out_put_reqs_available: 1879 + put_reqs_available(ctx, 1); 1897 1880 return ret; 1881 + } 1882 + 1883 + static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1884 + bool compat) 1885 + { 1886 + struct iocb iocb; 1887 + 1888 + if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb)))) 1889 + return -EFAULT; 1890 + 1891 + return __io_submit_one(ctx, &iocb, user_iocb, compat); 1898 1892 } 1899 1893 1900 1894 /* sys_io_submit: ··· 1940 1912 if (nr > ctx->nr_events) 1941 1913 nr = ctx->nr_events; 1942 1914 1943 - blk_start_plug(&plug); 1915 + if (nr > AIO_PLUG_THRESHOLD) 1916 + blk_start_plug(&plug); 1944 1917 for (i = 0; i < nr; i++) { 1945 1918 struct iocb __user *user_iocb; 1946 1919 ··· 1954 1925 if (ret) 1955 1926 break; 1956 1927 } 1957 - blk_finish_plug(&plug); 1928 + if (nr > AIO_PLUG_THRESHOLD) 1929 + blk_finish_plug(&plug); 1958 1930 1959 1931 percpu_ref_put(&ctx->users); 1960 1932 return i ? i : ret; ··· 1982 1952 if (nr > ctx->nr_events) 1983 1953 nr = ctx->nr_events; 1984 1954 1985 - blk_start_plug(&plug); 1955 + if (nr > AIO_PLUG_THRESHOLD) 1956 + blk_start_plug(&plug); 1986 1957 for (i = 0; i < nr; i++) { 1987 1958 compat_uptr_t user_iocb; 1988 1959 ··· 1996 1965 if (ret) 1997 1966 break; 1998 1967 } 1999 - blk_finish_plug(&plug); 1968 + if (nr > AIO_PLUG_THRESHOLD) 1969 + blk_finish_plug(&plug); 2000 1970 2001 1971 percpu_ref_put(&ctx->users); 2002 1972 return i ? i : ret;