Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

reintroduce accept4

Introduce a new accept4() system call. The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.

The accept4() system call is exactly the same as accept(), except that
it adds a flags bit-mask argument. Two flags are initially implemented.
(Most of the new system calls in 2.6.27 also had both of these flags.)

SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled
for the new file descriptor returned by accept4(). This is a useful
security feature to avoid leaking information in a multithreaded
program where one thread is doing an accept() at the same time as
another thread is doing a fork() plus exec(). More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling",
Ulrich Drepper).

The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag
to be enabled on the new open file description created by accept4().
(This flag is merely a convenience, saving the use of additional calls
fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result.

Here's a test program. Works on x86-32. Should work on x86-64, but
I (mtk) don't have a system to hand to test with.

It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies
that the appropriate flags are set on the file descriptor/open file
description returned by accept4().

I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.

/* test_accept4.c

Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
<mtk.manpages@gmail.com>

Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
printf("Calling accept4(): flags = %x", flags);
if (flags != 0) {
printf(" (");
if (flags & SOCK_CLOEXEC)
printf("SOCK_CLOEXEC");
if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
printf(" ");
if (flags & SOCK_NONBLOCK)
printf("SOCK_NONBLOCK");
printf(")");
}
printf("\n");

#if USE_SOCKETCALL
long args[6];

args[0] = fd;
args[1] = (long) sockaddr;
args[2] = (long) addrlen;
args[3] = flags;

return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/

static int
do_test(int lfd, struct sockaddr_in *conn_addr,
int closeonexec_flag, int nonblock_flag)
{
int connfd, acceptfd;
int fdf, flf, fdf_pass, flf_pass;
struct sockaddr_in claddr;
socklen_t addrlen;

printf("=======================================\n");

connfd = socket(AF_INET, SOCK_STREAM, 0);
if (connfd == -1)
die("socket");
if (connect(connfd, (struct sockaddr *) conn_addr,
sizeof(struct sockaddr_in)) == -1)
die("connect");

addrlen = sizeof(struct sockaddr_in);
acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
closeonexec_flag | nonblock_flag);
if (acceptfd == -1) {
perror("accept4()");
close(connfd);
return 0;
}

fdf = fcntl(acceptfd, F_GETFD);
if (fdf == -1)
die("fcntl:F_GETFD");
fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
((closeonexec_flag & SOCK_CLOEXEC) != 0);
printf("Close-on-exec flag is %sset (%s); ",
(fdf & FD_CLOEXEC) ? "" : "not ",
fdf_pass ? "OK" : "failed");

flf = fcntl(acceptfd, F_GETFL);
if (flf == -1)
die("fcntl:F_GETFD");
flf_pass = ((flf & O_NONBLOCK) != 0) ==
((nonblock_flag & SOCK_NONBLOCK) !=0);
printf("nonblock flag is %sset (%s)\n",
(flf & O_NONBLOCK) ? "" : "not ",
flf_pass ? "OK" : "failed");

close(acceptfd);
close(connfd);

printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
return fdf_pass && flf_pass;
}

static int
create_listening_socket(int port_num)
{
struct sockaddr_in svaddr;
int lfd;
int optval;

memset(&svaddr, 0, sizeof(struct sockaddr_in));
svaddr.sin_family = AF_INET;
svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
svaddr.sin_port = htons(port_num);

lfd = socket(AF_INET, SOCK_STREAM, 0);
if (lfd == -1)
die("socket");

optval = 1;
if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof(optval)) == -1)
die("setsockopt");

if (bind(lfd, (struct sockaddr *) &svaddr,
sizeof(struct sockaddr_in)) == -1)
die("bind");

if (listen(lfd, 5) == -1)
die("listen");

return lfd;
}

int
main(int argc, char *argv[])
{
struct sockaddr_in conn_addr;
int lfd;
int port_num;
int passed;

passed = 1;

port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

memset(&conn_addr, 0, sizeof(struct sockaddr_in));
conn_addr.sin_family = AF_INET;
conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
conn_addr.sin_port = htons(port_num);

lfd = create_listening_socket(port_num);

if (!do_test(lfd, &conn_addr, 0, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
passed = 0;

close(lfd);

exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

[mtk.manpages@gmail.com: rewrote changelog, updated test program]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Ulrich Drepper and committed by
Linus Torvalds
de11defe cf7ee554

+21 -124
+2 -2
arch/x86/include/asm/unistd_64.h
··· 639 639 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) 640 640 #define __NR_timerfd_gettime 287 641 641 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) 642 - #define __NR_paccept 288 643 - __SYSCALL(__NR_paccept, sys_paccept) 642 + #define __NR_accept4 288 643 + __SYSCALL(__NR_accept4, sys_accept4) 644 644 #define __NR_signalfd4 289 645 645 __SYSCALL(__NR_signalfd4, sys_signalfd4) 646 646 #define __NR_eventfd2 290
+2 -4
include/linux/net.h
··· 40 40 #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */ 41 41 #define SYS_SENDMSG 16 /* sys_sendmsg(2) */ 42 42 #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ 43 - #define SYS_PACCEPT 18 /* sys_paccept(2) */ 43 + #define SYS_ACCEPT4 18 /* sys_accept4(2) */ 44 44 45 45 typedef enum { 46 46 SS_FREE = 0, /* not allocated */ ··· 100 100 * remaining bits are used as flags. */ 101 101 #define SOCK_TYPE_MASK 0xf 102 102 103 - /* Flags for socket, socketpair, paccept */ 103 + /* Flags for socket, socketpair, accept4 */ 104 104 #define SOCK_CLOEXEC O_CLOEXEC 105 105 #ifndef SOCK_NONBLOCK 106 106 #define SOCK_NONBLOCK O_NONBLOCK ··· 223 223 extern struct socket *sockfd_lookup(int fd, int *err); 224 224 #define sockfd_put(sock) fput(sock->file) 225 225 extern int net_ratelimit(void); 226 - extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, 227 - int __user *upeer_addrlen, int flags); 228 226 229 227 #define net_random() random32() 230 228 #define net_srandom(seed) srandom32((__force u32)seed)
+1 -2
include/linux/syscalls.h
··· 410 410 asmlinkage long sys_bind(int, struct sockaddr __user *, int); 411 411 asmlinkage long sys_connect(int, struct sockaddr __user *, int); 412 412 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); 413 - asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *, 414 - const __user sigset_t *, size_t, int); 413 + asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int); 415 414 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); 416 415 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); 417 416 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
+1 -1
kernel/sys_ni.c
··· 31 31 cond_syscall(sys_bind); 32 32 cond_syscall(sys_listen); 33 33 cond_syscall(sys_accept); 34 - cond_syscall(sys_paccept); 34 + cond_syscall(sys_accept4); 35 35 cond_syscall(sys_connect); 36 36 cond_syscall(sys_getsockname); 37 37 cond_syscall(sys_getpeername);
+5 -45
net/compat.c
··· 725 725 static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 726 726 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 727 727 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 728 - AL(6)}; 728 + AL(4)}; 729 729 #undef AL 730 730 731 731 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) ··· 738 738 return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); 739 739 } 740 740 741 - asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, 742 - int __user *upeer_addrlen, 743 - const compat_sigset_t __user *sigmask, 744 - compat_size_t sigsetsize, int flags) 745 - { 746 - compat_sigset_t ss32; 747 - sigset_t ksigmask, sigsaved; 748 - int ret; 749 - 750 - if (sigmask) { 751 - if (sigsetsize != sizeof(compat_sigset_t)) 752 - return -EINVAL; 753 - if (copy_from_user(&ss32, sigmask, sizeof(ss32))) 754 - return -EFAULT; 755 - sigset_from_compat(&ksigmask, &ss32); 756 - 757 - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 758 - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 759 - } 760 - 761 - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); 762 - 763 - if (ret == -ERESTARTNOHAND) { 764 - /* 765 - * Don't restore the signal mask yet. Let do_signal() deliver 766 - * the signal on the way back to userspace, before the signal 767 - * mask is restored. 768 - */ 769 - if (sigmask) { 770 - memcpy(&current->saved_sigmask, &sigsaved, 771 - sizeof(sigsaved)); 772 - set_restore_sigmask(); 773 - } 774 - } else if (sigmask) 775 - sigprocmask(SIG_SETMASK, &sigsaved, NULL); 776 - 777 - return ret; 778 - } 779 - 780 741 asmlinkage long compat_sys_socketcall(int call, u32 __user *args) 781 742 { 782 743 int ret; 783 744 u32 a[6]; 784 745 u32 a0, a1; 785 746 786 - if (call < SYS_SOCKET || call > SYS_PACCEPT) 747 + if (call < SYS_SOCKET || call > SYS_ACCEPT4) 787 748 return -EINVAL; 788 749 if (copy_from_user(a, args, nas[call])) 789 750 return -EFAULT; ··· 765 804 ret = sys_listen(a0, a1); 766 805 break; 767 806 case SYS_ACCEPT: 768 - ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0); 807 + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); 769 808 break; 770 809 case SYS_GETSOCKNAME: 771 810 ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); ··· 805 844 case SYS_RECVMSG: 806 845 ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); 807 846 break; 808 - case SYS_PACCEPT: 809 - ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]), 810 - compat_ptr(a[3]), a[4], a[5]); 847 + case SYS_ACCEPT4: 848 + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); 811 849 break; 812 850 default: 813 851 ret = -EINVAL;
+10 -70
net/socket.c
··· 1426 1426 * clean when we restucture accept also. 1427 1427 */ 1428 1428 1429 - long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, 1430 - int __user *upeer_addrlen, int flags) 1429 + asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, 1430 + int __user *upeer_addrlen, int flags) 1431 1431 { 1432 1432 struct socket *sock, *newsock; 1433 1433 struct file *newfile; ··· 1510 1510 goto out_put; 1511 1511 } 1512 1512 1513 - #if 0 1514 - #ifdef HAVE_SET_RESTORE_SIGMASK 1515 - asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, 1516 - int __user *upeer_addrlen, 1517 - const sigset_t __user *sigmask, 1518 - size_t sigsetsize, int flags) 1519 - { 1520 - sigset_t ksigmask, sigsaved; 1521 - int ret; 1522 - 1523 - if (sigmask) { 1524 - /* XXX: Don't preclude handling different sized sigset_t's. */ 1525 - if (sigsetsize != sizeof(sigset_t)) 1526 - return -EINVAL; 1527 - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) 1528 - return -EFAULT; 1529 - 1530 - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1531 - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1532 - } 1533 - 1534 - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); 1535 - 1536 - if (ret < 0 && signal_pending(current)) { 1537 - /* 1538 - * Don't restore the signal mask yet. Let do_signal() deliver 1539 - * the signal on the way back to userspace, before the signal 1540 - * mask is restored. 1541 - */ 1542 - if (sigmask) { 1543 - memcpy(&current->saved_sigmask, &sigsaved, 1544 - sizeof(sigsaved)); 1545 - set_restore_sigmask(); 1546 - } 1547 - } else if (sigmask) 1548 - sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1549 - 1550 - return ret; 1551 - } 1552 - #else 1553 - asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, 1554 - int __user *upeer_addrlen, 1555 - const sigset_t __user *sigmask, 1556 - size_t sigsetsize, int flags) 1557 - { 1558 - /* The platform does not support restoring the signal mask in the 1559 - * return path. So we do not allow using paccept() with a signal 1560 - * mask. */ 1561 - if (sigmask) 1562 - return -EINVAL; 1563 - 1564 - return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); 1565 - } 1566 - #endif 1567 - #endif 1568 - 1569 1513 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, 1570 1514 int __user *upeer_addrlen) 1571 1515 { 1572 - return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); 1516 + return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); 1573 1517 } 1574 1518 1575 1519 /* ··· 2040 2096 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 2041 2097 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 2042 2098 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 2043 - AL(6) 2099 + AL(4) 2044 2100 }; 2045 2101 2046 2102 #undef AL ··· 2059 2115 unsigned long a0, a1; 2060 2116 int err; 2061 2117 2062 - if (call < 1 || call > SYS_PACCEPT) 2118 + if (call < 1 || call > SYS_ACCEPT4) 2063 2119 return -EINVAL; 2064 2120 2065 2121 /* copy_from_user should be SMP safe. */ ··· 2087 2143 err = sys_listen(a0, a1); 2088 2144 break; 2089 2145 case SYS_ACCEPT: 2090 - err = 2091 - do_accept(a0, (struct sockaddr __user *)a1, 2092 - (int __user *)a[2], 0); 2146 + err = sys_accept4(a0, (struct sockaddr __user *)a1, 2147 + (int __user *)a[2], 0); 2093 2148 break; 2094 2149 case SYS_GETSOCKNAME: 2095 2150 err = ··· 2135 2192 case SYS_RECVMSG: 2136 2193 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2137 2194 break; 2138 - case SYS_PACCEPT: 2139 - err = 2140 - sys_paccept(a0, (struct sockaddr __user *)a1, 2141 - (int __user *)a[2], 2142 - (const sigset_t __user *) a[3], 2143 - a[4], a[5]); 2195 + case SYS_ACCEPT4: 2196 + err = sys_accept4(a0, (struct sockaddr __user *)a1, 2197 + (int __user *)a[2], a[3]); 2144 2198 break; 2145 2199 default: 2146 2200 err = -EINVAL;