Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

flag parameters: paccept

This patch is by far the most complex in the series. It adds a new syscall
paccept. This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:

- a signal mask
- a flags value

The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
imlpemented here as well. Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX. Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc). So an interface change in inevitable.

The flag value is the same as for socket and socketpair. I think diverging
here will only create confusion. Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.

The signal mask is handled as for pselect etc. The mask is temporarily
installed for the thread and removed before the call returns. I modeled the
code after pselect. If there is a problem it's likely also in pselect.

For architectures which use socketcall I maintained this interface instead of
adding a system call. The symmetry shouldn't be broken.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif

#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif

#define PORT 57392

#define SOCK_CLOEXEC O_CLOEXEC

static pthread_barrier_t b;

static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);

pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);

pthread_barrier_wait (&b);
sleep (2);
pthread_kill ((pthread_t) arg, SIGUSR1);

return NULL;
}

static void
handler (int s)
{
}

int
main (void)
{
pthread_barrier_init (&b, NULL, 2);

struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
{
puts ("pthread_create failed");
return 1;
}

int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);

pthread_barrier_wait (&b);

int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}

int coe = fcntl (s2, F_GETFD);
if (coe & FD_CLOEXEC)
{
puts ("paccept(0) set close-on-exec-flag");
return 1;
}
close (s2);

pthread_barrier_wait (&b);

s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
if (s2 < 0)
{
puts ("paccept(SOCK_CLOEXEC) failed");
return 1;
}

coe = fcntl (s2, F_GETFD);
if ((coe & FD_CLOEXEC) == 0)
{
puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (s2);

pthread_barrier_wait (&b);

struct sigaction sa;
sa.sa_handler = handler;
sa.sa_flags = 0;
sigemptyset (&sa.sa_mask);
sigaction (SIGUSR1, &sa, NULL);

sigset_t ss;
pthread_sigmask (SIG_SETMASK, NULL, &ss);
sigaddset (&ss, SIGUSR1);
pthread_sigmask (SIG_SETMASK, &ss, NULL);

sigdelset (&ss, SIGUSR1);
alarm (4);
pthread_barrier_wait (&b);

errno = 0 ;
s2 = paccept (s, NULL, 0, &ss, 0);
if (s2 != -1 || errno != EINTR)
{
puts ("paccept did not fail with EINTR");
return 1;
}

close (s);

puts ("OK");

return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Ulrich Drepper and committed by
Linus Torvalds
aaca0bdc a677a039

+139 -12
+5
include/asm-alpha/socket.h
··· 62 62 63 63 #define SO_MARK 36 64 64 65 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we 66 + * have to define SOCK_NONBLOCK to a different value here. 67 + */ 68 + #define SOCK_NONBLOCK 0x40000000 69 + 65 70 #endif /* _ASM_SOCKET_H */
+5
include/asm-parisc/socket.h
··· 54 54 55 55 #define SO_MARK 0x401f 56 56 57 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we 58 + * have to define SOCK_NONBLOCK to a different value here. 59 + */ 60 + #define SOCK_NONBLOCK 0x40000000 61 + 57 62 #endif /* _ASM_SOCKET_H */
+2
include/asm-x86/unistd_64.h
··· 639 639 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) 640 640 #define __NR_timerfd_gettime 287 641 641 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) 642 + #define __NR_paccept 288 643 + __SYSCALL(__NR_paccept, sys_paccept) 642 644 643 645 644 646 #ifndef __NO_STUBS
+3
include/linux/net.h
··· 47 47 #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */ 48 48 #define SYS_SENDMSG 16 /* sys_sendmsg(2) */ 49 49 #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ 50 + #define SYS_PACCEPT 18 /* sys_paccept(2) */ 50 51 51 52 typedef enum { 52 53 SS_FREE = 0, /* not allocated */ ··· 220 219 extern struct socket *sockfd_lookup(int fd, int *err); 221 220 #define sockfd_put(sock) fput(sock->file) 222 221 extern int net_ratelimit(void); 222 + extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, 223 + int __user *upeer_addrlen, int flags); 223 224 224 225 #define net_random() random32() 225 226 #define net_srandom(seed) srandom32((__force u32)seed)
+2
include/linux/syscalls.h
··· 409 409 asmlinkage long sys_bind(int, struct sockaddr __user *, int); 410 410 asmlinkage long sys_connect(int, struct sockaddr __user *, int); 411 411 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); 412 + asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *, 413 + const sigset_t *, size_t, int); 412 414 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); 413 415 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); 414 416 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
+1
kernel/sys_ni.c
··· 31 31 cond_syscall(sys_bind); 32 32 cond_syscall(sys_listen); 33 33 cond_syscall(sys_accept); 34 + cond_syscall(sys_paccept); 34 35 cond_syscall(sys_connect); 35 36 cond_syscall(sys_getsockname); 36 37 cond_syscall(sys_getpeername);
+48 -4
net/compat.c
··· 722 722 723 723 /* Argument list sizes for compat_sys_socketcall */ 724 724 #define AL(x) ((x) * sizeof(u32)) 725 - static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 725 + static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 726 726 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 727 - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; 727 + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 728 + AL(6)}; 728 729 #undef AL 729 730 730 731 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) ··· 738 737 return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); 739 738 } 740 739 740 + asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, 741 + int __user *upeer_addrlen, 742 + const compat_sigset_t __user *sigmask, 743 + compat_size_t sigsetsize, int flags) 744 + { 745 + compat_sigset_t ss32; 746 + sigset_t ksigmask, sigsaved; 747 + int ret; 748 + 749 + if (sigmask) { 750 + if (sigsetsize != sizeof(compat_sigset_t)) 751 + return -EINVAL; 752 + if (copy_from_user(&ss32, sigmask, sizeof(ss32))) 753 + return -EFAULT; 754 + sigset_from_compat(&ksigmask, &ss32); 755 + 756 + sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 757 + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 758 + } 759 + 760 + ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); 761 + 762 + if (ret == -ERESTARTNOHAND) { 763 + /* 764 + * Don't restore the signal mask yet. Let do_signal() deliver 765 + * the signal on the way back to userspace, before the signal 766 + * mask is restored. 767 + */ 768 + if (sigmask) { 769 + memcpy(&current->saved_sigmask, &sigsaved, 770 + sizeof(sigsaved)); 771 + set_restore_sigmask(); 772 + } 773 + } else if (sigmask) 774 + sigprocmask(SIG_SETMASK, &sigsaved, NULL); 775 + 776 + return ret; 777 + } 778 + 741 779 asmlinkage long compat_sys_socketcall(int call, u32 __user *args) 742 780 { 743 781 int ret; 744 782 u32 a[6]; 745 783 u32 a0, a1; 746 784 747 - if (call < SYS_SOCKET || call > SYS_RECVMSG) 785 + if (call < SYS_SOCKET || call > SYS_PACCEPT) 748 786 return -EINVAL; 749 787 if (copy_from_user(a, args, nas[call])) 750 788 return -EFAULT; ··· 804 764 ret = sys_listen(a0, a1); 805 765 break; 806 766 case SYS_ACCEPT: 807 - ret = sys_accept(a0, compat_ptr(a1), compat_ptr(a[2])); 767 + ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0); 808 768 break; 809 769 case SYS_GETSOCKNAME: 810 770 ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); ··· 843 803 break; 844 804 case SYS_RECVMSG: 845 805 ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); 806 + break; 807 + case SYS_PACCEPT: 808 + ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]), 809 + compat_ptr(a[3]), a[4], a[5]); 846 810 break; 847 811 default: 848 812 ret = -EINVAL;
+73 -8
net/socket.c
··· 63 63 #include <linux/file.h> 64 64 #include <linux/net.h> 65 65 #include <linux/interrupt.h> 66 + #include <linux/thread_info.h> 66 67 #include <linux/rcupdate.h> 67 68 #include <linux/netdevice.h> 68 69 #include <linux/proc_fs.h> ··· 1226 1225 return -EINVAL; 1227 1226 type &= SOCK_TYPE_MASK; 1228 1227 1228 + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1229 + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1230 + 1229 1231 retval = sock_create(family, type, protocol, &sock); 1230 1232 if (retval < 0) 1231 1233 goto out; ··· 1262 1258 if (flags & ~SOCK_CLOEXEC) 1263 1259 return -EINVAL; 1264 1260 type &= SOCK_TYPE_MASK; 1261 + 1262 + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1263 + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1265 1264 1266 1265 /* 1267 1266 * Obtain the first socket and check if the underlying protocol ··· 1420 1413 * clean when we restucture accept also. 1421 1414 */ 1422 1415 1423 - asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, 1424 - int __user *upeer_addrlen) 1416 + long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, 1417 + int __user *upeer_addrlen, int flags) 1425 1418 { 1426 1419 struct socket *sock, *newsock; 1427 1420 struct file *newfile; 1428 1421 int err, len, newfd, fput_needed; 1429 1422 struct sockaddr_storage address; 1423 + 1424 + if (flags & ~SOCK_CLOEXEC) 1425 + return -EINVAL; 1426 + 1427 + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1428 + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1430 1429 1431 1430 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1432 1431 if (!sock) ··· 1451 1438 */ 1452 1439 __module_get(newsock->ops->owner); 1453 1440 1454 - newfd = sock_alloc_fd(&newfile, 0); 1441 + newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC); 1455 1442 if (unlikely(newfd < 0)) { 1456 1443 err = newfd; 1457 1444 sock_release(newsock); ··· 1502 1489 fput(newfile); 1503 1490 put_unused_fd(newfd); 1504 1491 goto out_put; 1492 + } 1493 + 1494 + asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, 1495 + int __user *upeer_addrlen, 1496 + const sigset_t __user *sigmask, 1497 + size_t sigsetsize, int flags) 1498 + { 1499 + sigset_t ksigmask, sigsaved; 1500 + int ret; 1501 + 1502 + if (sigmask) { 1503 + /* XXX: Don't preclude handling different sized sigset_t's. */ 1504 + if (sigsetsize != sizeof(sigset_t)) 1505 + return -EINVAL; 1506 + if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) 1507 + return -EFAULT; 1508 + 1509 + sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1510 + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1511 + } 1512 + 1513 + ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); 1514 + 1515 + if (ret < 0 && signal_pending(current)) { 1516 + /* 1517 + * Don't restore the signal mask yet. Let do_signal() deliver 1518 + * the signal on the way back to userspace, before the signal 1519 + * mask is restored. 1520 + */ 1521 + if (sigmask) { 1522 + memcpy(&current->saved_sigmask, &sigsaved, 1523 + sizeof(sigsaved)); 1524 + set_restore_sigmask(); 1525 + } 1526 + } else if (sigmask) 1527 + sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1528 + 1529 + return ret; 1530 + } 1531 + 1532 + asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, 1533 + int __user *upeer_addrlen) 1534 + { 1535 + return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); 1505 1536 } 1506 1537 1507 1538 /* ··· 2068 2011 2069 2012 /* Argument list sizes for sys_socketcall */ 2070 2013 #define AL(x) ((x) * sizeof(unsigned long)) 2071 - static const unsigned char nargs[18]={ 2014 + static const unsigned char nargs[19]={ 2072 2015 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 2073 2016 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 2074 - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) 2017 + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 2018 + AL(6) 2075 2019 }; 2076 2020 2077 2021 #undef AL ··· 2091 2033 unsigned long a0, a1; 2092 2034 int err; 2093 2035 2094 - if (call < 1 || call > SYS_RECVMSG) 2036 + if (call < 1 || call > SYS_PACCEPT) 2095 2037 return -EINVAL; 2096 2038 2097 2039 /* copy_from_user should be SMP safe. */ ··· 2120 2062 break; 2121 2063 case SYS_ACCEPT: 2122 2064 err = 2123 - sys_accept(a0, (struct sockaddr __user *)a1, 2124 - (int __user *)a[2]); 2065 + do_accept(a0, (struct sockaddr __user *)a1, 2066 + (int __user *)a[2], 0); 2125 2067 break; 2126 2068 case SYS_GETSOCKNAME: 2127 2069 err = ··· 2167 2109 break; 2168 2110 case SYS_RECVMSG: 2169 2111 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2112 + break; 2113 + case SYS_PACCEPT: 2114 + err = 2115 + sys_paccept(a0, (struct sockaddr __user *)a1, 2116 + (int __user *)a[2], 2117 + (const sigset_t __user *) a[3], 2118 + a[4], a[5]); 2170 2119 break; 2171 2120 default: 2172 2121 err = -EINVAL;