Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'ipc-scalability'

Merge IPC cleanup and scalability patches from Andrew Morton.

This cleans up many of the oddities in the IPC code, uses the list
iterator helpers, splits out locking and adds per-semaphore locks for
greater scalability of the IPC semaphore code.

Most normal user-level locking by now uses futexes (ie pthreads, but
also a lot of specialized locks), but SysV IPC semaphores are apparently
still used in some big applications, either for portability reasons, or
because they offer tracking and undo (and you don't need to have a
special shared memory area for them).

Our IPC semaphore scalability was pitiful. We used to lock much too big
ranges, and we used to have a single ipc lock per ipc semaphore array.
Most loads never cared, but some do. There are some numbers in the
individual commits.

* ipc-scalability:
ipc: sysv shared memory limited to 8TiB
ipc/msg.c: use list_for_each_entry_[safe] for list traversing
ipc,sem: fine grained locking for semtimedop
ipc,sem: have only one list in struct sem_queue
ipc,sem: open code and rename sem_lock
ipc,sem: do not hold ipc lock more than necessary
ipc: introduce lockless pre_down ipcctl
ipc: introduce obtaining a lockless ipc object
ipc: remove bogus lock comment for ipc_checkid
ipc/msgutil.c: use linux/uaccess.h
ipc: refactor msg list search into separate function
ipc: simplify msg list search
ipc: implement MSG_COPY as a new receive mode
ipc: remove msg handling from queue scan
ipc: set EFAULT as default error in load_msg()
ipc: tighten msg copy loops
ipc: separate msg allocation from userspace copy
ipc: clamp with min()

+541 -342
+1 -1
include/linux/ipc_namespace.h
··· 43 43 44 44 size_t shm_ctlmax; 45 45 size_t shm_ctlall; 46 + unsigned long shm_tot; 46 47 int shm_ctlmni; 47 - int shm_tot; 48 48 /* 49 49 * Defines whether IPC_RMID is forced for _all_ shm segments regardless 50 50 * of shmctl()
+49 -73
ipc/msg.c
··· 66 66 #define SEARCH_EQUAL 2 67 67 #define SEARCH_NOTEQUAL 3 68 68 #define SEARCH_LESSEQUAL 4 69 + #define SEARCH_NUMBER 5 69 70 70 71 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 71 72 ··· 238 237 239 238 static void ss_wakeup(struct list_head *h, int kill) 240 239 { 241 - struct list_head *tmp; 240 + struct msg_sender *mss, *t; 242 241 243 - tmp = h->next; 244 - while (tmp != h) { 245 - struct msg_sender *mss; 246 - 247 - mss = list_entry(tmp, struct msg_sender, list); 248 - tmp = tmp->next; 242 + list_for_each_entry_safe(mss, t, h, list) { 249 243 if (kill) 250 244 mss->list.next = NULL; 251 245 wake_up_process(mss->tsk); ··· 249 253 250 254 static void expunge_all(struct msg_queue *msq, int res) 251 255 { 252 - struct list_head *tmp; 256 + struct msg_receiver *msr, *t; 253 257 254 - tmp = msq->q_receivers.next; 255 - while (tmp != &msq->q_receivers) { 256 - struct msg_receiver *msr; 257 - 258 - msr = list_entry(tmp, struct msg_receiver, r_list); 259 - tmp = tmp->next; 258 + list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 260 259 msr->r_msg = NULL; 261 260 wake_up_process(msr->r_tsk); 262 261 smp_mb(); ··· 269 278 */ 270 279 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 271 280 { 272 - struct list_head *tmp; 281 + struct msg_msg *msg, *t; 273 282 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 274 283 275 284 expunge_all(msq, -EIDRM); ··· 277 286 msg_rmid(ns, msq); 278 287 msg_unlock(msq); 279 288 280 - tmp = msq->q_messages.next; 281 - while (tmp != &msq->q_messages) { 282 - struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); 283 - 284 - tmp = tmp->next; 289 + list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 285 290 atomic_dec(&ns->msg_hdrs); 286 291 free_msg(msg); 287 292 } ··· 570 583 switch(mode) 571 584 { 572 585 case SEARCH_ANY: 586 + case SEARCH_NUMBER: 573 587 return 1; 574 588 case SEARCH_LESSEQUAL: 575 589 if (msg->m_type <=type) ··· 590 602 591 603 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 592 604 { 593 - struct list_head *tmp; 605 + struct msg_receiver *msr, *t; 594 606 595 - tmp = msq->q_receivers.next; 596 - while (tmp != &msq->q_receivers) { 597 - struct msg_receiver *msr; 598 - 599 - msr = list_entry(tmp, struct msg_receiver, r_list); 600 - tmp = tmp->next; 607 + list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 601 608 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 602 609 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 603 610 msr->r_msgtype, msr->r_mode)) { ··· 668 685 goto out_unlock_free; 669 686 } 670 687 ss_add(msq, &s); 671 - ipc_rcu_getref(msq); 688 + 689 + if (!ipc_rcu_getref(msq)) { 690 + err = -EIDRM; 691 + goto out_unlock_free; 692 + } 693 + 672 694 msg_unlock(msq); 673 695 schedule(); 674 696 ··· 726 738 727 739 static inline int convert_mode(long *msgtyp, int msgflg) 728 740 { 741 + if (msgflg & MSG_COPY) 742 + return SEARCH_NUMBER; 729 743 /* 730 744 * find message of correct type. 731 745 * msgtyp = 0 => get first. ··· 764 774 * This function creates new kernel message structure, large enough to store 765 775 * bufsz message bytes. 766 776 */ 767 - static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, 768 - int msgflg, long *msgtyp, 769 - unsigned long *copy_number) 777 + static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 770 778 { 771 779 struct msg_msg *copy; 772 780 773 - *copy_number = *msgtyp; 774 - *msgtyp = 0; 775 781 /* 776 782 * Create dummy message to copy real message to. 777 783 */ ··· 783 797 free_msg(copy); 784 798 } 785 799 #else 786 - static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, 787 - int msgflg, long *msgtyp, 788 - unsigned long *copy_number) 800 + static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 789 801 { 790 802 return ERR_PTR(-ENOSYS); 791 803 } ··· 792 808 { 793 809 } 794 810 #endif 811 + 812 + static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 813 + { 814 + struct msg_msg *msg; 815 + long count = 0; 816 + 817 + list_for_each_entry(msg, &msq->q_messages, m_list) { 818 + if (testmsg(msg, *msgtyp, mode) && 819 + !security_msg_queue_msgrcv(msq, msg, current, 820 + *msgtyp, mode)) { 821 + if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 822 + *msgtyp = msg->m_type - 1; 823 + } else if (mode == SEARCH_NUMBER) { 824 + if (*msgtyp == count) 825 + return msg; 826 + } else 827 + return msg; 828 + count++; 829 + } 830 + } 831 + 832 + return ERR_PTR(-EAGAIN); 833 + } 834 + 795 835 796 836 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, 797 837 int msgflg, ··· 826 818 int mode; 827 819 struct ipc_namespace *ns; 828 820 struct msg_msg *copy = NULL; 829 - unsigned long copy_number = 0; 830 821 831 822 ns = current->nsproxy->ipc_ns; 832 823 833 824 if (msqid < 0 || (long) bufsz < 0) 834 825 return -EINVAL; 835 826 if (msgflg & MSG_COPY) { 836 - copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax), 837 - msgflg, &msgtyp, &copy_number); 827 + copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 838 828 if (IS_ERR(copy)) 839 829 return PTR_ERR(copy); 840 830 } ··· 846 840 847 841 for (;;) { 848 842 struct msg_receiver msr_d; 849 - struct list_head *tmp; 850 - long msg_counter = 0; 851 843 852 844 msg = ERR_PTR(-EACCES); 853 845 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 854 846 goto out_unlock; 855 847 856 - msg = ERR_PTR(-EAGAIN); 857 - tmp = msq->q_messages.next; 858 - while (tmp != &msq->q_messages) { 859 - struct msg_msg *walk_msg; 848 + msg = find_msg(msq, &msgtyp, mode); 860 849 861 - walk_msg = list_entry(tmp, struct msg_msg, m_list); 862 - if (testmsg(walk_msg, msgtyp, mode) && 863 - !security_msg_queue_msgrcv(msq, walk_msg, current, 864 - msgtyp, mode)) { 865 - 866 - msg = walk_msg; 867 - if (mode == SEARCH_LESSEQUAL && 868 - walk_msg->m_type != 1) { 869 - msgtyp = walk_msg->m_type - 1; 870 - } else if (msgflg & MSG_COPY) { 871 - if (copy_number == msg_counter) { 872 - /* 873 - * Found requested message. 874 - * Copy it. 875 - */ 876 - msg = copy_msg(msg, copy); 877 - if (IS_ERR(msg)) 878 - goto out_unlock; 879 - break; 880 - } 881 - msg = ERR_PTR(-EAGAIN); 882 - } else 883 - break; 884 - msg_counter++; 885 - } 886 - tmp = tmp->next; 887 - } 888 850 if (!IS_ERR(msg)) { 889 851 /* 890 852 * Found a suitable message. ··· 866 892 * If we are copying, then do not unlink message and do 867 893 * not update queue parameters. 868 894 */ 869 - if (msgflg & MSG_COPY) 895 + if (msgflg & MSG_COPY) { 896 + msg = copy_msg(msg, copy); 870 897 goto out_unlock; 898 + } 871 899 list_del(&msg->m_list); 872 900 msq->q_qnum--; 873 901 msq->q_rtime = get_seconds();
+53 -57
ipc/msgutil.c
··· 17 17 #include <linux/ipc_namespace.h> 18 18 #include <linux/utsname.h> 19 19 #include <linux/proc_fs.h> 20 - #include <asm/uaccess.h> 20 + #include <linux/uaccess.h> 21 21 22 22 #include "util.h" 23 23 ··· 37 37 atomic_t nr_ipc_ns = ATOMIC_INIT(1); 38 38 39 39 struct msg_msgseg { 40 - struct msg_msgseg* next; 40 + struct msg_msgseg *next; 41 41 /* the next part of the message follows immediately */ 42 42 }; 43 43 44 - #define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg)) 45 - #define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg)) 44 + #define DATALEN_MSG (int)(PAGE_SIZE-sizeof(struct msg_msg)) 45 + #define DATALEN_SEG (int)(PAGE_SIZE-sizeof(struct msg_msgseg)) 46 46 47 - struct msg_msg *load_msg(const void __user *src, int len) 47 + 48 + static struct msg_msg *alloc_msg(int len) 48 49 { 49 50 struct msg_msg *msg; 50 51 struct msg_msgseg **pseg; 51 - int err; 52 52 int alen; 53 53 54 - alen = len; 55 - if (alen > DATALEN_MSG) 56 - alen = DATALEN_MSG; 57 - 54 + alen = min(len, DATALEN_MSG); 58 55 msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL); 59 56 if (msg == NULL) 60 - return ERR_PTR(-ENOMEM); 57 + return NULL; 61 58 62 59 msg->next = NULL; 63 60 msg->security = NULL; 64 61 65 - if (copy_from_user(msg + 1, src, alen)) { 66 - err = -EFAULT; 67 - goto out_err; 68 - } 69 - 70 62 len -= alen; 71 - src = ((char __user *)src) + alen; 72 63 pseg = &msg->next; 73 64 while (len > 0) { 74 65 struct msg_msgseg *seg; 75 - alen = len; 76 - if (alen > DATALEN_SEG) 77 - alen = DATALEN_SEG; 78 - seg = kmalloc(sizeof(*seg) + alen, 79 - GFP_KERNEL); 80 - if (seg == NULL) { 81 - err = -ENOMEM; 66 + alen = min(len, DATALEN_SEG); 67 + seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL); 68 + if (seg == NULL) 82 69 goto out_err; 83 - } 84 70 *pseg = seg; 85 71 seg->next = NULL; 86 - if (copy_from_user(seg + 1, src, alen)) { 87 - err = -EFAULT; 88 - goto out_err; 89 - } 90 72 pseg = &seg->next; 91 73 len -= alen; 92 - src = ((char __user *)src) + alen; 74 + } 75 + 76 + return msg; 77 + 78 + out_err: 79 + free_msg(msg); 80 + return NULL; 81 + } 82 + 83 + struct msg_msg *load_msg(const void __user *src, int len) 84 + { 85 + struct msg_msg *msg; 86 + struct msg_msgseg *seg; 87 + int err = -EFAULT; 88 + int alen; 89 + 90 + msg = alloc_msg(len); 91 + if (msg == NULL) 92 + return ERR_PTR(-ENOMEM); 93 + 94 + alen = min(len, DATALEN_MSG); 95 + if (copy_from_user(msg + 1, src, alen)) 96 + goto out_err; 97 + 98 + for (seg = msg->next; seg != NULL; seg = seg->next) { 99 + len -= alen; 100 + src = (char __user *)src + alen; 101 + alen = min(len, DATALEN_SEG); 102 + if (copy_from_user(seg + 1, src, alen)) 103 + goto out_err; 93 104 } 94 105 95 106 err = security_msg_msg_alloc(msg); ··· 124 113 if (src->m_ts > dst->m_ts) 125 114 return ERR_PTR(-EINVAL); 126 115 127 - alen = len; 128 - if (alen > DATALEN_MSG) 129 - alen = DATALEN_MSG; 130 - 116 + alen = min(len, DATALEN_MSG); 131 117 memcpy(dst + 1, src + 1, alen); 132 118 133 - len -= alen; 134 - dst_pseg = dst->next; 135 - src_pseg = src->next; 136 - while (len > 0) { 137 - alen = len; 138 - if (alen > DATALEN_SEG) 139 - alen = DATALEN_SEG; 140 - memcpy(dst_pseg + 1, src_pseg + 1, alen); 141 - dst_pseg = dst_pseg->next; 119 + for (dst_pseg = dst->next, src_pseg = src->next; 120 + src_pseg != NULL; 121 + dst_pseg = dst_pseg->next, src_pseg = src_pseg->next) { 122 + 142 123 len -= alen; 143 - src_pseg = src_pseg->next; 124 + alen = min(len, DATALEN_SEG); 125 + memcpy(dst_pseg + 1, src_pseg + 1, alen); 144 126 } 145 127 146 128 dst->m_type = src->m_type; ··· 152 148 int alen; 153 149 struct msg_msgseg *seg; 154 150 155 - alen = len; 156 - if (alen > DATALEN_MSG) 157 - alen = DATALEN_MSG; 151 + alen = min(len, DATALEN_MSG); 158 152 if (copy_to_user(dest, msg + 1, alen)) 159 153 return -1; 160 154 161 - len -= alen; 162 - dest = ((char __user *)dest) + alen; 163 - seg = msg->next; 164 - while (len > 0) { 165 - alen = len; 166 - if (alen > DATALEN_SEG) 167 - alen = DATALEN_SEG; 155 + for (seg = msg->next; seg != NULL; seg = seg->next) { 156 + len -= alen; 157 + dest = (char __user *)dest + alen; 158 + alen = min(len, DATALEN_SEG); 168 159 if (copy_to_user(dest, seg + 1, alen)) 169 160 return -1; 170 - len -= alen; 171 - dest = ((char __user *)dest) + alen; 172 - seg = seg->next; 173 161 } 174 162 return 0; 175 163 }
+317 -161
ipc/sem.c
··· 94 94 struct sem { 95 95 int semval; /* current value */ 96 96 int sempid; /* pid of last operation */ 97 + spinlock_t lock; /* spinlock for fine-grained semtimedop */ 97 98 struct list_head sem_pending; /* pending single-sop operations */ 98 99 }; 99 100 100 101 /* One queue for each sleeping process in the system. */ 101 102 struct sem_queue { 102 - struct list_head simple_list; /* queue of pending operations */ 103 103 struct list_head list; /* queue of pending operations */ 104 104 struct task_struct *sleeper; /* this process */ 105 105 struct sem_undo *undo; /* undo structure */ ··· 138 138 139 139 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 140 140 141 - #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) 142 141 #define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) 143 142 144 143 static int newary(struct ipc_namespace *, struct ipc_params *); ··· 190 191 } 191 192 192 193 /* 194 + * If the request contains only one semaphore operation, and there are 195 + * no complex transactions pending, lock only the semaphore involved. 196 + * Otherwise, lock the entire semaphore array, since we either have 197 + * multiple semaphores in our own semops, or we need to look at 198 + * semaphores from other pending complex operations. 199 + * 200 + * Carefully guard against sma->complex_count changing between zero 201 + * and non-zero while we are spinning for the lock. The value of 202 + * sma->complex_count cannot change while we are holding the lock, 203 + * so sem_unlock should be fine. 204 + * 205 + * The global lock path checks that all the local locks have been released, 206 + * checking each local lock once. This means that the local lock paths 207 + * cannot start their critical sections while the global lock is held. 208 + */ 209 + static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 210 + int nsops) 211 + { 212 + int locknum; 213 + again: 214 + if (nsops == 1 && !sma->complex_count) { 215 + struct sem *sem = sma->sem_base + sops->sem_num; 216 + 217 + /* Lock just the semaphore we are interested in. */ 218 + spin_lock(&sem->lock); 219 + 220 + /* 221 + * If sma->complex_count was set while we were spinning, 222 + * we may need to look at things we did not lock here. 223 + */ 224 + if (unlikely(sma->complex_count)) { 225 + spin_unlock(&sem->lock); 226 + goto lock_array; 227 + } 228 + 229 + /* 230 + * Another process is holding the global lock on the 231 + * sem_array; we cannot enter our critical section, 232 + * but have to wait for the global lock to be released. 233 + */ 234 + if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { 235 + spin_unlock(&sem->lock); 236 + spin_unlock_wait(&sma->sem_perm.lock); 237 + goto again; 238 + } 239 + 240 + locknum = sops->sem_num; 241 + } else { 242 + int i; 243 + /* 244 + * Lock the semaphore array, and wait for all of the 245 + * individual semaphore locks to go away. The code 246 + * above ensures no new single-lock holders will enter 247 + * their critical section while the array lock is held. 248 + */ 249 + lock_array: 250 + spin_lock(&sma->sem_perm.lock); 251 + for (i = 0; i < sma->sem_nsems; i++) { 252 + struct sem *sem = sma->sem_base + i; 253 + spin_unlock_wait(&sem->lock); 254 + } 255 + locknum = -1; 256 + } 257 + return locknum; 258 + } 259 + 260 + static inline void sem_unlock(struct sem_array *sma, int locknum) 261 + { 262 + if (locknum == -1) { 263 + spin_unlock(&sma->sem_perm.lock); 264 + } else { 265 + struct sem *sem = sma->sem_base + locknum; 266 + spin_unlock(&sem->lock); 267 + } 268 + rcu_read_unlock(); 269 + } 270 + 271 + /* 193 272 * sem_lock_(check_) routines are called in the paths where the rw_mutex 194 273 * is not held. 195 274 */ 196 - static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id) 275 + static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, 276 + int id, struct sembuf *sops, int nsops, int *locknum) 197 277 { 198 - struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id); 278 + struct kern_ipc_perm *ipcp; 279 + struct sem_array *sma; 280 + 281 + rcu_read_lock(); 282 + ipcp = ipc_obtain_object(&sem_ids(ns), id); 283 + if (IS_ERR(ipcp)) { 284 + sma = ERR_CAST(ipcp); 285 + goto err; 286 + } 287 + 288 + sma = container_of(ipcp, struct sem_array, sem_perm); 289 + *locknum = sem_lock(sma, sops, nsops); 290 + 291 + /* ipc_rmid() may have already freed the ID while sem_lock 292 + * was spinning: verify that the structure is still valid 293 + */ 294 + if (!ipcp->deleted) 295 + return container_of(ipcp, struct sem_array, sem_perm); 296 + 297 + sem_unlock(sma, *locknum); 298 + sma = ERR_PTR(-EINVAL); 299 + err: 300 + rcu_read_unlock(); 301 + return sma; 302 + } 303 + 304 + static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 305 + { 306 + struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id); 199 307 200 308 if (IS_ERR(ipcp)) 201 - return (struct sem_array *)ipcp; 309 + return ERR_CAST(ipcp); 202 310 203 311 return container_of(ipcp, struct sem_array, sem_perm); 204 312 } 205 313 206 - static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, 207 - int id) 314 + static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 315 + int id) 208 316 { 209 - struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id); 317 + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 210 318 211 319 if (IS_ERR(ipcp)) 212 - return (struct sem_array *)ipcp; 320 + return ERR_CAST(ipcp); 213 321 214 322 return container_of(ipcp, struct sem_array, sem_perm); 215 323 } 216 324 217 325 static inline void sem_lock_and_putref(struct sem_array *sma) 218 326 { 219 - ipc_lock_by_ptr(&sma->sem_perm); 327 + rcu_read_lock(); 328 + sem_lock(sma, NULL, -1); 220 329 ipc_rcu_putref(sma); 221 330 } 222 331 223 332 static inline void sem_getref_and_unlock(struct sem_array *sma) 224 333 { 225 - ipc_rcu_getref(sma); 226 - ipc_unlock(&(sma)->sem_perm); 334 + WARN_ON_ONCE(!ipc_rcu_getref(sma)); 335 + sem_unlock(sma, -1); 227 336 } 228 337 229 338 static inline void sem_putref(struct sem_array *sma) 230 339 { 231 - ipc_lock_by_ptr(&sma->sem_perm); 232 - ipc_rcu_putref(sma); 233 - ipc_unlock(&(sma)->sem_perm); 340 + sem_lock_and_putref(sma); 341 + sem_unlock(sma, -1); 342 + } 343 + 344 + /* 345 + * Call inside the rcu read section. 346 + */ 347 + static inline void sem_getref(struct sem_array *sma) 348 + { 349 + sem_lock(sma, NULL, -1); 350 + WARN_ON_ONCE(!ipc_rcu_getref(sma)); 351 + sem_unlock(sma, -1); 234 352 } 235 353 236 354 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) ··· 440 324 441 325 sma->sem_base = (struct sem *) &sma[1]; 442 326 443 - for (i = 0; i < nsems; i++) 327 + for (i = 0; i < nsems; i++) { 444 328 INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); 329 + spin_lock_init(&sma->sem_base[i].lock); 330 + } 445 331 446 332 sma->complex_count = 0; 447 333 INIT_LIST_HEAD(&sma->sem_pending); 448 334 INIT_LIST_HEAD(&sma->list_id); 449 335 sma->sem_nsems = nsems; 450 336 sma->sem_ctime = get_seconds(); 451 - sem_unlock(sma); 337 + sem_unlock(sma, -1); 452 338 453 339 return sma->sem_perm.id; 454 340 } ··· 589 471 q->status = IN_WAKEUP; 590 472 q->pid = error; 591 473 592 - list_add_tail(&q->simple_list, pt); 474 + list_add_tail(&q->list, pt); 593 475 } 594 476 595 477 /** ··· 607 489 int did_something; 608 490 609 491 did_something = !list_empty(pt); 610 - list_for_each_entry_safe(q, t, pt, simple_list) { 492 + list_for_each_entry_safe(q, t, pt, list) { 611 493 wake_up_process(q->sleeper); 612 494 /* q can disappear immediately after writing q->status. */ 613 495 smp_wmb(); ··· 620 502 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 621 503 { 622 504 list_del(&q->list); 623 - if (q->nsops == 1) 624 - list_del(&q->simple_list); 625 - else 505 + if (q->nsops > 1) 626 506 sma->complex_count--; 627 507 } 628 508 ··· 673 557 } 674 558 /* 675 559 * semval is 0. Check if there are wait-for-zero semops. 676 - * They must be the first entries in the per-semaphore simple queue 560 + * They must be the first entries in the per-semaphore queue 677 561 */ 678 - h = list_first_entry(&curr->sem_pending, struct sem_queue, simple_list); 562 + h = list_first_entry(&curr->sem_pending, struct sem_queue, list); 679 563 BUG_ON(h->nsops != 1); 680 564 BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); 681 565 ··· 695 579 * @pt: list head for the tasks that must be woken up. 696 580 * 697 581 * update_queue must be called after a semaphore in a semaphore array 698 - * was modified. If multiple semaphore were modified, then @semnum 699 - * must be set to -1. 582 + * was modified. If multiple semaphores were modified, update_queue must 583 + * be called with semnum = -1, as well as with the number of each modified 584 + * semaphore. 700 585 * The tasks that must be woken up are added to @pt. The return code 701 586 * is stored in q->pid. 702 587 * The function return 1 if at least one semop was completed successfully. ··· 707 590 struct sem_queue *q; 708 591 struct list_head *walk; 709 592 struct list_head *pending_list; 710 - int offset; 711 593 int semop_completed = 0; 712 594 713 - /* if there are complex operations around, then knowing the semaphore 714 - * that was modified doesn't help us. Assume that multiple semaphores 715 - * were modified. 716 - */ 717 - if (sma->complex_count) 718 - semnum = -1; 719 - 720 - if (semnum == -1) { 595 + if (semnum == -1) 721 596 pending_list = &sma->sem_pending; 722 - offset = offsetof(struct sem_queue, list); 723 - } else { 597 + else 724 598 pending_list = &sma->sem_base[semnum].sem_pending; 725 - offset = offsetof(struct sem_queue, simple_list); 726 - } 727 599 728 600 again: 729 601 walk = pending_list->next; 730 602 while (walk != pending_list) { 731 603 int error, restart; 732 604 733 - q = (struct sem_queue *)((char *)walk - offset); 605 + q = container_of(walk, struct sem_queue, list); 734 606 walk = walk->next; 735 607 736 608 /* If we are scanning the single sop, per-semaphore list of ··· 778 672 if (sma->complex_count || sops == NULL) { 779 673 if (update_queue(sma, -1, pt)) 780 674 otime = 1; 675 + } 676 + 677 + if (!sops) { 678 + /* No semops; something special is going on. */ 679 + for (i = 0; i < sma->sem_nsems; i++) { 680 + if (update_queue(sma, i, pt)) 681 + otime = 1; 682 + } 781 683 goto done; 782 684 } 783 685 686 + /* Check the semaphores that were modified. */ 784 687 for (i = 0; i < nsops; i++) { 785 688 if (sops[i].sem_op > 0 || 786 689 (sops[i].sem_op < 0 && ··· 860 745 struct sem_queue *q, *tq; 861 746 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 862 747 struct list_head tasks; 748 + int i; 863 749 864 750 /* Free the existing undo structures for this semaphore set. */ 865 751 assert_spin_locked(&sma->sem_perm.lock); ··· 879 763 unlink_queue(sma, q); 880 764 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 881 765 } 766 + for (i = 0; i < sma->sem_nsems; i++) { 767 + struct sem *sem = sma->sem_base + i; 768 + list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { 769 + unlink_queue(sma, q); 770 + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 771 + } 772 + } 882 773 883 774 /* Remove the semaphore set from the IDR */ 884 775 sem_rmid(ns, sma); 885 - sem_unlock(sma); 776 + sem_unlock(sma, -1); 886 777 887 778 wake_up_sem_queue_do(&tasks); 888 779 ns->used_sems -= sma->sem_nsems; ··· 965 842 case SEM_STAT: 966 843 { 967 844 struct semid64_ds tbuf; 968 - int id; 845 + int id = 0; 846 + 847 + memset(&tbuf, 0, sizeof(tbuf)); 969 848 970 849 if (cmd == SEM_STAT) { 971 - sma = sem_lock(ns, semid); 972 - if (IS_ERR(sma)) 973 - return PTR_ERR(sma); 850 + rcu_read_lock(); 851 + sma = sem_obtain_object(ns, semid); 852 + if (IS_ERR(sma)) { 853 + err = PTR_ERR(sma); 854 + goto out_unlock; 855 + } 974 856 id = sma->sem_perm.id; 975 857 } else { 976 - sma = sem_lock_check(ns, semid); 977 - if (IS_ERR(sma)) 978 - return PTR_ERR(sma); 979 - id = 0; 858 + rcu_read_lock(); 859 + sma = sem_obtain_object_check(ns, semid); 860 + if (IS_ERR(sma)) { 861 + err = PTR_ERR(sma); 862 + goto out_unlock; 863 + } 980 864 } 981 865 982 866 err = -EACCES; ··· 994 864 if (err) 995 865 goto out_unlock; 996 866 997 - memset(&tbuf, 0, sizeof(tbuf)); 998 - 999 867 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 1000 868 tbuf.sem_otime = sma->sem_otime; 1001 869 tbuf.sem_ctime = sma->sem_ctime; 1002 870 tbuf.sem_nsems = sma->sem_nsems; 1003 - sem_unlock(sma); 871 + rcu_read_unlock(); 1004 872 if (copy_semid_to_user(p, &tbuf, version)) 1005 873 return -EFAULT; 1006 874 return id; ··· 1007 879 return -EINVAL; 1008 880 } 1009 881 out_unlock: 1010 - sem_unlock(sma); 882 + rcu_read_unlock(); 1011 883 return err; 1012 884 } 1013 885 ··· 1018 890 struct sem_array *sma; 1019 891 struct sem* curr; 1020 892 int err; 1021 - int nsems; 1022 893 struct list_head tasks; 1023 894 int val; 1024 895 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) ··· 1028 901 val = arg; 1029 902 #endif 1030 903 1031 - sma = sem_lock_check(ns, semid); 1032 - if (IS_ERR(sma)) 1033 - return PTR_ERR(sma); 904 + if (val > SEMVMX || val < 0) 905 + return -ERANGE; 1034 906 1035 907 INIT_LIST_HEAD(&tasks); 1036 - nsems = sma->sem_nsems; 1037 908 1038 - err = -EACCES; 1039 - if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) 1040 - goto out_unlock; 909 + rcu_read_lock(); 910 + sma = sem_obtain_object_check(ns, semid); 911 + if (IS_ERR(sma)) { 912 + rcu_read_unlock(); 913 + return PTR_ERR(sma); 914 + } 915 + 916 + if (semnum < 0 || semnum >= sma->sem_nsems) { 917 + rcu_read_unlock(); 918 + return -EINVAL; 919 + } 920 + 921 + 922 + if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 923 + rcu_read_unlock(); 924 + return -EACCES; 925 + } 1041 926 1042 927 err = security_sem_semctl(sma, SETVAL); 1043 - if (err) 1044 - goto out_unlock; 928 + if (err) { 929 + rcu_read_unlock(); 930 + return -EACCES; 931 + } 1045 932 1046 - err = -EINVAL; 1047 - if(semnum < 0 || semnum >= nsems) 1048 - goto out_unlock; 933 + sem_lock(sma, NULL, -1); 1049 934 1050 935 curr = &sma->sem_base[semnum]; 1051 - 1052 - err = -ERANGE; 1053 - if (val > SEMVMX || val < 0) 1054 - goto out_unlock; 1055 936 1056 937 assert_spin_locked(&sma->sem_perm.lock); 1057 938 list_for_each_entry(un, &sma->list_id, list_id) ··· 1070 935 sma->sem_ctime = get_seconds(); 1071 936 /* maybe some queued-up processes were waiting for this */ 1072 937 do_smart_update(sma, NULL, 0, 0, &tasks); 1073 - err = 0; 1074 - out_unlock: 1075 - sem_unlock(sma); 938 + sem_unlock(sma, -1); 1076 939 wake_up_sem_queue_do(&tasks); 1077 - return err; 940 + return 0; 1078 941 } 1079 942 1080 943 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, ··· 1080 947 { 1081 948 struct sem_array *sma; 1082 949 struct sem* curr; 1083 - int err; 950 + int err, nsems; 1084 951 ushort fast_sem_io[SEMMSL_FAST]; 1085 952 ushort* sem_io = fast_sem_io; 1086 - int nsems; 1087 953 struct list_head tasks; 1088 954 1089 - sma = sem_lock_check(ns, semid); 1090 - if (IS_ERR(sma)) 1091 - return PTR_ERR(sma); 1092 - 1093 955 INIT_LIST_HEAD(&tasks); 956 + 957 + rcu_read_lock(); 958 + sma = sem_obtain_object_check(ns, semid); 959 + if (IS_ERR(sma)) { 960 + rcu_read_unlock(); 961 + return PTR_ERR(sma); 962 + } 963 + 1094 964 nsems = sma->sem_nsems; 1095 965 1096 966 err = -EACCES; 1097 967 if (ipcperms(ns, &sma->sem_perm, 1098 - cmd == SETALL ? S_IWUGO : S_IRUGO)) 1099 - goto out_unlock; 968 + cmd == SETALL ? S_IWUGO : S_IRUGO)) { 969 + rcu_read_unlock(); 970 + goto out_wakeup; 971 + } 1100 972 1101 973 err = security_sem_semctl(sma, cmd); 1102 - if (err) 1103 - goto out_unlock; 974 + if (err) { 975 + rcu_read_unlock(); 976 + goto out_wakeup; 977 + } 1104 978 1105 979 err = -EACCES; 1106 980 switch (cmd) { ··· 1117 977 int i; 1118 978 1119 979 if(nsems > SEMMSL_FAST) { 1120 - sem_getref_and_unlock(sma); 980 + sem_getref(sma); 1121 981 1122 982 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1123 983 if(sem_io == NULL) { ··· 1127 987 1128 988 sem_lock_and_putref(sma); 1129 989 if (sma->sem_perm.deleted) { 1130 - sem_unlock(sma); 990 + sem_unlock(sma, -1); 1131 991 err = -EIDRM; 1132 992 goto out_free; 1133 993 } 1134 - } 994 + } else 995 + sem_lock(sma, NULL, -1); 1135 996 1136 997 for (i = 0; i < sma->sem_nsems; i++) 1137 998 sem_io[i] = sma->sem_base[i].semval; 1138 - sem_unlock(sma); 999 + sem_unlock(sma, -1); 1139 1000 err = 0; 1140 1001 if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1141 1002 err = -EFAULT; ··· 1147 1006 int i; 1148 1007 struct sem_undo *un; 1149 1008 1150 - sem_getref_and_unlock(sma); 1009 + if (!ipc_rcu_getref(sma)) { 1010 + rcu_read_unlock(); 1011 + return -EIDRM; 1012 + } 1013 + rcu_read_unlock(); 1151 1014 1152 1015 if(nsems > SEMMSL_FAST) { 1153 1016 sem_io = ipc_alloc(sizeof(ushort)*nsems); ··· 1176 1031 } 1177 1032 sem_lock_and_putref(sma); 1178 1033 if (sma->sem_perm.deleted) { 1179 - sem_unlock(sma); 1034 + sem_unlock(sma, -1); 1180 1035 err = -EIDRM; 1181 1036 goto out_free; 1182 1037 } ··· 1198 1053 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1199 1054 } 1200 1055 err = -EINVAL; 1201 - if(semnum < 0 || semnum >= nsems) 1202 - goto out_unlock; 1056 + if (semnum < 0 || semnum >= nsems) { 1057 + rcu_read_unlock(); 1058 + goto out_wakeup; 1059 + } 1203 1060 1061 + sem_lock(sma, NULL, -1); 1204 1062 curr = &sma->sem_base[semnum]; 1205 1063 1206 1064 switch (cmd) { ··· 1220 1072 err = count_semzcnt(sma,semnum); 1221 1073 goto out_unlock; 1222 1074 } 1223 - out_unlock: 1224 - sem_unlock(sma); 1225 - wake_up_sem_queue_do(&tasks); 1226 1075 1076 + out_unlock: 1077 + sem_unlock(sma, -1); 1078 + out_wakeup: 1079 + wake_up_sem_queue_do(&tasks); 1227 1080 out_free: 1228 1081 if(sem_io != fast_sem_io) 1229 1082 ipc_free(sem_io, sizeof(ushort)*nsems); ··· 1275 1126 return -EFAULT; 1276 1127 } 1277 1128 1278 - ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid, cmd, 1279 - &semid64.sem_perm, 0); 1129 + ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1130 + &semid64.sem_perm, 0); 1280 1131 if (IS_ERR(ipcp)) 1281 1132 return PTR_ERR(ipcp); 1282 1133 1283 1134 sma = container_of(ipcp, struct sem_array, sem_perm); 1284 1135 1285 1136 err = security_sem_semctl(sma, cmd); 1286 - if (err) 1137 + if (err) { 1138 + rcu_read_unlock(); 1287 1139 goto out_unlock; 1140 + } 1288 1141 1289 1142 switch(cmd){ 1290 1143 case IPC_RMID: 1144 + sem_lock(sma, NULL, -1); 1291 1145 freeary(ns, ipcp); 1292 1146 goto out_up; 1293 1147 case IPC_SET: 1148 + sem_lock(sma, NULL, -1); 1294 1149 err = ipc_update_perm(&semid64.sem_perm, ipcp); 1295 1150 if (err) 1296 1151 goto out_unlock; 1297 1152 sma->sem_ctime = get_seconds(); 1298 1153 break; 1299 1154 default: 1155 + rcu_read_unlock(); 1300 1156 err = -EINVAL; 1157 + goto out_up; 1301 1158 } 1302 1159 1303 1160 out_unlock: 1304 - sem_unlock(sma); 1161 + sem_unlock(sma, -1); 1305 1162 out_up: 1306 1163 up_write(&sem_ids(ns).rw_mutex); 1307 1164 return err; ··· 1419 1264 struct sem_array *sma; 1420 1265 struct sem_undo_list *ulp; 1421 1266 struct sem_undo *un, *new; 1422 - int nsems; 1423 - int error; 1267 + int nsems, error; 1424 1268 1425 1269 error = get_undo_list(&ulp); 1426 1270 if (error) ··· 1431 1277 spin_unlock(&ulp->lock); 1432 1278 if (likely(un!=NULL)) 1433 1279 goto out; 1434 - rcu_read_unlock(); 1435 1280 1436 1281 /* no undo structure around - allocate one. */ 1437 1282 /* step 1: figure out the size of the semaphore array */ 1438 - sma = sem_lock_check(ns, semid); 1439 - if (IS_ERR(sma)) 1283 + sma = sem_obtain_object_check(ns, semid); 1284 + if (IS_ERR(sma)) { 1285 + rcu_read_unlock(); 1440 1286 return ERR_CAST(sma); 1287 + } 1441 1288 1442 1289 nsems = sma->sem_nsems; 1443 - sem_getref_and_unlock(sma); 1290 + if (!ipc_rcu_getref(sma)) { 1291 + rcu_read_unlock(); 1292 + un = ERR_PTR(-EIDRM); 1293 + goto out; 1294 + } 1295 + rcu_read_unlock(); 1444 1296 1445 1297 /* step 2: allocate new undo structure */ 1446 1298 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); ··· 1458 1298 /* step 3: Acquire the lock on semaphore array */ 1459 1299 sem_lock_and_putref(sma); 1460 1300 if (sma->sem_perm.deleted) { 1461 - sem_unlock(sma); 1301 + sem_unlock(sma, -1); 1462 1302 kfree(new); 1463 1303 un = ERR_PTR(-EIDRM); 1464 1304 goto out; ··· 1486 1326 success: 1487 1327 spin_unlock(&ulp->lock); 1488 1328 rcu_read_lock(); 1489 - sem_unlock(sma); 1329 + sem_unlock(sma, -1); 1490 1330 out: 1491 1331 return un; 1492 1332 } ··· 1526 1366 struct sembuf fast_sops[SEMOPM_FAST]; 1527 1367 struct sembuf* sops = fast_sops, *sop; 1528 1368 struct sem_undo *un; 1529 - int undos = 0, alter = 0, max; 1369 + int undos = 0, alter = 0, max, locknum; 1530 1370 struct sem_queue queue; 1531 1371 unsigned long jiffies_left = 0; 1532 1372 struct ipc_namespace *ns; ··· 1570 1410 alter = 1; 1571 1411 } 1572 1412 1413 + INIT_LIST_HEAD(&tasks); 1414 + 1573 1415 if (undos) { 1416 + /* On success, find_alloc_undo takes the rcu_read_lock */ 1574 1417 un = find_alloc_undo(ns, semid); 1575 1418 if (IS_ERR(un)) { 1576 1419 error = PTR_ERR(un); 1577 1420 goto out_free; 1578 1421 } 1579 - } else 1422 + } else { 1580 1423 un = NULL; 1424 + rcu_read_lock(); 1425 + } 1581 1426 1582 - INIT_LIST_HEAD(&tasks); 1583 - 1584 - sma = sem_lock_check(ns, semid); 1427 + sma = sem_obtain_object_check(ns, semid); 1585 1428 if (IS_ERR(sma)) { 1586 - if (un) 1587 - rcu_read_unlock(); 1429 + rcu_read_unlock(); 1588 1430 error = PTR_ERR(sma); 1589 1431 goto out_free; 1432 + } 1433 + 1434 + error = -EFBIG; 1435 + if (max >= sma->sem_nsems) { 1436 + rcu_read_unlock(); 1437 + goto out_wakeup; 1438 + } 1439 + 1440 + error = -EACCES; 1441 + if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 1442 + rcu_read_unlock(); 1443 + goto out_wakeup; 1444 + } 1445 + 1446 + error = security_sem_semop(sma, sops, nsops, alter); 1447 + if (error) { 1448 + rcu_read_unlock(); 1449 + goto out_wakeup; 1590 1450 } 1591 1451 1592 1452 /* ··· 1617 1437 * "un" itself is guaranteed by rcu. 1618 1438 */ 1619 1439 error = -EIDRM; 1620 - if (un) { 1621 - if (un->semid == -1) { 1622 - rcu_read_unlock(); 1623 - goto out_unlock_free; 1624 - } else { 1625 - /* 1626 - * rcu lock can be released, "un" cannot disappear: 1627 - * - sem_lock is acquired, thus IPC_RMID is 1628 - * impossible. 1629 - * - exit_sem is impossible, it always operates on 1630 - * current (or a dead task). 1631 - */ 1632 - 1633 - rcu_read_unlock(); 1634 - } 1635 - } 1636 - 1637 - error = -EFBIG; 1638 - if (max >= sma->sem_nsems) 1639 - goto out_unlock_free; 1640 - 1641 - error = -EACCES; 1642 - if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) 1643 - goto out_unlock_free; 1644 - 1645 - error = security_sem_semop(sma, sops, nsops, alter); 1646 - if (error) 1440 + locknum = sem_lock(sma, sops, nsops); 1441 + if (un && un->semid == -1) 1647 1442 goto out_unlock_free; 1648 1443 1649 1444 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); ··· 1638 1483 queue.undo = un; 1639 1484 queue.pid = task_tgid_vnr(current); 1640 1485 queue.alter = alter; 1641 - if (alter) 1642 - list_add_tail(&queue.list, &sma->sem_pending); 1643 - else 1644 - list_add(&queue.list, &sma->sem_pending); 1645 1486 1646 1487 if (nsops == 1) { 1647 1488 struct sem *curr; 1648 1489 curr = &sma->sem_base[sops->sem_num]; 1649 1490 1650 1491 if (alter) 1651 - list_add_tail(&queue.simple_list, &curr->sem_pending); 1492 + list_add_tail(&queue.list, &curr->sem_pending); 1652 1493 else 1653 - list_add(&queue.simple_list, &curr->sem_pending); 1494 + list_add(&queue.list, &curr->sem_pending); 1654 1495 } else { 1655 - INIT_LIST_HEAD(&queue.simple_list); 1496 + if (alter) 1497 + list_add_tail(&queue.list, &sma->sem_pending); 1498 + else 1499 + list_add(&queue.list, &sma->sem_pending); 1656 1500 sma->complex_count++; 1657 1501 } 1658 1502 ··· 1660 1506 1661 1507 sleep_again: 1662 1508 current->state = TASK_INTERRUPTIBLE; 1663 - sem_unlock(sma); 1509 + sem_unlock(sma, locknum); 1664 1510 1665 1511 if (timeout) 1666 1512 jiffies_left = schedule_timeout(jiffies_left); ··· 1682 1528 goto out_free; 1683 1529 } 1684 1530 1685 - sma = sem_lock(ns, semid); 1531 + sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum); 1686 1532 1687 1533 /* 1688 1534 * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. ··· 1721 1567 unlink_queue(sma, &queue); 1722 1568 1723 1569 out_unlock_free: 1724 - sem_unlock(sma); 1725 - 1570 + sem_unlock(sma, locknum); 1571 + out_wakeup: 1726 1572 wake_up_sem_queue_do(&tasks); 1727 1573 out_free: 1728 1574 if(sops != fast_sops) ··· 1785 1631 struct sem_array *sma; 1786 1632 struct sem_undo *un; 1787 1633 struct list_head tasks; 1788 - int semid; 1789 - int i; 1634 + int semid, i; 1790 1635 1791 1636 rcu_read_lock(); 1792 1637 un = list_entry_rcu(ulp->list_proc.next, ··· 1794 1641 semid = -1; 1795 1642 else 1796 1643 semid = un->semid; 1797 - rcu_read_unlock(); 1798 1644 1799 - if (semid == -1) 1645 + if (semid == -1) { 1646 + rcu_read_unlock(); 1800 1647 break; 1648 + } 1801 1649 1802 - sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid); 1803 - 1650 + sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid); 1804 1651 /* exit_sem raced with IPC_RMID, nothing to do */ 1805 - if (IS_ERR(sma)) 1652 + if (IS_ERR(sma)) { 1653 + rcu_read_unlock(); 1806 1654 continue; 1655 + } 1807 1656 1657 + sem_lock(sma, NULL, -1); 1808 1658 un = __lookup_undo(ulp, semid); 1809 1659 if (un == NULL) { 1810 1660 /* exit_sem raced with IPC_RMID+semget() that created 1811 1661 * exactly the same semid. Nothing to do. 1812 1662 */ 1813 - sem_unlock(sma); 1663 + sem_unlock(sma, -1); 1814 1664 continue; 1815 1665 } 1816 1666 ··· 1853 1697 /* maybe some queued-up processes were waiting for this */ 1854 1698 INIT_LIST_HEAD(&tasks); 1855 1699 do_smart_update(sma, NULL, 0, 1, &tasks); 1856 - sem_unlock(sma); 1700 + sem_unlock(sma, -1); 1857 1701 wake_up_sem_queue_do(&tasks); 1858 1702 1859 1703 kfree_rcu(un, rcu);
+1 -1
ipc/shm.c
··· 462 462 size_t size = params->u.size; 463 463 int error; 464 464 struct shmid_kernel *shp; 465 - int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; 465 + size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 466 466 struct file * file; 467 467 char name[13]; 468 468 int id;
+108 -42
ipc/util.c
··· 439 439 * NULL is returned if the allocation fails 440 440 */ 441 441 442 - void* ipc_alloc(int size) 442 + void *ipc_alloc(int size) 443 443 { 444 - void* out; 444 + void *out; 445 445 if(size > PAGE_SIZE) 446 446 out = vmalloc(size); 447 447 else ··· 478 478 */ 479 479 struct ipc_rcu_hdr 480 480 { 481 - int refcount; 481 + atomic_t refcount; 482 482 int is_vmalloc; 483 483 void *data[0]; 484 484 }; ··· 516 516 * @size: size desired 517 517 * 518 518 * Allocate memory for the rcu header structure + the object. 519 - * Returns the pointer to the object. 520 - * NULL is returned if the allocation fails. 519 + * Returns the pointer to the object or NULL upon failure. 521 520 */ 522 - 523 - void* ipc_rcu_alloc(int size) 521 + void *ipc_rcu_alloc(int size) 524 522 { 525 - void* out; 526 - /* 523 + void *out; 524 + 525 + /* 527 526 * We prepend the allocation with the rcu struct, and 528 - * workqueue if necessary (for vmalloc). 527 + * workqueue if necessary (for vmalloc). 529 528 */ 530 529 if (rcu_use_vmalloc(size)) { 531 530 out = vmalloc(HDRLEN_VMALLOC + size); 532 - if (out) { 533 - out += HDRLEN_VMALLOC; 534 - container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; 535 - container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; 536 - } 531 + if (!out) 532 + goto done; 533 + 534 + out += HDRLEN_VMALLOC; 535 + container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; 537 536 } else { 538 537 out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); 539 - if (out) { 540 - out += HDRLEN_KMALLOC; 541 - container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; 542 - container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; 543 - } 538 + if (!out) 539 + goto done; 540 + 541 + out += HDRLEN_KMALLOC; 542 + container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; 544 543 } 545 544 545 + /* set reference counter no matter what kind of allocation was done */ 546 + atomic_set(&container_of(out, struct ipc_rcu_hdr, data)->refcount, 1); 547 + done: 546 548 return out; 547 549 } 548 550 549 - void ipc_rcu_getref(void *ptr) 551 + int ipc_rcu_getref(void *ptr) 550 552 { 551 - container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; 553 + return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu_hdr, data)->refcount); 552 554 } 553 555 554 556 static void ipc_do_vfree(struct work_struct *work) ··· 580 578 581 579 void ipc_rcu_putref(void *ptr) 582 580 { 583 - if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) 581 + if (!atomic_dec_and_test(&container_of(ptr, struct ipc_rcu_hdr, data)->refcount)) 584 582 return; 585 583 586 584 if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { ··· 671 669 } 672 670 673 671 /** 672 + * ipc_obtain_object 673 + * @ids: ipc identifier set 674 + * @id: ipc id to look for 675 + * 676 + * Look for an id in the ipc ids idr and return associated ipc object. 677 + * 678 + * Call inside the RCU critical section. 679 + * The ipc object is *not* locked on exit. 680 + */ 681 + struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id) 682 + { 683 + struct kern_ipc_perm *out; 684 + int lid = ipcid_to_idx(id); 685 + 686 + out = idr_find(&ids->ipcs_idr, lid); 687 + if (!out) 688 + return ERR_PTR(-EINVAL); 689 + 690 + return out; 691 + } 692 + 693 + /** 674 694 * ipc_lock - Lock an ipc structure without rw_mutex held 675 695 * @ids: IPC identifier set 676 696 * @id: ipc id to look for 677 697 * 678 698 * Look for an id in the ipc ids idr and lock the associated ipc object. 679 699 * 680 - * The ipc object is locked on exit. 700 + * The ipc object is locked on successful exit. 681 701 */ 682 - 683 702 struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) 684 703 { 685 704 struct kern_ipc_perm *out; 686 - int lid = ipcid_to_idx(id); 687 705 688 706 rcu_read_lock(); 689 - out = idr_find(&ids->ipcs_idr, lid); 690 - if (out == NULL) { 691 - rcu_read_unlock(); 692 - return ERR_PTR(-EINVAL); 693 - } 707 + out = ipc_obtain_object(ids, id); 708 + if (IS_ERR(out)) 709 + goto err1; 694 710 695 711 spin_lock(&out->lock); 696 - 712 + 697 713 /* ipc_rmid() may have already freed the ID while ipc_lock 698 714 * was spinning: here verify that the structure is still valid 699 715 */ 700 - if (out->deleted) { 701 - spin_unlock(&out->lock); 702 - rcu_read_unlock(); 703 - return ERR_PTR(-EINVAL); 704 - } 716 + if (!out->deleted) 717 + return out; 705 718 719 + spin_unlock(&out->lock); 720 + out = ERR_PTR(-EINVAL); 721 + err1: 722 + rcu_read_unlock(); 723 + return out; 724 + } 725 + 726 + /** 727 + * ipc_obtain_object_check 728 + * @ids: ipc identifier set 729 + * @id: ipc id to look for 730 + * 731 + * Similar to ipc_obtain_object() but also checks 732 + * the ipc object reference counter. 733 + * 734 + * Call inside the RCU critical section. 735 + * The ipc object is *not* locked on exit. 736 + */ 737 + struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id) 738 + { 739 + struct kern_ipc_perm *out = ipc_obtain_object(ids, id); 740 + 741 + if (IS_ERR(out)) 742 + goto out; 743 + 744 + if (ipc_checkid(out, id)) 745 + return ERR_PTR(-EIDRM); 746 + out: 706 747 return out; 707 748 } 708 749 ··· 826 781 struct ipc64_perm *perm, int extra_perm) 827 782 { 828 783 struct kern_ipc_perm *ipcp; 784 + 785 + ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm); 786 + if (IS_ERR(ipcp)) 787 + goto out; 788 + 789 + spin_lock(&ipcp->lock); 790 + out: 791 + return ipcp; 792 + } 793 + 794 + struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, 795 + struct ipc_ids *ids, int id, int cmd, 796 + struct ipc64_perm *perm, int extra_perm) 797 + { 829 798 kuid_t euid; 830 - int err; 799 + int err = -EPERM; 800 + struct kern_ipc_perm *ipcp; 831 801 832 802 down_write(&ids->rw_mutex); 833 - ipcp = ipc_lock_check(ids, id); 803 + rcu_read_lock(); 804 + 805 + ipcp = ipc_obtain_object_check(ids, id); 834 806 if (IS_ERR(ipcp)) { 835 807 err = PTR_ERR(ipcp); 836 808 goto out_up; ··· 856 794 audit_ipc_obj(ipcp); 857 795 if (cmd == IPC_SET) 858 796 audit_ipc_set_perm(extra_perm, perm->uid, 859 - perm->gid, perm->mode); 797 + perm->gid, perm->mode); 860 798 861 799 euid = current_euid(); 862 800 if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || 863 801 ns_capable(ns->user_ns, CAP_SYS_ADMIN)) 864 802 return ipcp; 865 803 866 - err = -EPERM; 867 - ipc_unlock(ipcp); 868 804 out_up: 805 + /* 806 + * Unsuccessful lookup, unlock and return 807 + * the corresponding error. 808 + */ 809 + rcu_read_unlock(); 869 810 up_write(&ids->rw_mutex); 811 + 870 812 return ERR_PTR(err); 871 813 } 872 814
+12 -7
ipc/util.h
··· 119 119 * to 0 schedules the rcu destruction. Caller must guarantee locking. 120 120 */ 121 121 void* ipc_rcu_alloc(int size); 122 - void ipc_rcu_getref(void *ptr); 122 + int ipc_rcu_getref(void *ptr); 123 123 void ipc_rcu_putref(void *ptr); 124 124 125 125 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); 126 + struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); 126 127 127 128 void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); 128 129 void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); 129 130 int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); 131 + struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, 132 + struct ipc_ids *ids, int id, int cmd, 133 + struct ipc64_perm *perm, int extra_perm); 130 134 struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, 131 135 struct ipc_ids *ids, int id, int cmd, 132 136 struct ipc64_perm *perm, int extra_perm); ··· 154 150 return SEQ_MULTIPLIER * seq + id; 155 151 } 156 152 157 - /* 158 - * Must be called with ipcp locked 159 - */ 160 153 static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid) 161 154 { 162 - if (uid / SEQ_MULTIPLIER != ipcp->seq) 163 - return 1; 164 - return 0; 155 + return uid / SEQ_MULTIPLIER != ipcp->seq; 165 156 } 166 157 167 158 static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) ··· 171 172 rcu_read_unlock(); 172 173 } 173 174 175 + static inline void ipc_lock_object(struct kern_ipc_perm *perm) 176 + { 177 + spin_lock(&perm->lock); 178 + } 179 + 174 180 struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); 181 + struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); 175 182 int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, 176 183 struct ipc_ops *ops, struct ipc_params *params); 177 184 void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,