Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipc/sem.c: always use only one queue for alter operations

There are two places that can contain alter operations:
- the global queue: sma->pending_alter
- the per-semaphore queues: sma->sem_base[].pending_alter.

Since one of the queues must be processed first, this causes an odd
priorization of the wakeups: complex operations have priority over
simple ops.

The patch restores the behavior of linux <=3.0.9: The longest waiting
operation has the highest priority.

This is done by using only one queue:
- if there are complex ops, then sma->pending_alter is used.
- otherwise, the per-semaphore queues are used.

As a side effect, do_smart_update_queue() becomes much simpler: no more
goto logic.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Manfred Spraul and committed by
Linus Torvalds
f269f40a 1a82e9e1

+88 -40
+88 -40
ipc/sem.c
··· 192 192 IPC_SEM_IDS, sysvipc_sem_proc_show); 193 193 } 194 194 195 + /** 196 + * unmerge_queues - unmerge queues, if possible. 197 + * @sma: semaphore array 198 + * 199 + * The function unmerges the wait queues if complex_count is 0. 200 + * It must be called prior to dropping the global semaphore array lock. 201 + */ 202 + static void unmerge_queues(struct sem_array *sma) 203 + { 204 + struct sem_queue *q, *tq; 205 + 206 + /* complex operations still around? */ 207 + if (sma->complex_count) 208 + return; 209 + /* 210 + * We will switch back to simple mode. 211 + * Move all pending operation back into the per-semaphore 212 + * queues. 213 + */ 214 + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 215 + struct sem *curr; 216 + curr = &sma->sem_base[q->sops[0].sem_num]; 217 + 218 + list_add_tail(&q->list, &curr->pending_alter); 219 + } 220 + INIT_LIST_HEAD(&sma->pending_alter); 221 + } 222 + 223 + /** 224 + * merge_queues - Merge single semop queues into global queue 225 + * @sma: semaphore array 226 + * 227 + * This function merges all per-semaphore queues into the global queue. 228 + * It is necessary to achieve FIFO ordering for the pending single-sop 229 + * operations when a multi-semop operation must sleep. 230 + * Only the alter operations must be moved, the const operations can stay. 231 + */ 232 + static void merge_queues(struct sem_array *sma) 233 + { 234 + int i; 235 + for (i = 0; i < sma->sem_nsems; i++) { 236 + struct sem *sem = sma->sem_base + i; 237 + 238 + list_splice_init(&sem->pending_alter, &sma->pending_alter); 239 + } 240 + } 241 + 195 242 /* 196 243 * If the request contains only one semaphore operation, and there are 197 244 * no complex transactions pending, lock only the semaphore involved. ··· 309 262 static inline void sem_unlock(struct sem_array *sma, int locknum) 310 263 { 311 264 if (locknum == -1) { 265 + unmerge_queues(sma); 312 266 ipc_unlock_object(&sma->sem_perm); 313 267 } else { 314 268 struct sem *sem = sma->sem_base + locknum; ··· 879 831 int otime, struct list_head *pt) 880 832 { 881 833 int i; 882 - int progress; 883 834 884 835 otime |= do_smart_wakeup_zero(sma, sops, nsops, pt); 885 836 886 - progress = 1; 887 - retry_global: 888 - if (sma->complex_count) { 889 - if (update_queue(sma, -1, pt)) { 890 - progress = 1; 891 - otime = 1; 892 - sops = NULL; 893 - } 894 - } 895 - if (!progress) 896 - goto done; 897 - 898 - if (!sops) { 899 - /* No semops; something special is going on. */ 900 - for (i = 0; i < sma->sem_nsems; i++) { 901 - if (update_queue(sma, i, pt)) { 902 - otime = 1; 903 - progress = 1; 837 + if (!list_empty(&sma->pending_alter)) { 838 + /* semaphore array uses the global queue - just process it. */ 839 + otime |= update_queue(sma, -1, pt); 840 + } else { 841 + if (!sops) { 842 + /* 843 + * No sops, thus the modified semaphores are not 844 + * known. Check all. 845 + */ 846 + for (i = 0; i < sma->sem_nsems; i++) 847 + otime |= update_queue(sma, i, pt); 848 + } else { 849 + /* 850 + * Check the semaphores that were increased: 851 + * - No complex ops, thus all sleeping ops are 852 + * decrease. 853 + * - if we decreased the value, then any sleeping 854 + * semaphore ops wont be able to run: If the 855 + * previous value was too small, then the new 856 + * value will be too small, too. 857 + */ 858 + for (i = 0; i < nsops; i++) { 859 + if (sops[i].sem_op > 0) { 860 + otime |= update_queue(sma, 861 + sops[i].sem_num, pt); 862 + } 904 863 } 905 864 } 906 - goto done_checkretry; 907 865 } 908 - 909 - /* Check the semaphores that were modified. */ 910 - for (i = 0; i < nsops; i++) { 911 - if (sops[i].sem_op > 0 || 912 - (sops[i].sem_op < 0 && 913 - sma->sem_base[sops[i].sem_num].semval == 0)) 914 - if (update_queue(sma, sops[i].sem_num, pt)) { 915 - otime = 1; 916 - progress = 1; 917 - } 918 - } 919 - done_checkretry: 920 - if (progress) { 921 - progress = 0; 922 - goto retry_global; 923 - } 924 - done: 925 866 if (otime) 926 867 sma->sem_otime = get_seconds(); 927 868 } ··· 1784 1747 struct sem *curr; 1785 1748 curr = &sma->sem_base[sops->sem_num]; 1786 1749 1787 - if (alter) 1788 - list_add_tail(&queue.list, &curr->pending_alter); 1789 - else 1750 + if (alter) { 1751 + if (sma->complex_count) { 1752 + list_add_tail(&queue.list, 1753 + &sma->pending_alter); 1754 + } else { 1755 + 1756 + list_add_tail(&queue.list, 1757 + &curr->pending_alter); 1758 + } 1759 + } else { 1790 1760 list_add_tail(&queue.list, &curr->pending_const); 1761 + } 1791 1762 } else { 1763 + if (!sma->complex_count) 1764 + merge_queues(sma); 1765 + 1792 1766 if (alter) 1793 1767 list_add_tail(&queue.list, &sma->pending_alter); 1794 1768 else