Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipc: do not use a negative value to re-enable msgmni automatic recomputing

This patch proposes an alternative to the "magical
positive-versus-negative number trick" Andrew complained about last week
in http://lkml.org/lkml/2008/6/24/418.

This had been introduced with the patches that scale msgmni to the amount
of lowmem. With these patches, msgmni has a registered notification
routine that recomputes msgmni value upon memory add/remove or ipc
namespace creation/ removal.

When msgmni is changed from user space (i.e. value written to the proc
file), that notification routine is unregistered, and the way to make it
registered back is to write a negative value into the proc file. This is
the "magical positive-versus-negative number trick".

To fix this, a new proc file is introduced: /proc/sys/kernel/auto_msgmni.
This file acts as ON/OFF for msgmni automatic recomputing.

With this patch, the process is the following:
1) kernel boots in "automatic recomputing mode"
/proc/sys/kernel/msgmni contains the value that has been computed (depends
on lowmem)
/proc/sys/kernel/automatic_msgmni contains "1"

2) echo <val> > /proc/sys/kernel/msgmni
. sets msg_ctlmni to <val>
. de-activates automatic recomputing (i.e. if, say, some memory is added
msgmni won't be recomputed anymore)
. /proc/sys/kernel/automatic_msgmni now contains "0"

3) echo "0" > /proc/sys/kernel/automatic_msgmni
. de-activates msgmni automatic recomputing
this has the same effect as 2) except that msg_ctlmni's value stays
blocked at its current value)

3) echo "1" > /proc/sys/kernel/automatic_msgmni
. recomputes msgmni's value based on the current available memory size
and number of ipc namespaces
. re-activates automatic recomputing for msgmni.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Solofo Ramangalahy <Solofo.Ramangalahy@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Nadia Derbey and committed by
Linus Torvalds
9eefe520 f1a43f93

+76 -19
+2 -1
include/linux/ipc_namespace.h
··· 36 36 int msg_ctlmni; 37 37 atomic_t msg_bytes; 38 38 atomic_t msg_hdrs; 39 + int auto_msgmni; 39 40 40 41 size_t shm_ctlmax; 41 42 size_t shm_ctlall; ··· 54 53 55 54 extern int register_ipcns_notifier(struct ipc_namespace *); 56 55 extern int cond_register_ipcns_notifier(struct ipc_namespace *); 57 - extern int unregister_ipcns_notifier(struct ipc_namespace *); 56 + extern void unregister_ipcns_notifier(struct ipc_namespace *); 58 57 extern int ipcns_notify(unsigned long); 59 58 60 59 #else /* CONFIG_SYSVIPC */
+59 -13
ipc/ipc_sysctl.c
··· 27 27 } 28 28 29 29 /* 30 - * Routine that is called when a tunable has successfully been changed by 31 - * hand and it has a callback routine registered on the ipc namespace notifier 32 - * chain: we don't want such tunables to be recomputed anymore upon memory 33 - * add/remove or ipc namespace creation/removal. 34 - * They can come back to a recomputable state by being set to a <0 value. 30 + * Routine that is called when the file "auto_msgmni" has successfully been 31 + * written. 32 + * Two values are allowed: 33 + * 0: unregister msgmni's callback routine from the ipc namespace notifier 34 + * chain. This means that msgmni won't be recomputed anymore upon memory 35 + * add/remove or ipc namespace creation/removal. 36 + * 1: register back the callback routine. 35 37 */ 36 - static void tunable_set_callback(int val) 38 + static void ipc_auto_callback(int val) 37 39 { 38 - if (val >= 0) 40 + if (!val) 39 41 unregister_ipcns_notifier(current->nsproxy->ipc_ns); 40 42 else { 41 43 /* ··· 73 71 rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); 74 72 75 73 if (write && !rc && lenp_bef == *lenp) 76 - tunable_set_callback(*((int *)(ipc_table.data))); 74 + /* 75 + * Tunable has successfully been changed by hand. Disable its 76 + * automatic adjustment. This simply requires unregistering 77 + * the notifiers that trigger recalculation. 78 + */ 79 + unregister_ipcns_notifier(current->nsproxy->ipc_ns); 77 80 78 81 return rc; 79 82 } ··· 94 87 lenp, ppos); 95 88 } 96 89 90 + static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, 91 + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) 92 + { 93 + struct ctl_table ipc_table; 94 + size_t lenp_bef = *lenp; 95 + int oldval; 96 + int rc; 97 + 98 + memcpy(&ipc_table, table, sizeof(ipc_table)); 99 + ipc_table.data = get_ipc(table); 100 + oldval = *((int *)(ipc_table.data)); 101 + 102 + rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); 103 + 104 + if (write && !rc && lenp_bef == *lenp) { 105 + int newval = *((int *)(ipc_table.data)); 106 + /* 107 + * The file "auto_msgmni" has correctly been set. 108 + * React by (un)registering the corresponding tunable, if the 109 + * value has changed. 110 + */ 111 + if (newval != oldval) 112 + ipc_auto_callback(newval); 113 + } 114 + 115 + return rc; 116 + } 117 + 97 118 #else 98 119 #define proc_ipc_doulongvec_minmax NULL 99 120 #define proc_ipc_dointvec NULL 100 121 #define proc_ipc_callback_dointvec NULL 122 + #define proc_ipcauto_dointvec_minmax NULL 101 123 #endif 102 124 103 125 #ifdef CONFIG_SYSCTL_SYSCALL ··· 178 142 rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, 179 143 newlen); 180 144 181 - if (newval && newlen && rc > 0) { 145 + if (newval && newlen && rc > 0) 182 146 /* 183 147 * Tunable has successfully been changed from userland 184 148 */ 185 - int *data = get_ipc(table); 186 - 187 - tunable_set_callback(*data); 188 - } 149 + unregister_ipcns_notifier(current->nsproxy->ipc_ns); 189 150 190 151 return rc; 191 152 } ··· 190 157 #define sysctl_ipc_data NULL 191 158 #define sysctl_ipc_registered_data NULL 192 159 #endif 160 + 161 + static int zero; 162 + static int one = 1; 193 163 194 164 static struct ctl_table ipc_kern_table[] = { 195 165 { ··· 257 221 .mode = 0644, 258 222 .proc_handler = proc_ipc_dointvec, 259 223 .strategy = sysctl_ipc_data, 224 + }, 225 + { 226 + .ctl_name = CTL_UNNUMBERED, 227 + .procname = "auto_msgmni", 228 + .data = &init_ipc_ns.auto_msgmni, 229 + .maxlen = sizeof(int), 230 + .mode = 0644, 231 + .proc_handler = proc_ipcauto_dointvec_minmax, 232 + .extra1 = &zero, 233 + .extra2 = &one, 260 234 }, 261 235 {} 262 236 };
+15 -5
ipc/ipcns_notifier.c
··· 55 55 56 56 int register_ipcns_notifier(struct ipc_namespace *ns) 57 57 { 58 + int rc; 59 + 58 60 memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); 59 61 ns->ipcns_nb.notifier_call = ipcns_callback; 60 62 ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; 61 - return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); 63 + rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); 64 + if (!rc) 65 + ns->auto_msgmni = 1; 66 + return rc; 62 67 } 63 68 64 69 int cond_register_ipcns_notifier(struct ipc_namespace *ns) 65 70 { 71 + int rc; 72 + 66 73 memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); 67 74 ns->ipcns_nb.notifier_call = ipcns_callback; 68 75 ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; 69 - return blocking_notifier_chain_cond_register(&ipcns_chain, 76 + rc = blocking_notifier_chain_cond_register(&ipcns_chain, 70 77 &ns->ipcns_nb); 78 + if (!rc) 79 + ns->auto_msgmni = 1; 80 + return rc; 71 81 } 72 82 73 - int unregister_ipcns_notifier(struct ipc_namespace *ns) 83 + void unregister_ipcns_notifier(struct ipc_namespace *ns) 74 84 { 75 - return blocking_notifier_chain_unregister(&ipcns_chain, 76 - &ns->ipcns_nb); 85 + blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb); 86 + ns->auto_msgmni = 0; 77 87 } 78 88 79 89 int ipcns_notify(unsigned long val)