Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] allow delayed freeing of ctl_table_header

Refcount the sucker; instead of freeing it by the end of unregistration
just drop the refcount and free only when it hits zero. Make sure that
we _always_ make ->unregistering non-NULL in start_unregistering().

That allows anybody to get a reference to such puppy, preventing its
freeing and reuse. It does *not* block unregistration. Anybody who
holds such a reference can
* try to grab a "use" reference (ctl_head_grab()); that will
succeeds if and only if it hadn't entered unregistration yet. If it
succeeds, we can use it in all normal ways until we release the "use"
reference (with ctl_head_finish()). Note that this relies on having
->unregistering become non-NULL in all cases when one starts to unregister
the sucker.
* keep pointers to ctl_table entries; they *can* be freed if
the entire thing is unregistered. However, if ctl_head_grab() succeeds,
we know that unregistration had not happened (and will not happen until
ctl_head_finish()) and such pointers can be used safely.

IOW, now we can have inodes under /proc/sys keep references to ctl_table
entries, protecting them with references to ctl_table_header and
grabbing the latter for the duration of operations that require access
to ctl_table. That won't cause deadlocks, since unregistration will not
be stopped by mere keeping a reference to ctl_table_header.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Al Viro f7e6ced4 73455092

+42 -1
+6
include/linux/sysctl.h
··· 957 957 struct ctl_table_set *parent, 958 958 int (*is_seen)(struct ctl_table_set *)); 959 959 960 + struct ctl_table_header; 961 + 962 + extern void sysctl_head_get(struct ctl_table_header *); 963 + extern void sysctl_head_put(struct ctl_table_header *); 964 + extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); 960 965 extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); 961 966 extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, 962 967 struct ctl_table_header *prev); ··· 1078 1073 struct ctl_table *ctl_table; 1079 1074 struct list_head ctl_entry; 1080 1075 int used; 1076 + int count; 1081 1077 struct completion *unregistering; 1082 1078 struct ctl_table *ctl_table_arg; 1083 1079 struct ctl_table_root *root;
+36 -1
kernel/sysctl.c
··· 1387 1387 spin_unlock(&sysctl_lock); 1388 1388 wait_for_completion(&wait); 1389 1389 spin_lock(&sysctl_lock); 1390 + } else { 1391 + /* anything non-NULL; we'll never dereference it */ 1392 + p->unregistering = ERR_PTR(-EINVAL); 1390 1393 } 1391 1394 /* 1392 1395 * do not remove from the list until nobody holds it; walking the 1393 1396 * list in do_sysctl() relies on that. 1394 1397 */ 1395 1398 list_del_init(&p->ctl_entry); 1399 + } 1400 + 1401 + void sysctl_head_get(struct ctl_table_header *head) 1402 + { 1403 + spin_lock(&sysctl_lock); 1404 + head->count++; 1405 + spin_unlock(&sysctl_lock); 1406 + } 1407 + 1408 + void sysctl_head_put(struct ctl_table_header *head) 1409 + { 1410 + spin_lock(&sysctl_lock); 1411 + if (!--head->count) 1412 + kfree(head); 1413 + spin_unlock(&sysctl_lock); 1414 + } 1415 + 1416 + struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) 1417 + { 1418 + if (!head) 1419 + BUG(); 1420 + spin_lock(&sysctl_lock); 1421 + if (!use_table(head)) 1422 + head = ERR_PTR(-ENOENT); 1423 + spin_unlock(&sysctl_lock); 1424 + return head; 1396 1425 } 1397 1426 1398 1427 void sysctl_head_finish(struct ctl_table_header *head) ··· 1800 1771 header->unregistering = NULL; 1801 1772 header->root = root; 1802 1773 sysctl_set_parent(NULL, header->ctl_table); 1774 + header->count = 1; 1803 1775 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK 1804 1776 if (sysctl_check_table(namespaces, header->ctl_table)) { 1805 1777 kfree(header); ··· 1864 1834 1865 1835 spin_lock(&sysctl_lock); 1866 1836 start_unregistering(header); 1837 + if (!--header->count) 1838 + kfree(header); 1867 1839 spin_unlock(&sysctl_lock); 1868 - kfree(header); 1869 1840 } 1870 1841 1871 1842 void setup_sysctl_set(struct ctl_table_set *p, ··· 1897 1866 void setup_sysctl_set(struct ctl_table_set *p, 1898 1867 struct ctl_table_set *parent, 1899 1868 int (*is_seen)(struct ctl_table_set *)) 1869 + { 1870 + } 1871 + 1872 + void sysctl_head_put(struct ctl_table_header *head) 1900 1873 { 1901 1874 } 1902 1875