Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Delay struct net freeing while there's a sysfs instance refering to it

* new refcount in struct net, controlling actual freeing of the memory
* new method in kobj_ns_type_operations (->drop_ns())
* ->current_ns() semantics change - it's supposed to be followed by
corresponding ->drop_ns(). For struct net in case of CONFIG_NET_NS it bumps
the new refcount; net_drop_ns() decrements it and calls net_free() if the
last reference has been dropped. Method renamed to ->grab_current_ns().
* old net_free() callers call net_drop_ns() instead.
* sysfs_exit_ns() is gone, along with a large part of callchain
leading to it; now that the references stored in ->ns[...] stay valid we
do not need to hunt them down and replace them with NULL. That fixes
problems in sysfs_lookup() and sysfs_readdir(), along with getting rid
of sb->s_instances abuse.

Note that struct net *shutdown* logics has not changed - net_cleanup()
is called exactly when it used to be called. The only thing postponed by
having a sysfs instance refering to that struct net is actual freeing of
memory occupied by struct net.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Al Viro a685e089 dde194a6

+55 -72
+11 -26
fs/sysfs/mount.c
··· 95 95 return error; 96 96 } 97 97 98 + static void free_sysfs_super_info(struct sysfs_super_info *info) 99 + { 100 + int type; 101 + for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) 102 + kobj_ns_drop(type, info->ns[type]); 103 + kfree(info); 104 + } 105 + 98 106 static struct dentry *sysfs_mount(struct file_system_type *fs_type, 99 107 int flags, const char *dev_name, void *data) 100 108 { ··· 116 108 return ERR_PTR(-ENOMEM); 117 109 118 110 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) 119 - info->ns[type] = kobj_ns_current(type); 111 + info->ns[type] = kobj_ns_grab_current(type); 120 112 121 113 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); 122 114 if (IS_ERR(sb) || sb->s_fs_info != info) 123 - kfree(info); 115 + free_sysfs_super_info(info); 124 116 if (IS_ERR(sb)) 125 117 return ERR_CAST(sb); 126 118 if (!sb->s_root) { ··· 139 131 static void sysfs_kill_sb(struct super_block *sb) 140 132 { 141 133 struct sysfs_super_info *info = sysfs_info(sb); 142 - 143 134 /* Remove the superblock from fs_supers/s_instances 144 135 * so we can't find it, before freeing sysfs_super_info. 145 136 */ 146 137 kill_anon_super(sb); 147 - kfree(info); 138 + free_sysfs_super_info(info); 148 139 } 149 140 150 141 static struct file_system_type sysfs_fs_type = { ··· 151 144 .mount = sysfs_mount, 152 145 .kill_sb = sysfs_kill_sb, 153 146 }; 154 - 155 - void sysfs_exit_ns(enum kobj_ns_type type, const void *ns) 156 - { 157 - struct super_block *sb; 158 - 159 - mutex_lock(&sysfs_mutex); 160 - spin_lock(&sb_lock); 161 - list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) { 162 - struct sysfs_super_info *info = sysfs_info(sb); 163 - /* 164 - * If we see a superblock on the fs_supers/s_instances 165 - * list the unmount has not completed and sb->s_fs_info 166 - * points to a valid struct sysfs_super_info. 167 - */ 168 - /* Ignore superblocks with the wrong ns */ 169 - if (info->ns[type] != ns) 170 - continue; 171 - info->ns[type] = NULL; 172 - } 173 - spin_unlock(&sb_lock); 174 - mutex_unlock(&sysfs_mutex); 175 - } 176 147 177 148 int __init sysfs_init(void) 178 149 {
+1 -1
fs/sysfs/sysfs.h
··· 136 136 * instance). 137 137 */ 138 138 struct sysfs_super_info { 139 - const void *ns[KOBJ_NS_TYPES]; 139 + void *ns[KOBJ_NS_TYPES]; 140 140 }; 141 141 #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) 142 142 extern struct sysfs_dirent sysfs_root;
+6 -4
include/linux/kobject_ns.h
··· 32 32 33 33 /* 34 34 * Callbacks so sysfs can determine namespaces 35 - * @current_ns: return calling task's namespace 35 + * @grab_current_ns: return a new reference to calling task's namespace 36 36 * @netlink_ns: return namespace to which a sock belongs (right?) 37 37 * @initial_ns: return the initial namespace (i.e. init_net_ns) 38 + * @drop_ns: drops a reference to namespace 38 39 */ 39 40 struct kobj_ns_type_operations { 40 41 enum kobj_ns_type type; 41 - const void *(*current_ns)(void); 42 + void *(*grab_current_ns)(void); 42 43 const void *(*netlink_ns)(struct sock *sk); 43 44 const void *(*initial_ns)(void); 45 + void (*drop_ns)(void *); 44 46 }; 45 47 46 48 int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); ··· 50 48 const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); 51 49 const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); 52 50 53 - const void *kobj_ns_current(enum kobj_ns_type type); 51 + void *kobj_ns_grab_current(enum kobj_ns_type type); 54 52 const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); 55 53 const void *kobj_ns_initial(enum kobj_ns_type type); 56 - void kobj_ns_exit(enum kobj_ns_type type, const void *ns); 54 + void kobj_ns_drop(enum kobj_ns_type type, void *ns); 57 55 58 56 #endif /* _LINUX_KOBJECT_NS_H */
-7
include/linux/sysfs.h
··· 177 177 struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); 178 178 void sysfs_put(struct sysfs_dirent *sd); 179 179 180 - /* Called to clear a ns tag when it is no longer valid */ 181 - void sysfs_exit_ns(enum kobj_ns_type type, const void *tag); 182 - 183 180 int __must_check sysfs_init(void); 184 181 185 182 #else /* CONFIG_SYSFS */ ··· 332 335 return NULL; 333 336 } 334 337 static inline void sysfs_put(struct sysfs_dirent *sd) 335 - { 336 - } 337 - 338 - static inline void sysfs_exit_ns(int type, const void *tag) 339 338 { 340 339 } 341 340
+9 -1
include/net/net_namespace.h
··· 35 35 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) 36 36 37 37 struct net { 38 + atomic_t passive; /* To decided when the network 39 + * namespace should be freed. 40 + */ 38 41 atomic_t count; /* To decided when the network 39 - * namespace should be freed. 42 + * namespace should be shut down. 40 43 */ 41 44 #ifdef NETNS_REFCNT_DEBUG 42 45 atomic_t use_count; /* To track references we ··· 157 154 { 158 155 return net1 == net2; 159 156 } 157 + 158 + extern void net_drop_ns(void *); 159 + 160 160 #else 161 161 162 162 static inline struct net *get_net(struct net *net) ··· 181 175 { 182 176 return 1; 183 177 } 178 + 179 + #define net_drop_ns NULL 184 180 #endif 185 181 186 182
+9 -17
lib/kobject.c
··· 948 948 } 949 949 950 950 951 - const void *kobj_ns_current(enum kobj_ns_type type) 951 + void *kobj_ns_grab_current(enum kobj_ns_type type) 952 952 { 953 - const void *ns = NULL; 953 + void *ns = NULL; 954 954 955 955 spin_lock(&kobj_ns_type_lock); 956 956 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && 957 957 kobj_ns_ops_tbl[type]) 958 - ns = kobj_ns_ops_tbl[type]->current_ns(); 958 + ns = kobj_ns_ops_tbl[type]->grab_current_ns(); 959 959 spin_unlock(&kobj_ns_type_lock); 960 960 961 961 return ns; ··· 987 987 return ns; 988 988 } 989 989 990 - /* 991 - * kobj_ns_exit - invalidate a namespace tag 992 - * 993 - * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) 994 - * @ns: the actual namespace being invalidated 995 - * 996 - * This is called when a tag is no longer valid. For instance, 997 - * when a network namespace exits, it uses this helper to 998 - * make sure no sb's sysfs_info points to the now-invalidated 999 - * netns. 1000 - */ 1001 - void kobj_ns_exit(enum kobj_ns_type type, const void *ns) 990 + void kobj_ns_drop(enum kobj_ns_type type, void *ns) 1002 991 { 1003 - sysfs_exit_ns(type, ns); 992 + spin_lock(&kobj_ns_type_lock); 993 + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && 994 + kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns) 995 + kobj_ns_ops_tbl[type]->drop_ns(ns); 996 + spin_unlock(&kobj_ns_type_lock); 1004 997 } 1005 - 1006 998 1007 999 EXPORT_SYMBOL(kobject_get); 1008 1000 EXPORT_SYMBOL(kobject_put);
+9 -14
net/core/net-sysfs.c
··· 1179 1179 #endif 1180 1180 } 1181 1181 1182 - static const void *net_current_ns(void) 1182 + static void *net_grab_current_ns(void) 1183 1183 { 1184 - return current->nsproxy->net_ns; 1184 + struct net *ns = current->nsproxy->net_ns; 1185 + #ifdef CONFIG_NET_NS 1186 + if (ns) 1187 + atomic_inc(&ns->passive); 1188 + #endif 1189 + return ns; 1185 1190 } 1186 1191 1187 1192 static const void *net_initial_ns(void) ··· 1201 1196 1202 1197 struct kobj_ns_type_operations net_ns_type_operations = { 1203 1198 .type = KOBJ_NS_TYPE_NET, 1204 - .current_ns = net_current_ns, 1199 + .grab_current_ns = net_grab_current_ns, 1205 1200 .netlink_ns = net_netlink_ns, 1206 1201 .initial_ns = net_initial_ns, 1202 + .drop_ns = net_drop_ns, 1207 1203 }; 1208 1204 EXPORT_SYMBOL_GPL(net_ns_type_operations); 1209 - 1210 - static void net_kobj_ns_exit(struct net *net) 1211 - { 1212 - kobj_ns_exit(KOBJ_NS_TYPE_NET, net); 1213 - } 1214 - 1215 - static struct pernet_operations kobj_net_ops = { 1216 - .exit = net_kobj_ns_exit, 1217 - }; 1218 - 1219 1205 1220 1206 #ifdef CONFIG_HOTPLUG 1221 1207 static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) ··· 1335 1339 int netdev_kobject_init(void) 1336 1340 { 1337 1341 kobj_ns_type_register(&net_ns_type_operations); 1338 - register_pernet_subsys(&kobj_net_ops); 1339 1342 return class_register(&net_class); 1340 1343 }
+10 -2
net/core/net_namespace.c
··· 128 128 LIST_HEAD(net_exit_list); 129 129 130 130 atomic_set(&net->count, 1); 131 + atomic_set(&net->passive, 1); 131 132 132 133 #ifdef NETNS_REFCNT_DEBUG 133 134 atomic_set(&net->use_count, 0); ··· 211 210 kmem_cache_free(net_cachep, net); 212 211 } 213 212 213 + void net_drop_ns(void *p) 214 + { 215 + struct net *ns = p; 216 + if (ns && atomic_dec_and_test(&ns->passive)) 217 + net_free(ns); 218 + } 219 + 214 220 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 215 221 { 216 222 struct net *net; ··· 238 230 } 239 231 mutex_unlock(&net_mutex); 240 232 if (rv < 0) { 241 - net_free(net); 233 + net_drop_ns(net); 242 234 return ERR_PTR(rv); 243 235 } 244 236 return net; ··· 294 286 /* Finally it is safe to free my network namespace structure */ 295 287 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 296 288 list_del_init(&net->exit_list); 297 - net_free(net); 289 + net_drop_ns(net); 298 290 } 299 291 } 300 292 static DECLARE_WORK(net_cleanup_work, cleanup_net);