Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipvs: fix crash if scheduler is changed

I overlooked the svc->sched_data usage from schedulers
when the services were converted to RCU in 3.10. Now
the rare ipvsadm -E command can change the scheduler
but due to the reverse order of ip_vs_bind_scheduler
and ip_vs_unbind_scheduler we provide new sched_data
to the old scheduler resulting in a crash.

To fix it without changing the scheduler methods we
have to use synchronize_rcu() only for the editing case.
It means all svc->scheduler readers should expect a
NULL value. To avoid breakage for the service listing
and ipvsadm -R we can use the "none" name to indicate
that scheduler is not assigned, a state when we drop
new connections.

Reported-by: Alexander Vasiliev <a.vasylev@404-group.com>
Fixes: ceec4c381681 ("ipvs: convert services to rcu")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>

authored by

Julian Anastasov and committed by
Simon Horman
05f00505 4754957f

+70 -38
+14 -2
net/netfilter/ipvs/ip_vs_core.c
··· 319 319 * return *ignored=0 i.e. ICMP and NF_DROP 320 320 */ 321 321 sched = rcu_dereference(svc->scheduler); 322 - dest = sched->schedule(svc, skb, iph); 322 + if (sched) { 323 + /* read svc->sched_data after svc->scheduler */ 324 + smp_rmb(); 325 + dest = sched->schedule(svc, skb, iph); 326 + } else { 327 + dest = NULL; 328 + } 323 329 if (!dest) { 324 330 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 325 331 kfree(param.pe_data); ··· 473 467 } 474 468 475 469 sched = rcu_dereference(svc->scheduler); 476 - dest = sched->schedule(svc, skb, iph); 470 + if (sched) { 471 + /* read svc->sched_data after svc->scheduler */ 472 + smp_rmb(); 473 + dest = sched->schedule(svc, skb, iph); 474 + } else { 475 + dest = NULL; 476 + } 477 477 if (dest == NULL) { 478 478 IP_VS_DBG(1, "Schedule: no dest found.\n"); 479 479 return NULL;
+50 -30
net/netfilter/ipvs/ip_vs_ctl.c
··· 842 842 __ip_vs_dst_cache_reset(dest); 843 843 spin_unlock_bh(&dest->dst_lock); 844 844 845 - sched = rcu_dereference_protected(svc->scheduler, 1); 846 845 if (add) { 847 846 ip_vs_start_estimator(svc->net, &dest->stats); 848 847 list_add_rcu(&dest->n_list, &svc->destinations); 849 848 svc->num_dests++; 850 - if (sched->add_dest) 849 + sched = rcu_dereference_protected(svc->scheduler, 1); 850 + if (sched && sched->add_dest) 851 851 sched->add_dest(svc, dest); 852 852 } else { 853 - if (sched->upd_dest) 853 + sched = rcu_dereference_protected(svc->scheduler, 1); 854 + if (sched && sched->upd_dest) 854 855 sched->upd_dest(svc, dest); 855 856 } 856 857 } ··· 1085 1084 struct ip_vs_scheduler *sched; 1086 1085 1087 1086 sched = rcu_dereference_protected(svc->scheduler, 1); 1088 - if (sched->del_dest) 1087 + if (sched && sched->del_dest) 1089 1088 sched->del_dest(svc, dest); 1090 1089 } 1091 1090 } ··· 1176 1175 ip_vs_use_count_inc(); 1177 1176 1178 1177 /* Lookup the scheduler by 'u->sched_name' */ 1179 - sched = ip_vs_scheduler_get(u->sched_name); 1180 - if (sched == NULL) { 1181 - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1182 - ret = -ENOENT; 1183 - goto out_err; 1178 + if (strcmp(u->sched_name, "none")) { 1179 + sched = ip_vs_scheduler_get(u->sched_name); 1180 + if (!sched) { 1181 + pr_info("Scheduler module ip_vs_%s not found\n", 1182 + u->sched_name); 1183 + ret = -ENOENT; 1184 + goto out_err; 1185 + } 1184 1186 } 1185 1187 1186 1188 if (u->pe_name && *u->pe_name) { ··· 1244 1240 spin_lock_init(&svc->stats.lock); 1245 1241 1246 1242 /* Bind the scheduler */ 1247 - ret = ip_vs_bind_scheduler(svc, sched); 1248 - if (ret) 1249 - goto out_err; 1250 - sched = NULL; 1243 + if (sched) { 1244 + ret = ip_vs_bind_scheduler(svc, sched); 1245 + if (ret) 1246 + goto out_err; 1247 + sched = NULL; 1248 + } 1251 1249 1252 1250 /* Bind the ct retriever */ 1253 1251 RCU_INIT_POINTER(svc->pe, pe); ··· 1297 1291 static int 1298 1292 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1299 1293 { 1300 - struct ip_vs_scheduler *sched, *old_sched; 1294 + struct ip_vs_scheduler *sched = NULL, *old_sched; 1301 1295 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1302 1296 int ret = 0; 1303 1297 1304 1298 /* 1305 1299 * Lookup the scheduler, by 'u->sched_name' 1306 1300 */ 1307 - sched = ip_vs_scheduler_get(u->sched_name); 1308 - if (sched == NULL) { 1309 - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1310 - return -ENOENT; 1301 + if (strcmp(u->sched_name, "none")) { 1302 + sched = ip_vs_scheduler_get(u->sched_name); 1303 + if (!sched) { 1304 + pr_info("Scheduler module ip_vs_%s not found\n", 1305 + u->sched_name); 1306 + return -ENOENT; 1307 + } 1311 1308 } 1312 1309 old_sched = sched; 1313 1310 ··· 1338 1329 1339 1330 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1340 1331 if (sched != old_sched) { 1341 - /* Bind the new scheduler */ 1342 - ret = ip_vs_bind_scheduler(svc, sched); 1343 - if (ret) { 1344 - old_sched = sched; 1345 - goto out; 1332 + if (old_sched) { 1333 + ip_vs_unbind_scheduler(svc, old_sched); 1334 + RCU_INIT_POINTER(svc->scheduler, NULL); 1335 + /* Wait all svc->sched_data users */ 1336 + synchronize_rcu(); 1346 1337 } 1347 - /* Unbind the old scheduler on success */ 1348 - ip_vs_unbind_scheduler(svc, old_sched); 1338 + /* Bind the new scheduler */ 1339 + if (sched) { 1340 + ret = ip_vs_bind_scheduler(svc, sched); 1341 + if (ret) { 1342 + ip_vs_scheduler_put(sched); 1343 + goto out; 1344 + } 1345 + } 1349 1346 } 1350 1347 1351 1348 /* ··· 1997 1982 const struct ip_vs_iter *iter = seq->private; 1998 1983 const struct ip_vs_dest *dest; 1999 1984 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 1985 + char *sched_name = sched ? sched->name : "none"; 2000 1986 2001 1987 if (iter->table == ip_vs_svc_table) { 2002 1988 #ifdef CONFIG_IP_VS_IPV6 ··· 2006 1990 ip_vs_proto_name(svc->protocol), 2007 1991 &svc->addr.in6, 2008 1992 ntohs(svc->port), 2009 - sched->name); 1993 + sched_name); 2010 1994 else 2011 1995 #endif 2012 1996 seq_printf(seq, "%s %08X:%04X %s %s ", 2013 1997 ip_vs_proto_name(svc->protocol), 2014 1998 ntohl(svc->addr.ip), 2015 1999 ntohs(svc->port), 2016 - sched->name, 2000 + sched_name, 2017 2001 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2018 2002 } else { 2019 2003 seq_printf(seq, "FWM %08X %s %s", 2020 - svc->fwmark, sched->name, 2004 + svc->fwmark, sched_name, 2021 2005 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2022 2006 } 2023 2007 ··· 2443 2427 { 2444 2428 struct ip_vs_scheduler *sched; 2445 2429 struct ip_vs_kstats kstats; 2430 + char *sched_name; 2446 2431 2447 2432 sched = rcu_dereference_protected(src->scheduler, 1); 2433 + sched_name = sched ? sched->name : "none"; 2448 2434 dst->protocol = src->protocol; 2449 2435 dst->addr = src->addr.ip; 2450 2436 dst->port = src->port; 2451 2437 dst->fwmark = src->fwmark; 2452 - strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); 2438 + strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); 2453 2439 dst->flags = src->flags; 2454 2440 dst->timeout = src->timeout / HZ; 2455 2441 dst->netmask = src->netmask; ··· 2910 2892 struct ip_vs_flags flags = { .flags = svc->flags, 2911 2893 .mask = ~0 }; 2912 2894 struct ip_vs_kstats kstats; 2895 + char *sched_name; 2913 2896 2914 2897 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); 2915 2898 if (!nl_service) ··· 2929 2910 } 2930 2911 2931 2912 sched = rcu_dereference_protected(svc->scheduler, 1); 2913 + sched_name = sched ? sched->name : "none"; 2932 2914 pe = rcu_dereference_protected(svc->pe, 1); 2933 - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || 2915 + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || 2934 2916 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 2935 2917 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 2936 2918 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
+6 -6
net/netfilter/ipvs/ip_vs_sched.c
··· 74 74 75 75 if (sched->done_service) 76 76 sched->done_service(svc); 77 - /* svc->scheduler can not be set to NULL */ 77 + /* svc->scheduler can be set to NULL only by caller */ 78 78 } 79 79 80 80 ··· 147 147 148 148 void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) 149 149 { 150 - struct ip_vs_scheduler *sched; 150 + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 151 + char *sched_name = sched ? sched->name : "none"; 151 152 152 - sched = rcu_dereference(svc->scheduler); 153 153 if (svc->fwmark) { 154 154 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", 155 - sched->name, svc->fwmark, svc->fwmark, msg); 155 + sched_name, svc->fwmark, svc->fwmark, msg); 156 156 #ifdef CONFIG_IP_VS_IPV6 157 157 } else if (svc->af == AF_INET6) { 158 158 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", 159 - sched->name, ip_vs_proto_name(svc->protocol), 159 + sched_name, ip_vs_proto_name(svc->protocol), 160 160 &svc->addr.in6, ntohs(svc->port), msg); 161 161 #endif 162 162 } else { 163 163 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", 164 - sched->name, ip_vs_proto_name(svc->protocol), 164 + sched_name, ip_vs_proto_name(svc->protocol), 165 165 &svc->addr.ip, ntohs(svc->port), msg); 166 166 } 167 167 }