[IPVS]: ip_vs_ftp breaks connections using persistence

ip_vs_ftp when loaded can create NAT connections with unknown client
port for passive FTP. For such expectations we lookup with cport=0 on
incoming packet but it matches the format of the persistence templates
causing packets to other persistent virtual servers to be forwarded to
real server without creating connection. Later the reply packets are
treated as foreign and not SNAT-ed.

This patch changes the connection lookup for packets from clients:

* introduce IP_VS_CONN_F_TEMPLATE connection flag to mark the
connection as template

* create new connection lookup function just for templates -
ip_vs_ct_in_get

* make sure ip_vs_conn_in_get hits only connections with
IP_VS_CONN_F_NO_CPORT flag set when s_port is 0. By this way
we avoid returning template when looking for cport=0 (ftp)

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by Julian Anastasov and committed by David S. Miller 87375ab4 f5e229db

+63 -17
+3
include/net/ip_vs.h
··· 84 84 #define IP_VS_CONN_F_IN_SEQ 0x0400 /* must do input seq adjust */ 85 85 #define IP_VS_CONN_F_SEQ_MASK 0x0600 /* in/out sequence mask */ 86 86 #define IP_VS_CONN_F_NO_CPORT 0x0800 /* no client port set yet */ 87 + #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ 87 88 88 89 /* Move it to better place one day, for now keep it unique */ 89 90 #define NFC_IPVS_PROPERTY 0x10000 ··· 739 738 }; 740 739 741 740 extern struct ip_vs_conn *ip_vs_conn_in_get 741 + (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); 742 + extern struct ip_vs_conn *ip_vs_ct_in_get 742 743 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); 743 744 extern struct ip_vs_conn *ip_vs_conn_out_get 744 745 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
+38 -3
net/ipv4/ipvs/ip_vs_conn.c
··· 196 196 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 197 197 if (s_addr==cp->caddr && s_port==cp->cport && 198 198 d_port==cp->vport && d_addr==cp->vaddr && 199 + ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 199 200 protocol==cp->protocol) { 200 201 /* HIT */ 201 202 atomic_inc(&cp->refcnt); ··· 228 227 return cp; 229 228 } 230 229 230 + /* Get reference to connection template */ 231 + struct ip_vs_conn *ip_vs_ct_in_get 232 + (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) 233 + { 234 + unsigned hash; 235 + struct ip_vs_conn *cp; 236 + 237 + hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); 238 + 239 + ct_read_lock(hash); 240 + 241 + list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 242 + if (s_addr==cp->caddr && s_port==cp->cport && 243 + d_port==cp->vport && d_addr==cp->vaddr && 244 + cp->flags & IP_VS_CONN_F_TEMPLATE && 245 + protocol==cp->protocol) { 246 + /* HIT */ 247 + atomic_inc(&cp->refcnt); 248 + goto out; 249 + } 250 + } 251 + cp = NULL; 252 + 253 + out: 254 + ct_read_unlock(hash); 255 + 256 + IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", 257 + ip_vs_proto_name(protocol), 258 + NIPQUAD(s_addr), ntohs(s_port), 259 + NIPQUAD(d_addr), ntohs(d_port), 260 + cp?"hit":"not hit"); 261 + 262 + return cp; 263 + } 231 264 232 265 /* 233 266 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. ··· 402 367 atomic_read(&dest->refcnt)); 403 368 404 369 /* Update the connection counters */ 405 - if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { 370 + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { 406 371 /* It is a normal connection, so increase the inactive 407 372 connection counter because it is in TCP SYNRECV 408 373 state (inactive) or other protocol inacive state */ ··· 441 406 atomic_read(&dest->refcnt)); 442 407 443 408 /* Update the connection counters */ 444 - if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { 409 + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { 445 410 /* It is a normal connection, so decrease the inactconns 446 411 or activeconns counter */ 447 412 if (cp->flags & IP_VS_CONN_F_INACTIVE) { ··· 811 776 ct_write_lock_bh(hash); 812 777 813 778 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 814 - if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) 779 + if (cp->flags & IP_VS_CONN_F_TEMPLATE) 815 780 /* connection template */ 816 781 continue; 817 782
+8 -8
net/ipv4/ipvs/ip_vs_core.c
··· 243 243 if (ports[1] == svc->port) { 244 244 /* Check if a template already exists */ 245 245 if (svc->port != FTPPORT) 246 - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 246 + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, 247 247 iph->daddr, ports[1]); 248 248 else 249 - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 249 + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, 250 250 iph->daddr, 0); 251 251 252 252 if (!ct || !ip_vs_check_template(ct)) { ··· 272 272 iph->daddr, 273 273 ports[1], 274 274 dest->addr, dest->port, 275 - 0, 275 + IP_VS_CONN_F_TEMPLATE, 276 276 dest); 277 277 else 278 278 ct = ip_vs_conn_new(iph->protocol, 279 279 snet, 0, 280 280 iph->daddr, 0, 281 281 dest->addr, 0, 282 - 0, 282 + IP_VS_CONN_F_TEMPLATE, 283 283 dest); 284 284 if (ct == NULL) 285 285 return NULL; ··· 298 298 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> 299 299 */ 300 300 if (svc->fwmark) 301 - ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0, 301 + ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, 302 302 htonl(svc->fwmark), 0); 303 303 else 304 - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 304 + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, 305 305 iph->daddr, 0); 306 306 307 307 if (!ct || !ip_vs_check_template(ct)) { ··· 326 326 snet, 0, 327 327 htonl(svc->fwmark), 0, 328 328 dest->addr, 0, 329 - 0, 329 + IP_VS_CONN_F_TEMPLATE, 330 330 dest); 331 331 else 332 332 ct = ip_vs_conn_new(iph->protocol, 333 333 snet, 0, 334 334 iph->daddr, 0, 335 335 dest->addr, 0, 336 - 0, 336 + IP_VS_CONN_F_TEMPLATE, 337 337 dest); 338 338 if (ct == NULL) 339 339 return NULL;
+14 -6
net/ipv4/ipvs/ip_vs_sync.c
··· 297 297 298 298 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 299 299 for (i=0; i<m->nr_conns; i++) { 300 + unsigned flags; 301 + 300 302 s = (struct ip_vs_sync_conn *)p; 301 - cp = ip_vs_conn_in_get(s->protocol, 302 - s->caddr, s->cport, 303 - s->vaddr, s->vport); 303 + flags = ntohs(s->flags); 304 + if (!(flags & IP_VS_CONN_F_TEMPLATE)) 305 + cp = ip_vs_conn_in_get(s->protocol, 306 + s->caddr, s->cport, 307 + s->vaddr, s->vport); 308 + else 309 + cp = ip_vs_ct_in_get(s->protocol, 310 + s->caddr, s->cport, 311 + s->vaddr, s->vport); 304 312 if (!cp) { 305 313 cp = ip_vs_conn_new(s->protocol, 306 314 s->caddr, s->cport, 307 315 s->vaddr, s->vport, 308 316 s->daddr, s->dport, 309 - ntohs(s->flags), NULL); 317 + flags, NULL); 310 318 if (!cp) { 311 319 IP_VS_ERR("ip_vs_conn_new failed\n"); 312 320 return; ··· 323 315 } else if (!cp->dest) { 324 316 /* it is an entry created by the synchronization */ 325 317 cp->state = ntohs(s->state); 326 - cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED; 318 + cp->flags = flags | IP_VS_CONN_F_HASHED; 327 319 } /* Note that we don't touch its state and flags 328 320 if it is a normal entry. */ 329 321 330 - if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) { 322 + if (flags & IP_VS_CONN_F_SEQ_MASK) { 331 323 opt = (struct ip_vs_sync_conn_options *)&s[1]; 332 324 memcpy(&cp->in_seq, opt, sizeof(*opt)); 333 325 p += FULL_CONN_SIZE;