[IPVS]: ip_vs_ftp breaks connections using persistence

ip_vs_ftp when loaded can create NAT connections with unknown client
port for passive FTP. For such expectations we lookup with cport=0 on
incoming packet but it matches the format of the persistence templates
causing packets to other persistent virtual servers to be forwarded to
real server without creating connection. Later the reply packets are
treated as foreign and not SNAT-ed.

This patch changes the connection lookup for packets from clients:

* introduce IP_VS_CONN_F_TEMPLATE connection flag to mark the
connection as template

* create new connection lookup function just for templates -
ip_vs_ct_in_get

* make sure ip_vs_conn_in_get hits only connections with
IP_VS_CONN_F_NO_CPORT flag set when s_port is 0. By this way
we avoid returning template when looking for cport=0 (ftp)

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by Julian Anastasov and committed by David S. Miller 87375ab4 f5e229db

+63 -17
+3
include/net/ip_vs.h
··· 84 #define IP_VS_CONN_F_IN_SEQ 0x0400 /* must do input seq adjust */ 85 #define IP_VS_CONN_F_SEQ_MASK 0x0600 /* in/out sequence mask */ 86 #define IP_VS_CONN_F_NO_CPORT 0x0800 /* no client port set yet */ 87 88 /* Move it to better place one day, for now keep it unique */ 89 #define NFC_IPVS_PROPERTY 0x10000 ··· 739 }; 740 741 extern struct ip_vs_conn *ip_vs_conn_in_get 742 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); 743 extern struct ip_vs_conn *ip_vs_conn_out_get 744 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
··· 84 #define IP_VS_CONN_F_IN_SEQ 0x0400 /* must do input seq adjust */ 85 #define IP_VS_CONN_F_SEQ_MASK 0x0600 /* in/out sequence mask */ 86 #define IP_VS_CONN_F_NO_CPORT 0x0800 /* no client port set yet */ 87 + #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ 88 89 /* Move it to better place one day, for now keep it unique */ 90 #define NFC_IPVS_PROPERTY 0x10000 ··· 738 }; 739 740 extern struct ip_vs_conn *ip_vs_conn_in_get 741 + (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); 742 + extern struct ip_vs_conn *ip_vs_ct_in_get 743 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); 744 extern struct ip_vs_conn *ip_vs_conn_out_get 745 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
+38 -3
net/ipv4/ipvs/ip_vs_conn.c
··· 196 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 197 if (s_addr==cp->caddr && s_port==cp->cport && 198 d_port==cp->vport && d_addr==cp->vaddr && 199 protocol==cp->protocol) { 200 /* HIT */ 201 atomic_inc(&cp->refcnt); ··· 228 return cp; 229 } 230 231 232 /* 233 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. ··· 402 atomic_read(&dest->refcnt)); 403 404 /* Update the connection counters */ 405 - if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { 406 /* It is a normal connection, so increase the inactive 407 connection counter because it is in TCP SYNRECV 408 state (inactive) or other protocol inacive state */ ··· 441 atomic_read(&dest->refcnt)); 442 443 /* Update the connection counters */ 444 - if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { 445 /* It is a normal connection, so decrease the inactconns 446 or activeconns counter */ 447 if (cp->flags & IP_VS_CONN_F_INACTIVE) { ··· 811 ct_write_lock_bh(hash); 812 813 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 814 - if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) 815 /* connection template */ 816 continue; 817
··· 196 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 197 if (s_addr==cp->caddr && s_port==cp->cport && 198 d_port==cp->vport && d_addr==cp->vaddr && 199 + ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 200 protocol==cp->protocol) { 201 /* HIT */ 202 atomic_inc(&cp->refcnt); ··· 227 return cp; 228 } 229 230 + /* Get reference to connection template */ 231 + struct ip_vs_conn *ip_vs_ct_in_get 232 + (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) 233 + { 234 + unsigned hash; 235 + struct ip_vs_conn *cp; 236 + 237 + hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); 238 + 239 + ct_read_lock(hash); 240 + 241 + list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 242 + if (s_addr==cp->caddr && s_port==cp->cport && 243 + d_port==cp->vport && d_addr==cp->vaddr && 244 + cp->flags & IP_VS_CONN_F_TEMPLATE && 245 + protocol==cp->protocol) { 246 + /* HIT */ 247 + atomic_inc(&cp->refcnt); 248 + goto out; 249 + } 250 + } 251 + cp = NULL; 252 + 253 + out: 254 + ct_read_unlock(hash); 255 + 256 + IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", 257 + ip_vs_proto_name(protocol), 258 + NIPQUAD(s_addr), ntohs(s_port), 259 + NIPQUAD(d_addr), ntohs(d_port), 260 + cp?"hit":"not hit"); 261 + 262 + return cp; 263 + } 264 265 /* 266 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. ··· 367 atomic_read(&dest->refcnt)); 368 369 /* Update the connection counters */ 370 + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { 371 /* It is a normal connection, so increase the inactive 372 connection counter because it is in TCP SYNRECV 373 state (inactive) or other protocol inacive state */ ··· 406 atomic_read(&dest->refcnt)); 407 408 /* Update the connection counters */ 409 + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { 410 /* It is a normal connection, so decrease the inactconns 411 or activeconns counter */ 412 if (cp->flags & IP_VS_CONN_F_INACTIVE) { ··· 776 ct_write_lock_bh(hash); 777 778 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 779 + if (cp->flags & IP_VS_CONN_F_TEMPLATE) 780 /* connection template */ 781 continue; 782
+8 -8
net/ipv4/ipvs/ip_vs_core.c
··· 243 if (ports[1] == svc->port) { 244 /* Check if a template already exists */ 245 if (svc->port != FTPPORT) 246 - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 247 iph->daddr, ports[1]); 248 else 249 - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 250 iph->daddr, 0); 251 252 if (!ct || !ip_vs_check_template(ct)) { ··· 272 iph->daddr, 273 ports[1], 274 dest->addr, dest->port, 275 - 0, 276 dest); 277 else 278 ct = ip_vs_conn_new(iph->protocol, 279 snet, 0, 280 iph->daddr, 0, 281 dest->addr, 0, 282 - 0, 283 dest); 284 if (ct == NULL) 285 return NULL; ··· 298 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> 299 */ 300 if (svc->fwmark) 301 - ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0, 302 htonl(svc->fwmark), 0); 303 else 304 - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 305 iph->daddr, 0); 306 307 if (!ct || !ip_vs_check_template(ct)) { ··· 326 snet, 0, 327 htonl(svc->fwmark), 0, 328 dest->addr, 0, 329 - 0, 330 dest); 331 else 332 ct = ip_vs_conn_new(iph->protocol, 333 snet, 0, 334 iph->daddr, 0, 335 dest->addr, 0, 336 - 0, 337 dest); 338 if (ct == NULL) 339 return NULL;
··· 243 if (ports[1] == svc->port) { 244 /* Check if a template already exists */ 245 if (svc->port != FTPPORT) 246 + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, 247 iph->daddr, ports[1]); 248 else 249 + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, 250 iph->daddr, 0); 251 252 if (!ct || !ip_vs_check_template(ct)) { ··· 272 iph->daddr, 273 ports[1], 274 dest->addr, dest->port, 275 + IP_VS_CONN_F_TEMPLATE, 276 dest); 277 else 278 ct = ip_vs_conn_new(iph->protocol, 279 snet, 0, 280 iph->daddr, 0, 281 dest->addr, 0, 282 + IP_VS_CONN_F_TEMPLATE, 283 dest); 284 if (ct == NULL) 285 return NULL; ··· 298 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> 299 */ 300 if (svc->fwmark) 301 + ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, 302 htonl(svc->fwmark), 0); 303 else 304 + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, 305 iph->daddr, 0); 306 307 if (!ct || !ip_vs_check_template(ct)) { ··· 326 snet, 0, 327 htonl(svc->fwmark), 0, 328 dest->addr, 0, 329 + IP_VS_CONN_F_TEMPLATE, 330 dest); 331 else 332 ct = ip_vs_conn_new(iph->protocol, 333 snet, 0, 334 iph->daddr, 0, 335 dest->addr, 0, 336 + IP_VS_CONN_F_TEMPLATE, 337 dest); 338 if (ct == NULL) 339 return NULL;
+14 -6
net/ipv4/ipvs/ip_vs_sync.c
··· 297 298 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 299 for (i=0; i<m->nr_conns; i++) { 300 s = (struct ip_vs_sync_conn *)p; 301 - cp = ip_vs_conn_in_get(s->protocol, 302 - s->caddr, s->cport, 303 - s->vaddr, s->vport); 304 if (!cp) { 305 cp = ip_vs_conn_new(s->protocol, 306 s->caddr, s->cport, 307 s->vaddr, s->vport, 308 s->daddr, s->dport, 309 - ntohs(s->flags), NULL); 310 if (!cp) { 311 IP_VS_ERR("ip_vs_conn_new failed\n"); 312 return; ··· 323 } else if (!cp->dest) { 324 /* it is an entry created by the synchronization */ 325 cp->state = ntohs(s->state); 326 - cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED; 327 } /* Note that we don't touch its state and flags 328 if it is a normal entry. */ 329 330 - if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) { 331 opt = (struct ip_vs_sync_conn_options *)&s[1]; 332 memcpy(&cp->in_seq, opt, sizeof(*opt)); 333 p += FULL_CONN_SIZE;
··· 297 298 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 299 for (i=0; i<m->nr_conns; i++) { 300 + unsigned flags; 301 + 302 s = (struct ip_vs_sync_conn *)p; 303 + flags = ntohs(s->flags); 304 + if (!(flags & IP_VS_CONN_F_TEMPLATE)) 305 + cp = ip_vs_conn_in_get(s->protocol, 306 + s->caddr, s->cport, 307 + s->vaddr, s->vport); 308 + else 309 + cp = ip_vs_ct_in_get(s->protocol, 310 + s->caddr, s->cport, 311 + s->vaddr, s->vport); 312 if (!cp) { 313 cp = ip_vs_conn_new(s->protocol, 314 s->caddr, s->cport, 315 s->vaddr, s->vport, 316 s->daddr, s->dport, 317 + flags, NULL); 318 if (!cp) { 319 IP_VS_ERR("ip_vs_conn_new failed\n"); 320 return; ··· 315 } else if (!cp->dest) { 316 /* it is an entry created by the synchronization */ 317 cp->state = ntohs(s->state); 318 + cp->flags = flags | IP_VS_CONN_F_HASHED; 319 } /* Note that we don't touch its state and flags 320 if it is a normal entry. */ 321 322 + if (flags & IP_VS_CONN_F_SEQ_MASK) { 323 opt = (struct ip_vs_sync_conn_options *)&s[1]; 324 memcpy(&cp->in_seq, opt, sizeof(*opt)); 325 p += FULL_CONN_SIZE;