Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen-netback: count number required slots for an skb more carefully

When a VM is providing an iSCSI target and the LUN is used by the
backend domain, the generated skbs for direct I/O writes to the disk
have large, multi-page skb->data but no frags.

With some lengths and starting offsets, xen_netbk_count_skb_slots()
would be one short because the simple calculation of
DIV_ROUND_UP(skb_headlen(), PAGE_SIZE) was not accounting for the
decisions made by start_new_rx_buffer() which does not guarantee
responses are fully packed.

For example, a skb with length < 2 pages but which spans 3 pages would
be counted as requiring 2 slots but would actually use 3 slots.

skb->data:

| 1111|222222222222|3333 |

Fully packed, this would need 2 slots:

|111122222222|22223333 |

But because the 2nd page wholy fits into a slot it is not split across
slots and goes into a slot of its own:

|1111 |222222222222|3333 |

Miscounting the number of slots means netback may push more responses
than the number of available requests. This will cause the frontend
to get very confused and report "Too many frags/slots". The frontend
never recovers and will eventually BUG.

Fix this by counting the number of required slots more carefully. In
xen_netbk_count_skb_slots(), more closely follow the algorithm used by
xen_netbk_gop_skb() by introducing xen_netbk_count_frag_slots() which
is the dry-run equivalent of netbk_gop_frag_copy().

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

David Vrabel and committed by
David S. Miller
6e43fc04 300cf9b9

+65 -31
+65 -31
drivers/net/xen-netback/netback.c
··· 212 212 return false; 213 213 } 214 214 215 + struct xenvif_count_slot_state { 216 + unsigned long copy_off; 217 + bool head; 218 + }; 219 + 220 + unsigned int xenvif_count_frag_slots(struct xenvif *vif, 221 + unsigned long offset, unsigned long size, 222 + struct xenvif_count_slot_state *state) 223 + { 224 + unsigned count = 0; 225 + 226 + offset &= ~PAGE_MASK; 227 + 228 + while (size > 0) { 229 + unsigned long bytes; 230 + 231 + bytes = PAGE_SIZE - offset; 232 + 233 + if (bytes > size) 234 + bytes = size; 235 + 236 + if (start_new_rx_buffer(state->copy_off, bytes, state->head)) { 237 + count++; 238 + state->copy_off = 0; 239 + } 240 + 241 + if (state->copy_off + bytes > MAX_BUFFER_OFFSET) 242 + bytes = MAX_BUFFER_OFFSET - state->copy_off; 243 + 244 + state->copy_off += bytes; 245 + 246 + offset += bytes; 247 + size -= bytes; 248 + 249 + if (offset == PAGE_SIZE) 250 + offset = 0; 251 + 252 + state->head = false; 253 + } 254 + 255 + return count; 256 + } 257 + 215 258 /* 216 259 * Figure out how many ring slots we're going to need to send @skb to 217 260 * the guest. This function is essentially a dry run of ··· 262 219 */ 263 220 unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb) 264 221 { 222 + struct xenvif_count_slot_state state; 265 223 unsigned int count; 266 - int i, copy_off; 224 + unsigned char *data; 225 + unsigned i; 267 226 268 - count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE); 227 + state.head = true; 228 + state.copy_off = 0; 269 229 270 - copy_off = skb_headlen(skb) % PAGE_SIZE; 230 + /* Slot for the first (partial) page of data. */ 231 + count = 1; 271 232 233 + /* Need a slot for the GSO prefix for GSO extra data? */ 272 234 if (skb_shinfo(skb)->gso_size) 273 235 count++; 236 + 237 + data = skb->data; 238 + while (data < skb_tail_pointer(skb)) { 239 + unsigned long offset = offset_in_page(data); 240 + unsigned long size = PAGE_SIZE - offset; 241 + 242 + if (data + size > skb_tail_pointer(skb)) 243 + size = skb_tail_pointer(skb) - data; 244 + 245 + count += xenvif_count_frag_slots(vif, offset, size, &state); 246 + 247 + data += size; 248 + } 274 249 275 250 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 276 251 unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]); 277 252 unsigned long offset = skb_shinfo(skb)->frags[i].page_offset; 278 - unsigned long bytes; 279 253 280 - offset &= ~PAGE_MASK; 281 - 282 - while (size > 0) { 283 - BUG_ON(offset >= PAGE_SIZE); 284 - BUG_ON(copy_off > MAX_BUFFER_OFFSET); 285 - 286 - bytes = PAGE_SIZE - offset; 287 - 288 - if (bytes > size) 289 - bytes = size; 290 - 291 - if (start_new_rx_buffer(copy_off, bytes, 0)) { 292 - count++; 293 - copy_off = 0; 294 - } 295 - 296 - if (copy_off + bytes > MAX_BUFFER_OFFSET) 297 - bytes = MAX_BUFFER_OFFSET - copy_off; 298 - 299 - copy_off += bytes; 300 - 301 - offset += bytes; 302 - size -= bytes; 303 - 304 - if (offset == PAGE_SIZE) 305 - offset = 0; 306 - } 254 + count += xenvif_count_frag_slots(vif, offset, size, &state); 307 255 } 308 256 return count; 309 257 }