Drivers: hv: vmbus: finally fix hv_need_to_signal_on_read()

Commit a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in
hv_need_to_signal_on_read()")
added the proper mb(), but removed the test "prev_write_sz < pending_sz"
when making the signal decision.

As a result, the guest can signal the host unnecessarily,
and then the host can throttle the guest because the host
thinks the guest is buggy or malicious; finally the user
running stress test can perceive intermittent freeze of
the guest.

This patch brings back the test, and properly handles the
in-place consumption APIs used by NetVSC (see get_next_pkt_raw(),
put_pkt_raw() and commit_rd_index()).

Fixes: a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in
hv_need_to_signal_on_read()")

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reported-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Tested-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Dexuan Cui and committed by
Greg Kroah-Hartman
433e19cf 191e885a

+37 -2
+1
drivers/hv/ring_buffer.c
··· 383 return ret; 384 } 385 386 next_read_location = hv_get_next_read_location(inring_info); 387 next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc, 388 sizeof(desc),
··· 383 return ret; 384 } 385 386 + init_cached_read_index(channel); 387 next_read_location = hv_get_next_read_location(inring_info); 388 next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc, 389 sizeof(desc),
+6
drivers/net/hyperv/netvsc.c
··· 1295 ndev = hv_get_drvdata(device); 1296 buffer = get_per_channel_state(channel); 1297 1298 do { 1299 desc = get_next_pkt_raw(channel); 1300 if (desc != NULL) { ··· 1350 1351 bufferlen = bytes_recvd; 1352 } 1353 } while (1); 1354 1355 if (bufferlen > NETVSC_PACKET_SIZE)
··· 1295 ndev = hv_get_drvdata(device); 1296 buffer = get_per_channel_state(channel); 1297 1298 + /* commit_rd_index() -> hv_signal_on_read() needs this. */ 1299 + init_cached_read_index(channel); 1300 + 1301 do { 1302 desc = get_next_pkt_raw(channel); 1303 if (desc != NULL) { ··· 1347 1348 bufferlen = bytes_recvd; 1349 } 1350 + 1351 + init_cached_read_index(channel); 1352 + 1353 } while (1); 1354 1355 if (bufferlen > NETVSC_PACKET_SIZE)
+30 -2
include/linux/hyperv.h
··· 128 u32 ring_data_startoffset; 129 u32 priv_write_index; 130 u32 priv_read_index; 131 }; 132 133 /* ··· 181 return write; 182 } 183 184 /* 185 * VMBUS version is 32 bit entity broken up into 186 * two 16 bit quantities: major_number. minor_number. ··· 1502 1503 static inline void hv_signal_on_read(struct vmbus_channel *channel) 1504 { 1505 - u32 cur_write_sz; 1506 u32 pending_sz; 1507 struct hv_ring_buffer_info *rbi = &channel->inbound; 1508 ··· 1526 1527 cur_write_sz = hv_get_bytes_to_write(rbi); 1528 1529 - if (cur_write_sz >= pending_sz) 1530 vmbus_setevent(channel); 1531 1532 return; 1533 } 1534 1535 /* ··· 1594 /* 1595 * This call commits the read index and potentially signals the host. 1596 * Here is the pattern for using the "in-place" consumption APIs: 1597 * 1598 * while (get_next_pkt_raw() { 1599 * process the packet "in-place";
··· 128 u32 ring_data_startoffset; 129 u32 priv_write_index; 130 u32 priv_read_index; 131 + u32 cached_read_index; 132 }; 133 134 /* ··· 180 return write; 181 } 182 183 + static inline u32 hv_get_cached_bytes_to_write( 184 + const struct hv_ring_buffer_info *rbi) 185 + { 186 + u32 read_loc, write_loc, dsize, write; 187 + 188 + dsize = rbi->ring_datasize; 189 + read_loc = rbi->cached_read_index; 190 + write_loc = rbi->ring_buffer->write_index; 191 + 192 + write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : 193 + read_loc - write_loc; 194 + return write; 195 + } 196 /* 197 * VMBUS version is 32 bit entity broken up into 198 * two 16 bit quantities: major_number. minor_number. ··· 1488 1489 static inline void hv_signal_on_read(struct vmbus_channel *channel) 1490 { 1491 + u32 cur_write_sz, cached_write_sz; 1492 u32 pending_sz; 1493 struct hv_ring_buffer_info *rbi = &channel->inbound; 1494 ··· 1512 1513 cur_write_sz = hv_get_bytes_to_write(rbi); 1514 1515 + if (cur_write_sz < pending_sz) 1516 + return; 1517 + 1518 + cached_write_sz = hv_get_cached_bytes_to_write(rbi); 1519 + if (cached_write_sz < pending_sz) 1520 vmbus_setevent(channel); 1521 1522 return; 1523 + } 1524 + 1525 + static inline void 1526 + init_cached_read_index(struct vmbus_channel *channel) 1527 + { 1528 + struct hv_ring_buffer_info *rbi = &channel->inbound; 1529 + 1530 + rbi->cached_read_index = rbi->ring_buffer->read_index; 1531 } 1532 1533 /* ··· 1568 /* 1569 * This call commits the read index and potentially signals the host. 1570 * Here is the pattern for using the "in-place" consumption APIs: 1571 + * 1572 + * init_cached_read_index(); 1573 * 1574 * while (get_next_pkt_raw() { 1575 * process the packet "in-place";