[NET]: Zerocopy sequential reading of skb data

Implements sequential reading for both linear and non-linear
skb data at zerocopy cost. The data is returned in chunks of
arbitary length, therefore random access is not possible.

Usage:
from := 0
to := 128
state := undef
data := undef
len := undef
consumed := 0

skb_prepare_seq_read(skb, from, to, &state)
while (len = skb_seq_read(consumed, &data, &state)) != 0 do
/* do something with 'data' of length 'len' */
if abort then
/* abort read if we don't wait for
* skb_seq_read() to return 0 */
skb_abort_seq_read(&state)
return
endif
/* not necessary to consume all of 'len' */
consumed += len
done

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by Thomas Graf and committed by David S. Miller 677e90ed 6408f79c

+135
+18
include/linux/skbuff.h
··· 321 extern void skb_under_panic(struct sk_buff *skb, int len, 322 void *here); 323 324 /* Internal */ 325 #define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 326
··· 321 extern void skb_under_panic(struct sk_buff *skb, int len, 322 void *here); 323 324 + struct skb_seq_state 325 + { 326 + __u32 lower_offset; 327 + __u32 upper_offset; 328 + __u32 frag_idx; 329 + __u32 stepped_offset; 330 + struct sk_buff *root_skb; 331 + struct sk_buff *cur_skb; 332 + __u8 *frag_data; 333 + }; 334 + 335 + extern void skb_prepare_seq_read(struct sk_buff *skb, 336 + unsigned int from, unsigned int to, 337 + struct skb_seq_state *st); 338 + extern unsigned int skb_seq_read(unsigned int consumed, const u8 **data, 339 + struct skb_seq_state *st); 340 + extern void skb_abort_seq_read(struct skb_seq_state *st); 341 + 342 /* Internal */ 343 #define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 344
+117
net/core/skbuff.c
··· 1500 skb_split_no_header(skb, skb1, len, pos); 1501 } 1502 1503 void __init skb_init(void) 1504 { 1505 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", ··· 1652 EXPORT_SYMBOL(skb_unlink); 1653 EXPORT_SYMBOL(skb_append); 1654 EXPORT_SYMBOL(skb_split);
··· 1500 skb_split_no_header(skb, skb1, len, pos); 1501 } 1502 1503 + /** 1504 + * skb_prepare_seq_read - Prepare a sequential read of skb data 1505 + * @skb: the buffer to read 1506 + * @from: lower offset of data to be read 1507 + * @to: upper offset of data to be read 1508 + * @st: state variable 1509 + * 1510 + * Initializes the specified state variable. Must be called before 1511 + * invoking skb_seq_read() for the first time. 1512 + */ 1513 + void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, 1514 + unsigned int to, struct skb_seq_state *st) 1515 + { 1516 + st->lower_offset = from; 1517 + st->upper_offset = to; 1518 + st->root_skb = st->cur_skb = skb; 1519 + st->frag_idx = st->stepped_offset = 0; 1520 + st->frag_data = NULL; 1521 + } 1522 + 1523 + /** 1524 + * skb_seq_read - Sequentially read skb data 1525 + * @consumed: number of bytes consumed by the caller so far 1526 + * @data: destination pointer for data to be returned 1527 + * @st: state variable 1528 + * 1529 + * Reads a block of skb data at &consumed relative to the 1530 + * lower offset specified to skb_prepare_seq_read(). Assigns 1531 + * the head of the data block to &data and returns the length 1532 + * of the block or 0 if the end of the skb data or the upper 1533 + * offset has been reached. 1534 + * 1535 + * The caller is not required to consume all of the data 1536 + * returned, i.e. &consumed is typically set to the number 1537 + * of bytes already consumed and the next call to 1538 + * skb_seq_read() will return the remaining part of the block. 1539 + * 1540 + * Note: The size of each block of data returned can be arbitary, 1541 + * this limitation is the cost for zerocopy seqeuental 1542 + * reads of potentially non linear data. 1543 + * 1544 + * Note: Fragment lists within fragments are not implemented 1545 + * at the moment, state->root_skb could be replaced with 1546 + * a stack for this purpose. 1547 + */ 1548 + unsigned int skb_seq_read(unsigned int consumed, const u8 **data, 1549 + struct skb_seq_state *st) 1550 + { 1551 + unsigned int block_limit, abs_offset = consumed + st->lower_offset; 1552 + skb_frag_t *frag; 1553 + 1554 + if (unlikely(abs_offset >= st->upper_offset)) 1555 + return 0; 1556 + 1557 + next_skb: 1558 + block_limit = skb_headlen(st->cur_skb); 1559 + 1560 + if (abs_offset < block_limit) { 1561 + *data = st->cur_skb->data + abs_offset; 1562 + return block_limit - abs_offset; 1563 + } 1564 + 1565 + if (st->frag_idx == 0 && !st->frag_data) 1566 + st->stepped_offset += skb_headlen(st->cur_skb); 1567 + 1568 + while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { 1569 + frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; 1570 + block_limit = frag->size + st->stepped_offset; 1571 + 1572 + if (abs_offset < block_limit) { 1573 + if (!st->frag_data) 1574 + st->frag_data = kmap_skb_frag(frag); 1575 + 1576 + *data = (u8 *) st->frag_data + frag->page_offset + 1577 + (abs_offset - st->stepped_offset); 1578 + 1579 + return block_limit - abs_offset; 1580 + } 1581 + 1582 + if (st->frag_data) { 1583 + kunmap_skb_frag(st->frag_data); 1584 + st->frag_data = NULL; 1585 + } 1586 + 1587 + st->frag_idx++; 1588 + st->stepped_offset += frag->size; 1589 + } 1590 + 1591 + if (st->cur_skb->next) { 1592 + st->cur_skb = st->cur_skb->next; 1593 + st->frag_idx = 0; 1594 + goto next_skb; 1595 + } else if (st->root_skb == st->cur_skb && 1596 + skb_shinfo(st->root_skb)->frag_list) { 1597 + st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 1598 + goto next_skb; 1599 + } 1600 + 1601 + return 0; 1602 + } 1603 + 1604 + /** 1605 + * skb_abort_seq_read - Abort a sequential read of skb data 1606 + * @st: state variable 1607 + * 1608 + * Must be called if skb_seq_read() was not called until it 1609 + * returned 0. 1610 + */ 1611 + void skb_abort_seq_read(struct skb_seq_state *st) 1612 + { 1613 + if (st->frag_data) 1614 + kunmap_skb_frag(st->frag_data); 1615 + } 1616 + 1617 void __init skb_init(void) 1618 { 1619 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", ··· 1538 EXPORT_SYMBOL(skb_unlink); 1539 EXPORT_SYMBOL(skb_append); 1540 EXPORT_SYMBOL(skb_split); 1541 + EXPORT_SYMBOL(skb_prepare_seq_read); 1542 + EXPORT_SYMBOL(skb_seq_read); 1543 + EXPORT_SYMBOL(skb_abort_seq_read);