Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

b43: Optimize DMA buffers

In the old days we used one slot per frame. But when we changed that to 2,
we didn't raise the overall slot count. Which resulted in an effective
division of two to the number of slots.

Double the number of TX slots, so we have an effective hardware queue
of 128 frames per QoS queue.

Also optimize the TX header cache handling. We don't need a cached TX header
for slots that will never carry an actual header.
So we reduce the memory consumption of the cache by 50%.

So as a net result we end up with more or less the same memory usage before
and after this patch (except a few tiny meta structures), but have twice
the number of TX slots available.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

authored by

Michael Buesch and committed by
John W. Linville
bdceeb2d 8eccb53f

+25 -17
+23 -15
drivers/net/wireless/b43/dma.c
··· 41 41 #include <asm/div64.h> 42 42 43 43 44 + /* Required number of TX DMA slots per TX frame. 45 + * This currently is 2, because we put the header and the ieee80211 frame 46 + * into separate slots. */ 47 + #define TX_SLOTS_PER_FRAME 2 48 + 49 + 44 50 /* 32bit DMA ops. */ 45 51 static 46 52 struct b43_dmadesc_generic *op32_idx2desc(struct b43_dmaring *ring, ··· 580 574 return -ENOMEM; 581 575 dmaaddr = map_descbuffer(ring, skb->data, 582 576 ring->rx_buffersize, 0); 583 - } 584 - 585 - if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) { 586 - b43err(ring->dev->wl, "RX DMA buffer allocation failed\n"); 587 - dev_kfree_skb_any(skb); 588 - return -EIO; 577 + if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) { 578 + b43err(ring->dev->wl, "RX DMA buffer allocation failed\n"); 579 + dev_kfree_skb_any(skb); 580 + return -EIO; 581 + } 589 582 } 590 583 591 584 meta->skb = skb; ··· 842 837 #endif 843 838 844 839 if (for_tx) { 845 - ring->txhdr_cache = kcalloc(ring->nr_slots, 840 + ring->txhdr_cache = kcalloc(ring->nr_slots / TX_SLOTS_PER_FRAME, 846 841 b43_txhdr_size(dev), 847 842 GFP_KERNEL); 848 843 if (!ring->txhdr_cache) ··· 858 853 b43_txhdr_size(dev), 1)) { 859 854 /* ugh realloc */ 860 855 kfree(ring->txhdr_cache); 861 - ring->txhdr_cache = kcalloc(ring->nr_slots, 856 + ring->txhdr_cache = kcalloc(ring->nr_slots / TX_SLOTS_PER_FRAME, 862 857 b43_txhdr_size(dev), 863 858 GFP_KERNEL | GFP_DMA); 864 859 if (!ring->txhdr_cache) ··· 1149 1144 u16 cookie; 1150 1145 size_t hdrsize = b43_txhdr_size(ring->dev); 1151 1146 1152 - #define SLOTS_PER_PACKET 2 1147 + /* Important note: If the number of used DMA slots per TX frame 1148 + * is changed here, the TX_SLOTS_PER_FRAME definition at the top of 1149 + * the file has to be updated, too! 1150 + */ 1153 1151 1154 1152 old_top_slot = ring->current_slot; 1155 1153 old_used_slots = ring->used_slots; ··· 1162 1154 desc = ops->idx2desc(ring, slot, &meta_hdr); 1163 1155 memset(meta_hdr, 0, sizeof(*meta_hdr)); 1164 1156 1165 - header = &(ring->txhdr_cache[slot * hdrsize]); 1157 + header = &(ring->txhdr_cache[(slot / TX_SLOTS_PER_FRAME) * hdrsize]); 1166 1158 cookie = generate_cookie(ring, slot); 1167 1159 err = b43_generate_txhdr(ring->dev, header, 1168 1160 skb->data, skb->len, info, cookie); ··· 1316 1308 * That would be a mac80211 bug. */ 1317 1309 B43_WARN_ON(ring->stopped); 1318 1310 1319 - if (unlikely(free_slots(ring) < SLOTS_PER_PACKET)) { 1311 + if (unlikely(free_slots(ring) < TX_SLOTS_PER_FRAME)) { 1320 1312 b43warn(dev->wl, "DMA queue overflow\n"); 1321 1313 err = -ENOSPC; 1322 1314 goto out_unlock; ··· 1340 1332 goto out_unlock; 1341 1333 } 1342 1334 ring->nr_tx_packets++; 1343 - if ((free_slots(ring) < SLOTS_PER_PACKET) || 1335 + if ((free_slots(ring) < TX_SLOTS_PER_FRAME) || 1344 1336 should_inject_overflow(ring)) { 1345 1337 /* This TX ring is full. */ 1346 1338 ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb)); ··· 1424 1416 } 1425 1417 dev->stats.last_tx = jiffies; 1426 1418 if (ring->stopped) { 1427 - B43_WARN_ON(free_slots(ring) < SLOTS_PER_PACKET); 1419 + B43_WARN_ON(free_slots(ring) < TX_SLOTS_PER_FRAME); 1428 1420 ieee80211_wake_queue(dev->wl->hw, ring->queue_prio); 1429 1421 ring->stopped = 0; 1430 1422 if (b43_debug(dev, B43_DBG_DMAVERBOSE)) { ··· 1447 1439 ring = select_ring_by_priority(dev, i); 1448 1440 1449 1441 spin_lock_irqsave(&ring->lock, flags); 1450 - stats[i].len = ring->used_slots / SLOTS_PER_PACKET; 1451 - stats[i].limit = ring->nr_slots / SLOTS_PER_PACKET; 1442 + stats[i].len = ring->used_slots / TX_SLOTS_PER_FRAME; 1443 + stats[i].limit = ring->nr_slots / TX_SLOTS_PER_FRAME; 1452 1444 stats[i].count = ring->nr_tx_packets; 1453 1445 spin_unlock_irqrestore(&ring->lock, flags); 1454 1446 }
+2 -2
drivers/net/wireless/b43/dma.h
··· 162 162 #define B43_DMA0_RX_FRAMEOFFSET 30 163 163 164 164 /* DMA engine tuning knobs */ 165 - #define B43_TXRING_SLOTS 128 165 + #define B43_TXRING_SLOTS 256 166 166 #define B43_RXRING_SLOTS 64 167 167 #define B43_DMA0_RX_BUFFERSIZE IEEE80211_MAX_FRAME_LEN 168 168 ··· 212 212 void *descbase; 213 213 /* Meta data about all descriptors. */ 214 214 struct b43_dmadesc_meta *meta; 215 - /* Cache of TX headers for each slot. 215 + /* Cache of TX headers for each TX frame. 216 216 * This is to avoid an allocation on each TX. 217 217 * This is NULL for an RX ring. 218 218 */