Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mac80211: Implement Airtime-based Queue Limit (AQL)

In order for the Fq_CoDel algorithm integrated in mac80211 layer to operate
effectively to control excessive queueing latency, the CoDel algorithm
requires an accurate measure of how long packets stays in the queue, AKA
sojourn time. The sojourn time measured at the mac80211 layer doesn't
include queueing latency in the lower layer (firmware/hardware) and CoDel
expects lower layer to have a short queue. However, most 802.11ac chipsets
offload tasks such TX aggregation to firmware or hardware, thus have a deep
lower layer queue.

Without a mechanism to control the lower layer queue size, packets only
stay in mac80211 layer transiently before being sent to firmware queue.
As a result, the sojourn time measured by CoDel in the mac80211 layer is
almost always lower than the CoDel latency target, hence CoDel does little
to control the latency, even when the lower layer queue causes excessive
latency.

The Byte Queue Limits (BQL) mechanism is commonly used to address the
similar issue with wired network interface. However, this method cannot be
applied directly to the wireless network interface. "Bytes" is not a
suitable measure of queue depth in the wireless network, as the data rate
can vary dramatically from station to station in the same network, from a
few Mbps to over Gbps.

This patch implements an Airtime-based Queue Limit (AQL) to make CoDel work
effectively with wireless drivers that utilized firmware/hardware
offloading. AQL allows each txq to release just enough packets to the lower
layer to form 1-2 large aggregations to keep hardware fully utilized and
retains the rest of the frames in mac80211 layer to be controlled by the
CoDel algorithm.

Signed-off-by: Kan Yan <kyan@google.com>
[ Toke: Keep API to set pending airtime internal, fix nits in commit msg ]
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20191119060610.76681-4-kyan@google.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

authored by

Kan Yan and committed by
Johannes Berg
3ace10f5 db3e1c40

+244 -14
+7
include/net/cfg80211.h
··· 2606 2606 2607 2607 #define IEEE80211_DEFAULT_AIRTIME_WEIGHT 256 2608 2608 2609 + /* The per TXQ device queue limit in airtime */ 2610 + #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000 2611 + #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000 2612 + 2613 + /* The per interface airtime threshold to switch to lower queue limit */ 2614 + #define IEEE80211_AQL_THRESHOLD 24000 2615 + 2609 2616 /** 2610 2617 * struct cfg80211_pmksa - PMK Security Association 2611 2618 *
+12
include/net/mac80211.h
··· 5566 5566 u32 tx_airtime, u32 rx_airtime); 5567 5567 5568 5568 /** 5569 + * ieee80211_txq_airtime_check - check if a txq can send frame to device 5570 + * 5571 + * @hw: pointer obtained from ieee80211_alloc_hw() 5572 + * @txq: pointer obtained from station or virtual interface 5573 + * 5574 + * Return true if the AQL's airtime limit has not been reached and the txq can 5575 + * continue to send more packets to the device. Otherwise return false. 5576 + */ 5577 + bool 5578 + ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq); 5579 + 5580 + /** 5569 5581 * ieee80211_iter_keys - iterate keys programmed into the device 5570 5582 * @hw: pointer obtained from ieee80211_alloc_hw() 5571 5583 * @vif: virtual interface to iterate, may be %NULL for all
+85
net/mac80211/debugfs.c
··· 150 150 .llseek = default_llseek, 151 151 }; 152 152 153 + static ssize_t aql_txq_limit_read(struct file *file, 154 + char __user *user_buf, 155 + size_t count, 156 + loff_t *ppos) 157 + { 158 + struct ieee80211_local *local = file->private_data; 159 + char buf[400]; 160 + int len = 0; 161 + 162 + len = scnprintf(buf, sizeof(buf), 163 + "AC AQL limit low AQL limit high\n" 164 + "VO %u %u\n" 165 + "VI %u %u\n" 166 + "BE %u %u\n" 167 + "BK %u %u\n", 168 + local->aql_txq_limit_low[IEEE80211_AC_VO], 169 + local->aql_txq_limit_high[IEEE80211_AC_VO], 170 + local->aql_txq_limit_low[IEEE80211_AC_VI], 171 + local->aql_txq_limit_high[IEEE80211_AC_VI], 172 + local->aql_txq_limit_low[IEEE80211_AC_BE], 173 + local->aql_txq_limit_high[IEEE80211_AC_BE], 174 + local->aql_txq_limit_low[IEEE80211_AC_BK], 175 + local->aql_txq_limit_high[IEEE80211_AC_BK]); 176 + return simple_read_from_buffer(user_buf, count, ppos, 177 + buf, len); 178 + } 179 + 180 + static ssize_t aql_txq_limit_write(struct file *file, 181 + const char __user *user_buf, 182 + size_t count, 183 + loff_t *ppos) 184 + { 185 + struct ieee80211_local *local = file->private_data; 186 + char buf[100]; 187 + size_t len; 188 + u32 ac, q_limit_low, q_limit_high, q_limit_low_old, q_limit_high_old; 189 + struct sta_info *sta; 190 + 191 + if (count > sizeof(buf)) 192 + return -EINVAL; 193 + 194 + if (copy_from_user(buf, user_buf, count)) 195 + return -EFAULT; 196 + 197 + buf[sizeof(buf) - 1] = 0; 198 + len = strlen(buf); 199 + if (len > 0 && buf[len - 1] == '\n') 200 + buf[len - 1] = 0; 201 + 202 + if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3) 203 + return -EINVAL; 204 + 205 + if (ac >= IEEE80211_NUM_ACS) 206 + return -EINVAL; 207 + 208 + q_limit_low_old = local->aql_txq_limit_low[ac]; 209 + q_limit_high_old = local->aql_txq_limit_high[ac]; 210 + 211 + local->aql_txq_limit_low[ac] = q_limit_low; 212 + local->aql_txq_limit_high[ac] = q_limit_high; 213 + 214 + mutex_lock(&local->sta_mtx); 215 + list_for_each_entry(sta, &local->sta_list, list) { 216 + /* If a sta has customized queue limits, keep it */ 217 + if (sta->airtime[ac].aql_limit_low == q_limit_low_old && 218 + sta->airtime[ac].aql_limit_high == q_limit_high_old) { 219 + sta->airtime[ac].aql_limit_low = q_limit_low; 220 + sta->airtime[ac].aql_limit_high = q_limit_high; 221 + } 222 + } 223 + mutex_unlock(&local->sta_mtx); 224 + return count; 225 + } 226 + 227 + static const struct file_operations aql_txq_limit_ops = { 228 + .write = aql_txq_limit_write, 229 + .read = aql_txq_limit_read, 230 + .open = simple_open, 231 + .llseek = default_llseek, 232 + }; 233 + 153 234 static ssize_t force_tx_status_read(struct file *file, 154 235 char __user *user_buf, 155 236 size_t count, ··· 524 443 525 444 debugfs_create_u16("airtime_flags", 0600, 526 445 phyd, &local->airtime_flags); 446 + 447 + DEBUGFS_ADD(aql_txq_limit); 448 + debugfs_create_u32("aql_threshold", 0600, 449 + phyd, &local->aql_threshold); 527 450 528 451 statsd = debugfs_create_dir("statistics", phyd); 529 452
+33 -10
net/mac80211/debugfs_sta.c
··· 197 197 { 198 198 struct sta_info *sta = file->private_data; 199 199 struct ieee80211_local *local = sta->sdata->local; 200 - size_t bufsz = 200; 200 + size_t bufsz = 400; 201 201 char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; 202 202 u64 rx_airtime = 0, tx_airtime = 0; 203 203 s64 deficit[IEEE80211_NUM_ACS]; 204 + u32 q_depth[IEEE80211_NUM_ACS]; 205 + u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS]; 204 206 ssize_t rv; 205 207 int ac; 206 208 ··· 214 212 rx_airtime += sta->airtime[ac].rx_airtime; 215 213 tx_airtime += sta->airtime[ac].tx_airtime; 216 214 deficit[ac] = sta->airtime[ac].deficit; 215 + q_limit_l[ac] = sta->airtime[ac].aql_limit_low; 216 + q_limit_h[ac] = sta->airtime[ac].aql_limit_high; 217 217 spin_unlock_bh(&local->active_txq_lock[ac]); 218 + q_depth[ac] = atomic_read(&sta->airtime[ac].aql_tx_pending); 218 219 } 219 220 220 221 p += scnprintf(p, bufsz + buf - p, 221 222 "RX: %llu us\nTX: %llu us\nWeight: %u\n" 222 - "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n", 223 - rx_airtime, 224 - tx_airtime, 225 - sta->airtime_weight, 226 - deficit[0], 227 - deficit[1], 228 - deficit[2], 229 - deficit[3]); 223 + "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n" 224 + "Q depth: VO: %u us VI: %u us BE: %u us BK: %u us\n" 225 + "Q limit[low/high]: VO: %u/%u VI: %u/%u BE: %u/%u BK: %u/%u\n", 226 + rx_airtime, tx_airtime, sta->airtime_weight, 227 + deficit[0], deficit[1], deficit[2], deficit[3], 228 + q_depth[0], q_depth[1], q_depth[2], q_depth[3], 229 + q_limit_l[0], q_limit_h[0], q_limit_l[1], q_limit_h[1], 230 + q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]), 230 231 231 232 rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); 232 233 kfree(buf); ··· 241 236 { 242 237 struct sta_info *sta = file->private_data; 243 238 struct ieee80211_local *local = sta->sdata->local; 244 - int ac; 239 + u32 ac, q_limit_l, q_limit_h; 240 + char _buf[100] = {}, *buf = _buf; 241 + 242 + if (count > sizeof(_buf)) 243 + return -EINVAL; 244 + 245 + if (copy_from_user(buf, userbuf, count)) 246 + return -EFAULT; 247 + 248 + buf[sizeof(_buf) - 1] = '\0'; 249 + if (sscanf(buf, "queue limit %u %u %u", &ac, &q_limit_l, &q_limit_h) 250 + != 3) 251 + return -EINVAL; 252 + 253 + if (ac >= IEEE80211_NUM_ACS) 254 + return -EINVAL; 255 + 256 + sta->airtime[ac].aql_limit_low = q_limit_l; 257 + sta->airtime[ac].aql_limit_high = q_limit_h; 245 258 246 259 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { 247 260 spin_lock_bh(&local->active_txq_lock[ac]);
+4
net/mac80211/ieee80211_i.h
··· 1142 1142 u16 schedule_round[IEEE80211_NUM_ACS]; 1143 1143 1144 1144 u16 airtime_flags; 1145 + u32 aql_txq_limit_low[IEEE80211_NUM_ACS]; 1146 + u32 aql_txq_limit_high[IEEE80211_NUM_ACS]; 1147 + u32 aql_threshold; 1148 + atomic_t aql_total_pending_airtime; 1145 1149 1146 1150 const struct ieee80211_ops *ops; 1147 1151
+9 -1
net/mac80211/main.c
··· 667 667 for (i = 0; i < IEEE80211_NUM_ACS; i++) { 668 668 INIT_LIST_HEAD(&local->active_txqs[i]); 669 669 spin_lock_init(&local->active_txq_lock[i]); 670 + local->aql_txq_limit_low[i] = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L; 671 + local->aql_txq_limit_high[i] = 672 + IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H; 670 673 } 671 - local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX; 674 + 675 + local->airtime_flags = AIRTIME_USE_TX | 676 + AIRTIME_USE_RX | 677 + AIRTIME_USE_AQL; 678 + local->aql_threshold = IEEE80211_AQL_THRESHOLD; 679 + atomic_set(&local->aql_total_pending_airtime, 0); 672 680 673 681 INIT_LIST_HEAD(&local->chanctx_list); 674 682 mutex_init(&local->chanctx_mtx);
+38
net/mac80211/sta_info.c
··· 410 410 skb_queue_head_init(&sta->ps_tx_buf[i]); 411 411 skb_queue_head_init(&sta->tx_filtered[i]); 412 412 sta->airtime[i].deficit = sta->airtime_weight; 413 + atomic_set(&sta->airtime[i].aql_tx_pending, 0); 414 + sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i]; 415 + sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i]; 413 416 } 414 417 415 418 for (i = 0; i < IEEE80211_NUM_TIDS; i++) ··· 1909 1906 spin_unlock_bh(&local->active_txq_lock[ac]); 1910 1907 } 1911 1908 EXPORT_SYMBOL(ieee80211_sta_register_airtime); 1909 + 1910 + void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, 1911 + struct sta_info *sta, u8 ac, 1912 + u16 tx_airtime, bool tx_completed) 1913 + { 1914 + int tx_pending; 1915 + 1916 + if (!tx_completed) { 1917 + if (sta) 1918 + atomic_add(tx_airtime, 1919 + &sta->airtime[ac].aql_tx_pending); 1920 + 1921 + atomic_add(tx_airtime, &local->aql_total_pending_airtime); 1922 + return; 1923 + } 1924 + 1925 + if (sta) { 1926 + tx_pending = atomic_sub_return(tx_airtime, 1927 + &sta->airtime[ac].aql_tx_pending); 1928 + if (WARN_ONCE(tx_pending < 0, 1929 + "STA %pM AC %d txq pending airtime underflow: %u, %u", 1930 + sta->addr, ac, tx_pending, tx_airtime)) 1931 + atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending, 1932 + tx_pending, 0); 1933 + } 1934 + 1935 + tx_pending = atomic_sub_return(tx_airtime, 1936 + &local->aql_total_pending_airtime); 1937 + if (WARN_ONCE(tx_pending < 0, 1938 + "Device %s AC %d pending airtime underflow: %u, %u", 1939 + wiphy_name(local->hw.wiphy), ac, tx_pending, 1940 + tx_airtime)) 1941 + atomic_cmpxchg(&local->aql_total_pending_airtime, 1942 + tx_pending, 0); 1943 + } 1912 1944 1913 1945 int sta_info_move_state(struct sta_info *sta, 1914 1946 enum ieee80211_sta_state new_state)
+8
net/mac80211/sta_info.h
··· 127 127 /* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */ 128 128 #define AIRTIME_USE_TX BIT(0) 129 129 #define AIRTIME_USE_RX BIT(1) 130 + #define AIRTIME_USE_AQL BIT(2) 130 131 131 132 struct airtime_info { 132 133 u64 rx_airtime; 133 134 u64 tx_airtime; 134 135 s64 deficit; 136 + atomic_t aql_tx_pending; /* Estimated airtime for frames pending */ 137 + u32 aql_limit_low; 138 + u32 aql_limit_high; 135 139 }; 140 + 141 + void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, 142 + struct sta_info *sta, u8 ac, 143 + u16 tx_airtime, bool tx_completed); 136 144 137 145 struct sta_info; 138 146
+48 -3
net/mac80211/tx.c
··· 3677 3677 { 3678 3678 struct ieee80211_local *local = hw_to_local(hw); 3679 3679 struct ieee80211_txq *ret = NULL; 3680 - struct txq_info *txqi = NULL; 3680 + struct txq_info *txqi = NULL, *head = NULL; 3681 + bool found_eligible_txq = false; 3681 3682 3682 3683 spin_lock_bh(&local->active_txq_lock[ac]); 3683 3684 ··· 3689 3688 if (!txqi) 3690 3689 goto out; 3691 3690 3691 + if (txqi == head) { 3692 + if (!found_eligible_txq) 3693 + goto out; 3694 + else 3695 + found_eligible_txq = false; 3696 + } 3697 + 3698 + if (!head) 3699 + head = txqi; 3700 + 3692 3701 if (txqi->txq.sta) { 3693 3702 struct sta_info *sta = container_of(txqi->txq.sta, 3694 - struct sta_info, sta); 3703 + struct sta_info, sta); 3704 + bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq); 3705 + s64 deficit = sta->airtime[txqi->txq.ac].deficit; 3695 3706 3696 - if (sta->airtime[txqi->txq.ac].deficit < 0) { 3707 + if (aql_check) 3708 + found_eligible_txq = true; 3709 + 3710 + if (deficit < 0) 3697 3711 sta->airtime[txqi->txq.ac].deficit += 3698 3712 sta->airtime_weight; 3713 + 3714 + if (deficit < 0 || !aql_check) { 3699 3715 list_move_tail(&txqi->schedule_order, 3700 3716 &local->active_txqs[txqi->txq.ac]); 3701 3717 goto begin; ··· 3765 3747 spin_unlock_bh(&local->active_txq_lock[txq->ac]); 3766 3748 } 3767 3749 EXPORT_SYMBOL(__ieee80211_schedule_txq); 3750 + 3751 + bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw, 3752 + struct ieee80211_txq *txq) 3753 + { 3754 + struct sta_info *sta; 3755 + struct ieee80211_local *local = hw_to_local(hw); 3756 + 3757 + if (!(local->airtime_flags & AIRTIME_USE_AQL)) 3758 + return true; 3759 + 3760 + if (!txq->sta) 3761 + return true; 3762 + 3763 + sta = container_of(txq->sta, struct sta_info, sta); 3764 + if (atomic_read(&sta->airtime[txq->ac].aql_tx_pending) < 3765 + sta->airtime[txq->ac].aql_limit_low) 3766 + return true; 3767 + 3768 + if (atomic_read(&local->aql_total_pending_airtime) < 3769 + local->aql_threshold && 3770 + atomic_read(&sta->airtime[txq->ac].aql_tx_pending) < 3771 + sta->airtime[txq->ac].aql_limit_high) 3772 + return true; 3773 + 3774 + return false; 3775 + } 3776 + EXPORT_SYMBOL(ieee80211_txq_airtime_check); 3768 3777 3769 3778 bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, 3770 3779 struct ieee80211_txq *txq)