Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] e100: re-enable microcode with more useful defaults

For the four versions of hardware that we (currently) support microcode
download on, the default configuration of our receive interrupt mitigation
microcode was too aggressive, and caused unnecessary delays when pinging,
and low(er) throughput on single connection latency sensitive performance
tests.

This code adds microcode support, and sets the defaults to more reasonable
settings. It also explains the functionality in the code in more detail.
Compile and load tested, shows expected behavior for slight delay of ping
packets (1-2ms) when ucode is loaded, and decent interrupt moderation for
small packets, while maintaining good throughput.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>

authored by

Jesse Brandeburg and committed by
Jeff Garzik
2afecc04 f1b8c63e

+256 -17
+256 -17
drivers/net/e100.c
··· 156 156 157 157 #define DRV_NAME "e100" 158 158 #define DRV_EXT "-NAPI" 159 - #define DRV_VERSION "3.4.14-k2"DRV_EXT 159 + #define DRV_VERSION "3.4.14-k4"DRV_EXT 160 160 #define DRV_DESCRIPTION "Intel(R) PRO/100 Network Driver" 161 161 #define DRV_COPYRIGHT "Copyright(c) 1999-2005 Intel Corporation" 162 162 #define PFX DRV_NAME ": " ··· 903 903 904 904 static void e100_get_defaults(struct nic *nic) 905 905 { 906 - struct param_range rfds = { .min = 16, .max = 256, .count = 64 }; 907 - struct param_range cbs = { .min = 64, .max = 256, .count = 64 }; 906 + struct param_range rfds = { .min = 16, .max = 256, .count = 256 }; 907 + struct param_range cbs = { .min = 64, .max = 256, .count = 128 }; 908 908 909 909 pci_read_config_byte(nic->pdev, PCI_REVISION_ID, &nic->rev_id); 910 910 /* MAC type is encoded as rev ID; exception: ICH is treated as 82559 */ ··· 1007 1007 c[16], c[17], c[18], c[19], c[20], c[21], c[22], c[23]); 1008 1008 } 1009 1009 1010 + /********************************************************/ 1011 + /* Micro code for 8086:1229 Rev 8 */ 1012 + /********************************************************/ 1013 + 1014 + /* Parameter values for the D101M B-step */ 1015 + #define D101M_CPUSAVER_TIMER_DWORD 78 1016 + #define D101M_CPUSAVER_BUNDLE_DWORD 65 1017 + #define D101M_CPUSAVER_MIN_SIZE_DWORD 126 1018 + 1019 + #define D101M_B_RCVBUNDLE_UCODE \ 1020 + {\ 1021 + 0x00550215, 0xFFFF0437, 0xFFFFFFFF, 0x06A70789, 0xFFFFFFFF, 0x0558FFFF, \ 1022 + 0x000C0001, 0x00101312, 0x000C0008, 0x00380216, \ 1023 + 0x0010009C, 0x00204056, 0x002380CC, 0x00380056, \ 1024 + 0x0010009C, 0x00244C0B, 0x00000800, 0x00124818, \ 1025 + 0x00380438, 0x00000000, 0x00140000, 0x00380555, \ 1026 + 0x00308000, 0x00100662, 0x00100561, 0x000E0408, \ 1027 + 0x00134861, 0x000C0002, 0x00103093, 0x00308000, \ 1028 + 0x00100624, 0x00100561, 0x000E0408, 0x00100861, \ 1029 + 0x000C007E, 0x00222C21, 0x000C0002, 0x00103093, \ 1030 + 0x00380C7A, 0x00080000, 0x00103090, 0x00380C7A, \ 1031 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1032 + 0x0010009C, 0x00244C2D, 0x00010004, 0x00041000, \ 1033 + 0x003A0437, 0x00044010, 0x0038078A, 0x00000000, \ 1034 + 0x00100099, 0x00206C7A, 0x0010009C, 0x00244C48, \ 1035 + 0x00130824, 0x000C0001, 0x00101213, 0x00260C75, \ 1036 + 0x00041000, 0x00010004, 0x00130826, 0x000C0006, \ 1037 + 0x002206A8, 0x0013C926, 0x00101313, 0x003806A8, \ 1038 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1039 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1040 + 0x00080600, 0x00101B10, 0x00050004, 0x00100826, \ 1041 + 0x00101210, 0x00380C34, 0x00000000, 0x00000000, \ 1042 + 0x0021155B, 0x00100099, 0x00206559, 0x0010009C, \ 1043 + 0x00244559, 0x00130836, 0x000C0000, 0x00220C62, \ 1044 + 0x000C0001, 0x00101B13, 0x00229C0E, 0x00210C0E, \ 1045 + 0x00226C0E, 0x00216C0E, 0x0022FC0E, 0x00215C0E, \ 1046 + 0x00214C0E, 0x00380555, 0x00010004, 0x00041000, \ 1047 + 0x00278C67, 0x00040800, 0x00018100, 0x003A0437, \ 1048 + 0x00130826, 0x000C0001, 0x00220559, 0x00101313, \ 1049 + 0x00380559, 0x00000000, 0x00000000, 0x00000000, \ 1050 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1051 + 0x00000000, 0x00130831, 0x0010090B, 0x00124813, \ 1052 + 0x000CFF80, 0x002606AB, 0x00041000, 0x00010004, \ 1053 + 0x003806A8, 0x00000000, 0x00000000, 0x00000000, \ 1054 + } 1055 + 1056 + /********************************************************/ 1057 + /* Micro code for 8086:1229 Rev 9 */ 1058 + /********************************************************/ 1059 + 1060 + /* Parameter values for the D101S */ 1061 + #define D101S_CPUSAVER_TIMER_DWORD 78 1062 + #define D101S_CPUSAVER_BUNDLE_DWORD 67 1063 + #define D101S_CPUSAVER_MIN_SIZE_DWORD 128 1064 + 1065 + #define D101S_RCVBUNDLE_UCODE \ 1066 + {\ 1067 + 0x00550242, 0xFFFF047E, 0xFFFFFFFF, 0x06FF0818, 0xFFFFFFFF, 0x05A6FFFF, \ 1068 + 0x000C0001, 0x00101312, 0x000C0008, 0x00380243, \ 1069 + 0x0010009C, 0x00204056, 0x002380D0, 0x00380056, \ 1070 + 0x0010009C, 0x00244F8B, 0x00000800, 0x00124818, \ 1071 + 0x0038047F, 0x00000000, 0x00140000, 0x003805A3, \ 1072 + 0x00308000, 0x00100610, 0x00100561, 0x000E0408, \ 1073 + 0x00134861, 0x000C0002, 0x00103093, 0x00308000, \ 1074 + 0x00100624, 0x00100561, 0x000E0408, 0x00100861, \ 1075 + 0x000C007E, 0x00222FA1, 0x000C0002, 0x00103093, \ 1076 + 0x00380F90, 0x00080000, 0x00103090, 0x00380F90, \ 1077 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1078 + 0x0010009C, 0x00244FAD, 0x00010004, 0x00041000, \ 1079 + 0x003A047E, 0x00044010, 0x00380819, 0x00000000, \ 1080 + 0x00100099, 0x00206FFD, 0x0010009A, 0x0020AFFD, \ 1081 + 0x0010009C, 0x00244FC8, 0x00130824, 0x000C0001, \ 1082 + 0x00101213, 0x00260FF7, 0x00041000, 0x00010004, \ 1083 + 0x00130826, 0x000C0006, 0x00220700, 0x0013C926, \ 1084 + 0x00101313, 0x00380700, 0x00000000, 0x00000000, \ 1085 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1086 + 0x00080600, 0x00101B10, 0x00050004, 0x00100826, \ 1087 + 0x00101210, 0x00380FB6, 0x00000000, 0x00000000, \ 1088 + 0x002115A9, 0x00100099, 0x002065A7, 0x0010009A, \ 1089 + 0x0020A5A7, 0x0010009C, 0x002445A7, 0x00130836, \ 1090 + 0x000C0000, 0x00220FE4, 0x000C0001, 0x00101B13, \ 1091 + 0x00229F8E, 0x00210F8E, 0x00226F8E, 0x00216F8E, \ 1092 + 0x0022FF8E, 0x00215F8E, 0x00214F8E, 0x003805A3, \ 1093 + 0x00010004, 0x00041000, 0x00278FE9, 0x00040800, \ 1094 + 0x00018100, 0x003A047E, 0x00130826, 0x000C0001, \ 1095 + 0x002205A7, 0x00101313, 0x003805A7, 0x00000000, \ 1096 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1097 + 0x00000000, 0x00000000, 0x00000000, 0x00130831, \ 1098 + 0x0010090B, 0x00124813, 0x000CFF80, 0x00260703, \ 1099 + 0x00041000, 0x00010004, 0x00380700 \ 1100 + } 1101 + 1102 + /********************************************************/ 1103 + /* Micro code for the 8086:1229 Rev F/10 */ 1104 + /********************************************************/ 1105 + 1106 + /* Parameter values for the D102 E-step */ 1107 + #define D102_E_CPUSAVER_TIMER_DWORD 42 1108 + #define D102_E_CPUSAVER_BUNDLE_DWORD 54 1109 + #define D102_E_CPUSAVER_MIN_SIZE_DWORD 46 1110 + 1111 + #define D102_E_RCVBUNDLE_UCODE \ 1112 + {\ 1113 + 0x007D028F, 0x0E4204F9, 0x14ED0C85, 0x14FA14E9, 0x0EF70E36, 0x1FFF1FFF, \ 1114 + 0x00E014B9, 0x00000000, 0x00000000, 0x00000000, \ 1115 + 0x00E014BD, 0x00000000, 0x00000000, 0x00000000, \ 1116 + 0x00E014D5, 0x00000000, 0x00000000, 0x00000000, \ 1117 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1118 + 0x00E014C1, 0x00000000, 0x00000000, 0x00000000, \ 1119 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1120 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1121 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1122 + 0x00E014C8, 0x00000000, 0x00000000, 0x00000000, \ 1123 + 0x00200600, 0x00E014EE, 0x00000000, 0x00000000, \ 1124 + 0x0030FF80, 0x00940E46, 0x00038200, 0x00102000, \ 1125 + 0x00E00E43, 0x00000000, 0x00000000, 0x00000000, \ 1126 + 0x00300006, 0x00E014FB, 0x00000000, 0x00000000, \ 1127 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1128 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1129 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1130 + 0x00906E41, 0x00800E3C, 0x00E00E39, 0x00000000, \ 1131 + 0x00906EFD, 0x00900EFD, 0x00E00EF8, 0x00000000, \ 1132 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1133 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1134 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1135 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1136 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1137 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1138 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1139 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1140 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1141 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1142 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1143 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1144 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1145 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 1146 + } 1147 + 1010 1148 static void e100_load_ucode(struct nic *nic, struct cb *cb, struct sk_buff *skb) 1011 1149 { 1012 - int i; 1013 - static const u32 ucode[UCODE_SIZE] = { 1014 - /* NFS packets are misinterpreted as TCO packets and 1015 - * incorrectly routed to the BMC over SMBus. This 1016 - * microcode patch checks the fragmented IP bit in the 1017 - * NFS/UDP header to distinguish between NFS and TCO. */ 1018 - 0x0EF70E36, 0x1FFF1FFF, 0x1FFF1FFF, 0x1FFF1FFF, 0x1FFF1FFF, 1019 - 0x1FFF1FFF, 0x00906E41, 0x00800E3C, 0x00E00E39, 0x00000000, 1020 - 0x00906EFD, 0x00900EFD, 0x00E00EF8, 1021 - }; 1150 + /* *INDENT-OFF* */ 1151 + static struct { 1152 + u32 ucode[UCODE_SIZE + 1]; 1153 + u8 mac; 1154 + u8 timer_dword; 1155 + u8 bundle_dword; 1156 + u8 min_size_dword; 1157 + } ucode_opts[] = { 1158 + { D101M_B_RCVBUNDLE_UCODE, 1159 + mac_82559_D101M, 1160 + D101M_CPUSAVER_TIMER_DWORD, 1161 + D101M_CPUSAVER_BUNDLE_DWORD, 1162 + D101M_CPUSAVER_MIN_SIZE_DWORD }, 1163 + { D101S_RCVBUNDLE_UCODE, 1164 + mac_82559_D101S, 1165 + D101S_CPUSAVER_TIMER_DWORD, 1166 + D101S_CPUSAVER_BUNDLE_DWORD, 1167 + D101S_CPUSAVER_MIN_SIZE_DWORD }, 1168 + { D102_E_RCVBUNDLE_UCODE, 1169 + mac_82551_F, 1170 + D102_E_CPUSAVER_TIMER_DWORD, 1171 + D102_E_CPUSAVER_BUNDLE_DWORD, 1172 + D102_E_CPUSAVER_MIN_SIZE_DWORD }, 1173 + { D102_E_RCVBUNDLE_UCODE, 1174 + mac_82551_10, 1175 + D102_E_CPUSAVER_TIMER_DWORD, 1176 + D102_E_CPUSAVER_BUNDLE_DWORD, 1177 + D102_E_CPUSAVER_MIN_SIZE_DWORD }, 1178 + { {0}, 0, 0, 0, 0} 1179 + }, *opts; 1180 + /* *INDENT-ON* */ 1022 1181 1023 - if(nic->mac == mac_82551_F || nic->mac == mac_82551_10) { 1024 - for(i = 0; i < UCODE_SIZE; i++) 1182 + /************************************************************************* 1183 + * CPUSaver parameters 1184 + * 1185 + * All CPUSaver parameters are 16-bit literals that are part of a 1186 + * "move immediate value" instruction. By changing the value of 1187 + * the literal in the instruction before the code is loaded, the 1188 + * driver can change the algorithm. 1189 + * 1190 + * INTDELAY - This loads the dead-man timer with its inital value. 1191 + * When this timer expires the interrupt is asserted, and the 1192 + * timer is reset each time a new packet is received. (see 1193 + * BUNDLEMAX below to set the limit on number of chained packets) 1194 + * The current default is 0x600 or 1536. Experiments show that 1195 + * the value should probably stay within the 0x200 - 0x1000. 1196 + * 1197 + * BUNDLEMAX - 1198 + * This sets the maximum number of frames that will be bundled. In 1199 + * some situations, such as the TCP windowing algorithm, it may be 1200 + * better to limit the growth of the bundle size than let it go as 1201 + * high as it can, because that could cause too much added latency. 1202 + * The default is six, because this is the number of packets in the 1203 + * default TCP window size. A value of 1 would make CPUSaver indicate 1204 + * an interrupt for every frame received. If you do not want to put 1205 + * a limit on the bundle size, set this value to xFFFF. 1206 + * 1207 + * BUNDLESMALL - 1208 + * This contains a bit-mask describing the minimum size frame that 1209 + * will be bundled. The default masks the lower 7 bits, which means 1210 + * that any frame less than 128 bytes in length will not be bundled, 1211 + * but will instead immediately generate an interrupt. This does 1212 + * not affect the current bundle in any way. Any frame that is 128 1213 + * bytes or large will be bundled normally. This feature is meant 1214 + * to provide immediate indication of ACK frames in a TCP environment. 1215 + * Customers were seeing poor performance when a machine with CPUSaver 1216 + * enabled was sending but not receiving. The delay introduced when 1217 + * the ACKs were received was enough to reduce total throughput, because 1218 + * the sender would sit idle until the ACK was finally seen. 1219 + * 1220 + * The current default is 0xFF80, which masks out the lower 7 bits. 1221 + * This means that any frame which is x7F (127) bytes or smaller 1222 + * will cause an immediate interrupt. Because this value must be a 1223 + * bit mask, there are only a few valid values that can be used. To 1224 + * turn this feature off, the driver can write the value xFFFF to the 1225 + * lower word of this instruction (in the same way that the other 1226 + * parameters are used). Likewise, a value of 0xF800 (2047) would 1227 + * cause an interrupt to be generated for every frame, because all 1228 + * standard Ethernet frames are <= 2047 bytes in length. 1229 + *************************************************************************/ 1230 + 1231 + /* if you wish to disable the ucode functionality, while maintaining the 1232 + * workarounds it provides, set the following defines to: 1233 + * BUNDLESMALL 0 1234 + * BUNDLEMAX 1 1235 + * INTDELAY 1 1236 + */ 1237 + #define BUNDLESMALL 1 1238 + #define BUNDLEMAX (u16)6 1239 + #define INTDELAY (u16)1536 /* 0x600 */ 1240 + 1241 + /* do not load u-code for ICH devices */ 1242 + if (nic->flags & ich) 1243 + goto noloaducode; 1244 + 1245 + /* Search for ucode match against h/w rev_id */ 1246 + for (opts = ucode_opts; opts->mac; opts++) { 1247 + int i; 1248 + u32 *ucode = opts->ucode; 1249 + if (nic->mac != opts->mac) 1250 + continue; 1251 + 1252 + /* Insert user-tunable settings */ 1253 + ucode[opts->timer_dword] &= 0xFFFF0000; 1254 + ucode[opts->timer_dword] |= INTDELAY; 1255 + ucode[opts->bundle_dword] &= 0xFFFF0000; 1256 + ucode[opts->bundle_dword] |= BUNDLEMAX; 1257 + ucode[opts->min_size_dword] &= 0xFFFF0000; 1258 + ucode[opts->min_size_dword] |= (BUNDLESMALL) ? 0xFFFF : 0xFF80; 1259 + 1260 + for (i = 0; i < UCODE_SIZE; i++) 1025 1261 cb->u.ucode[i] = cpu_to_le32(ucode[i]); 1026 1262 cb->command = cpu_to_le16(cb_ucode); 1027 - } else 1028 - cb->command = cpu_to_le16(cb_nop); 1263 + return; 1264 + } 1265 + 1266 + noloaducode: 1267 + cb->command = cpu_to_le16(cb_nop); 1029 1268 } 1030 1269 1031 1270 static void e100_setup_iaaddr(struct nic *nic, struct cb *cb,