xprtrdma: Remove imul instructions from rpcrdma_convert_iovs()

Re-arrange the pointer arithmetic in rpcrdma_convert_iovs() to
eliminate several integer multiplication instructions during
Transport Header encoding.

Also, array overflow does not occur outside development
environments, so replace overflow checking with one spot check
at the end. This reduces the number of conditional branches in
the common case.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

authored by

Chuck Lever and committed by
Anna Schumaker
28d9d56f 7ec910e7

+48 -57
+48 -57
net/sunrpc/xprtrdma/rpc_rdma.c
··· 169 return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; 170 } 171 172 - /* Split "vec" on page boundaries into segments. FMR registers pages, 173 - * not a byte range. Other modes coalesce these segments into a single 174 - * MR when they can. 175 */ 176 - static int 177 - rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) 178 { 179 - size_t page_offset; 180 - u32 remaining; 181 char *base; 182 183 base = vec->iov_base; 184 page_offset = offset_in_page(base); 185 remaining = vec->iov_len; 186 - while (remaining && n < RPCRDMA_MAX_SEGS) { 187 - seg[n].mr_page = NULL; 188 - seg[n].mr_offset = base; 189 - seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining); 190 - remaining -= seg[n].mr_len; 191 - base += seg[n].mr_len; 192 - ++n; 193 page_offset = 0; 194 } 195 - return n; 196 } 197 198 - /* 199 - * Chunk assembly from upper layer xdr_buf. 200 * 201 - * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk 202 - * elements. Segments are then coalesced when registered, if possible 203 - * within the selected memreg mode. 204 - * 205 - * Returns positive number of segments converted, or a negative errno. 206 */ 207 208 static int ··· 211 unsigned int pos, enum rpcrdma_chunktype type, 212 struct rpcrdma_mr_seg *seg) 213 { 214 - int len, n, p, page_base; 215 struct page **ppages; 216 217 n = 0; 218 - if (pos == 0) { 219 - n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n); 220 - if (n == RPCRDMA_MAX_SEGS) 221 - goto out_overflow; 222 - } 223 224 len = xdrbuf->page_len; 225 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); 226 page_base = offset_in_page(xdrbuf->page_base); 227 - p = 0; 228 - while (len && n < RPCRDMA_MAX_SEGS) { 229 - if (!ppages[p]) { 230 - /* alloc the pagelist for receiving buffer */ 231 - ppages[p] = alloc_page(GFP_ATOMIC); 232 - if (!ppages[p]) 233 return -EAGAIN; 234 } 235 - seg[n].mr_page = ppages[p]; 236 - seg[n].mr_offset = (void *)(unsigned long) page_base; 237 - seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); 238 - if (seg[n].mr_len > PAGE_SIZE) 239 - goto out_overflow; 240 - len -= seg[n].mr_len; 241 ++n; 242 - ++p; 243 - page_base = 0; /* page offset only applies to first page */ 244 } 245 - 246 - /* Message overflows the seg array */ 247 - if (len && n == RPCRDMA_MAX_SEGS) 248 - goto out_overflow; 249 250 /* When encoding a Read chunk, the tail iovec contains an 251 * XDR pad and may be omitted. 252 */ 253 if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup) 254 - return n; 255 256 /* When encoding a Write chunk, some servers need to see an 257 * extra segment for non-XDR-aligned Write chunks. The upper ··· 253 * for this purpose. 254 */ 255 if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup) 256 - return n; 257 258 - if (xdrbuf->tail[0].iov_len) { 259 - n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); 260 - if (n == RPCRDMA_MAX_SEGS) 261 - goto out_overflow; 262 - } 263 264 return n; 265 - 266 - out_overflow: 267 - pr_err("rpcrdma: segment array overflow\n"); 268 - return -EIO; 269 } 270 271 static inline int
··· 169 return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; 170 } 171 172 + /* Split @vec on page boundaries into SGEs. FMR registers pages, not 173 + * a byte range. Other modes coalesce these SGEs into a single MR 174 + * when they can. 175 + * 176 + * Returns pointer to next available SGE, and bumps the total number 177 + * of SGEs consumed. 178 */ 179 + static struct rpcrdma_mr_seg * 180 + rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, 181 + unsigned int *n) 182 { 183 + u32 remaining, page_offset; 184 char *base; 185 186 base = vec->iov_base; 187 page_offset = offset_in_page(base); 188 remaining = vec->iov_len; 189 + while (remaining) { 190 + seg->mr_page = NULL; 191 + seg->mr_offset = base; 192 + seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining); 193 + remaining -= seg->mr_len; 194 + base += seg->mr_len; 195 + ++seg; 196 + ++(*n); 197 page_offset = 0; 198 } 199 + return seg; 200 } 201 202 + /* Convert @xdrbuf into SGEs no larger than a page each. As they 203 + * are registered, these SGEs are then coalesced into RDMA segments 204 + * when the selected memreg mode supports it. 205 * 206 + * Returns positive number of SGEs consumed, or a negative errno. 207 */ 208 209 static int ··· 210 unsigned int pos, enum rpcrdma_chunktype type, 211 struct rpcrdma_mr_seg *seg) 212 { 213 + unsigned long page_base; 214 + unsigned int len, n; 215 struct page **ppages; 216 217 n = 0; 218 + if (pos == 0) 219 + seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n); 220 221 len = xdrbuf->page_len; 222 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); 223 page_base = offset_in_page(xdrbuf->page_base); 224 + while (len) { 225 + if (unlikely(!*ppages)) { 226 + /* XXX: Certain upper layer operations do 227 + * not provide receive buffer pages. 228 + */ 229 + *ppages = alloc_page(GFP_ATOMIC); 230 + if (!*ppages) 231 return -EAGAIN; 232 } 233 + seg->mr_page = *ppages; 234 + seg->mr_offset = (char *)page_base; 235 + seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len); 236 + len -= seg->mr_len; 237 + ++ppages; 238 + ++seg; 239 ++n; 240 + page_base = 0; 241 } 242 243 /* When encoding a Read chunk, the tail iovec contains an 244 * XDR pad and may be omitted. 245 */ 246 if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup) 247 + goto out; 248 249 /* When encoding a Write chunk, some servers need to see an 250 * extra segment for non-XDR-aligned Write chunks. The upper ··· 258 * for this purpose. 259 */ 260 if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup) 261 + goto out; 262 263 + if (xdrbuf->tail[0].iov_len) 264 + seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n); 265 266 + out: 267 + if (unlikely(n > RPCRDMA_MAX_SEGS)) 268 + return -EIO; 269 return n; 270 } 271 272 static inline int