Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/udl: Inline memcmp() for RLE compression of xfer

As we use a variable length the compiler does not realise that it is a
fixed value of either 2 or 4 bytes. Instead of performing the inline
comparison itself, the compiler inserts a function call to the generic
memcmp routine which is optimised for long comparisons of variable
length. That turns out to be quite expensive...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>

authored by

Chris Wilson and committed by
Dave Airlie
e90a4ea5 bcb39af4

+27 -17
+27 -17
drivers/gpu/drm/udl/udl_transfer.c
··· 75 75 } 76 76 #endif 77 77 78 - static inline u16 pixel32_to_be16p(const uint8_t *pixel) 78 + static inline u16 pixel32_to_be16(const uint32_t pixel) 79 79 { 80 - uint32_t pix = *(uint32_t *)pixel; 81 - u16 retval; 80 + return (((pixel >> 3) & 0x001f) | 81 + ((pixel >> 5) & 0x07e0) | 82 + ((pixel >> 8) & 0xf800)); 83 + } 82 84 83 - retval = (((pix >> 3) & 0x001f) | 84 - ((pix >> 5) & 0x07e0) | 85 - ((pix >> 8) & 0xf800)); 86 - return retval; 85 + static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp) 86 + { 87 + if (bpp == 2) 88 + return *(const uint16_t *)pixel == repeat; 89 + else 90 + return *(const uint32_t *)pixel == repeat; 87 91 } 88 92 89 93 /* ··· 156 152 prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp); 157 153 158 154 while (pixel < cmd_pixel_end) { 159 - const u8 * const repeating_pixel = pixel; 155 + const u8 *const start = pixel; 156 + u32 repeating_pixel; 160 157 161 - if (bpp == 2) 162 - *(uint16_t *)cmd = cpu_to_be16p((uint16_t *)pixel); 163 - else if (bpp == 4) 164 - *(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16p(pixel)); 158 + if (bpp == 2) { 159 + repeating_pixel = *(uint16_t *)pixel; 160 + *(uint16_t *)cmd = cpu_to_be16(repeating_pixel); 161 + } else { 162 + repeating_pixel = *(uint32_t *)pixel; 163 + *(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel)); 164 + } 165 165 166 166 cmd += 2; 167 167 pixel += bpp; 168 168 169 169 if (unlikely((pixel < cmd_pixel_end) && 170 - (!memcmp(pixel, repeating_pixel, bpp)))) { 170 + (pixel_repeats(pixel, repeating_pixel, bpp)))) { 171 171 /* go back and fill in raw pixel count */ 172 - *raw_pixels_count_byte = (((repeating_pixel - 172 + *raw_pixels_count_byte = (((start - 173 173 raw_pixel_start) / bpp) + 1) & 0xFF; 174 174 175 - while ((pixel < cmd_pixel_end) 176 - && (!memcmp(pixel, repeating_pixel, bpp))) { 175 + while ((pixel < cmd_pixel_end) && 176 + (pixel_repeats(pixel, repeating_pixel, bpp))) { 177 177 pixel += bpp; 178 178 } 179 179 180 180 /* immediately after raw data is repeat byte */ 181 - *cmd++ = (((pixel - repeating_pixel) / bpp) - 1) & 0xFF; 181 + *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF; 182 182 183 183 /* Then start another raw pixel span */ 184 184 raw_pixel_start = pixel; ··· 230 222 struct urb *urb = *urb_ptr; 231 223 u8 *cmd = *urb_buf_ptr; 232 224 u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; 225 + 226 + BUG_ON(!(bpp == 2 || bpp == 4)); 233 227 234 228 line_start = (u8 *) (front + byte_offset); 235 229 next_pixel = line_start;