Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

uvesafb,vesafb: create WC or WB PAT-entries

with an PAT-enabled kernel, when using uvesafb or vesafb, these drivers will
create uncached-minus PAT entries for the framebuffer memory because they use
ioremap() (not the *_cache or *_wc variants). When the framebuffer memory
intersects with the video RAM used by Xorg, the complete video RAM will be
mapped uncached-minus what results in a serve performance penalty.

Here are the correct MTRR entries created by uvesafb:
schlicht@netbook:~$ cat /proc/mtrr
reg00: base=0x000000000 ( 0MB), size= 2048MB, count=1: write-back
reg01: base=0x06ff00000 ( 1791MB), size= 1MB, count=1: uncachable
reg02: base=0x070000000 ( 1792MB), size= 256MB, count=1: uncachable
reg03: base=0x0d0000000 ( 3328MB), size= 16MB, count=1: write-combining

And here are the problematic PAT entries:
schlicht@netbook:~$ sudo cat /sys/kernel/debug/x86/pat_memtype_list
PAT memtype list:
write-back @ 0x0-0x1000
uncached-minus @ 0x6fedd000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee3000-0x6fee4000
uncached-minus @ 0x6fee3000-0x6fee4000
uncached-minus @ 0x6fee3000-0x6fee4000
uncached-minus @ 0xd0000000-0xe0000000 <-- created by xserver-xorg
uncached-minus @ 0xd0000000-0xd1194000 <-- created by uvesafb
uncached-minus @ 0xf4000000-0xf4009000
uncached-minus @ 0xf4200000-0xf4400000
uncached-minus @ 0xf5000000-0xf5010000
uncached-minus @ 0xf5100000-0xf5104000
uncached-minus @ 0xf5400000-0xf5404000
uncached-minus @ 0xf5404000-0xf5405000
uncached-minus @ 0xf5404000-0xf5405000
uncached-minus @ 0xfed00000-0xfed01000

Therefore I created the attached patch for uvesafb which uses ioremap_wc() to
create the correct PAT entries, as shown below:
schlicht@netbook:~$ sudo cat /sys/kernel/debug/x86/pat_memtype_list
PAT memtype list:
write-back @ 0x0-0x1000
uncached-minus @ 0x6fedd000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee2000-0x6fee3000
uncached-minus @ 0x6fee3000-0x6fee4000
uncached-minus @ 0x6fee3000-0x6fee4000
uncached-minus @ 0x6fee3000-0x6fee4000
write-combining @ 0xd0000000-0xe0000000
write-combining @ 0xd0000000-0xd1194000
uncached-minus @ 0xf4000000-0xf4009000
uncached-minus @ 0xf4200000-0xf4400000
uncached-minus @ 0xf5000000-0xf5010000
uncached-minus @ 0xf5100000-0xf5104000
uncached-minus @ 0xf5400000-0xf5404000
uncached-minus @ 0xf5404000-0xf5405000
uncached-minus @ 0xf5404000-0xf5405000
uncached-minus @ 0xfed00000-0xfed01000

This results in a performance gain, objectively measurable with e.g.
x11perf -comppixwin10 -comppixwin100 -comppixwin500:
1: x11perf_xaa.log
2: x11perf_xaa_patched.log

1 2 Operation
-------- ---------------- -----------------
124000.0 202000.0 ( 1.63) Composite 10x10 from pixmap to window
3340.0 24400.0 ( 7.31) Composite 100x100 from pixmap to window
131.0 1150.0 ( 8.78) Composite 500x500 from pixmap to window

You can see the serve performance gain when composing larger pixmaps to window.

The patches replace the ioremap() function with the variant matching the mtrr-
parameter. To create "write-back" PAT entries, the ioremap_cache() function
must be called after creating the MTRR entries, and the ioremap_cache() region
must completely fit into the MTRR region, this is why the MTRR region size is
now rounded up to the next power-of-two.

Signed-off-by: Thomas Schlichter <thomas.schlichter@web.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

authored by

Thomas Schlichter and committed by
Paul Mundt
803a4e14 7b0e2785

+64 -29
+35 -14
drivers/video/uvesafb.c
··· 1552 1552 int rc; 1553 1553 1554 1554 /* Find the largest power-of-two */ 1555 - while (temp_size & (temp_size - 1)) 1556 - temp_size &= (temp_size - 1); 1555 + temp_size = roundup_pow_of_two(temp_size); 1557 1556 1558 1557 /* Try and find a power of two to add */ 1559 1558 do { ··· 1565 1566 #endif /* CONFIG_MTRR */ 1566 1567 } 1567 1568 1569 + static void __devinit uvesafb_ioremap(struct fb_info *info) 1570 + { 1571 + #ifdef CONFIG_X86 1572 + switch (mtrr) { 1573 + case 1: /* uncachable */ 1574 + info->screen_base = ioremap_nocache(info->fix.smem_start, info->fix.smem_len); 1575 + break; 1576 + case 2: /* write-back */ 1577 + info->screen_base = ioremap_cache(info->fix.smem_start, info->fix.smem_len); 1578 + break; 1579 + case 3: /* write-combining */ 1580 + info->screen_base = ioremap_wc(info->fix.smem_start, info->fix.smem_len); 1581 + break; 1582 + case 4: /* write-through */ 1583 + default: 1584 + info->screen_base = ioremap(info->fix.smem_start, info->fix.smem_len); 1585 + break; 1586 + } 1587 + #else 1588 + info->screen_base = ioremap(info->fix.smem_start, info->fix.smem_len); 1589 + #endif /* CONFIG_X86 */ 1590 + } 1568 1591 1569 1592 static ssize_t uvesafb_show_vbe_ver(struct device *dev, 1570 1593 struct device_attribute *attr, char *buf) ··· 1757 1736 1758 1737 uvesafb_init_info(info, mode); 1759 1738 1739 + if (!request_region(0x3c0, 32, "uvesafb")) { 1740 + printk(KERN_ERR "uvesafb: request region 0x3c0-0x3e0 failed\n"); 1741 + err = -EIO; 1742 + goto out_mode; 1743 + } 1744 + 1760 1745 if (!request_mem_region(info->fix.smem_start, info->fix.smem_len, 1761 1746 "uvesafb")) { 1762 1747 printk(KERN_ERR "uvesafb: cannot reserve video memory at " 1763 1748 "0x%lx\n", info->fix.smem_start); 1764 1749 err = -EIO; 1765 - goto out_mode; 1750 + goto out_reg; 1766 1751 } 1767 1752 1768 - info->screen_base = ioremap(info->fix.smem_start, info->fix.smem_len); 1753 + uvesafb_init_mtrr(info); 1754 + uvesafb_ioremap(info); 1769 1755 1770 1756 if (!info->screen_base) { 1771 1757 printk(KERN_ERR ··· 1783 1755 goto out_mem; 1784 1756 } 1785 1757 1786 - if (!request_region(0x3c0, 32, "uvesafb")) { 1787 - printk(KERN_ERR "uvesafb: request region 0x3c0-0x3e0 failed\n"); 1788 - err = -EIO; 1789 - goto out_unmap; 1790 - } 1791 - 1792 - uvesafb_init_mtrr(info); 1793 1758 platform_set_drvdata(dev, info); 1794 1759 1795 1760 if (register_framebuffer(info) < 0) { 1796 1761 printk(KERN_ERR 1797 1762 "uvesafb: failed to register framebuffer device\n"); 1798 1763 err = -EINVAL; 1799 - goto out_reg; 1764 + goto out_unmap; 1800 1765 } 1801 1766 1802 1767 printk(KERN_INFO "uvesafb: framebuffer at 0x%lx, mapped to 0x%p, " ··· 1806 1785 1807 1786 return 0; 1808 1787 1809 - out_reg: 1810 - release_region(0x3c0, 32); 1811 1788 out_unmap: 1812 1789 iounmap(info->screen_base); 1813 1790 out_mem: 1814 1791 release_mem_region(info->fix.smem_start, info->fix.smem_len); 1792 + out_reg: 1793 + release_region(0x3c0, 32); 1815 1794 out_mode: 1816 1795 if (!list_empty(&info->modelist)) 1817 1796 fb_destroy_modelist(&info->modelist);
+29 -15
drivers/video/vesafb.c
··· 303 303 info->apertures->ranges[0].base = screen_info.lfb_base; 304 304 info->apertures->ranges[0].size = size_total; 305 305 306 - info->screen_base = ioremap(vesafb_fix.smem_start, vesafb_fix.smem_len); 307 - if (!info->screen_base) { 308 - printk(KERN_ERR 309 - "vesafb: abort, cannot ioremap video memory 0x%x @ 0x%lx\n", 310 - vesafb_fix.smem_len, vesafb_fix.smem_start); 311 - err = -EIO; 312 - goto err; 313 - } 314 - 315 - printk(KERN_INFO "vesafb: framebuffer at 0x%lx, mapped to 0x%p, " 316 - "using %dk, total %dk\n", 317 - vesafb_fix.smem_start, info->screen_base, 318 - size_remap/1024, size_total/1024); 319 306 printk(KERN_INFO "vesafb: mode is %dx%dx%d, linelength=%d, pages=%d\n", 320 307 vesafb_defined.xres, vesafb_defined.yres, vesafb_defined.bits_per_pixel, vesafb_fix.line_length, screen_info.pages); 321 308 ··· 425 438 int rc; 426 439 427 440 /* Find the largest power-of-two */ 428 - while (temp_size & (temp_size - 1)) 429 - temp_size &= (temp_size - 1); 441 + temp_size = roundup_pow_of_two(temp_size); 430 442 431 443 /* Try and find a power of two to add */ 432 444 do { ··· 437 451 } 438 452 #endif 439 453 454 + switch (mtrr) { 455 + case 1: /* uncachable */ 456 + info->screen_base = ioremap_nocache(vesafb_fix.smem_start, vesafb_fix.smem_len); 457 + break; 458 + case 2: /* write-back */ 459 + info->screen_base = ioremap_cache(vesafb_fix.smem_start, vesafb_fix.smem_len); 460 + break; 461 + case 3: /* write-combining */ 462 + info->screen_base = ioremap_wc(vesafb_fix.smem_start, vesafb_fix.smem_len); 463 + break; 464 + case 4: /* write-through */ 465 + default: 466 + info->screen_base = ioremap(vesafb_fix.smem_start, vesafb_fix.smem_len); 467 + break; 468 + } 469 + if (!info->screen_base) { 470 + printk(KERN_ERR 471 + "vesafb: abort, cannot ioremap video memory 0x%x @ 0x%lx\n", 472 + vesafb_fix.smem_len, vesafb_fix.smem_start); 473 + err = -EIO; 474 + goto err; 475 + } 476 + 477 + printk(KERN_INFO "vesafb: framebuffer at 0x%lx, mapped to 0x%p, " 478 + "using %dk, total %dk\n", 479 + vesafb_fix.smem_start, info->screen_base, 480 + size_remap/1024, size_total/1024); 481 + 440 482 info->fbops = &vesafb_ops; 441 483 info->var = vesafb_defined; 442 484 info->fix = vesafb_fix;