[PATCH] Manage jbd allocations from its own slabs

JBD currently allocates commit and frozen buffers from slabs. With
CONFIG_SLAB_DEBUG, its possible for an allocation to cross the page
boundary causing IO problems.

https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=200127

So, instead of allocating these from regular slabs - manage allocation from
its own slabs and disable slab debug for these slabs.

[akpm@osdl.org: cleanups]
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Badari Pulavarty and committed by Linus Torvalds ea817398 4c4d50f7

+97 -13
+3 -3
fs/jbd/commit.c
··· 261 261 struct buffer_head *bh = jh2bh(jh); 262 262 263 263 jbd_lock_bh_state(bh); 264 - kfree(jh->b_committed_data); 264 + jbd_slab_free(jh->b_committed_data, bh->b_size); 265 265 jh->b_committed_data = NULL; 266 266 jbd_unlock_bh_state(bh); 267 267 } ··· 745 745 * Otherwise, we can just throw away the frozen data now. 746 746 */ 747 747 if (jh->b_committed_data) { 748 - kfree(jh->b_committed_data); 748 + jbd_slab_free(jh->b_committed_data, bh->b_size); 749 749 jh->b_committed_data = NULL; 750 750 if (jh->b_frozen_data) { 751 751 jh->b_committed_data = jh->b_frozen_data; 752 752 jh->b_frozen_data = NULL; 753 753 } 754 754 } else if (jh->b_frozen_data) { 755 - kfree(jh->b_frozen_data); 755 + jbd_slab_free(jh->b_frozen_data, bh->b_size); 756 756 jh->b_frozen_data = NULL; 757 757 } 758 758
+86 -6
fs/jbd/journal.c
··· 84 84 85 85 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 86 86 static void __journal_abort_soft (journal_t *journal, int errno); 87 + static int journal_create_jbd_slab(size_t slab_size); 87 88 88 89 /* 89 90 * Helper function used to manage commit timeouts ··· 329 328 char *tmp; 330 329 331 330 jbd_unlock_bh_state(bh_in); 332 - tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS); 331 + tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); 333 332 jbd_lock_bh_state(bh_in); 334 333 if (jh_in->b_frozen_data) { 335 - kfree(tmp); 334 + jbd_slab_free(tmp, bh_in->b_size); 336 335 goto repeat; 337 336 } 338 337 ··· 1070 1069 int journal_load(journal_t *journal) 1071 1070 { 1072 1071 int err; 1072 + journal_superblock_t *sb; 1073 1073 1074 1074 err = load_superblock(journal); 1075 1075 if (err) 1076 1076 return err; 1077 1077 1078 + sb = journal->j_superblock; 1078 1079 /* If this is a V2 superblock, then we have to check the 1079 1080 * features flags on it. */ 1080 1081 1081 1082 if (journal->j_format_version >= 2) { 1082 - journal_superblock_t *sb = journal->j_superblock; 1083 - 1084 1083 if ((sb->s_feature_ro_compat & 1085 1084 ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || 1086 1085 (sb->s_feature_incompat & ··· 1090 1089 return -EINVAL; 1091 1090 } 1092 1091 } 1092 + 1093 + /* 1094 + * Create a slab for this blocksize 1095 + */ 1096 + err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize)); 1097 + if (err) 1098 + return err; 1093 1099 1094 1100 /* Let the recovery code check whether it needs to recover any 1095 1101 * data from the journal. */ ··· 1620 1612 } 1621 1613 1622 1614 /* 1615 + * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed 1616 + * and allocate frozen and commit buffers from these slabs. 1617 + * 1618 + * Reason for doing this is to avoid, SLAB_DEBUG - since it could 1619 + * cause bh to cross page boundary. 1620 + */ 1621 + 1622 + #define JBD_MAX_SLABS 5 1623 + #define JBD_SLAB_INDEX(size) (size >> 11) 1624 + 1625 + static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; 1626 + static const char *jbd_slab_names[JBD_MAX_SLABS] = { 1627 + "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" 1628 + }; 1629 + 1630 + static void journal_destroy_jbd_slabs(void) 1631 + { 1632 + int i; 1633 + 1634 + for (i = 0; i < JBD_MAX_SLABS; i++) { 1635 + if (jbd_slab[i]) 1636 + kmem_cache_destroy(jbd_slab[i]); 1637 + jbd_slab[i] = NULL; 1638 + } 1639 + } 1640 + 1641 + static int journal_create_jbd_slab(size_t slab_size) 1642 + { 1643 + int i = JBD_SLAB_INDEX(slab_size); 1644 + 1645 + BUG_ON(i >= JBD_MAX_SLABS); 1646 + 1647 + /* 1648 + * Check if we already have a slab created for this size 1649 + */ 1650 + if (jbd_slab[i]) 1651 + return 0; 1652 + 1653 + /* 1654 + * Create a slab and force alignment to be same as slabsize - 1655 + * this will make sure that allocations won't cross the page 1656 + * boundary. 1657 + */ 1658 + jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], 1659 + slab_size, slab_size, 0, NULL, NULL); 1660 + if (!jbd_slab[i]) { 1661 + printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); 1662 + return -ENOMEM; 1663 + } 1664 + return 0; 1665 + } 1666 + 1667 + void * jbd_slab_alloc(size_t size, gfp_t flags) 1668 + { 1669 + int idx; 1670 + 1671 + idx = JBD_SLAB_INDEX(size); 1672 + BUG_ON(jbd_slab[idx] == NULL); 1673 + return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); 1674 + } 1675 + 1676 + void jbd_slab_free(void *ptr, size_t size) 1677 + { 1678 + int idx; 1679 + 1680 + idx = JBD_SLAB_INDEX(size); 1681 + BUG_ON(jbd_slab[idx] == NULL); 1682 + kmem_cache_free(jbd_slab[idx], ptr); 1683 + } 1684 + 1685 + /* 1623 1686 * Journal_head storage management 1624 1687 */ 1625 1688 static kmem_cache_t *journal_head_cache; ··· 1878 1799 printk(KERN_WARNING "%s: freeing " 1879 1800 "b_frozen_data\n", 1880 1801 __FUNCTION__); 1881 - kfree(jh->b_frozen_data); 1802 + jbd_slab_free(jh->b_frozen_data, bh->b_size); 1882 1803 } 1883 1804 if (jh->b_committed_data) { 1884 1805 printk(KERN_WARNING "%s: freeing " 1885 1806 "b_committed_data\n", 1886 1807 __FUNCTION__); 1887 - kfree(jh->b_committed_data); 1808 + jbd_slab_free(jh->b_committed_data, bh->b_size); 1888 1809 } 1889 1810 bh->b_private = NULL; 1890 1811 jh->b_bh = NULL; /* debug, really */ ··· 2040 1961 journal_destroy_revoke_caches(); 2041 1962 journal_destroy_journal_head_cache(); 2042 1963 journal_destroy_handle_cache(); 1964 + journal_destroy_jbd_slabs(); 2043 1965 } 2044 1966 2045 1967 static int __init journal_init(void)
+5 -4
fs/jbd/transaction.c
··· 666 666 if (!frozen_buffer) { 667 667 JBUFFER_TRACE(jh, "allocate memory for buffer"); 668 668 jbd_unlock_bh_state(bh); 669 - frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, 670 - GFP_NOFS); 669 + frozen_buffer = 670 + jbd_slab_alloc(jh2bh(jh)->b_size, 671 + GFP_NOFS); 671 672 if (!frozen_buffer) { 672 673 printk(KERN_EMERG 673 674 "%s: OOM for frozen_buffer\n", ··· 880 879 881 880 repeat: 882 881 if (!jh->b_committed_data) { 883 - committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS); 882 + committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); 884 883 if (!committed_data) { 885 884 printk(KERN_EMERG "%s: No memory for committed data\n", 886 885 __FUNCTION__); ··· 907 906 out: 908 907 journal_put_journal_head(jh); 909 908 if (unlikely(committed_data)) 910 - kfree(committed_data); 909 + jbd_slab_free(committed_data, bh->b_size); 911 910 return err; 912 911 } 913 912
+3
include/linux/jbd.h
··· 72 72 #endif 73 73 74 74 extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry); 75 + extern void * jbd_slab_alloc(size_t size, gfp_t flags); 76 + extern void jbd_slab_free(void *ptr, size_t size); 77 + 75 78 #define jbd_kmalloc(size, flags) \ 76 79 __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) 77 80 #define jbd_rep_kmalloc(size, flags) \