Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tmpfs: mem_cgroup charge fault to vm_mm not current mm

Although shmem_fault() has been careful to count a major fault to vm_mm,
shmem_getpage_gfp() has been careless in charging a remote access fault
to current->mm owner's memcg instead of to vma->vm_mm owner's memcg:
that is inconsistent with all the mem_cgroup charging on remote access
faults in mm/memory.c.

Fix it by passing fault_mm along with fault_type to
shmem_get_page_gfp(); but in that case, now knowing the right mm, it's
better for it to handle the PGMAJFAULT updates itself.

And let's keep this clutter out of most callers' way: change the common
shmem_getpage() wrapper to hide fault_mm and fault_type as well as gfp.

Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Yang Shi <yang.shi@linaro.org>
Cc: Ning Qu <quning@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Andres Lagar-Cavilla and committed by
Linus Torvalds
9e18eb29 75edd345

+34 -27
+34 -27
mm/shmem.c
··· 121 121 static int shmem_replace_page(struct page **pagep, gfp_t gfp, 122 122 struct shmem_inode_info *info, pgoff_t index); 123 123 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, 124 - struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); 124 + struct page **pagep, enum sgp_type sgp, 125 + gfp_t gfp, struct mm_struct *fault_mm, int *fault_type); 125 126 126 127 static inline int shmem_getpage(struct inode *inode, pgoff_t index, 127 - struct page **pagep, enum sgp_type sgp, int *fault_type) 128 + struct page **pagep, enum sgp_type sgp) 128 129 { 129 130 return shmem_getpage_gfp(inode, index, pagep, sgp, 130 - mapping_gfp_mask(inode->i_mapping), fault_type); 131 + mapping_gfp_mask(inode->i_mapping), NULL, NULL); 131 132 } 132 133 133 134 static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) ··· 528 527 529 528 if (partial_start) { 530 529 struct page *page = NULL; 531 - shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); 530 + shmem_getpage(inode, start - 1, &page, SGP_READ); 532 531 if (page) { 533 532 unsigned int top = PAGE_SIZE; 534 533 if (start > end) { ··· 543 542 } 544 543 if (partial_end) { 545 544 struct page *page = NULL; 546 - shmem_getpage(inode, end, &page, SGP_READ, NULL); 545 + shmem_getpage(inode, end, &page, SGP_READ); 547 546 if (page) { 548 547 zero_user_segment(page, 0, partial_end); 549 548 set_page_dirty(page); ··· 1116 1115 * 1117 1116 * If we allocate a new one we do not mark it dirty. That's up to the 1118 1117 * vm. If we swap it in we mark it dirty since we also free the swap 1119 - * entry since a page cannot live in both the swap and page cache 1118 + * entry since a page cannot live in both the swap and page cache. 1119 + * 1120 + * fault_mm and fault_type are only supplied by shmem_fault: 1121 + * otherwise they are NULL. 1120 1122 */ 1121 1123 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, 1122 - struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) 1124 + struct page **pagep, enum sgp_type sgp, gfp_t gfp, 1125 + struct mm_struct *fault_mm, int *fault_type) 1123 1126 { 1124 1127 struct address_space *mapping = inode->i_mapping; 1125 1128 struct shmem_inode_info *info; 1126 1129 struct shmem_sb_info *sbinfo; 1130 + struct mm_struct *charge_mm; 1127 1131 struct mem_cgroup *memcg; 1128 1132 struct page *page; 1129 1133 swp_entry_t swap; ··· 1174 1168 */ 1175 1169 info = SHMEM_I(inode); 1176 1170 sbinfo = SHMEM_SB(inode->i_sb); 1171 + charge_mm = fault_mm ? : current->mm; 1177 1172 1178 1173 if (swap.val) { 1179 1174 /* Look it up and read it in.. */ 1180 1175 page = lookup_swap_cache(swap); 1181 1176 if (!page) { 1182 - /* here we actually do the io */ 1183 - if (fault_type) 1177 + /* Or update major stats only when swapin succeeds?? */ 1178 + if (fault_type) { 1184 1179 *fault_type |= VM_FAULT_MAJOR; 1180 + count_vm_event(PGMAJFAULT); 1181 + mem_cgroup_count_vm_event(fault_mm, PGMAJFAULT); 1182 + } 1183 + /* Here we actually start the io */ 1185 1184 page = shmem_swapin(swap, gfp, info, index); 1186 1185 if (!page) { 1187 1186 error = -ENOMEM; ··· 1213 1202 goto failed; 1214 1203 } 1215 1204 1216 - error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg, 1205 + error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg, 1217 1206 false); 1218 1207 if (!error) { 1219 1208 error = shmem_add_to_page_cache(page, mapping, index, ··· 1274 1263 if (sgp == SGP_WRITE) 1275 1264 __SetPageReferenced(page); 1276 1265 1277 - error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg, 1266 + error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg, 1278 1267 false); 1279 1268 if (error) 1280 1269 goto decused; ··· 1363 1352 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1364 1353 { 1365 1354 struct inode *inode = file_inode(vma->vm_file); 1355 + gfp_t gfp = mapping_gfp_mask(inode->i_mapping); 1366 1356 int error; 1367 1357 int ret = VM_FAULT_LOCKED; 1368 1358 ··· 1425 1413 spin_unlock(&inode->i_lock); 1426 1414 } 1427 1415 1428 - error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); 1416 + error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE, 1417 + gfp, vma->vm_mm, &ret); 1429 1418 if (error) 1430 1419 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); 1431 - 1432 - if (ret & VM_FAULT_MAJOR) { 1433 - count_vm_event(PGMAJFAULT); 1434 - mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); 1435 - } 1436 1420 return ret; 1437 1421 } 1438 1422 ··· 1575 1567 return -EPERM; 1576 1568 } 1577 1569 1578 - return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); 1570 + return shmem_getpage(inode, index, pagep, SGP_WRITE); 1579 1571 } 1580 1572 1581 1573 static int ··· 1641 1633 break; 1642 1634 } 1643 1635 1644 - error = shmem_getpage(inode, index, &page, sgp, NULL); 1636 + error = shmem_getpage(inode, index, &page, sgp); 1645 1637 if (error) { 1646 1638 if (error == -EINVAL) 1647 1639 error = 0; ··· 1757 1749 error = 0; 1758 1750 1759 1751 while (spd.nr_pages < nr_pages) { 1760 - error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL); 1752 + error = shmem_getpage(inode, index, &page, SGP_CACHE); 1761 1753 if (error) 1762 1754 break; 1763 1755 unlock_page(page); ··· 1779 1771 page = spd.pages[page_nr]; 1780 1772 1781 1773 if (!PageUptodate(page) || page->mapping != mapping) { 1782 - error = shmem_getpage(inode, index, &page, 1783 - SGP_CACHE, NULL); 1774 + error = shmem_getpage(inode, index, &page, SGP_CACHE); 1784 1775 if (error) 1785 1776 break; 1786 1777 unlock_page(page); ··· 2222 2215 else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) 2223 2216 error = -ENOMEM; 2224 2217 else 2225 - error = shmem_getpage(inode, index, &page, SGP_FALLOC, 2226 - NULL); 2218 + error = shmem_getpage(inode, index, &page, SGP_FALLOC); 2227 2219 if (error) { 2228 2220 /* Remove the !PageUptodate pages we added */ 2229 2221 shmem_undo_range(inode, ··· 2540 2534 inode->i_op = &shmem_short_symlink_operations; 2541 2535 } else { 2542 2536 inode_nohighmem(inode); 2543 - error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); 2537 + error = shmem_getpage(inode, 0, &page, SGP_WRITE); 2544 2538 if (error) { 2545 2539 iput(inode); 2546 2540 return error; ··· 2581 2575 return ERR_PTR(-ECHILD); 2582 2576 } 2583 2577 } else { 2584 - error = shmem_getpage(inode, 0, &page, SGP_READ, NULL); 2578 + error = shmem_getpage(inode, 0, &page, SGP_READ); 2585 2579 if (error) 2586 2580 return ERR_PTR(error); 2587 2581 unlock_page(page); ··· 3485 3479 int error; 3486 3480 3487 3481 BUG_ON(mapping->a_ops != &shmem_aops); 3488 - error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL); 3482 + error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, 3483 + gfp, NULL, NULL); 3489 3484 if (error) 3490 3485 page = ERR_PTR(error); 3491 3486 else