Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net-memcg-gather-memcg-code-under-config_memcg'

Kuniyuki Iwashima says:

====================
net-memcg: Gather memcg code under CONFIG_MEMCG.

This series converts most sk->sk_memcg access to helper functions
under CONFIG_MEMCG and finally defines sk_memcg under CONFIG_MEMCG.

This is v5 of the series linked below but without core changes
that decoupled memcg and global socket memory accounting.

I will defer the changes to a follow-up series that will use BPF
to store a flag in sk->sk_memcg.

Overview of the series:

patch 1 is a trivial fix for MPTCP
patch 2 ~ 9 move sk->sk_memcg accesses to a single place
patch 10 moves sk_memcg under CONFIG_MEMCG

v4: https://lore.kernel.org/20250814200912.1040628-1-kuniyu@google.com
v3: https://lore.kernel.org/20250812175848.512446-1-kuniyu@google.com
v2: https://lore.kernel.org/20250811173116.2829786-1-kuniyu@google.com
v1: https://lore.kernel.org/20250721203624.3807041-1-kuniyu@google.com
====================

Link: https://patch.msgid.link/20250815201712.1745332-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+142 -74
+26 -19
include/linux/memcontrol.h
··· 1596 1596 #endif /* CONFIG_CGROUP_WRITEBACK */ 1597 1597 1598 1598 struct sock; 1599 - bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, 1600 - gfp_t gfp_mask); 1601 - void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); 1602 1599 #ifdef CONFIG_MEMCG 1603 1600 extern struct static_key_false memcg_sockets_enabled_key; 1604 1601 #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) 1602 + 1605 1603 void mem_cgroup_sk_alloc(struct sock *sk); 1606 1604 void mem_cgroup_sk_free(struct sock *sk); 1605 + void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk); 1606 + bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages, 1607 + gfp_t gfp_mask); 1608 + void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages); 1607 1609 1608 1610 #if BITS_PER_LONG < 64 1609 1611 static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) ··· 1642 1640 } 1643 1641 #endif 1644 1642 1645 - static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) 1646 - { 1647 - #ifdef CONFIG_MEMCG_V1 1648 - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) 1649 - return !!memcg->tcpmem_pressure; 1650 - #endif /* CONFIG_MEMCG_V1 */ 1651 - do { 1652 - if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) 1653 - return true; 1654 - } while ((memcg = parent_mem_cgroup(memcg))); 1655 - return false; 1656 - } 1657 - 1658 1643 int alloc_shrinker_info(struct mem_cgroup *memcg); 1659 1644 void free_shrinker_info(struct mem_cgroup *memcg); 1660 1645 void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); 1661 1646 void reparent_shrinker_deferred(struct mem_cgroup *memcg); 1662 1647 #else 1663 1648 #define mem_cgroup_sockets_enabled 0 1664 - static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; 1665 - static inline void mem_cgroup_sk_free(struct sock *sk) { }; 1666 - static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) 1649 + 1650 + static inline void mem_cgroup_sk_alloc(struct sock *sk) 1651 + { 1652 + } 1653 + 1654 + static inline void mem_cgroup_sk_free(struct sock *sk) 1655 + { 1656 + } 1657 + 1658 + static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) 1659 + { 1660 + } 1661 + 1662 + static inline bool mem_cgroup_sk_charge(const struct sock *sk, 1663 + unsigned int nr_pages, 1664 + gfp_t gfp_mask) 1667 1665 { 1668 1666 return false; 1667 + } 1668 + 1669 + static inline void mem_cgroup_sk_uncharge(const struct sock *sk, 1670 + unsigned int nr_pages) 1671 + { 1669 1672 } 1670 1673 1671 1674 static inline void set_shrinker_bit(struct mem_cgroup *memcg,
+2 -2
include/net/proto_memory.h
··· 31 31 if (!sk->sk_prot->memory_pressure) 32 32 return false; 33 33 34 - if (mem_cgroup_sockets_enabled && sk->sk_memcg && 35 - mem_cgroup_under_socket_pressure(sk->sk_memcg)) 34 + if (mem_cgroup_sk_enabled(sk) && 35 + mem_cgroup_sk_under_memory_pressure(sk)) 36 36 return true; 37 37 38 38 return !!READ_ONCE(*sk->sk_prot->memory_pressure);
+46
include/net/sock.h
··· 443 443 __cacheline_group_begin(sock_read_rxtx); 444 444 int sk_err; 445 445 struct socket *sk_socket; 446 + #ifdef CONFIG_MEMCG 446 447 struct mem_cgroup *sk_memcg; 448 + #endif 447 449 #ifdef CONFIG_XFRM 448 450 struct xfrm_policy __rcu *sk_policy[2]; 449 451 #endif ··· 2595 2593 { 2596 2594 return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; 2597 2595 } 2596 + 2597 + #ifdef CONFIG_MEMCG 2598 + static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) 2599 + { 2600 + return sk->sk_memcg; 2601 + } 2602 + 2603 + static inline bool mem_cgroup_sk_enabled(const struct sock *sk) 2604 + { 2605 + return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk); 2606 + } 2607 + 2608 + static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) 2609 + { 2610 + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); 2611 + 2612 + #ifdef CONFIG_MEMCG_V1 2613 + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) 2614 + return !!memcg->tcpmem_pressure; 2615 + #endif /* CONFIG_MEMCG_V1 */ 2616 + 2617 + do { 2618 + if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) 2619 + return true; 2620 + } while ((memcg = parent_mem_cgroup(memcg))); 2621 + 2622 + return false; 2623 + } 2624 + #else 2625 + static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) 2626 + { 2627 + return NULL; 2628 + } 2629 + 2630 + static inline bool mem_cgroup_sk_enabled(const struct sock *sk) 2631 + { 2632 + return false; 2633 + } 2634 + 2635 + static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) 2636 + { 2637 + return false; 2638 + } 2639 + #endif 2598 2640 2599 2641 static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) 2600 2642 {
+2 -2
include/net/tcp.h
··· 275 275 /* optimized version of sk_under_memory_pressure() for TCP sockets */ 276 276 static inline bool tcp_under_memory_pressure(const struct sock *sk) 277 277 { 278 - if (mem_cgroup_sockets_enabled && sk->sk_memcg && 279 - mem_cgroup_under_socket_pressure(sk->sk_memcg)) 278 + if (mem_cgroup_sk_enabled(sk) && 279 + mem_cgroup_sk_under_memory_pressure(sk)) 280 280 return true; 281 281 282 282 return READ_ONCE(tcp_memory_pressure);
+31 -9
mm/memcontrol.c
··· 5020 5020 5021 5021 void mem_cgroup_sk_free(struct sock *sk) 5022 5022 { 5023 - if (sk->sk_memcg) 5024 - css_put(&sk->sk_memcg->css); 5023 + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); 5024 + 5025 + if (memcg) 5026 + css_put(&memcg->css); 5027 + } 5028 + 5029 + void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) 5030 + { 5031 + struct mem_cgroup *memcg; 5032 + 5033 + if (sk->sk_memcg == newsk->sk_memcg) 5034 + return; 5035 + 5036 + mem_cgroup_sk_free(newsk); 5037 + 5038 + memcg = mem_cgroup_from_sk(sk); 5039 + if (memcg) 5040 + css_get(&memcg->css); 5041 + 5042 + newsk->sk_memcg = sk->sk_memcg; 5025 5043 } 5026 5044 5027 5045 /** 5028 - * mem_cgroup_charge_skmem - charge socket memory 5029 - * @memcg: memcg to charge 5046 + * mem_cgroup_sk_charge - charge socket memory 5047 + * @sk: socket in memcg to charge 5030 5048 * @nr_pages: number of pages to charge 5031 5049 * @gfp_mask: reclaim mode 5032 5050 * 5033 5051 * Charges @nr_pages to @memcg. Returns %true if the charge fit within 5034 5052 * @memcg's configured limit, %false if it doesn't. 5035 5053 */ 5036 - bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, 5037 - gfp_t gfp_mask) 5054 + bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages, 5055 + gfp_t gfp_mask) 5038 5056 { 5057 + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); 5058 + 5039 5059 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) 5040 5060 return memcg1_charge_skmem(memcg, nr_pages, gfp_mask); 5041 5061 ··· 5068 5048 } 5069 5049 5070 5050 /** 5071 - * mem_cgroup_uncharge_skmem - uncharge socket memory 5072 - * @memcg: memcg to uncharge 5051 + * mem_cgroup_sk_uncharge - uncharge socket memory 5052 + * @sk: socket in memcg to uncharge 5073 5053 * @nr_pages: number of pages to uncharge 5074 5054 */ 5075 - void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) 5055 + void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages) 5076 5056 { 5057 + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); 5058 + 5077 5059 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) { 5078 5060 memcg1_uncharge_skmem(memcg, nr_pages); 5079 5061 return;
+20 -18
net/core/sock.c
··· 1032 1032 bool charged; 1033 1033 int pages; 1034 1034 1035 - if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk)) 1035 + if (!mem_cgroup_sk_enabled(sk) || !sk_has_account(sk)) 1036 1036 return -EOPNOTSUPP; 1037 1037 1038 1038 if (!bytes) ··· 1041 1041 pages = sk_mem_pages(bytes); 1042 1042 1043 1043 /* pre-charge to memcg */ 1044 - charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages, 1045 - GFP_KERNEL | __GFP_RETRY_MAYFAIL); 1044 + charged = mem_cgroup_sk_charge(sk, pages, 1045 + GFP_KERNEL | __GFP_RETRY_MAYFAIL); 1046 1046 if (!charged) 1047 1047 return -ENOMEM; 1048 1048 ··· 1054 1054 */ 1055 1055 if (allocated > sk_prot_mem_limits(sk, 1)) { 1056 1056 sk_memory_allocated_sub(sk, pages); 1057 - mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); 1057 + mem_cgroup_sk_uncharge(sk, pages); 1058 1058 return -ENOMEM; 1059 1059 } 1060 1060 sk_forward_alloc_add(sk, pages << PAGE_SHIFT); ··· 2512 2512 2513 2513 sock_reset_flag(newsk, SOCK_DONE); 2514 2514 2515 + #ifdef CONFIG_MEMCG 2515 2516 /* sk->sk_memcg will be populated at accept() time */ 2516 2517 newsk->sk_memcg = NULL; 2518 + #endif 2517 2519 2518 2520 cgroup_sk_clone(&newsk->sk_cgrp_data); 2519 2521 ··· 3265 3263 */ 3266 3264 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) 3267 3265 { 3268 - struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL; 3266 + bool memcg_enabled = false, charged = false; 3269 3267 struct proto *prot = sk->sk_prot; 3270 - bool charged = true; 3271 3268 long allocated; 3272 3269 3273 3270 sk_memory_allocated_add(sk, amt); 3274 3271 allocated = sk_memory_allocated(sk); 3275 3272 3276 - if (memcg) { 3277 - charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()); 3273 + if (mem_cgroup_sk_enabled(sk)) { 3274 + memcg_enabled = true; 3275 + charged = mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge()); 3278 3276 if (!charged) 3279 3277 goto suppress_allocation; 3280 3278 } ··· 3348 3346 */ 3349 3347 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { 3350 3348 /* Force charge with __GFP_NOFAIL */ 3351 - if (memcg && !charged) { 3352 - mem_cgroup_charge_skmem(memcg, amt, 3353 - gfp_memcg_charge() | __GFP_NOFAIL); 3354 - } 3349 + if (memcg_enabled && !charged) 3350 + mem_cgroup_sk_charge(sk, amt, 3351 + gfp_memcg_charge() | __GFP_NOFAIL); 3355 3352 return 1; 3356 3353 } 3357 3354 } 3358 3355 3359 - if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged)) 3360 - trace_sock_exceed_buf_limit(sk, prot, allocated, kind); 3356 + trace_sock_exceed_buf_limit(sk, prot, allocated, kind); 3361 3357 3362 3358 sk_memory_allocated_sub(sk, amt); 3363 3359 3364 - if (memcg && charged) 3365 - mem_cgroup_uncharge_skmem(memcg, amt); 3360 + if (charged) 3361 + mem_cgroup_sk_uncharge(sk, amt); 3366 3362 3367 3363 return 0; 3368 3364 } ··· 3398 3398 { 3399 3399 sk_memory_allocated_sub(sk, amount); 3400 3400 3401 - if (mem_cgroup_sockets_enabled && sk->sk_memcg) 3402 - mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); 3401 + if (mem_cgroup_sk_enabled(sk)) 3402 + mem_cgroup_sk_uncharge(sk, amount); 3403 3403 3404 3404 if (sk_under_global_memory_pressure(sk) && 3405 3405 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) ··· 4454 4454 4455 4455 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err); 4456 4456 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket); 4457 + #ifdef CONFIG_MEMCG 4457 4458 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg); 4459 + #endif 4458 4460 4459 4461 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock); 4460 4462 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
+9 -10
net/ipv4/inet_connection_sock.c
··· 706 706 spin_unlock_bh(&queue->fastopenq.lock); 707 707 } 708 708 709 - out: 710 709 release_sock(sk); 711 - if (newsk && mem_cgroup_sockets_enabled) { 710 + 711 + if (mem_cgroup_sockets_enabled) { 712 712 gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL; 713 713 int amt = 0; 714 714 ··· 718 718 lock_sock(newsk); 719 719 720 720 mem_cgroup_sk_alloc(newsk); 721 - if (newsk->sk_memcg) { 721 + if (mem_cgroup_from_sk(newsk)) { 722 722 /* The socket has not been accepted yet, no need 723 723 * to look at newsk->sk_wmem_queued. 724 724 */ ··· 727 727 } 728 728 729 729 if (amt) 730 - mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp); 730 + mem_cgroup_sk_charge(newsk, amt, gfp); 731 731 kmem_cache_charge(newsk, gfp); 732 732 733 733 release_sock(newsk); 734 734 } 735 + 735 736 if (req) 736 737 reqsk_put(req); 737 738 738 - if (newsk) 739 - inet_init_csk_locks(newsk); 740 - 739 + inet_init_csk_locks(newsk); 741 740 return newsk; 741 + 742 742 out_err: 743 - newsk = NULL; 744 - req = NULL; 743 + release_sock(sk); 745 744 arg->err = error; 746 - goto out; 745 + return NULL; 747 746 } 748 747 EXPORT_SYMBOL(inet_csk_accept); 749 748
+2 -3
net/ipv4/tcp_output.c
··· 3578 3578 sk_forward_alloc_add(sk, amt << PAGE_SHIFT); 3579 3579 sk_memory_allocated_add(sk, amt); 3580 3580 3581 - if (mem_cgroup_sockets_enabled && sk->sk_memcg) 3582 - mem_cgroup_charge_skmem(sk->sk_memcg, amt, 3583 - gfp_memcg_charge() | __GFP_NOFAIL); 3581 + if (mem_cgroup_sk_enabled(sk)) 3582 + mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge() | __GFP_NOFAIL); 3584 3583 } 3585 3584 3586 3585 /* Send a FIN. The caller locks the socket for us.
+1 -3
net/mptcp/protocol.h
··· 788 788 * as it can always coalesce them 789 789 */ 790 790 return (data_avail >= sk->sk_rcvlowat) || 791 - (mem_cgroup_sockets_enabled && sk->sk_memcg && 792 - mem_cgroup_under_socket_pressure(sk->sk_memcg)) || 793 - READ_ONCE(tcp_memory_pressure); 791 + tcp_under_memory_pressure(sk); 794 792 } 795 793 796 794 int mptcp_set_rcvlowat(struct sock *sk, int val);
+3 -8
net/mptcp/subflow.c
··· 1717 1717 /* only the additional subflows created by kworkers have to be modified */ 1718 1718 if (cgroup_id(sock_cgroup_ptr(parent_skcd)) != 1719 1719 cgroup_id(sock_cgroup_ptr(child_skcd))) { 1720 - #ifdef CONFIG_MEMCG 1721 - struct mem_cgroup *memcg = parent->sk_memcg; 1722 - 1723 - mem_cgroup_sk_free(child); 1724 - if (memcg && css_tryget(&memcg->css)) 1725 - child->sk_memcg = memcg; 1726 - #endif /* CONFIG_MEMCG */ 1727 - 1728 1720 cgroup_sk_free(child_skcd); 1729 1721 *child_skcd = *parent_skcd; 1730 1722 cgroup_sk_clone(child_skcd); 1731 1723 } 1732 1724 #endif /* CONFIG_SOCK_CGROUP_DATA */ 1725 + 1726 + if (mem_cgroup_sockets_enabled) 1727 + mem_cgroup_sk_inherit(parent, child); 1733 1728 } 1734 1729 1735 1730 static void mptcp_subflow_ops_override(struct sock *ssk)