Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'splice-net-handle-msg_splice_pages-in-af_kcm'

David Howells says:

====================
splice, net: Handle MSG_SPLICE_PAGES in AF_KCM

Here are patches to make AF_KCM handle the MSG_SPLICE_PAGES internal
sendmsg flag. MSG_SPLICE_PAGES is an internal hint that tells the protocol
that it should splice the pages supplied if it can. Its sendpage
implementation is then turned into a wrapper around that.

Does anyone actually use AF_KCM? Upstream it has some issues. It doesn't
seem able to handle a "message" longer than 113920 bytes without jamming
and doesn't handle the client termination once it is jammed.

Link: https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=51c78a4d532efe9543a4df019ff405f05c6157f6 # part 1
Link: https://lore.kernel.org/r/20230524144923.3623536-1-dhowells@redhat.com/ # v1
====================

Link: https://lore.kernel.org/r/20230531110423.643196-1-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+58 -160
+58 -160
net/kcm/kcmsock.c
··· 761 761 kcm_write_msgs(kcm); 762 762 } 763 763 764 - static ssize_t kcm_sendpage(struct socket *sock, struct page *page, 765 - int offset, size_t size, int flags) 766 - 767 - { 768 - struct sock *sk = sock->sk; 769 - struct kcm_sock *kcm = kcm_sk(sk); 770 - struct sk_buff *skb = NULL, *head = NULL; 771 - long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 772 - bool eor; 773 - int err = 0; 774 - int i; 775 - 776 - if (flags & MSG_SENDPAGE_NOTLAST) 777 - flags |= MSG_MORE; 778 - 779 - /* No MSG_EOR from splice, only look at MSG_MORE */ 780 - eor = !(flags & MSG_MORE); 781 - 782 - lock_sock(sk); 783 - 784 - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 785 - 786 - err = -EPIPE; 787 - if (sk->sk_err) 788 - goto out_error; 789 - 790 - if (kcm->seq_skb) { 791 - /* Previously opened message */ 792 - head = kcm->seq_skb; 793 - skb = kcm_tx_msg(head)->last_skb; 794 - i = skb_shinfo(skb)->nr_frags; 795 - 796 - if (skb_can_coalesce(skb, i, page, offset)) { 797 - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); 798 - skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; 799 - goto coalesced; 800 - } 801 - 802 - if (i >= MAX_SKB_FRAGS) { 803 - struct sk_buff *tskb; 804 - 805 - tskb = alloc_skb(0, sk->sk_allocation); 806 - while (!tskb) { 807 - kcm_push(kcm); 808 - err = sk_stream_wait_memory(sk, &timeo); 809 - if (err) 810 - goto out_error; 811 - } 812 - 813 - if (head == skb) 814 - skb_shinfo(head)->frag_list = tskb; 815 - else 816 - skb->next = tskb; 817 - 818 - skb = tskb; 819 - skb->ip_summed = CHECKSUM_UNNECESSARY; 820 - i = 0; 821 - } 822 - } else { 823 - /* Call the sk_stream functions to manage the sndbuf mem. */ 824 - if (!sk_stream_memory_free(sk)) { 825 - kcm_push(kcm); 826 - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 827 - err = sk_stream_wait_memory(sk, &timeo); 828 - if (err) 829 - goto out_error; 830 - } 831 - 832 - head = alloc_skb(0, sk->sk_allocation); 833 - while (!head) { 834 - kcm_push(kcm); 835 - err = sk_stream_wait_memory(sk, &timeo); 836 - if (err) 837 - goto out_error; 838 - } 839 - 840 - skb = head; 841 - i = 0; 842 - } 843 - 844 - get_page(page); 845 - skb_fill_page_desc_noacc(skb, i, page, offset, size); 846 - skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; 847 - 848 - coalesced: 849 - skb->len += size; 850 - skb->data_len += size; 851 - skb->truesize += size; 852 - sk->sk_wmem_queued += size; 853 - sk_mem_charge(sk, size); 854 - 855 - if (head != skb) { 856 - head->len += size; 857 - head->data_len += size; 858 - head->truesize += size; 859 - } 860 - 861 - if (eor) { 862 - bool not_busy = skb_queue_empty(&sk->sk_write_queue); 863 - 864 - /* Message complete, queue it on send buffer */ 865 - __skb_queue_tail(&sk->sk_write_queue, head); 866 - kcm->seq_skb = NULL; 867 - KCM_STATS_INCR(kcm->stats.tx_msgs); 868 - 869 - if (flags & MSG_BATCH) { 870 - kcm->tx_wait_more = true; 871 - } else if (kcm->tx_wait_more || not_busy) { 872 - err = kcm_write_msgs(kcm); 873 - if (err < 0) { 874 - /* We got a hard error in write_msgs but have 875 - * already queued this message. Report an error 876 - * in the socket, but don't affect return value 877 - * from sendmsg 878 - */ 879 - pr_warn("KCM: Hard failure on kcm_write_msgs\n"); 880 - report_csk_error(&kcm->sk, -err); 881 - } 882 - } 883 - } else { 884 - /* Message not complete, save state */ 885 - kcm->seq_skb = head; 886 - kcm_tx_msg(head)->last_skb = skb; 887 - } 888 - 889 - KCM_STATS_ADD(kcm->stats.tx_bytes, size); 890 - 891 - release_sock(sk); 892 - return size; 893 - 894 - out_error: 895 - kcm_push(kcm); 896 - 897 - err = sk_stream_error(sk, flags, err); 898 - 899 - /* make sure we wake any epoll edge trigger waiter */ 900 - if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) 901 - sk->sk_write_space(sk); 902 - 903 - release_sock(sk); 904 - return err; 905 - } 906 - 907 764 static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 908 765 { 909 766 struct sock *sk = sock->sk; ··· 846 989 merge = false; 847 990 } 848 991 849 - copy = min_t(int, msg_data_left(msg), 850 - pfrag->size - pfrag->offset); 992 + if (msg->msg_flags & MSG_SPLICE_PAGES) { 993 + copy = msg_data_left(msg); 994 + if (!sk_wmem_schedule(sk, copy)) 995 + goto wait_for_memory; 851 996 852 - if (!sk_wmem_schedule(sk, copy)) 853 - goto wait_for_memory; 997 + err = skb_splice_from_iter(skb, &msg->msg_iter, copy, 998 + sk->sk_allocation); 999 + if (err < 0) { 1000 + if (err == -EMSGSIZE) 1001 + goto wait_for_memory; 1002 + goto out_error; 1003 + } 854 1004 855 - err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, 856 - pfrag->page, 857 - pfrag->offset, 858 - copy); 859 - if (err) 860 - goto out_error; 1005 + copy = err; 1006 + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; 1007 + sk_wmem_queued_add(sk, copy); 1008 + sk_mem_charge(sk, copy); 861 1009 862 - /* Update the skb. */ 863 - if (merge) { 864 - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1010 + if (head != skb) 1011 + head->truesize += copy; 865 1012 } else { 866 - skb_fill_page_desc(skb, i, pfrag->page, 867 - pfrag->offset, copy); 868 - get_page(pfrag->page); 1013 + copy = min_t(int, msg_data_left(msg), 1014 + pfrag->size - pfrag->offset); 1015 + if (!sk_wmem_schedule(sk, copy)) 1016 + goto wait_for_memory; 1017 + 1018 + err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, 1019 + pfrag->page, 1020 + pfrag->offset, 1021 + copy); 1022 + if (err) 1023 + goto out_error; 1024 + 1025 + /* Update the skb. */ 1026 + if (merge) { 1027 + skb_frag_size_add( 1028 + &skb_shinfo(skb)->frags[i - 1], copy); 1029 + } else { 1030 + skb_fill_page_desc(skb, i, pfrag->page, 1031 + pfrag->offset, copy); 1032 + get_page(pfrag->page); 1033 + } 1034 + 1035 + pfrag->offset += copy; 869 1036 } 870 1037 871 - pfrag->offset += copy; 872 1038 copied += copy; 873 1039 if (head != skb) { 874 1040 head->len += copy; ··· 966 1086 967 1087 release_sock(sk); 968 1088 return err; 1089 + } 1090 + 1091 + static ssize_t kcm_sendpage(struct socket *sock, struct page *page, 1092 + int offset, size_t size, int flags) 1093 + 1094 + { 1095 + struct bio_vec bvec; 1096 + struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, }; 1097 + 1098 + if (flags & MSG_SENDPAGE_NOTLAST) 1099 + msg.msg_flags |= MSG_MORE; 1100 + 1101 + if (flags & MSG_OOB) 1102 + return -EOPNOTSUPP; 1103 + 1104 + bvec_set_page(&bvec, page, size, offset); 1105 + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); 1106 + return kcm_sendmsg(sock, &msg, size); 969 1107 } 970 1108 971 1109 static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,