Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sk-filter: Add ability to get socket filter program (v2)

The SO_ATTACH_FILTER option is set only. I propose to add the get
ability by using SO_ATTACH_FILTER in getsockopt. To be less
irritating to eyes the SO_GET_FILTER alias to it is declared. This
ability is required by checkpoint-restore project to be able to
save full state of a socket.

There are two issues with getting filter back.

First, kernel modifies the sock_filter->code on filter load, thus in
order to return the filter element back to user we have to decode it
into user-visible constants. Fortunately the modification in question
is interconvertible.

Second, the BPF_S_ALU_DIV_K code modifies the command argument k to
speed up the run-time division by doing kernel_k = reciprocal(user_k).
Bad news is that different user_k may result in same kernel_k, so we
can't get the original user_k back. Good news is that we don't have
to do it. What we need to is calculate a user2_k so, that

reciprocal(user2_k) == reciprocal(user_k) == kernel_k

i.e. if it's re-loaded back the compiled again value will be exactly
the same as it was. That said, the user2_k can be calculated like this

user2_k = reciprocal(kernel_k)

with an exception, that if kernel_k == 0, then user2_k == 1.

The optlen argument is treated like this -- when zero, kernel returns
the amount of sock_fprog elements in filter, otherwise it should be
large enough for the sock_fprog array.

changes since v1:
* Declared SO_GET_FILTER in all arch headers
* Added decode of vlan-tag codes

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Pavel Emelyanov and committed by
David S. Miller
a8fc9277 96442e42

+153
+1
arch/alpha/include/asm/socket.h
··· 47 47 /* Socket filtering */ 48 48 #define SO_ATTACH_FILTER 26 49 49 #define SO_DETACH_FILTER 27 50 + #define SO_GET_FILTER SO_ATTACH_FILTER 50 51 51 52 #define SO_PEERNAME 28 52 53 #define SO_TIMESTAMP 29
+1
arch/avr32/include/uapi/asm/socket.h
··· 40 40 /* Socket filtering */ 41 41 #define SO_ATTACH_FILTER 26 42 42 #define SO_DETACH_FILTER 27 43 + #define SO_GET_FILTER SO_ATTACH_FILTER 43 44 44 45 #define SO_PEERNAME 28 45 46 #define SO_TIMESTAMP 29
+1
arch/cris/include/asm/socket.h
··· 42 42 /* Socket filtering */ 43 43 #define SO_ATTACH_FILTER 26 44 44 #define SO_DETACH_FILTER 27 45 + #define SO_GET_FILTER SO_ATTACH_FILTER 45 46 46 47 #define SO_PEERNAME 28 47 48 #define SO_TIMESTAMP 29
+1
arch/frv/include/uapi/asm/socket.h
··· 40 40 /* Socket filtering */ 41 41 #define SO_ATTACH_FILTER 26 42 42 #define SO_DETACH_FILTER 27 43 + #define SO_GET_FILTER SO_ATTACH_FILTER 43 44 44 45 #define SO_PEERNAME 28 45 46 #define SO_TIMESTAMP 29
+1
arch/h8300/include/asm/socket.h
··· 40 40 /* Socket filtering */ 41 41 #define SO_ATTACH_FILTER 26 42 42 #define SO_DETACH_FILTER 27 43 + #define SO_GET_FILTER SO_ATTACH_FILTER 43 44 44 45 #define SO_PEERNAME 28 45 46 #define SO_TIMESTAMP 29
+1
arch/ia64/include/uapi/asm/socket.h
··· 49 49 /* Socket filtering */ 50 50 #define SO_ATTACH_FILTER 26 51 51 #define SO_DETACH_FILTER 27 52 + #define SO_GET_FILTER SO_ATTACH_FILTER 52 53 53 54 #define SO_PEERNAME 28 54 55 #define SO_TIMESTAMP 29
+1
arch/m32r/include/asm/socket.h
··· 40 40 /* Socket filtering */ 41 41 #define SO_ATTACH_FILTER 26 42 42 #define SO_DETACH_FILTER 27 43 + #define SO_GET_FILTER SO_ATTACH_FILTER 43 44 44 45 #define SO_PEERNAME 28 45 46 #define SO_TIMESTAMP 29
+1
arch/m68k/include/asm/socket.h
··· 40 40 /* Socket filtering */ 41 41 #define SO_ATTACH_FILTER 26 42 42 #define SO_DETACH_FILTER 27 43 + #define SO_GET_FILTER SO_ATTACH_FILTER 43 44 44 45 #define SO_PEERNAME 28 45 46 #define SO_TIMESTAMP 29
+1
arch/mips/include/uapi/asm/socket.h
··· 63 63 /* Socket filtering */ 64 64 #define SO_ATTACH_FILTER 26 65 65 #define SO_DETACH_FILTER 27 66 + #define SO_GET_FILTER SO_ATTACH_FILTER 66 67 67 68 #define SO_PEERNAME 28 68 69 #define SO_TIMESTAMP 29
+1
arch/mn10300/include/uapi/asm/socket.h
··· 40 40 /* Socket filtering */ 41 41 #define SO_ATTACH_FILTER 26 42 42 #define SO_DETACH_FILTER 27 43 + #define SO_GET_FILTER SO_ATTACH_FILTER 43 44 44 45 #define SO_PEERNAME 28 45 46 #define SO_TIMESTAMP 29
+1
arch/parisc/include/asm/socket.h
··· 48 48 /* Socket filtering */ 49 49 #define SO_ATTACH_FILTER 0x401a 50 50 #define SO_DETACH_FILTER 0x401b 51 + #define SO_GET_FILTER SO_ATTACH_FILTER 51 52 52 53 #define SO_ACCEPTCONN 0x401c 53 54
+1
arch/powerpc/include/uapi/asm/socket.h
··· 47 47 /* Socket filtering */ 48 48 #define SO_ATTACH_FILTER 26 49 49 #define SO_DETACH_FILTER 27 50 + #define SO_GET_FILTER SO_ATTACH_FILTER 50 51 51 52 #define SO_PEERNAME 28 52 53 #define SO_TIMESTAMP 29
+1
arch/s390/include/uapi/asm/socket.h
··· 46 46 /* Socket filtering */ 47 47 #define SO_ATTACH_FILTER 26 48 48 #define SO_DETACH_FILTER 27 49 + #define SO_GET_FILTER SO_ATTACH_FILTER 49 50 50 51 #define SO_PEERNAME 28 51 52 #define SO_TIMESTAMP 29
+1
arch/sparc/include/uapi/asm/socket.h
··· 41 41 42 42 #define SO_ATTACH_FILTER 0x001a 43 43 #define SO_DETACH_FILTER 0x001b 44 + #define SO_GET_FILTER SO_ATTACH_FILTER 44 45 45 46 #define SO_PEERNAME 0x001c 46 47 #define SO_TIMESTAMP 0x001d
+1
arch/xtensa/include/asm/socket.h
··· 52 52 53 53 #define SO_ATTACH_FILTER 26 54 54 #define SO_DETACH_FILTER 27 55 + #define SO_GET_FILTER SO_ATTACH_FILTER 55 56 56 57 #define SO_PEERNAME 28 57 58 #define SO_TIMESTAMP 29
+1
include/linux/filter.h
··· 45 45 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); 46 46 extern int sk_detach_filter(struct sock *sk); 47 47 extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); 48 + extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); 48 49 49 50 #ifdef CONFIG_BPF_JIT 50 51 extern void bpf_jit_compile(struct sk_filter *fp);
+1
include/uapi/asm-generic/socket.h
··· 43 43 /* Socket filtering */ 44 44 #define SO_ATTACH_FILTER 26 45 45 #define SO_DETACH_FILTER 27 46 + #define SO_GET_FILTER SO_ATTACH_FILTER 46 47 47 48 #define SO_PEERNAME 28 48 49 #define SO_TIMESTAMP 29
+130
net/core/filter.c
··· 760 760 return ret; 761 761 } 762 762 EXPORT_SYMBOL_GPL(sk_detach_filter); 763 + 764 + static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) 765 + { 766 + static const u16 decodes[] = { 767 + [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, 768 + [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, 769 + [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, 770 + [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, 771 + [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, 772 + [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, 773 + [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, 774 + [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, 775 + [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, 776 + [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, 777 + [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, 778 + [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, 779 + [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, 780 + [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, 781 + [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, 782 + [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, 783 + [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, 784 + [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, 785 + [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, 786 + [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, 787 + [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, 788 + [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, 789 + [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, 790 + [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, 791 + [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, 792 + [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, 793 + [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, 794 + [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, 795 + [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, 796 + [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, 797 + [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, 798 + [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, 799 + [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, 800 + [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, 801 + [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, 802 + [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, 803 + [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, 804 + [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, 805 + [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, 806 + [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, 807 + [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, 808 + [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, 809 + [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, 810 + [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, 811 + [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, 812 + [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, 813 + [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, 814 + [BPF_S_RET_K] = BPF_RET|BPF_K, 815 + [BPF_S_RET_A] = BPF_RET|BPF_A, 816 + [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, 817 + [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, 818 + [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, 819 + [BPF_S_ST] = BPF_ST, 820 + [BPF_S_STX] = BPF_STX, 821 + [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, 822 + [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, 823 + [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, 824 + [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, 825 + [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, 826 + [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, 827 + [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, 828 + [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, 829 + [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, 830 + }; 831 + u16 code; 832 + 833 + code = filt->code; 834 + 835 + to->code = decodes[code]; 836 + to->jt = filt->jt; 837 + to->jf = filt->jf; 838 + 839 + if (code == BPF_S_ALU_DIV_K) { 840 + /* 841 + * When loaded this rule user gave us X, which was 842 + * translated into R = r(X). Now we calculate the 843 + * RR = r(R) and report it back. If next time this 844 + * value is loaded and RRR = r(RR) is calculated 845 + * then the R == RRR will be true. 846 + * 847 + * One exception. X == 1 translates into R == 0 and 848 + * we can't calculate RR out of it with r(). 849 + */ 850 + 851 + if (filt->k == 0) 852 + to->k = 1; 853 + else 854 + to->k = reciprocal_value(filt->k); 855 + 856 + BUG_ON(reciprocal_value(to->k) != filt->k); 857 + } else 858 + to->k = filt->k; 859 + } 860 + 861 + int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) 862 + { 863 + struct sk_filter *filter; 864 + int i, ret; 865 + 866 + lock_sock(sk); 867 + filter = rcu_dereference_protected(sk->sk_filter, 868 + sock_owned_by_user(sk)); 869 + ret = 0; 870 + if (!filter) 871 + goto out; 872 + ret = filter->len; 873 + if (!len) 874 + goto out; 875 + ret = -EINVAL; 876 + if (len < filter->len) 877 + goto out; 878 + 879 + ret = -EFAULT; 880 + for (i = 0; i < filter->len; i++) { 881 + struct sock_filter fb; 882 + 883 + sk_decode_filter(&filter->insns[i], &fb); 884 + if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) 885 + goto out; 886 + } 887 + 888 + ret = filter->len; 889 + out: 890 + release_sock(sk); 891 + return ret; 892 + }
+6
net/core/sock.c
··· 1077 1077 case SO_BINDTODEVICE: 1078 1078 v.val = sk->sk_bound_dev_if; 1079 1079 break; 1080 + case SO_GET_FILTER: 1081 + len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); 1082 + if (len < 0) 1083 + return len; 1084 + 1085 + goto lenout; 1080 1086 default: 1081 1087 return -ENOPROTOOPT; 1082 1088 }