Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vhost: switch to use new message format

We use to have message like:

struct vhost_msg {
int type;
union {
struct vhost_iotlb_msg iotlb;
__u8 padding[64];
};
};

Unfortunately, there will be a hole of 32bit in 64bit machine because
of the alignment. This leads a different formats between 32bit API and
64bit API. What's more it will break 32bit program running on 64bit
machine.

So fixing this by introducing a new message type with an explicit
32bit reserved field after type like:

struct vhost_msg_v2 {
__u32 type;
__u32 reserved;
union {
struct vhost_iotlb_msg iotlb;
__u8 padding[64];
};
};

We will have a consistent ABI after switching to use this. To enable
this capability, introduce a new ioctl (VHOST_SET_BAKCEND_FEATURE) for
userspace to enable this feature (VHOST_BACKEND_F_IOTLB_V2).

Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jason Wang and committed by
David S. Miller
429711ae 9c2e955c

+111 -19
+30
drivers/vhost/net.c
··· 78 78 }; 79 79 80 80 enum { 81 + VHOST_NET_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) 82 + }; 83 + 84 + enum { 81 85 VHOST_NET_VQ_RX = 0, 82 86 VHOST_NET_VQ_TX = 1, 83 87 VHOST_NET_VQ_MAX = 2, ··· 1403 1399 return err; 1404 1400 } 1405 1401 1402 + static int vhost_net_set_backend_features(struct vhost_net *n, u64 features) 1403 + { 1404 + int i; 1405 + 1406 + mutex_lock(&n->dev.mutex); 1407 + for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { 1408 + mutex_lock(&n->vqs[i].vq.mutex); 1409 + n->vqs[i].vq.acked_backend_features = features; 1410 + mutex_unlock(&n->vqs[i].vq.mutex); 1411 + } 1412 + mutex_unlock(&n->dev.mutex); 1413 + 1414 + return 0; 1415 + } 1416 + 1406 1417 static int vhost_net_set_features(struct vhost_net *n, u64 features) 1407 1418 { 1408 1419 size_t vhost_hlen, sock_hlen, hdr_len; ··· 1508 1489 if (features & ~VHOST_NET_FEATURES) 1509 1490 return -EOPNOTSUPP; 1510 1491 return vhost_net_set_features(n, features); 1492 + case VHOST_GET_BACKEND_FEATURES: 1493 + features = VHOST_NET_BACKEND_FEATURES; 1494 + if (copy_to_user(featurep, &features, sizeof(features))) 1495 + return -EFAULT; 1496 + return 0; 1497 + case VHOST_SET_BACKEND_FEATURES: 1498 + if (copy_from_user(&features, featurep, sizeof(features))) 1499 + return -EFAULT; 1500 + if (features & ~VHOST_NET_BACKEND_FEATURES) 1501 + return -EOPNOTSUPP; 1502 + return vhost_net_set_backend_features(n, features); 1511 1503 case VHOST_RESET_OWNER: 1512 1504 return vhost_net_reset_owner(n); 1513 1505 case VHOST_SET_OWNER:
+53 -18
drivers/vhost/vhost.c
··· 315 315 vq->log_addr = -1ull; 316 316 vq->private_data = NULL; 317 317 vq->acked_features = 0; 318 + vq->acked_backend_features = 0; 318 319 vq->log_base = NULL; 319 320 vq->error_ctx = NULL; 320 321 vq->kick = NULL; ··· 1028 1027 ssize_t vhost_chr_write_iter(struct vhost_dev *dev, 1029 1028 struct iov_iter *from) 1030 1029 { 1031 - struct vhost_msg_node node; 1032 - unsigned size = sizeof(struct vhost_msg); 1033 - size_t ret; 1034 - int err; 1030 + struct vhost_iotlb_msg msg; 1031 + size_t offset; 1032 + int type, ret; 1035 1033 1036 - if (iov_iter_count(from) < size) 1037 - return 0; 1038 - ret = copy_from_iter(&node.msg, size, from); 1039 - if (ret != size) 1034 + ret = copy_from_iter(&type, sizeof(type), from); 1035 + if (ret != sizeof(type)) 1040 1036 goto done; 1041 1037 1042 - switch (node.msg.type) { 1038 + switch (type) { 1043 1039 case VHOST_IOTLB_MSG: 1044 - err = vhost_process_iotlb_msg(dev, &node.msg.iotlb); 1045 - if (err) 1046 - ret = err; 1040 + /* There maybe a hole after type for V1 message type, 1041 + * so skip it here. 1042 + */ 1043 + offset = offsetof(struct vhost_msg, iotlb) - sizeof(int); 1044 + break; 1045 + case VHOST_IOTLB_MSG_V2: 1046 + offset = sizeof(__u32); 1047 1047 break; 1048 1048 default: 1049 1049 ret = -EINVAL; 1050 - break; 1050 + goto done; 1051 1051 } 1052 1052 1053 + iov_iter_advance(from, offset); 1054 + ret = copy_from_iter(&msg, sizeof(msg), from); 1055 + if (ret != sizeof(msg)) 1056 + goto done; 1057 + if (vhost_process_iotlb_msg(dev, &msg)) { 1058 + ret = -EFAULT; 1059 + goto done; 1060 + } 1061 + 1062 + ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) : 1063 + sizeof(struct vhost_msg_v2); 1053 1064 done: 1054 1065 return ret; 1055 1066 } ··· 1120 1107 finish_wait(&dev->wait, &wait); 1121 1108 1122 1109 if (node) { 1123 - ret = copy_to_iter(&node->msg, size, to); 1110 + struct vhost_iotlb_msg *msg; 1111 + void *start = &node->msg; 1124 1112 1125 - if (ret != size || node->msg.type != VHOST_IOTLB_MISS) { 1113 + switch (node->msg.type) { 1114 + case VHOST_IOTLB_MSG: 1115 + size = sizeof(node->msg); 1116 + msg = &node->msg.iotlb; 1117 + break; 1118 + case VHOST_IOTLB_MSG_V2: 1119 + size = sizeof(node->msg_v2); 1120 + msg = &node->msg_v2.iotlb; 1121 + break; 1122 + default: 1123 + BUG(); 1124 + break; 1125 + } 1126 + 1127 + ret = copy_to_iter(start, size, to); 1128 + if (ret != size || msg->type != VHOST_IOTLB_MISS) { 1126 1129 kfree(node); 1127 1130 return ret; 1128 1131 } 1129 - 1130 1132 vhost_enqueue_msg(dev, &dev->pending_list, node); 1131 1133 } 1132 1134 ··· 1154 1126 struct vhost_dev *dev = vq->dev; 1155 1127 struct vhost_msg_node *node; 1156 1128 struct vhost_iotlb_msg *msg; 1129 + bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2); 1157 1130 1158 - node = vhost_new_msg(vq, VHOST_IOTLB_MISS); 1131 + node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG); 1159 1132 if (!node) 1160 1133 return -ENOMEM; 1161 1134 1162 - msg = &node->msg.iotlb; 1135 + if (v2) { 1136 + node->msg_v2.type = VHOST_IOTLB_MSG_V2; 1137 + msg = &node->msg_v2.iotlb; 1138 + } else { 1139 + msg = &node->msg.iotlb; 1140 + } 1141 + 1163 1142 msg->type = VHOST_IOTLB_MISS; 1164 1143 msg->iova = iova; 1165 1144 msg->perm = access;
+10 -1
drivers/vhost/vhost.h
··· 132 132 struct vhost_umem *iotlb; 133 133 void *private_data; 134 134 u64 acked_features; 135 + u64 acked_backend_features; 135 136 /* Log write descriptors */ 136 137 void __user *log_base; 137 138 struct vhost_log *log; ··· 148 147 }; 149 148 150 149 struct vhost_msg_node { 151 - struct vhost_msg msg; 150 + union { 151 + struct vhost_msg msg; 152 + struct vhost_msg_v2 msg_v2; 153 + }; 152 154 struct vhost_virtqueue *vq; 153 155 struct list_head node; 154 156 }; ··· 240 236 static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) 241 237 { 242 238 return vq->acked_features & (1ULL << bit); 239 + } 240 + 241 + static inline bool vhost_backend_has_feature(struct vhost_virtqueue *vq, int bit) 242 + { 243 + return vq->acked_backend_features & (1ULL << bit); 243 244 } 244 245 245 246 #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
+18
include/uapi/linux/vhost.h
··· 65 65 }; 66 66 67 67 #define VHOST_IOTLB_MSG 0x1 68 + #define VHOST_IOTLB_MSG_V2 0x2 68 69 69 70 struct vhost_msg { 70 71 int type; 72 + union { 73 + struct vhost_iotlb_msg iotlb; 74 + __u8 padding[64]; 75 + }; 76 + }; 77 + 78 + struct vhost_msg_v2 { 79 + __u32 type; 80 + __u32 reserved; 71 81 union { 72 82 struct vhost_iotlb_msg iotlb; 73 83 __u8 padding[64]; ··· 169 159 /* Get busy loop timeout (in us) */ 170 160 #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \ 171 161 struct vhost_vring_state) 162 + 163 + /* Set or get vhost backend capability */ 164 + 165 + /* Use message type V2 */ 166 + #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 167 + 168 + #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) 169 + #define VHOST_GET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x26, __u64) 172 170 173 171 /* VHOST_NET specific defines */ 174 172