um: add shared memory optimisation for time-travel=ext

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

With external time travel, a LOT of message can end up
being exchanged on the socket, taking a significant
amount of time just to do that.

Add a new shared memory optimisation to that, where a
number of changes are made:
- the controller sends a client ID and a shared memory FD
(and a logging FD we don't use) in the ACK message to
the initial START
- the shared memory holds the current time and the
free_until value, so that there's no need to exchange
messages for that
- if the client that's running has shared memory support,
any client (the running one included) can request the
next time it wants to run inside the shared memory,
rather than sending a message, by also updating the
free_until value
- when shared memory is enabled, RUN/WAIT messages no
longer have an ACK, further cutting down on messages

Together, this can reduce the number of messages very
significantly, and reduce overall test/simulation run time.

Co-developed-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Link: https://patch.msgid.link/20240702192118.6ad0a083f574.Ie41206c8ce4507fe26b991937f47e86c24ca7a31@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

Johannes Berg 2 years ago bfb80d8b e20f9b3c

+283 -26

2 changed files

expand all

arch

kernel

time.c

include

uapi

linux

um_timetravel.h

+118 -12

arch/um/kernel/time.c

··· 31 31 static bool time_travel_start_set; 32 32 static unsigned long long time_travel_start; 33 33 static unsigned long long time_travel_time; 34 + static unsigned long long time_travel_shm_offset; 34 35 static LIST_HEAD(time_travel_events); 35 36 static LIST_HEAD(time_travel_irqs); 36 37 static unsigned long long time_travel_timer_interval; ··· 41 40 static unsigned int time_travel_ext_waiting; 42 41 static bool time_travel_ext_prev_request_valid; 43 42 static unsigned long long time_travel_ext_prev_request; 44 - static bool time_travel_ext_free_until_valid; 45 - static unsigned long long time_travel_ext_free_until; 43 + static unsigned long long *time_travel_ext_free_until; 44 + static unsigned long long _time_travel_ext_free_until; 45 + static u16 time_travel_shm_id; 46 + static struct um_timetravel_schedshm *time_travel_shm; 47 + static union um_timetravel_schedshm_client *time_travel_shm_client; 46 48 47 49 static void time_travel_set_time(unsigned long long ns) 48 50 { ··· 62 58 TTMH_IDLE, 63 59 TTMH_POLL, 64 60 TTMH_READ, 61 + TTMH_READ_START_ACK, 65 62 }; 66 63 67 64 static u64 bc_message; ··· 72 67 { 73 68 time_travel_should_print_bc_msg = 0; 74 69 printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message); 70 + } 71 + 72 + static void time_travel_setup_shm(int fd, u16 id) 73 + { 74 + u32 len; 75 + 76 + time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm)); 77 + 78 + if (!time_travel_shm) 79 + goto out; 80 + 81 + len = time_travel_shm->len; 82 + 83 + if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION || 84 + len < struct_size(time_travel_shm, clients, id + 1)) { 85 + os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm)); 86 + time_travel_shm = NULL; 87 + goto out; 88 + } 89 + 90 + time_travel_shm = os_mremap_rw_shared(time_travel_shm, 91 + sizeof(*time_travel_shm), 92 + len); 93 + if (!time_travel_shm) 94 + goto out; 95 + 96 + time_travel_shm_offset = time_travel_shm->current_time; 97 + time_travel_shm_client = &time_travel_shm->clients[id]; 98 + time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE; 99 + time_travel_shm_id = id; 100 + /* always look at that free_until from now on */ 101 + time_travel_ext_free_until = &time_travel_shm->free_until; 102 + out: 103 + os_close_file(fd); 75 104 } 76 105 77 106 static void time_travel_handle_message(struct um_timetravel_msg *msg, ··· 128 89 } 129 90 } 130 91 131 - ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); 92 + if (unlikely(mode == TTMH_READ_START_ACK)) { 93 + int fd[UM_TIMETRAVEL_SHARED_MAX_FDS]; 94 + 95 + ret = os_rcv_fd_msg(time_travel_ext_fd, fd, 96 + ARRAY_SIZE(fd), msg, sizeof(*msg)); 97 + if (ret == sizeof(*msg)) { 98 + time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD], 99 + msg->time & UM_TIMETRAVEL_START_ACK_ID); 100 + /* we don't use the logging for now */ 101 + os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]); 102 + } 103 + } else { 104 + ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); 105 + } 132 106 133 107 if (ret == 0) 134 108 panic("time-travel external link is broken\n"); ··· 157 105 return; 158 106 case UM_TIMETRAVEL_RUN: 159 107 time_travel_set_time(msg->time); 108 + if (time_travel_shm) { 109 + /* no request right now since we're running */ 110 + time_travel_shm_client->flags &= 111 + ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN; 112 + /* no ack for shared memory RUN */ 113 + return; 114 + } 160 115 break; 161 116 case UM_TIMETRAVEL_FREE_UNTIL: 162 - time_travel_ext_free_until_valid = true; 163 - time_travel_ext_free_until = msg->time; 117 + /* not supposed to get this with shm, but ignore it */ 118 + if (time_travel_shm) 119 + break; 120 + time_travel_ext_free_until = &_time_travel_ext_free_until; 121 + _time_travel_ext_free_until = msg->time; 164 122 break; 165 123 case UM_TIMETRAVEL_BROADCAST: 166 124 bc_message = msg->time; ··· 211 149 block_signals_hard(); 212 150 os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); 213 151 152 + /* no ACK expected for WAIT in shared memory mode */ 153 + if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm) 154 + goto done; 155 + 214 156 while (msg.op != UM_TIMETRAVEL_ACK) 215 - time_travel_handle_message(&msg, TTMH_READ); 157 + time_travel_handle_message(&msg, 158 + op == UM_TIMETRAVEL_START ? 159 + TTMH_READ_START_ACK : 160 + TTMH_READ); 216 161 217 162 if (msg.seq != mseq) 218 163 panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n", ··· 227 158 228 159 if (op == UM_TIMETRAVEL_GET) 229 160 time_travel_set_time(msg.time); 161 + done: 230 162 unblock_signals_hard(); 231 163 232 164 return msg.time; ··· 263 193 /* 264 194 * if we're running and are allowed to run past the request 265 195 * then we don't need to update it either 196 + * 197 + * Note for shm we ignore FREE_UNTIL messages and leave the pointer 198 + * to shared memory, and for non-shm the offset is 0. 266 199 */ 267 - if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && 268 - time < time_travel_ext_free_until) 200 + if (!time_travel_ext_waiting && time_travel_ext_free_until && 201 + time < (*time_travel_ext_free_until - time_travel_shm_offset)) 269 202 return; 270 203 271 204 time_travel_ext_prev_request = time; 272 205 time_travel_ext_prev_request_valid = true; 206 + 207 + if (time_travel_shm) { 208 + union um_timetravel_schedshm_client *running; 209 + 210 + running = &time_travel_shm->clients[time_travel_shm->running_id]; 211 + 212 + if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) { 213 + time_travel_shm_client->flags |= 214 + UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN; 215 + time += time_travel_shm_offset; 216 + time_travel_shm_client->req_time = time; 217 + if (time < time_travel_shm->free_until) 218 + time_travel_shm->free_until = time; 219 + return; 220 + } 221 + } 222 + 273 223 time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); 274 224 } 275 225 276 226 void __time_travel_propagate_time(void) 277 227 { 278 228 static unsigned long long last_propagated; 229 + 230 + if (time_travel_shm) { 231 + if (time_travel_shm->running_id != time_travel_shm_id) 232 + panic("time-travel: setting time while not running\n"); 233 + time_travel_shm->current_time = time_travel_time + 234 + time_travel_shm_offset; 235 + return; 236 + } 279 237 280 238 if (last_propagated == time_travel_time) 281 239 return; ··· 320 222 * If we received an external sync point ("free until") then we 321 223 * don't have to request/wait for anything until then, unless 322 224 * we're already waiting. 225 + * 226 + * Note for shm we ignore FREE_UNTIL messages and leave the pointer 227 + * to shared memory, and for non-shm the offset is 0. 323 228 */ 324 - if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && 325 - time < time_travel_ext_free_until) 229 + if (!time_travel_ext_waiting && time_travel_ext_free_until && 230 + time < (*time_travel_ext_free_until - time_travel_shm_offset)) 326 231 return false; 327 232 328 233 time_travel_ext_update_request(time); ··· 339 238 }; 340 239 341 240 time_travel_ext_prev_request_valid = false; 342 - time_travel_ext_free_until_valid = false; 241 + if (!time_travel_shm) 242 + time_travel_ext_free_until = NULL; 343 243 time_travel_ext_waiting++; 344 244 345 245 time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); ··· 363 261 364 262 static void time_travel_ext_get_time(void) 365 263 { 366 - time_travel_ext_req(UM_TIMETRAVEL_GET, -1); 264 + if (time_travel_shm) 265 + time_travel_set_time(time_travel_shm->current_time - 266 + time_travel_shm_offset); 267 + else 268 + time_travel_ext_req(UM_TIMETRAVEL_GET, -1); 367 269 } 368 270 369 271 static void __time_travel_update_time(unsigned long long ns, bool idle)

+165 -14

include/uapi/linux/um_timetravel.h

··· 1 + /* SPDX-License-Identifier: BSD-3-Clause */ 1 2 /* 2 - * Permission to use, copy, modify, and/or distribute this software for any 3 - * purpose with or without fee is hereby granted, provided that the above 4 - * copyright notice and this permission notice appear in all copies. 5 - * 6 - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 7 - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 8 - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 9 - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 10 - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 11 - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 12 - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 13 - * 14 - * Copyright (C) 2019 Intel Corporation 3 + * Copyright (C) 2019 - 2023 Intel Corporation 15 4 */ 16 5 #ifndef _UAPI_LINUX_UM_TIMETRAVEL_H 17 6 #define _UAPI_LINUX_UM_TIMETRAVEL_H ··· 39 50 __u64 time; 40 51 }; 41 52 53 + /* max number of file descriptors that can be sent/received in a message */ 54 + #define UM_TIMETRAVEL_MAX_FDS 2 55 + 56 + /** 57 + * enum um_timetravel_shared_mem_fds - fds sent in ACK message for START message 58 + */ 59 + enum um_timetravel_shared_mem_fds { 60 + /** 61 + * @UM_TIMETRAVEL_SHARED_MEMFD: Index of the shared memory file 62 + * descriptor in the control message 63 + */ 64 + UM_TIMETRAVEL_SHARED_MEMFD, 65 + /** 66 + * @UM_TIMETRAVEL_SHARED_LOGFD: Index of the logging file descriptor 67 + * in the control message 68 + */ 69 + UM_TIMETRAVEL_SHARED_LOGFD, 70 + UM_TIMETRAVEL_SHARED_MAX_FDS, 71 + }; 72 + 73 + /** 74 + * enum um_timetravel_start_ack - ack-time mask for start message 75 + */ 76 + enum um_timetravel_start_ack { 77 + /** 78 + * @UM_TIMETRAVEL_START_ACK_ID: client ID that controller allocated. 79 + */ 80 + UM_TIMETRAVEL_START_ACK_ID = 0xffff, 81 + }; 82 + 42 83 /** 43 84 * enum um_timetravel_ops - Operation codes 44 85 */ ··· 76 57 /** 77 58 * @UM_TIMETRAVEL_ACK: response (ACK) to any previous message, 78 59 * this usually doesn't carry any data in the 'time' field 79 - * unless otherwise specified below 60 + * unless otherwise specified below, note: while using shared 61 + * memory no ACK for WAIT and RUN messages, for more info see 62 + * &struct um_timetravel_schedshm. 80 63 */ 81 64 UM_TIMETRAVEL_ACK = 0, 82 65 ··· 157 136 UM_TIMETRAVEL_BROADCAST = 9, 158 137 }; 159 138 139 + /* version of struct um_timetravel_schedshm */ 140 + #define UM_TIMETRAVEL_SCHEDSHM_VERSION 2 141 + 142 + /** 143 + * enum um_timetravel_schedshm_cap - time travel capabilities of every client 144 + * 145 + * These flags must be set immediately after processing the ACK to 146 + * the START message, before sending any message to the controller. 147 + */ 148 + enum um_timetravel_schedshm_cap { 149 + /** 150 + * @UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE: client can read current time 151 + * update internal time request to shared memory and read 152 + * free until and send no Ack on RUN and doesn't expect ACK on 153 + * WAIT. 154 + */ 155 + UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE = 0x1, 156 + }; 157 + 158 + /** 159 + * enum um_timetravel_schedshm_flags - time travel flags of every client 160 + */ 161 + enum um_timetravel_schedshm_flags { 162 + /** 163 + * @UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN: client has a request to run. 164 + * It's set by client when it has a request to run, if (and only 165 + * if) the @running_id points to a client that is able to use 166 + * shared memory, i.e. has %UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE 167 + * (this includes the client itself). Otherwise, a message must 168 + * be used. 169 + */ 170 + UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN = 0x1, 171 + }; 172 + 173 + /** 174 + * DOC: Time travel shared memory overview 175 + * 176 + * The main purpose of the shared memory is to avoid all time travel message 177 + * that don't need any action, for example current time can be held in shared 178 + * memory without the need of any client to send a message UM_TIMETRAVEL_GET 179 + * in order to know what's the time. 180 + * 181 + * Since this is shared memory with all clients and controller and controller 182 + * creates the shared memory space, all time values are absolute to controller 183 + * time. So first time client connects to shared memory mode it should take the 184 + * current_time value in shared memory and keep it internally as a diff to 185 + * shared memory times, and once shared memory is initialized, any interaction 186 + * with the controller must happen in the controller time domain, including any 187 + * messages (for clients that are not using shared memory, the controller will 188 + * handle an offset and make the clients think they start at time zero.) 189 + * 190 + * Along with the shared memory file descriptor is sent to the client a logging 191 + * file descriptor, to have all logs related to shared memory, 192 + * logged into one place. note: to have all logs synced into log file at write, 193 + * file should be flushed (fflush) after writing to it. 194 + * 195 + * To avoid memory corruption, we define below for each field who can write to 196 + * it at what time, defined in the structure fields. 197 + * 198 + * To avoid having to pack this struct, all fields in it must be naturally aligned 199 + * (i.e. aligned to their size). 200 + */ 201 + 202 + /** 203 + * union um_timetravel_schedshm_client - UM time travel client struct 204 + * 205 + * Every entity using the shared memory including the controller has a place in 206 + * the um_timetravel_schedshm clients array, that holds info related to the client 207 + * using the shared memory, and can be set only by the client after it gets the 208 + * fd memory. 209 + * 210 + * @capa: bit fields with client capabilities see 211 + * &enum um_timetravel_schedshm_cap, set by client once after getting the 212 + * shared memory file descriptor. 213 + * @flags: bit fields for flags see &enum um_timetravel_schedshm_flags for doc. 214 + * @req_time: request time to run, set by client on every request it needs. 215 + * @name: unique id sent to the controller by client with START message. 216 + */ 217 + union um_timetravel_schedshm_client { 218 + struct { 219 + __u32 capa; 220 + __u32 flags; 221 + __u64 req_time; 222 + __u64 name; 223 + }; 224 + char reserve[128]; /* reserved for future usage */ 225 + }; 226 + 227 + /** 228 + * struct um_timetravel_schedshm - UM time travel shared memory struct 229 + * 230 + * @hdr: header fields: 231 + * @version: Current version struct UM_TIMETRAVEL_SCHEDSHM_VERSION, 232 + * set by controller once at init, clients must check this after mapping 233 + * and work without shared memory if they cannot handle the indicated 234 + * version. 235 + * @len: Length of all the memory including header (@hdr), clients should once 236 + * per connection first mmap the header and take the length (@len) to remap the entire size. 237 + * This is done in order to support dynamic struct size letting number of 238 + * clients be dynamic based on controller support. 239 + * @free_until: Stores the next request to run by any client, in order for the 240 + * current client to know how long it can still run. A client needs to (at 241 + * least) reload this value immediately after communicating with any other 242 + * client, since the controller will update this field when a new request 243 + * is made by any client. Clients also must update this value when they 244 + * insert/update an own request into the shared memory while not running 245 + * themselves, and the new request is before than the current value. 246 + * current_time: Current time, can only be set by the client in running state 247 + * (indicated by @running_id), though that client may only run until @free_until, 248 + * so it must remain smaller than @free_until. 249 + * @running_id: The current client in state running, set before a client is 250 + * notified that it's now running. 251 + * @max_clients: size of @clients array, set once at init by the controller. 252 + * @clients: clients array see &union um_timetravel_schedshm_client for doc, 253 + * set only by client. 254 + */ 255 + struct um_timetravel_schedshm { 256 + union { 257 + struct { 258 + __u32 version; 259 + __u32 len; 260 + __u64 free_until; 261 + __u64 current_time; 262 + __u16 running_id; 263 + __u16 max_clients; 264 + }; 265 + char hdr[4096]; /* align to 4K page size */ 266 + }; 267 + union um_timetravel_schedshm_client clients[]; 268 + }; 160 269 #endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */