Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

platform/surface: aggregator: Add error injection capabilities

This commit adds error injection hooks to the Surface Serial Hub
communication protocol implementation, to:

- simulate simple serial transmission errors,

- drop packets, requests, and responses, simulating communication
failures and potentially trigger retransmission timeouts, as well as

- inject invalid data into submitted and received packets.

Together with the trace points introduced in the previous commit, these
facilities are intended to aid in testing, validation, and debugging of
the Surface Aggregator communication layer.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/20201221183959.1186143-6-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>

authored by

Maximilian Luz and committed by
Hans de Goede
02be44f6 0d21bb85

+375 -1
+14
drivers/platform/surface/aggregator/Kconfig
··· 40 40 Choose m if you want to build the SAM subsystem core and SSH driver as 41 41 module, y if you want to build it into the kernel and n if you don't 42 42 want it at all. 43 + 44 + config SURFACE_AGGREGATOR_ERROR_INJECTION 45 + bool "Surface System Aggregator Module Error Injection Capabilities" 46 + depends on SURFACE_AGGREGATOR 47 + depends on FUNCTION_ERROR_INJECTION 48 + help 49 + Provides error-injection capabilities for the Surface System 50 + Aggregator Module subsystem and Surface Serial Hub driver. 51 + 52 + Specifically, exports error injection hooks to be used with the 53 + kernel's function error injection capabilities to simulate underlying 54 + transport and communication problems, such as invalid data sent to or 55 + received from the EC, dropped data, and communication timeouts. 56 + Intended for development and debugging.
+295 -1
drivers/platform/surface/aggregator/ssh_packet_layer.c
··· 7 7 8 8 #include <asm/unaligned.h> 9 9 #include <linux/atomic.h> 10 + #include <linux/error-injection.h> 10 11 #include <linux/jiffies.h> 11 12 #include <linux/kfifo.h> 12 13 #include <linux/kref.h> ··· 226 225 * SSH_PTL_RX_FIFO_LEN - Fifo input-buffer size in bytes. 227 226 */ 228 227 #define SSH_PTL_RX_FIFO_LEN 4096 228 + 229 + #ifdef CONFIG_SURFACE_AGGREGATOR_ERROR_INJECTION 230 + 231 + /** 232 + * ssh_ptl_should_drop_ack_packet() - Error injection hook to drop ACK packets. 233 + * 234 + * Useful to test detection and handling of automated re-transmits by the EC. 235 + * Specifically of packets that the EC considers not-ACKed but the driver 236 + * already considers ACKed (due to dropped ACK). In this case, the EC 237 + * re-transmits the packet-to-be-ACKed and the driver should detect it as 238 + * duplicate/already handled. Note that the driver should still send an ACK 239 + * for the re-transmitted packet. 240 + */ 241 + static noinline bool ssh_ptl_should_drop_ack_packet(void) 242 + { 243 + return false; 244 + } 245 + ALLOW_ERROR_INJECTION(ssh_ptl_should_drop_ack_packet, TRUE); 246 + 247 + /** 248 + * ssh_ptl_should_drop_nak_packet() - Error injection hook to drop NAK packets. 249 + * 250 + * Useful to test/force automated (timeout-based) re-transmit by the EC. 251 + * Specifically, packets that have not reached the driver completely/with valid 252 + * checksums. Only useful in combination with receival of (injected) bad data. 253 + */ 254 + static noinline bool ssh_ptl_should_drop_nak_packet(void) 255 + { 256 + return false; 257 + } 258 + ALLOW_ERROR_INJECTION(ssh_ptl_should_drop_nak_packet, TRUE); 259 + 260 + /** 261 + * ssh_ptl_should_drop_dsq_packet() - Error injection hook to drop sequenced 262 + * data packet. 263 + * 264 + * Useful to test re-transmit timeout of the driver. If the data packet has not 265 + * been ACKed after a certain time, the driver should re-transmit the packet up 266 + * to limited number of times defined in SSH_PTL_MAX_PACKET_TRIES. 267 + */ 268 + static noinline bool ssh_ptl_should_drop_dsq_packet(void) 269 + { 270 + return false; 271 + } 272 + ALLOW_ERROR_INJECTION(ssh_ptl_should_drop_dsq_packet, TRUE); 273 + 274 + /** 275 + * ssh_ptl_should_fail_write() - Error injection hook to make 276 + * serdev_device_write() fail. 277 + * 278 + * Hook to simulate errors in serdev_device_write when transmitting packets. 279 + */ 280 + static noinline int ssh_ptl_should_fail_write(void) 281 + { 282 + return 0; 283 + } 284 + ALLOW_ERROR_INJECTION(ssh_ptl_should_fail_write, ERRNO); 285 + 286 + /** 287 + * ssh_ptl_should_corrupt_tx_data() - Error injection hook to simulate invalid 288 + * data being sent to the EC. 289 + * 290 + * Hook to simulate corrupt/invalid data being sent from host (driver) to EC. 291 + * Causes the packet data to be actively corrupted by overwriting it with 292 + * pre-defined values, such that it becomes invalid, causing the EC to respond 293 + * with a NAK packet. Useful to test handling of NAK packets received by the 294 + * driver. 295 + */ 296 + static noinline bool ssh_ptl_should_corrupt_tx_data(void) 297 + { 298 + return false; 299 + } 300 + ALLOW_ERROR_INJECTION(ssh_ptl_should_corrupt_tx_data, TRUE); 301 + 302 + /** 303 + * ssh_ptl_should_corrupt_rx_syn() - Error injection hook to simulate invalid 304 + * data being sent by the EC. 305 + * 306 + * Hook to simulate invalid SYN bytes, i.e. an invalid start of messages and 307 + * test handling thereof in the driver. 308 + */ 309 + static noinline bool ssh_ptl_should_corrupt_rx_syn(void) 310 + { 311 + return false; 312 + } 313 + ALLOW_ERROR_INJECTION(ssh_ptl_should_corrupt_rx_syn, TRUE); 314 + 315 + /** 316 + * ssh_ptl_should_corrupt_rx_data() - Error injection hook to simulate invalid 317 + * data being sent by the EC. 318 + * 319 + * Hook to simulate invalid data/checksum of the message frame and test handling 320 + * thereof in the driver. 321 + */ 322 + static noinline bool ssh_ptl_should_corrupt_rx_data(void) 323 + { 324 + return false; 325 + } 326 + ALLOW_ERROR_INJECTION(ssh_ptl_should_corrupt_rx_data, TRUE); 327 + 328 + static bool __ssh_ptl_should_drop_ack_packet(struct ssh_packet *packet) 329 + { 330 + if (likely(!ssh_ptl_should_drop_ack_packet())) 331 + return false; 332 + 333 + trace_ssam_ei_tx_drop_ack_packet(packet); 334 + ptl_info(packet->ptl, "packet error injection: dropping ACK packet %p\n", 335 + packet); 336 + 337 + return true; 338 + } 339 + 340 + static bool __ssh_ptl_should_drop_nak_packet(struct ssh_packet *packet) 341 + { 342 + if (likely(!ssh_ptl_should_drop_nak_packet())) 343 + return false; 344 + 345 + trace_ssam_ei_tx_drop_nak_packet(packet); 346 + ptl_info(packet->ptl, "packet error injection: dropping NAK packet %p\n", 347 + packet); 348 + 349 + return true; 350 + } 351 + 352 + static bool __ssh_ptl_should_drop_dsq_packet(struct ssh_packet *packet) 353 + { 354 + if (likely(!ssh_ptl_should_drop_dsq_packet())) 355 + return false; 356 + 357 + trace_ssam_ei_tx_drop_dsq_packet(packet); 358 + ptl_info(packet->ptl, 359 + "packet error injection: dropping sequenced data packet %p\n", 360 + packet); 361 + 362 + return true; 363 + } 364 + 365 + static bool ssh_ptl_should_drop_packet(struct ssh_packet *packet) 366 + { 367 + /* Ignore packets that don't carry any data (i.e. flush). */ 368 + if (!packet->data.ptr || !packet->data.len) 369 + return false; 370 + 371 + switch (packet->data.ptr[SSH_MSGOFFSET_FRAME(type)]) { 372 + case SSH_FRAME_TYPE_ACK: 373 + return __ssh_ptl_should_drop_ack_packet(packet); 374 + 375 + case SSH_FRAME_TYPE_NAK: 376 + return __ssh_ptl_should_drop_nak_packet(packet); 377 + 378 + case SSH_FRAME_TYPE_DATA_SEQ: 379 + return __ssh_ptl_should_drop_dsq_packet(packet); 380 + 381 + default: 382 + return false; 383 + } 384 + } 385 + 386 + static int ssh_ptl_write_buf(struct ssh_ptl *ptl, struct ssh_packet *packet, 387 + const unsigned char *buf, size_t count) 388 + { 389 + int status; 390 + 391 + status = ssh_ptl_should_fail_write(); 392 + if (unlikely(status)) { 393 + trace_ssam_ei_tx_fail_write(packet, status); 394 + ptl_info(packet->ptl, 395 + "packet error injection: simulating transmit error %d, packet %p\n", 396 + status, packet); 397 + 398 + return status; 399 + } 400 + 401 + return serdev_device_write_buf(ptl->serdev, buf, count); 402 + } 403 + 404 + static void ssh_ptl_tx_inject_invalid_data(struct ssh_packet *packet) 405 + { 406 + /* Ignore packets that don't carry any data (i.e. flush). */ 407 + if (!packet->data.ptr || !packet->data.len) 408 + return; 409 + 410 + /* Only allow sequenced data packets to be modified. */ 411 + if (packet->data.ptr[SSH_MSGOFFSET_FRAME(type)] != SSH_FRAME_TYPE_DATA_SEQ) 412 + return; 413 + 414 + if (likely(!ssh_ptl_should_corrupt_tx_data())) 415 + return; 416 + 417 + trace_ssam_ei_tx_corrupt_data(packet); 418 + ptl_info(packet->ptl, 419 + "packet error injection: simulating invalid transmit data on packet %p\n", 420 + packet); 421 + 422 + /* 423 + * NB: The value 0xb3 has been chosen more or less randomly so that it 424 + * doesn't have any (major) overlap with the SYN bytes (aa 55) and is 425 + * non-trivial (i.e. non-zero, non-0xff). 426 + */ 427 + memset(packet->data.ptr, 0xb3, packet->data.len); 428 + } 429 + 430 + static void ssh_ptl_rx_inject_invalid_syn(struct ssh_ptl *ptl, 431 + struct ssam_span *data) 432 + { 433 + struct ssam_span frame; 434 + 435 + /* Check if there actually is something to corrupt. */ 436 + if (!sshp_find_syn(data, &frame)) 437 + return; 438 + 439 + if (likely(!ssh_ptl_should_corrupt_rx_syn())) 440 + return; 441 + 442 + trace_ssam_ei_rx_corrupt_syn(data->len); 443 + 444 + data->ptr[1] = 0xb3; /* Set second byte of SYN to "random" value. */ 445 + } 446 + 447 + static void ssh_ptl_rx_inject_invalid_data(struct ssh_ptl *ptl, 448 + struct ssam_span *frame) 449 + { 450 + size_t payload_len, message_len; 451 + struct ssh_frame *sshf; 452 + 453 + /* Ignore incomplete messages, will get handled once it's complete. */ 454 + if (frame->len < SSH_MESSAGE_LENGTH(0)) 455 + return; 456 + 457 + /* Ignore incomplete messages, part 2. */ 458 + payload_len = get_unaligned_le16(&frame->ptr[SSH_MSGOFFSET_FRAME(len)]); 459 + message_len = SSH_MESSAGE_LENGTH(payload_len); 460 + if (frame->len < message_len) 461 + return; 462 + 463 + if (likely(!ssh_ptl_should_corrupt_rx_data())) 464 + return; 465 + 466 + sshf = (struct ssh_frame *)&frame->ptr[SSH_MSGOFFSET_FRAME(type)]; 467 + trace_ssam_ei_rx_corrupt_data(sshf); 468 + 469 + /* 470 + * Flip bits in first byte of payload checksum. This is basically 471 + * equivalent to a payload/frame data error without us having to worry 472 + * about (the, arguably pretty small, probability of) accidental 473 + * checksum collisions. 474 + */ 475 + frame->ptr[frame->len - 2] = ~frame->ptr[frame->len - 2]; 476 + } 477 + 478 + #else /* CONFIG_SURFACE_AGGREGATOR_ERROR_INJECTION */ 479 + 480 + static inline bool ssh_ptl_should_drop_packet(struct ssh_packet *packet) 481 + { 482 + return false; 483 + } 484 + 485 + static inline int ssh_ptl_write_buf(struct ssh_ptl *ptl, 486 + struct ssh_packet *packet, 487 + const unsigned char *buf, 488 + size_t count) 489 + { 490 + return serdev_device_write_buf(ptl->serdev, buf, count); 491 + } 492 + 493 + static inline void ssh_ptl_tx_inject_invalid_data(struct ssh_packet *packet) 494 + { 495 + } 496 + 497 + static inline void ssh_ptl_rx_inject_invalid_syn(struct ssh_ptl *ptl, 498 + struct ssam_span *data) 499 + { 500 + } 501 + 502 + static inline void ssh_ptl_rx_inject_invalid_data(struct ssh_ptl *ptl, 503 + struct ssam_span *frame) 504 + { 505 + } 506 + 507 + #endif /* CONFIG_SURFACE_AGGREGATOR_ERROR_INJECTION */ 229 508 230 509 static void __ssh_ptl_packet_release(struct kref *kref) 231 510 { ··· 1057 776 if (unlikely(!packet->data.ptr)) 1058 777 return 0; 1059 778 779 + /* Error injection: drop packet to simulate transmission problem. */ 780 + if (ssh_ptl_should_drop_packet(packet)) 781 + return 0; 782 + 783 + /* Error injection: simulate invalid packet data. */ 784 + ssh_ptl_tx_inject_invalid_data(packet); 785 + 1060 786 ptl_dbg(ptl, "tx: sending data (length: %zu)\n", packet->data.len); 1061 787 print_hex_dump_debug("tx: ", DUMP_PREFIX_OFFSET, 16, 1, 1062 788 packet->data.ptr, packet->data.len, false); ··· 1075 787 buf = packet->data.ptr + offset; 1076 788 len = packet->data.len - offset; 1077 789 1078 - status = serdev_device_write_buf(ptl->serdev, buf, len); 790 + status = ssh_ptl_write_buf(ptl, packet, buf, len); 1079 791 if (status < 0) 1080 792 return status; 1081 793 ··· 1688 1400 bool syn_found; 1689 1401 int status; 1690 1402 1403 + /* Error injection: Modify data to simulate corrupt SYN bytes. */ 1404 + ssh_ptl_rx_inject_invalid_syn(ptl, source); 1405 + 1691 1406 /* Find SYN. */ 1692 1407 syn_found = sshp_find_syn(source, &aligned); 1693 1408 ··· 1720 1429 1721 1430 if (unlikely(!syn_found)) 1722 1431 return aligned.ptr - source->ptr; 1432 + 1433 + /* Error injection: Modify data to simulate corruption. */ 1434 + ssh_ptl_rx_inject_invalid_data(ptl, &aligned); 1723 1435 1724 1436 /* Parse and validate frame. */ 1725 1437 status = sshp_parse_frame(&ptl->serdev->dev, &aligned, &frame, &payload,
+35
drivers/platform/surface/aggregator/ssh_request_layer.c
··· 8 8 #include <asm/unaligned.h> 9 9 #include <linux/atomic.h> 10 10 #include <linux/completion.h> 11 + #include <linux/error-injection.h> 11 12 #include <linux/ktime.h> 12 13 #include <linux/limits.h> 13 14 #include <linux/list.h> ··· 58 57 * guess, may be adjusted. 59 58 */ 60 59 #define SSH_RTL_TX_BATCH 10 60 + 61 + #ifdef CONFIG_SURFACE_AGGREGATOR_ERROR_INJECTION 62 + 63 + /** 64 + * ssh_rtl_should_drop_response() - Error injection hook to drop request 65 + * responses. 66 + * 67 + * Useful to cause request transmission timeouts in the driver by dropping the 68 + * response to a request. 69 + */ 70 + static noinline bool ssh_rtl_should_drop_response(void) 71 + { 72 + return false; 73 + } 74 + ALLOW_ERROR_INJECTION(ssh_rtl_should_drop_response, TRUE); 75 + 76 + #else 77 + 78 + static inline bool ssh_rtl_should_drop_response(void) 79 + { 80 + return false; 81 + } 82 + 83 + #endif 61 84 62 85 static u16 ssh_request_get_rqid(struct ssh_request *rqst) 63 86 { ··· 483 458 /* We generally expect requests to be processed in order. */ 484 459 if (unlikely(ssh_request_get_rqid(p) != rqid)) 485 460 continue; 461 + 462 + /* Simulate response timeout. */ 463 + if (ssh_rtl_should_drop_response()) { 464 + spin_unlock(&rtl->pending.lock); 465 + 466 + trace_ssam_ei_rx_drop_response(p); 467 + rtl_info(rtl, "request error injection: dropping response for request %p\n", 468 + &p->packet); 469 + return; 470 + } 486 471 487 472 /* 488 473 * Mark as "response received" and "locked" as we're going to
+31
drivers/platform/surface/aggregator/trace.h
··· 565 565 TP_ARGS(pending) \ 566 566 ) 567 567 568 + DECLARE_EVENT_CLASS(ssam_data_class, 569 + TP_PROTO(size_t length), 570 + 571 + TP_ARGS(length), 572 + 573 + TP_STRUCT__entry( 574 + __field(size_t, length) 575 + ), 576 + 577 + TP_fast_assign( 578 + __entry->length = length; 579 + ), 580 + 581 + TP_printk("length=%zu", __entry->length) 582 + ); 583 + 584 + #define DEFINE_SSAM_DATA_EVENT(name) \ 585 + DEFINE_EVENT(ssam_data_class, ssam_##name, \ 586 + TP_PROTO(size_t length), \ 587 + TP_ARGS(length) \ 588 + ) 589 + 568 590 DEFINE_SSAM_FRAME_EVENT(rx_frame_received); 569 591 DEFINE_SSAM_COMMAND_EVENT(rx_response_received); 570 592 DEFINE_SSAM_COMMAND_EVENT(rx_event_received); ··· 604 582 DEFINE_SSAM_REQUEST_EVENT(request_cancel); 605 583 DEFINE_SSAM_REQUEST_STATUS_EVENT(request_complete); 606 584 DEFINE_SSAM_PENDING_EVENT(rtl_timeout_reap); 585 + 586 + DEFINE_SSAM_PACKET_EVENT(ei_tx_drop_ack_packet); 587 + DEFINE_SSAM_PACKET_EVENT(ei_tx_drop_nak_packet); 588 + DEFINE_SSAM_PACKET_EVENT(ei_tx_drop_dsq_packet); 589 + DEFINE_SSAM_PACKET_STATUS_EVENT(ei_tx_fail_write); 590 + DEFINE_SSAM_PACKET_EVENT(ei_tx_corrupt_data); 591 + DEFINE_SSAM_DATA_EVENT(ei_rx_corrupt_syn); 592 + DEFINE_SSAM_FRAME_EVENT(ei_rx_corrupt_data); 593 + DEFINE_SSAM_REQUEST_EVENT(ei_rx_drop_response); 607 594 608 595 DEFINE_SSAM_ALLOC_EVENT(ctrl_packet_alloc); 609 596 DEFINE_SSAM_FREE_EVENT(ctrl_packet_free);