Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

blktrace: fix accounting of partially completed requests

trace_block_rq_complete does not take into account that request can
be partially completed, so we can get the following incorrect output
of blkparser:

C R 232 + 240 [0]
C R 240 + 232 [0]
C R 248 + 224 [0]
C R 256 + 216 [0]

but should be:

C R 232 + 8 [0]
C R 240 + 8 [0]
C R 248 + 8 [0]
C R 256 + 8 [0]

Also, the whole output summary statistics of completed requests and
final throughput will be incorrect.

This patch takes into account real completion size of the request and
fixes wrong completion accounting.

Signed-off-by: Roman Pen <r.peniaev@gmail.com>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Ingo Molnar <mingo@redhat.com>
CC: linux-kernel@vger.kernel.org
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <axboe@fb.com>

authored by

Roman Pen and committed by
Jens Axboe
af5040da c46fff2a

+43 -14
+1 -1
block/blk-core.c
··· 2354 2354 if (!req->bio) 2355 2355 return false; 2356 2356 2357 - trace_block_rq_complete(req->q, req); 2357 + trace_block_rq_complete(req->q, req, nr_bytes); 2358 2358 2359 2359 /* 2360 2360 * For fs requests, rq is just carrier of independent bio's
+1 -1
block/blk-mq.c
··· 309 309 struct bio *bio = rq->bio; 310 310 unsigned int bytes = 0; 311 311 312 - trace_block_rq_complete(rq->q, rq); 312 + trace_block_rq_complete(rq->q, rq, blk_rq_bytes(rq)); 313 313 314 314 while (bio) { 315 315 struct bio *next = bio->bi_next;
+30 -3
include/trace/events/block.h
··· 132 132 * block_rq_complete - block IO operation completed by device driver 133 133 * @q: queue containing the block operation request 134 134 * @rq: block operations request 135 + * @nr_bytes: number of completed bytes 135 136 * 136 137 * The block_rq_complete tracepoint event indicates that some portion 137 138 * of operation request has been completed by the device driver. If ··· 140 139 * do for the request. If @rq->bio is non-NULL then there is 141 140 * additional work required to complete the request. 142 141 */ 143 - DEFINE_EVENT(block_rq_with_error, block_rq_complete, 142 + TRACE_EVENT(block_rq_complete, 144 143 145 - TP_PROTO(struct request_queue *q, struct request *rq), 144 + TP_PROTO(struct request_queue *q, struct request *rq, 145 + unsigned int nr_bytes), 146 146 147 - TP_ARGS(q, rq) 147 + TP_ARGS(q, rq, nr_bytes), 148 + 149 + TP_STRUCT__entry( 150 + __field( dev_t, dev ) 151 + __field( sector_t, sector ) 152 + __field( unsigned int, nr_sector ) 153 + __field( int, errors ) 154 + __array( char, rwbs, RWBS_LEN ) 155 + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 156 + ), 157 + 158 + TP_fast_assign( 159 + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; 160 + __entry->sector = blk_rq_pos(rq); 161 + __entry->nr_sector = nr_bytes >> 9; 162 + __entry->errors = rq->errors; 163 + 164 + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes); 165 + blk_dump_cmd(__get_str(cmd), rq); 166 + ), 167 + 168 + TP_printk("%d,%d %s (%s) %llu + %u [%d]", 169 + MAJOR(__entry->dev), MINOR(__entry->dev), 170 + __entry->rwbs, __get_str(cmd), 171 + (unsigned long long)__entry->sector, 172 + __entry->nr_sector, __entry->errors) 148 173 ); 149 174 150 175 DECLARE_EVENT_CLASS(block_rq,
+11 -9
kernel/trace/blktrace.c
··· 702 702 * blk_add_trace_rq - Add a trace for a request oriented action 703 703 * @q: queue the io is for 704 704 * @rq: the source request 705 + * @nr_bytes: number of completed bytes 705 706 * @what: the action 706 707 * 707 708 * Description: ··· 710 709 * 711 710 **/ 712 711 static void blk_add_trace_rq(struct request_queue *q, struct request *rq, 713 - u32 what) 712 + unsigned int nr_bytes, u32 what) 714 713 { 715 714 struct blk_trace *bt = q->blk_trace; 716 715 ··· 719 718 720 719 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 721 720 what |= BLK_TC_ACT(BLK_TC_PC); 722 - __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags, 721 + __blk_add_trace(bt, 0, nr_bytes, rq->cmd_flags, 723 722 what, rq->errors, rq->cmd_len, rq->cmd); 724 723 } else { 725 724 what |= BLK_TC_ACT(BLK_TC_FS); 726 - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 725 + __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, 727 726 rq->cmd_flags, what, rq->errors, 0, NULL); 728 727 } 729 728 } ··· 731 730 static void blk_add_trace_rq_abort(void *ignore, 732 731 struct request_queue *q, struct request *rq) 733 732 { 734 - blk_add_trace_rq(q, rq, BLK_TA_ABORT); 733 + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT); 735 734 } 736 735 737 736 static void blk_add_trace_rq_insert(void *ignore, 738 737 struct request_queue *q, struct request *rq) 739 738 { 740 - blk_add_trace_rq(q, rq, BLK_TA_INSERT); 739 + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT); 741 740 } 742 741 743 742 static void blk_add_trace_rq_issue(void *ignore, 744 743 struct request_queue *q, struct request *rq) 745 744 { 746 - blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 745 + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE); 747 746 } 748 747 749 748 static void blk_add_trace_rq_requeue(void *ignore, 750 749 struct request_queue *q, 751 750 struct request *rq) 752 751 { 753 - blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 752 + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE); 754 753 } 755 754 756 755 static void blk_add_trace_rq_complete(void *ignore, 757 756 struct request_queue *q, 758 - struct request *rq) 757 + struct request *rq, 758 + unsigned int nr_bytes) 759 759 { 760 - blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 760 + blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE); 761 761 } 762 762 763 763 /**