Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfs: merge adjacent io completions of the same type

It's possible for pagecache writeback to split up a large amount of work
into smaller pieces for throttling purposes or to reduce the amount of
time a writeback operation is pending. Whatever the reason, XFS can end
up with a bunch of IO completions that call for the same operation to be
performed on a contiguous extent mapping. Since mappings are extent
based in XFS, we'd prefer to run fewer transactions when we can.

When we're processing an ioend on the list of io completions, check to
see if the next items on the list are both adjacent and of the same
type. If so, we can merge the completions to reduce transaction
overhead.

On fast storage this doesn't seem to make much of a difference in
performance, though the number of transactions for an overnight xfstests
run seems to drop by ~5%.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>

+86
+86
fs/xfs/xfs_aops.c
··· 237 237 xfs_end_ioend( 238 238 struct xfs_ioend *ioend) 239 239 { 240 + struct list_head ioend_list; 240 241 struct xfs_inode *ip = XFS_I(ioend->io_inode); 241 242 xfs_off_t offset = ioend->io_offset; 242 243 size_t size = ioend->io_size; ··· 274 273 done: 275 274 if (ioend->io_append_trans) 276 275 error = xfs_setfilesize_ioend(ioend, error); 276 + list_replace_init(&ioend->io_list, &ioend_list); 277 277 xfs_destroy_ioend(ioend, error); 278 + 279 + while (!list_empty(&ioend_list)) { 280 + ioend = list_first_entry(&ioend_list, struct xfs_ioend, 281 + io_list); 282 + list_del_init(&ioend->io_list); 283 + xfs_destroy_ioend(ioend, error); 284 + } 285 + } 286 + 287 + /* 288 + * We can merge two adjacent ioends if they have the same set of work to do. 289 + */ 290 + static bool 291 + xfs_ioend_can_merge( 292 + struct xfs_ioend *ioend, 293 + int ioend_error, 294 + struct xfs_ioend *next) 295 + { 296 + int next_error; 297 + 298 + next_error = blk_status_to_errno(next->io_bio->bi_status); 299 + if (ioend_error != next_error) 300 + return false; 301 + if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK)) 302 + return false; 303 + if ((ioend->io_state == XFS_EXT_UNWRITTEN) ^ 304 + (next->io_state == XFS_EXT_UNWRITTEN)) 305 + return false; 306 + if (ioend->io_offset + ioend->io_size != next->io_offset) 307 + return false; 308 + if (xfs_ioend_is_append(ioend) != xfs_ioend_is_append(next)) 309 + return false; 310 + return true; 311 + } 312 + 313 + /* Try to merge adjacent completions. */ 314 + STATIC void 315 + xfs_ioend_try_merge( 316 + struct xfs_ioend *ioend, 317 + struct list_head *more_ioends) 318 + { 319 + struct xfs_ioend *next_ioend; 320 + int ioend_error; 321 + int error; 322 + 323 + if (list_empty(more_ioends)) 324 + return; 325 + 326 + ioend_error = blk_status_to_errno(ioend->io_bio->bi_status); 327 + 328 + while (!list_empty(more_ioends)) { 329 + next_ioend = list_first_entry(more_ioends, struct xfs_ioend, 330 + io_list); 331 + if (!xfs_ioend_can_merge(ioend, ioend_error, next_ioend)) 332 + break; 333 + list_move_tail(&next_ioend->io_list, &ioend->io_list); 334 + ioend->io_size += next_ioend->io_size; 335 + if (ioend->io_append_trans) { 336 + error = xfs_setfilesize_ioend(next_ioend, 1); 337 + ASSERT(error == 1); 338 + } 339 + } 340 + } 341 + 342 + /* list_sort compare function for ioends */ 343 + static int 344 + xfs_ioend_compare( 345 + void *priv, 346 + struct list_head *a, 347 + struct list_head *b) 348 + { 349 + struct xfs_ioend *ia; 350 + struct xfs_ioend *ib; 351 + 352 + ia = container_of(a, struct xfs_ioend, io_list); 353 + ib = container_of(b, struct xfs_ioend, io_list); 354 + if (ia->io_offset < ib->io_offset) 355 + return -1; 356 + else if (ia->io_offset > ib->io_offset) 357 + return 1; 358 + return 0; 278 359 } 279 360 280 361 /* Finish all pending io completions. */ ··· 375 292 list_replace_init(&ip->i_ioend_list, &completion_list); 376 293 spin_unlock_irqrestore(&ip->i_ioend_lock, flags); 377 294 295 + list_sort(NULL, &completion_list, xfs_ioend_compare); 296 + 378 297 while (!list_empty(&completion_list)) { 379 298 ioend = list_first_entry(&completion_list, struct xfs_ioend, 380 299 io_list); 381 300 list_del_init(&ioend->io_list); 301 + xfs_ioend_try_merge(ioend, &completion_list); 382 302 xfs_end_ioend(ioend); 383 303 } 384 304 }