fs/btrfs/file-item.c at master · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / fs / btrfs / file-item.c
at master 40 kB view raw
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2007 Oracle.  All rights reserved.
   4 */
   5
   6#include <linux/bio.h>
   7#include <linux/slab.h>
   8#include <linux/pagemap.h>
   9#include <linux/highmem.h>
  10#include <linux/sched/mm.h>
  11#include <crypto/hash.h>
  12#include "messages.h"
  13#include "ctree.h"
  14#include "disk-io.h"
  15#include "transaction.h"
  16#include "bio.h"
  17#include "compression.h"
  18#include "fs.h"
  19#include "accessors.h"
  20#include "file-item.h"
  21#include "volumes.h"
  22
  23#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
  24				   sizeof(struct btrfs_item) * 2) / \
  25				  size) - 1))
  26
  27#define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
  28				       PAGE_SIZE))
  29
  30/*
  31 * Set inode's size according to filesystem options.
  32 *
  33 * @inode:      inode we want to update the disk_i_size for
  34 * @new_i_size: i_size we want to set to, 0 if we use i_size
  35 *
  36 * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read()
  37 * returns as it is perfectly fine with a file that has holes without hole file
  38 * extent items.
  39 *
  40 * However without NO_HOLES we need to only return the area that is contiguous
  41 * from the 0 offset of the file.  Otherwise we could end up adjust i_size up
  42 * to an extent that has a gap in between.
  43 *
  44 * Finally new_i_size should only be set in the case of truncate where we're not
  45 * ready to use i_size_read() as the limiter yet.
  46 */
  47void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size)
  48{
  49	u64 start, end, i_size;
  50	bool found;
  51
  52	spin_lock(&inode->lock);
  53	i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
  54	if (!inode->file_extent_tree) {
  55		inode->disk_i_size = i_size;
  56		goto out_unlock;
  57	}
  58
  59	found = btrfs_find_contiguous_extent_bit(inode->file_extent_tree, 0, &start,
  60						 &end, EXTENT_DIRTY);
  61	if (found && start == 0)
  62		i_size = min(i_size, end + 1);
  63	else
  64		i_size = 0;
  65	inode->disk_i_size = i_size;
  66out_unlock:
  67	spin_unlock(&inode->lock);
  68}
  69
  70/*
  71 * Mark range within a file as having a new extent inserted.
  72 *
  73 * @inode: inode being modified
  74 * @start: start file offset of the file extent we've inserted
  75 * @len:   logical length of the file extent item
  76 *
  77 * Call when we are inserting a new file extent where there was none before.
  78 * Does not need to call this in the case where we're replacing an existing file
  79 * extent, however if not sure it's fine to call this multiple times.
  80 *
  81 * The start and len must match the file extent item, so thus must be sectorsize
  82 * aligned.
  83 */
  84int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
  85				      u64 len)
  86{
  87	if (!inode->file_extent_tree)
  88		return 0;
  89
  90	if (len == 0)
  91		return 0;
  92
  93	ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize));
  94
  95	return btrfs_set_extent_bit(inode->file_extent_tree, start, start + len - 1,
  96				    EXTENT_DIRTY, NULL);
  97}
  98
  99/*
 100 * Mark an inode range as not having a backing extent.
 101 *
 102 * @inode: inode being modified
 103 * @start: start file offset of the file extent we've inserted
 104 * @len:   logical length of the file extent item
 105 *
 106 * Called when we drop a file extent, for example when we truncate.  Doesn't
 107 * need to be called for cases where we're replacing a file extent, like when
 108 * we've COWed a file extent.
 109 *
 110 * The start and len must match the file extent item, so thus must be sectorsize
 111 * aligned.
 112 */
 113int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
 114					u64 len)
 115{
 116	if (!inode->file_extent_tree)
 117		return 0;
 118
 119	if (len == 0)
 120		return 0;
 121
 122	ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) ||
 123	       len == (u64)-1);
 124
 125	return btrfs_clear_extent_bit(inode->file_extent_tree, start,
 126				      start + len - 1, EXTENT_DIRTY, NULL);
 127}
 128
 129static size_t bytes_to_csum_size(const struct btrfs_fs_info *fs_info, u32 bytes)
 130{
 131	ASSERT(IS_ALIGNED(bytes, fs_info->sectorsize));
 132
 133	return (bytes >> fs_info->sectorsize_bits) * fs_info->csum_size;
 134}
 135
 136static size_t csum_size_to_bytes(const struct btrfs_fs_info *fs_info, u32 csum_size)
 137{
 138	ASSERT(IS_ALIGNED(csum_size, fs_info->csum_size));
 139
 140	return (csum_size / fs_info->csum_size) << fs_info->sectorsize_bits;
 141}
 142
 143static inline u32 max_ordered_sum_bytes(const struct btrfs_fs_info *fs_info)
 144{
 145	u32 max_csum_size = round_down(PAGE_SIZE - sizeof(struct btrfs_ordered_sum),
 146				       fs_info->csum_size);
 147
 148	return csum_size_to_bytes(fs_info, max_csum_size);
 149}
 150
 151/*
 152 * Calculate the total size needed to allocate for an ordered sum structure
 153 * spanning @bytes in the file.
 154 */
 155static int btrfs_ordered_sum_size(const struct btrfs_fs_info *fs_info, unsigned long bytes)
 156{
 157	return sizeof(struct btrfs_ordered_sum) + bytes_to_csum_size(fs_info, bytes);
 158}
 159
 160int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
 161			     struct btrfs_root *root,
 162			     u64 objectid, u64 pos, u64 num_bytes)
 163{
 164	int ret = 0;
 165	struct btrfs_file_extent_item *item;
 166	struct btrfs_key file_key;
 167	BTRFS_PATH_AUTO_FREE(path);
 168	struct extent_buffer *leaf;
 169
 170	path = btrfs_alloc_path();
 171	if (!path)
 172		return -ENOMEM;
 173
 174	file_key.objectid = objectid;
 175	file_key.type = BTRFS_EXTENT_DATA_KEY;
 176	file_key.offset = pos;
 177
 178	ret = btrfs_insert_empty_item(trans, root, path, &file_key,
 179				      sizeof(*item));
 180	if (ret < 0)
 181		return ret;
 182	leaf = path->nodes[0];
 183	item = btrfs_item_ptr(leaf, path->slots[0],
 184			      struct btrfs_file_extent_item);
 185	btrfs_set_file_extent_disk_bytenr(leaf, item, 0);
 186	btrfs_set_file_extent_disk_num_bytes(leaf, item, 0);
 187	btrfs_set_file_extent_offset(leaf, item, 0);
 188	btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
 189	btrfs_set_file_extent_ram_bytes(leaf, item, num_bytes);
 190	btrfs_set_file_extent_generation(leaf, item, trans->transid);
 191	btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
 192	btrfs_set_file_extent_compression(leaf, item, 0);
 193	btrfs_set_file_extent_encryption(leaf, item, 0);
 194	btrfs_set_file_extent_other_encoding(leaf, item, 0);
 195
 196	return ret;
 197}
 198
 199static struct btrfs_csum_item *
 200btrfs_lookup_csum(struct btrfs_trans_handle *trans,
 201		  struct btrfs_root *root,
 202		  struct btrfs_path *path,
 203		  u64 bytenr, int cow)
 204{
 205	struct btrfs_fs_info *fs_info = root->fs_info;
 206	int ret;
 207	struct btrfs_key file_key;
 208	struct btrfs_key found_key;
 209	struct btrfs_csum_item *item;
 210	struct extent_buffer *leaf;
 211	u64 csum_offset = 0;
 212	const u32 csum_size = fs_info->csum_size;
 213	int csums_in_item;
 214
 215	file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
 216	file_key.type = BTRFS_EXTENT_CSUM_KEY;
 217	file_key.offset = bytenr;
 218	ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
 219	if (ret < 0)
 220		goto fail;
 221	leaf = path->nodes[0];
 222	if (ret > 0) {
 223		ret = 1;
 224		if (path->slots[0] == 0)
 225			goto fail;
 226		path->slots[0]--;
 227		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 228		if (found_key.type != BTRFS_EXTENT_CSUM_KEY)
 229			goto fail;
 230
 231		csum_offset = (bytenr - found_key.offset) >>
 232				fs_info->sectorsize_bits;
 233		csums_in_item = btrfs_item_size(leaf, path->slots[0]);
 234		csums_in_item /= csum_size;
 235
 236		if (csum_offset == csums_in_item) {
 237			ret = -EFBIG;
 238			goto fail;
 239		} else if (csum_offset > csums_in_item) {
 240			goto fail;
 241		}
 242	}
 243	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
 244	item = (struct btrfs_csum_item *)((unsigned char *)item +
 245					  csum_offset * csum_size);
 246	return item;
 247fail:
 248	if (ret > 0)
 249		ret = -ENOENT;
 250	return ERR_PTR(ret);
 251}
 252
 253int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 254			     struct btrfs_root *root,
 255			     struct btrfs_path *path, u64 objectid,
 256			     u64 offset, int mod)
 257{
 258	struct btrfs_key file_key;
 259	int ins_len = mod < 0 ? -1 : 0;
 260	int cow = mod != 0;
 261
 262	file_key.objectid = objectid;
 263	file_key.type = BTRFS_EXTENT_DATA_KEY;
 264	file_key.offset = offset;
 265
 266	return btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
 267}
 268
 269/*
 270 * Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and
 271 * store the result to @dst.
 272 *
 273 * Return >0 for the number of sectors we found.
 274 * Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum
 275 * for it. Caller may want to try next sector until one range is hit.
 276 * Return <0 for fatal error.
 277 */
 278static int search_csum_tree(struct btrfs_fs_info *fs_info,
 279			    struct btrfs_path *path, u64 disk_bytenr,
 280			    u64 len, u8 *dst)
 281{
 282	struct btrfs_root *csum_root;
 283	struct btrfs_csum_item *item = NULL;
 284	struct btrfs_key key;
 285	const u32 sectorsize = fs_info->sectorsize;
 286	const u32 csum_size = fs_info->csum_size;
 287	u32 itemsize;
 288	int ret;
 289	u64 csum_start;
 290	u64 csum_len;
 291
 292	ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) &&
 293	       IS_ALIGNED(len, sectorsize));
 294
 295	/* Check if the current csum item covers disk_bytenr */
 296	if (path->nodes[0]) {
 297		item = btrfs_item_ptr(path->nodes[0], path->slots[0],
 298				      struct btrfs_csum_item);
 299		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 300		itemsize = btrfs_item_size(path->nodes[0], path->slots[0]);
 301
 302		csum_start = key.offset;
 303		csum_len = (itemsize / csum_size) * sectorsize;
 304
 305		if (in_range(disk_bytenr, csum_start, csum_len))
 306			goto found;
 307	}
 308
 309	/* Current item doesn't contain the desired range, search again */
 310	btrfs_release_path(path);
 311	csum_root = btrfs_csum_root(fs_info, disk_bytenr);
 312	item = btrfs_lookup_csum(NULL, csum_root, path, disk_bytenr, 0);
 313	if (IS_ERR(item)) {
 314		ret = PTR_ERR(item);
 315		goto out;
 316	}
 317	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 318	itemsize = btrfs_item_size(path->nodes[0], path->slots[0]);
 319
 320	csum_start = key.offset;
 321	csum_len = (itemsize / csum_size) * sectorsize;
 322	ASSERT(in_range(disk_bytenr, csum_start, csum_len));
 323
 324found:
 325	ret = (min(csum_start + csum_len, disk_bytenr + len) -
 326		   disk_bytenr) >> fs_info->sectorsize_bits;
 327	read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
 328			ret * csum_size);
 329out:
 330	if (ret == -ENOENT || ret == -EFBIG)
 331		ret = 0;
 332	return ret;
 333}
 334
 335/*
 336 * Lookup the checksum for the read bio in csum tree.
 337 *
 338 * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
 339 */
 340int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
 341{
 342	struct btrfs_inode *inode = bbio->inode;
 343	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 344	struct bio *bio = &bbio->bio;
 345	BTRFS_PATH_AUTO_FREE(path);
 346	const u32 sectorsize = fs_info->sectorsize;
 347	const u32 csum_size = fs_info->csum_size;
 348	u32 orig_len = bio->bi_iter.bi_size;
 349	u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
 350	const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
 351	int ret = 0;
 352	u32 bio_offset = 0;
 353
 354	if ((inode->flags & BTRFS_INODE_NODATASUM) ||
 355	    test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state))
 356		return 0;
 357
 358	/*
 359	 * This function is only called for read bio.
 360	 *
 361	 * This means two things:
 362	 * - All our csums should only be in csum tree
 363	 *   No ordered extents csums, as ordered extents are only for write
 364	 *   path.
 365	 * - No need to bother any other info from bvec
 366	 *   Since we're looking up csums, the only important info is the
 367	 *   disk_bytenr and the length, which can be extracted from bi_iter
 368	 *   directly.
 369	 */
 370	ASSERT(bio_op(bio) == REQ_OP_READ);
 371	path = btrfs_alloc_path();
 372	if (!path)
 373		return -ENOMEM;
 374
 375	if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
 376		bbio->csum = kvcalloc(nblocks, csum_size, GFP_NOFS);
 377		if (!bbio->csum)
 378			return -ENOMEM;
 379	} else {
 380		bbio->csum = bbio->csum_inline;
 381	}
 382
 383	/*
 384	 * If requested number of sectors is larger than one leaf can contain,
 385	 * kick the readahead for csum tree.
 386	 */
 387	if (nblocks > fs_info->csums_per_leaf)
 388		path->reada = READA_FORWARD;
 389
 390	/*
 391	 * the free space stuff is only read when it hasn't been
 392	 * updated in the current transaction.  So, we can safely
 393	 * read from the commit root and sidestep a nasty deadlock
 394	 * between reading the free space cache and updating the csum tree.
 395	 */
 396	if (btrfs_is_free_space_inode(inode)) {
 397		path->search_commit_root = true;
 398		path->skip_locking = true;
 399	}
 400
 401	/*
 402	 * If we are searching for a csum of an extent from a past
 403	 * transaction, we can search in the commit root and reduce
 404	 * lock contention on the csum tree extent buffers.
 405	 *
 406	 * This is important because that lock is an rwsem which gets
 407	 * pretty heavy write load under memory pressure and sustained
 408	 * csum overwrites, unlike the commit_root_sem. (Memory pressure
 409	 * makes us writeback the nodes multiple times per transaction,
 410	 * which makes us cow them each time, taking the write lock.)
 411	 *
 412	 * Due to how rwsem is implemented, there is a possible
 413	 * priority inversion where the readers holding the lock don't
 414	 * get scheduled (say they're in a cgroup stuck in heavy reclaim)
 415	 * which then blocks writers, including transaction commit. By
 416	 * using a semaphore with fewer writers (only a commit switching
 417	 * the roots), we make this issue less likely.
 418	 *
 419	 * Note that we don't rely on btrfs_search_slot to lock the
 420	 * commit root csum. We call search_slot multiple times, which would
 421	 * create a potential race where a commit comes in between searches
 422	 * while we are not holding the commit_root_sem, and we get csums
 423	 * from across transactions.
 424	 */
 425	if (bbio->csum_search_commit_root) {
 426		path->search_commit_root = true;
 427		path->skip_locking = true;
 428		down_read(&fs_info->commit_root_sem);
 429	}
 430
 431	while (bio_offset < orig_len) {
 432		int count;
 433		u64 cur_disk_bytenr = orig_disk_bytenr + bio_offset;
 434		u8 *csum_dst = bbio->csum +
 435			(bio_offset >> fs_info->sectorsize_bits) * csum_size;
 436
 437		count = search_csum_tree(fs_info, path, cur_disk_bytenr,
 438					 orig_len - bio_offset, csum_dst);
 439		if (count < 0) {
 440			ret = count;
 441			if (bbio->csum != bbio->csum_inline)
 442				kvfree(bbio->csum);
 443			bbio->csum = NULL;
 444			break;
 445		}
 446
 447		/*
 448		 * We didn't find a csum for this range.  We need to make sure
 449		 * we complain loudly about this, because we are not NODATASUM.
 450		 *
 451		 * However for the DATA_RELOC inode we could potentially be
 452		 * relocating data extents for a NODATASUM inode, so the inode
 453		 * itself won't be marked with NODATASUM, but the extent we're
 454		 * copying is in fact NODATASUM.  If we don't find a csum we
 455		 * assume this is the case.
 456		 */
 457		if (count == 0) {
 458			memset(csum_dst, 0, csum_size);
 459			count = 1;
 460
 461			if (btrfs_is_data_reloc_root(inode->root)) {
 462				u64 file_offset = bbio->file_offset + bio_offset;
 463
 464				btrfs_set_extent_bit(&inode->io_tree, file_offset,
 465						     file_offset + sectorsize - 1,
 466						     EXTENT_NODATASUM, NULL);
 467			} else {
 468				btrfs_warn_rl(fs_info,
 469			"csum hole found for disk bytenr range [%llu, %llu)",
 470				cur_disk_bytenr, cur_disk_bytenr + sectorsize);
 471			}
 472		}
 473		bio_offset += count * sectorsize;
 474	}
 475
 476	if (bbio->csum_search_commit_root)
 477		up_read(&fs_info->commit_root_sem);
 478	return ret;
 479}
 480
 481/*
 482 * Search for checksums for a given logical range.
 483 *
 484 * @root:		The root where to look for checksums.
 485 * @start:		Logical address of target checksum range.
 486 * @end:		End offset (inclusive) of the target checksum range.
 487 * @list:		List for adding each checksum that was found.
 488 *			Can be NULL in case the caller only wants to check if
 489 *			there any checksums for the range.
 490 * @nowait:		Indicate if the search must be non-blocking or not.
 491 *
 492 * Return < 0 on error, 0 if no checksums were found, or 1 if checksums were
 493 * found.
 494 */
 495int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
 496			    struct list_head *list, bool nowait)
 497{
 498	struct btrfs_fs_info *fs_info = root->fs_info;
 499	struct btrfs_key key;
 500	struct btrfs_path *path;
 501	struct extent_buffer *leaf;
 502	struct btrfs_ordered_sum *sums;
 503	struct btrfs_csum_item *item;
 504	int ret;
 505	bool found_csums = false;
 506
 507	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
 508	       IS_ALIGNED(end + 1, fs_info->sectorsize));
 509
 510	path = btrfs_alloc_path();
 511	if (!path)
 512		return -ENOMEM;
 513
 514	path->nowait = nowait;
 515
 516	key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
 517	key.type = BTRFS_EXTENT_CSUM_KEY;
 518	key.offset = start;
 519
 520	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 521	if (ret < 0)
 522		goto out;
 523	if (ret > 0 && path->slots[0] > 0) {
 524		leaf = path->nodes[0];
 525		btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
 526
 527		/*
 528		 * There are two cases we can hit here for the previous csum
 529		 * item:
 530		 *
 531		 *		|<- search range ->|
 532		 *	|<- csum item ->|
 533		 *
 534		 * Or
 535		 *				|<- search range ->|
 536		 *	|<- csum item ->|
 537		 *
 538		 * Check if the previous csum item covers the leading part of
 539		 * the search range.  If so we have to start from previous csum
 540		 * item.
 541		 */
 542		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
 543		    key.type == BTRFS_EXTENT_CSUM_KEY) {
 544			if (bytes_to_csum_size(fs_info, start - key.offset) <
 545			    btrfs_item_size(leaf, path->slots[0] - 1))
 546				path->slots[0]--;
 547		}
 548	}
 549
 550	while (start <= end) {
 551		u64 csum_end;
 552
 553		leaf = path->nodes[0];
 554		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
 555			ret = btrfs_next_leaf(root, path);
 556			if (ret < 0)
 557				goto out;
 558			if (ret > 0)
 559				break;
 560			leaf = path->nodes[0];
 561		}
 562
 563		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 564		if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
 565		    key.type != BTRFS_EXTENT_CSUM_KEY ||
 566		    key.offset > end)
 567			break;
 568
 569		if (key.offset > start)
 570			start = key.offset;
 571
 572		csum_end = key.offset + csum_size_to_bytes(fs_info,
 573					btrfs_item_size(leaf, path->slots[0]));
 574		if (csum_end <= start) {
 575			path->slots[0]++;
 576			continue;
 577		}
 578
 579		found_csums = true;
 580		if (!list)
 581			goto out;
 582
 583		csum_end = min(csum_end, end + 1);
 584		item = btrfs_item_ptr(path->nodes[0], path->slots[0],
 585				      struct btrfs_csum_item);
 586		while (start < csum_end) {
 587			unsigned long offset;
 588			size_t size;
 589
 590			size = min_t(size_t, csum_end - start,
 591				     max_ordered_sum_bytes(fs_info));
 592			sums = kzalloc(btrfs_ordered_sum_size(fs_info, size),
 593				       GFP_NOFS);
 594			if (!sums) {
 595				ret = -ENOMEM;
 596				goto out;
 597			}
 598
 599			sums->logical = start;
 600			sums->len = size;
 601
 602			offset = bytes_to_csum_size(fs_info, start - key.offset);
 603
 604			read_extent_buffer(path->nodes[0],
 605					   sums->sums,
 606					   ((unsigned long)item) + offset,
 607					   bytes_to_csum_size(fs_info, size));
 608
 609			start += size;
 610			list_add_tail(&sums->list, list);
 611		}
 612		path->slots[0]++;
 613	}
 614out:
 615	btrfs_free_path(path);
 616	if (ret < 0) {
 617		if (list) {
 618			struct btrfs_ordered_sum *tmp_sums;
 619
 620			list_for_each_entry_safe(sums, tmp_sums, list, list)
 621				kfree(sums);
 622		}
 623
 624		return ret;
 625	}
 626
 627	return found_csums ? 1 : 0;
 628}
 629
 630/*
 631 * Do the same work as btrfs_lookup_csums_list(), the difference is in how
 632 * we return the result.
 633 *
 634 * This version will set the corresponding bits in @csum_bitmap to represent
 635 * that there is a csum found.
 636 * Each bit represents a sector. Thus caller should ensure @csum_buf passed
 637 * in is large enough to contain all csums.
 638 */
 639int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
 640			      u64 start, u64 end, u8 *csum_buf,
 641			      unsigned long *csum_bitmap)
 642{
 643	struct btrfs_fs_info *fs_info = root->fs_info;
 644	struct btrfs_key key;
 645	struct extent_buffer *leaf;
 646	struct btrfs_csum_item *item;
 647	const u64 orig_start = start;
 648	bool free_path = false;
 649	int ret;
 650
 651	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
 652	       IS_ALIGNED(end + 1, fs_info->sectorsize));
 653
 654	if (!path) {
 655		path = btrfs_alloc_path();
 656		if (!path)
 657			return -ENOMEM;
 658		free_path = true;
 659	}
 660
 661	/* Check if we can reuse the previous path. */
 662	if (path->nodes[0]) {
 663		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 664
 665		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
 666		    key.type == BTRFS_EXTENT_CSUM_KEY &&
 667		    key.offset <= start)
 668			goto search_forward;
 669		btrfs_release_path(path);
 670	}
 671
 672	key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
 673	key.type = BTRFS_EXTENT_CSUM_KEY;
 674	key.offset = start;
 675
 676	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 677	if (ret < 0)
 678		goto fail;
 679	if (ret > 0 && path->slots[0] > 0) {
 680		leaf = path->nodes[0];
 681		btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
 682
 683		/*
 684		 * There are two cases we can hit here for the previous csum
 685		 * item:
 686		 *
 687		 *		|<- search range ->|
 688		 *	|<- csum item ->|
 689		 *
 690		 * Or
 691		 *				|<- search range ->|
 692		 *	|<- csum item ->|
 693		 *
 694		 * Check if the previous csum item covers the leading part of
 695		 * the search range.  If so we have to start from previous csum
 696		 * item.
 697		 */
 698		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
 699		    key.type == BTRFS_EXTENT_CSUM_KEY) {
 700			if (bytes_to_csum_size(fs_info, start - key.offset) <
 701			    btrfs_item_size(leaf, path->slots[0] - 1))
 702				path->slots[0]--;
 703		}
 704	}
 705
 706search_forward:
 707	while (start <= end) {
 708		u64 csum_end;
 709
 710		leaf = path->nodes[0];
 711		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
 712			ret = btrfs_next_leaf(root, path);
 713			if (ret < 0)
 714				goto fail;
 715			if (ret > 0)
 716				break;
 717			leaf = path->nodes[0];
 718		}
 719
 720		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 721		if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
 722		    key.type != BTRFS_EXTENT_CSUM_KEY ||
 723		    key.offset > end)
 724			break;
 725
 726		if (key.offset > start)
 727			start = key.offset;
 728
 729		csum_end = key.offset + csum_size_to_bytes(fs_info,
 730					btrfs_item_size(leaf, path->slots[0]));
 731		if (csum_end <= start) {
 732			path->slots[0]++;
 733			continue;
 734		}
 735
 736		csum_end = min(csum_end, end + 1);
 737		item = btrfs_item_ptr(path->nodes[0], path->slots[0],
 738				      struct btrfs_csum_item);
 739		while (start < csum_end) {
 740			unsigned long offset;
 741			size_t size;
 742			u8 *csum_dest = csum_buf + bytes_to_csum_size(fs_info,
 743						start - orig_start);
 744
 745			size = min_t(size_t, csum_end - start, end + 1 - start);
 746
 747			offset = bytes_to_csum_size(fs_info, start - key.offset);
 748
 749			read_extent_buffer(path->nodes[0], csum_dest,
 750					   ((unsigned long)item) + offset,
 751					   bytes_to_csum_size(fs_info, size));
 752
 753			bitmap_set(csum_bitmap,
 754				(start - orig_start) >> fs_info->sectorsize_bits,
 755				size >> fs_info->sectorsize_bits);
 756
 757			start += size;
 758		}
 759		path->slots[0]++;
 760	}
 761	ret = 0;
 762fail:
 763	if (free_path)
 764		btrfs_free_path(path);
 765	return ret;
 766}
 767
 768static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src)
 769{
 770	struct btrfs_inode *inode = bbio->inode;
 771	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 772	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
 773	struct bio *bio = &bbio->bio;
 774	struct btrfs_ordered_sum *sums = bbio->sums;
 775	struct bvec_iter iter = *src;
 776	phys_addr_t paddr;
 777	const u32 blocksize = fs_info->sectorsize;
 778	const u32 step = min(blocksize, PAGE_SIZE);
 779	const u32 nr_steps = blocksize / step;
 780	phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
 781	u32 offset = 0;
 782	int index = 0;
 783
 784	shash->tfm = fs_info->csum_shash;
 785
 786	btrfs_bio_for_each_block(paddr, bio, &iter, step) {
 787		paddrs[(offset / step) % nr_steps] = paddr;
 788		offset += step;
 789
 790		if (IS_ALIGNED(offset, blocksize)) {
 791			btrfs_calculate_block_csum_pages(fs_info, paddrs, sums->sums + index);
 792			index += fs_info->csum_size;
 793		}
 794	}
 795}
 796
 797static void csum_one_bio_work(struct work_struct *work)
 798{
 799	struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, csum_work);
 800
 801	ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
 802	ASSERT(bbio->async_csum == true);
 803	csum_one_bio(bbio, &bbio->csum_saved_iter);
 804	complete(&bbio->csum_done);
 805}
 806
 807/*
 808 * Calculate checksums of the data contained inside a bio.
 809 */
 810int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async)
 811{
 812	struct btrfs_ordered_extent *ordered = bbio->ordered;
 813	struct btrfs_inode *inode = bbio->inode;
 814	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 815	struct bio *bio = &bbio->bio;
 816	struct btrfs_ordered_sum *sums;
 817	unsigned nofs_flag;
 818
 819	nofs_flag = memalloc_nofs_save();
 820	sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
 821		       GFP_KERNEL);
 822	memalloc_nofs_restore(nofs_flag);
 823
 824	if (!sums)
 825		return -ENOMEM;
 826
 827	sums->logical = bbio->orig_logical;
 828	sums->len = bio->bi_iter.bi_size;
 829	INIT_LIST_HEAD(&sums->list);
 830	bbio->sums = sums;
 831	btrfs_add_ordered_sum(ordered, sums);
 832
 833	if (!async) {
 834		csum_one_bio(bbio, &bbio->bio.bi_iter);
 835		return 0;
 836	}
 837	init_completion(&bbio->csum_done);
 838	bbio->async_csum = true;
 839	bbio->csum_saved_iter = bbio->bio.bi_iter;
 840	INIT_WORK(&bbio->csum_work, csum_one_bio_work);
 841	schedule_work(&bbio->csum_work);
 842	return 0;
 843}
 844
 845/*
 846 * Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to
 847 * record the updated logical address on Zone Append completion.
 848 * Allocate just the structure with an empty sums array here for that case.
 849 */
 850int btrfs_alloc_dummy_sum(struct btrfs_bio *bbio)
 851{
 852	bbio->sums = kmalloc(sizeof(*bbio->sums), GFP_NOFS);
 853	if (!bbio->sums)
 854		return -ENOMEM;
 855	bbio->sums->len = bbio->bio.bi_iter.bi_size;
 856	bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
 857	btrfs_add_ordered_sum(bbio->ordered, bbio->sums);
 858	return 0;
 859}
 860
 861/*
 862 * Remove one checksum overlapping a range.
 863 *
 864 * This expects the key to describe the csum pointed to by the path, and it
 865 * expects the csum to overlap the range [bytenr, len]
 866 *
 867 * The csum should not be entirely contained in the range and the range should
 868 * not be entirely contained in the csum.
 869 *
 870 * This calls btrfs_truncate_item with the correct args based on the overlap,
 871 * and fixes up the key as required.
 872 */
 873static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
 874				       struct btrfs_path *path,
 875				       struct btrfs_key *key,
 876				       u64 bytenr, u64 len)
 877{
 878	struct btrfs_fs_info *fs_info = trans->fs_info;
 879	struct extent_buffer *leaf;
 880	const u32 csum_size = fs_info->csum_size;
 881	u64 csum_end;
 882	u64 end_byte = bytenr + len;
 883	u32 blocksize_bits = fs_info->sectorsize_bits;
 884
 885	leaf = path->nodes[0];
 886	csum_end = btrfs_item_size(leaf, path->slots[0]) / csum_size;
 887	csum_end <<= blocksize_bits;
 888	csum_end += key->offset;
 889
 890	if (key->offset < bytenr && csum_end <= end_byte) {
 891		/*
 892		 *         [ bytenr - len ]
 893		 *         [   ]
 894		 *   [csum     ]
 895		 *   A simple truncate off the end of the item
 896		 */
 897		u32 new_size = (bytenr - key->offset) >> blocksize_bits;
 898		new_size *= csum_size;
 899		btrfs_truncate_item(trans, path, new_size, 1);
 900	} else if (key->offset >= bytenr && csum_end > end_byte &&
 901		   end_byte > key->offset) {
 902		/*
 903		 *         [ bytenr - len ]
 904		 *                 [ ]
 905		 *                 [csum     ]
 906		 * we need to truncate from the beginning of the csum
 907		 */
 908		u32 new_size = (csum_end - end_byte) >> blocksize_bits;
 909		new_size *= csum_size;
 910
 911		btrfs_truncate_item(trans, path, new_size, 0);
 912
 913		key->offset = end_byte;
 914		btrfs_set_item_key_safe(trans, path, key);
 915	} else {
 916		BUG();
 917	}
 918}
 919
 920/*
 921 * Delete the csum items from the csum tree for a given range of bytes.
 922 */
 923int btrfs_del_csums(struct btrfs_trans_handle *trans,
 924		    struct btrfs_root *root, u64 bytenr, u64 len)
 925{
 926	struct btrfs_fs_info *fs_info = trans->fs_info;
 927	BTRFS_PATH_AUTO_FREE(path);
 928	struct btrfs_key key;
 929	u64 end_byte = bytenr + len;
 930	u64 csum_end;
 931	struct extent_buffer *leaf;
 932	int ret = 0;
 933	const u32 csum_size = fs_info->csum_size;
 934	u32 blocksize_bits = fs_info->sectorsize_bits;
 935
 936	ASSERT(btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID ||
 937	       btrfs_root_id(root) == BTRFS_TREE_LOG_OBJECTID);
 938
 939	path = btrfs_alloc_path();
 940	if (!path)
 941		return -ENOMEM;
 942
 943	while (1) {
 944		key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
 945		key.type = BTRFS_EXTENT_CSUM_KEY;
 946		key.offset = end_byte - 1;
 947
 948		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 949		if (ret > 0) {
 950			ret = 0;
 951			if (path->slots[0] == 0)
 952				break;
 953			path->slots[0]--;
 954		} else if (ret < 0) {
 955			break;
 956		}
 957
 958		leaf = path->nodes[0];
 959		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 960
 961		if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
 962		    key.type != BTRFS_EXTENT_CSUM_KEY) {
 963			break;
 964		}
 965
 966		if (key.offset >= end_byte)
 967			break;
 968
 969		csum_end = btrfs_item_size(leaf, path->slots[0]) / csum_size;
 970		csum_end <<= blocksize_bits;
 971		csum_end += key.offset;
 972
 973		/* this csum ends before we start, we're done */
 974		if (csum_end <= bytenr)
 975			break;
 976
 977		/* delete the entire item, it is inside our range */
 978		if (key.offset >= bytenr && csum_end <= end_byte) {
 979			int del_nr = 1;
 980
 981			/*
 982			 * Check how many csum items preceding this one in this
 983			 * leaf correspond to our range and then delete them all
 984			 * at once.
 985			 */
 986			if (key.offset > bytenr && path->slots[0] > 0) {
 987				int slot = path->slots[0] - 1;
 988
 989				while (slot >= 0) {
 990					struct btrfs_key pk;
 991
 992					btrfs_item_key_to_cpu(leaf, &pk, slot);
 993					if (pk.offset < bytenr ||
 994					    pk.type != BTRFS_EXTENT_CSUM_KEY ||
 995					    pk.objectid !=
 996					    BTRFS_EXTENT_CSUM_OBJECTID)
 997						break;
 998					path->slots[0] = slot;
 999					del_nr++;
1000					key.offset = pk.offset;
1001					slot--;
1002				}
1003			}
1004			ret = btrfs_del_items(trans, root, path,
1005					      path->slots[0], del_nr);
1006			if (ret)
1007				break;
1008			if (key.offset == bytenr)
1009				break;
1010		} else if (key.offset < bytenr && csum_end > end_byte) {
1011			unsigned long offset;
1012			unsigned long shift_len;
1013			unsigned long item_offset;
1014			/*
1015			 *        [ bytenr - len ]
1016			 *     [csum                ]
1017			 *
1018			 * Our bytes are in the middle of the csum,
1019			 * we need to split this item and insert a new one.
1020			 *
1021			 * But we can't drop the path because the
1022			 * csum could change, get removed, extended etc.
1023			 *
1024			 * The trick here is the max size of a csum item leaves
1025			 * enough room in the tree block for a single
1026			 * item header.  So, we split the item in place,
1027			 * adding a new header pointing to the existing
1028			 * bytes.  Then we loop around again and we have
1029			 * a nicely formed csum item that we can neatly
1030			 * truncate.
1031			 */
1032			offset = (bytenr - key.offset) >> blocksize_bits;
1033			offset *= csum_size;
1034
1035			shift_len = (len >> blocksize_bits) * csum_size;
1036
1037			item_offset = btrfs_item_ptr_offset(leaf,
1038							    path->slots[0]);
1039
1040			memzero_extent_buffer(leaf, item_offset + offset,
1041					     shift_len);
1042			key.offset = bytenr;
1043
1044			/*
1045			 * btrfs_split_item returns -EAGAIN when the
1046			 * item changed size or key
1047			 */
1048			ret = btrfs_split_item(trans, root, path, &key, offset);
1049			if (unlikely(ret && ret != -EAGAIN)) {
1050				btrfs_abort_transaction(trans, ret);
1051				break;
1052			}
1053			ret = 0;
1054
1055			key.offset = end_byte - 1;
1056		} else {
1057			truncate_one_csum(trans, path, &key, bytenr, len);
1058			if (key.offset < bytenr)
1059				break;
1060		}
1061		btrfs_release_path(path);
1062	}
1063	return ret;
1064}
1065
1066static int find_next_csum_offset(struct btrfs_root *root,
1067				 struct btrfs_path *path,
1068				 u64 *next_offset)
1069{
1070	const u32 nritems = btrfs_header_nritems(path->nodes[0]);
1071	struct btrfs_key found_key;
1072	int slot = path->slots[0] + 1;
1073	int ret;
1074
1075	if (nritems == 0 || slot >= nritems) {
1076		ret = btrfs_next_leaf(root, path);
1077		if (ret < 0) {
1078			return ret;
1079		} else if (ret > 0) {
1080			*next_offset = (u64)-1;
1081			return 0;
1082		}
1083		slot = path->slots[0];
1084	}
1085
1086	btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
1087
1088	if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1089	    found_key.type != BTRFS_EXTENT_CSUM_KEY)
1090		*next_offset = (u64)-1;
1091	else
1092		*next_offset = found_key.offset;
1093
1094	return 0;
1095}
1096
1097int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
1098			   struct btrfs_root *root,
1099			   struct btrfs_ordered_sum *sums)
1100{
1101	struct btrfs_fs_info *fs_info = root->fs_info;
1102	struct btrfs_key file_key;
1103	struct btrfs_key found_key;
1104	BTRFS_PATH_AUTO_FREE(path);
1105	struct btrfs_csum_item *item;
1106	struct btrfs_csum_item *item_end;
1107	struct extent_buffer *leaf = NULL;
1108	u64 next_offset;
1109	u64 total_bytes = 0;
1110	u64 csum_offset;
1111	u64 bytenr;
1112	u32 ins_size;
1113	int index = 0;
1114	int found_next;
1115	int ret;
1116	const u32 csum_size = fs_info->csum_size;
1117
1118	path = btrfs_alloc_path();
1119	if (!path)
1120		return -ENOMEM;
1121again:
1122	next_offset = (u64)-1;
1123	found_next = 0;
1124	bytenr = sums->logical + total_bytes;
1125	file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1126	file_key.type = BTRFS_EXTENT_CSUM_KEY;
1127	file_key.offset = bytenr;
1128
1129	item = btrfs_lookup_csum(trans, root, path, bytenr, 1);
1130	if (!IS_ERR(item)) {
1131		ret = 0;
1132		leaf = path->nodes[0];
1133		item_end = btrfs_item_ptr(leaf, path->slots[0],
1134					  struct btrfs_csum_item);
1135		item_end = (struct btrfs_csum_item *)((char *)item_end +
1136			   btrfs_item_size(leaf, path->slots[0]));
1137		goto found;
1138	}
1139	ret = PTR_ERR(item);
1140	if (ret != -EFBIG && ret != -ENOENT)
1141		goto out;
1142
1143	if (ret == -EFBIG) {
1144		u32 item_size;
1145		/* we found one, but it isn't big enough yet */
1146		leaf = path->nodes[0];
1147		item_size = btrfs_item_size(leaf, path->slots[0]);
1148		if ((item_size / csum_size) >=
1149		    MAX_CSUM_ITEMS(fs_info, csum_size)) {
1150			/* already at max size, make a new one */
1151			goto insert;
1152		}
1153	} else {
1154		/* We didn't find a csum item, insert one. */
1155		ret = find_next_csum_offset(root, path, &next_offset);
1156		if (ret < 0)
1157			goto out;
1158		found_next = 1;
1159		goto insert;
1160	}
1161
1162	/*
1163	 * At this point, we know the tree has a checksum item that ends at an
1164	 * offset matching the start of the checksum range we want to insert.
1165	 * We try to extend that item as much as possible and then add as many
1166	 * checksums to it as they fit.
1167	 *
1168	 * First check if the leaf has enough free space for at least one
1169	 * checksum. If it has go directly to the item extension code, otherwise
1170	 * release the path and do a search for insertion before the extension.
1171	 */
1172	if (btrfs_leaf_free_space(leaf) >= csum_size) {
1173		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1174		csum_offset = (bytenr - found_key.offset) >>
1175			fs_info->sectorsize_bits;
1176		goto extend_csum;
1177	}
1178
1179	btrfs_release_path(path);
1180	path->search_for_extension = true;
1181	ret = btrfs_search_slot(trans, root, &file_key, path,
1182				csum_size, 1);
1183	path->search_for_extension = false;
1184	if (ret < 0)
1185		goto out;
1186
1187	if (ret > 0) {
1188		if (path->slots[0] == 0)
1189			goto insert;
1190		path->slots[0]--;
1191	}
1192
1193	leaf = path->nodes[0];
1194	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1195	csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits;
1196
1197	if (found_key.type != BTRFS_EXTENT_CSUM_KEY ||
1198	    found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1199	    csum_offset >= MAX_CSUM_ITEMS(fs_info, csum_size)) {
1200		goto insert;
1201	}
1202
1203extend_csum:
1204	if (csum_offset == btrfs_item_size(leaf, path->slots[0]) /
1205	    csum_size) {
1206		int extend_nr;
1207		u64 tmp;
1208		u32 diff;
1209
1210		tmp = sums->len - total_bytes;
1211		tmp >>= fs_info->sectorsize_bits;
1212		WARN_ON(tmp < 1);
1213		extend_nr = max_t(int, 1, tmp);
1214
1215		/*
1216		 * A log tree can already have checksum items with a subset of
1217		 * the checksums we are trying to log. This can happen after
1218		 * doing a sequence of partial writes into prealloc extents and
1219		 * fsyncs in between, with a full fsync logging a larger subrange
1220		 * of an extent for which a previous fast fsync logged a smaller
1221		 * subrange. And this happens in particular due to merging file
1222		 * extent items when we complete an ordered extent for a range
1223		 * covered by a prealloc extent - this is done at
1224		 * btrfs_mark_extent_written().
1225		 *
1226		 * So if we try to extend the previous checksum item, which has
1227		 * a range that ends at the start of the range we want to insert,
1228		 * make sure we don't extend beyond the start offset of the next
1229		 * checksum item. If we are at the last item in the leaf, then
1230		 * forget the optimization of extending and add a new checksum
1231		 * item - it is not worth the complexity of releasing the path,
1232		 * getting the first key for the next leaf, repeat the btree
1233		 * search, etc, because log trees are temporary anyway and it
1234		 * would only save a few bytes of leaf space.
1235		 */
1236		if (btrfs_root_id(root) == BTRFS_TREE_LOG_OBJECTID) {
1237			if (path->slots[0] + 1 >=
1238			    btrfs_header_nritems(path->nodes[0])) {
1239				ret = find_next_csum_offset(root, path, &next_offset);
1240				if (ret < 0)
1241					goto out;
1242				found_next = 1;
1243				goto insert;
1244			}
1245
1246			ret = find_next_csum_offset(root, path, &next_offset);
1247			if (ret < 0)
1248				goto out;
1249
1250			tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits;
1251			if (tmp <= INT_MAX)
1252				extend_nr = min_t(int, extend_nr, tmp);
1253		}
1254
1255		diff = (csum_offset + extend_nr) * csum_size;
1256		diff = min(diff,
1257			   MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
1258
1259		diff = diff - btrfs_item_size(leaf, path->slots[0]);
1260		diff = min_t(u32, btrfs_leaf_free_space(leaf), diff);
1261		diff /= csum_size;
1262		diff *= csum_size;
1263
1264		btrfs_extend_item(trans, path, diff);
1265		ret = 0;
1266		goto csum;
1267	}
1268
1269insert:
1270	btrfs_release_path(path);
1271	csum_offset = 0;
1272	if (found_next) {
1273		u64 tmp;
1274
1275		tmp = sums->len - total_bytes;
1276		tmp >>= fs_info->sectorsize_bits;
1277		tmp = min(tmp, (next_offset - file_key.offset) >>
1278					 fs_info->sectorsize_bits);
1279
1280		tmp = max_t(u64, 1, tmp);
1281		tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size));
1282		ins_size = csum_size * tmp;
1283	} else {
1284		ins_size = csum_size;
1285	}
1286	ret = btrfs_insert_empty_item(trans, root, path, &file_key,
1287				      ins_size);
1288	if (ret < 0)
1289		goto out;
1290	leaf = path->nodes[0];
1291csum:
1292	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
1293	item_end = (struct btrfs_csum_item *)((unsigned char *)item +
1294				      btrfs_item_size(leaf, path->slots[0]));
1295	item = (struct btrfs_csum_item *)((unsigned char *)item +
1296					  csum_offset * csum_size);
1297found:
1298	ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits;
1299	ins_size *= csum_size;
1300	ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item,
1301			      ins_size);
1302	write_extent_buffer(leaf, sums->sums + index, (unsigned long)item,
1303			    ins_size);
1304
1305	index += ins_size;
1306	ins_size /= csum_size;
1307	total_bytes += ins_size * fs_info->sectorsize;
1308
1309	if (total_bytes < sums->len) {
1310		btrfs_release_path(path);
1311		cond_resched();
1312		goto again;
1313	}
1314out:
1315	return ret;
1316}
1317
1318void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
1319				     const struct btrfs_path *path,
1320				     const struct btrfs_file_extent_item *fi,
1321				     struct extent_map *em)
1322{
1323	struct btrfs_fs_info *fs_info = inode->root->fs_info;
1324	struct btrfs_root *root = inode->root;
1325	struct extent_buffer *leaf = path->nodes[0];
1326	const int slot = path->slots[0];
1327	struct btrfs_key key;
1328	u64 extent_start;
1329	u8 type = btrfs_file_extent_type(leaf, fi);
1330	int compress_type = btrfs_file_extent_compression(leaf, fi);
1331
1332	btrfs_item_key_to_cpu(leaf, &key, slot);
1333	extent_start = key.offset;
1334	em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1335	em->generation = btrfs_file_extent_generation(leaf, fi);
1336	if (type == BTRFS_FILE_EXTENT_REG ||
1337	    type == BTRFS_FILE_EXTENT_PREALLOC) {
1338		const u64 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1339
1340		em->start = extent_start;
1341		em->len = btrfs_file_extent_end(path) - extent_start;
1342		if (disk_bytenr == 0) {
1343			em->disk_bytenr = EXTENT_MAP_HOLE;
1344			em->disk_num_bytes = 0;
1345			em->offset = 0;
1346			return;
1347		}
1348		em->disk_bytenr = disk_bytenr;
1349		em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1350		em->offset = btrfs_file_extent_offset(leaf, fi);
1351		if (compress_type != BTRFS_COMPRESS_NONE) {
1352			btrfs_extent_map_set_compression(em, compress_type);
1353		} else {
1354			/*
1355			 * Older kernels can create regular non-hole data
1356			 * extents with ram_bytes smaller than disk_num_bytes.
1357			 * Not a big deal, just always use disk_num_bytes
1358			 * for ram_bytes.
1359			 */
1360			em->ram_bytes = em->disk_num_bytes;
1361			if (type == BTRFS_FILE_EXTENT_PREALLOC)
1362				em->flags |= EXTENT_FLAG_PREALLOC;
1363		}
1364	} else if (type == BTRFS_FILE_EXTENT_INLINE) {
1365		/* Tree-checker has ensured this. */
1366		ASSERT(extent_start == 0);
1367
1368		em->disk_bytenr = EXTENT_MAP_INLINE;
1369		em->start = 0;
1370		em->len = fs_info->sectorsize;
1371		em->offset = 0;
1372		btrfs_extent_map_set_compression(em, compress_type);
1373	} else {
1374		btrfs_err(fs_info,
1375			  "unknown file extent item type %d, inode %llu, offset %llu, "
1376			  "root %llu", type, btrfs_ino(inode), extent_start,
1377			  btrfs_root_id(root));
1378	}
1379}
1380
1381/*
1382 * Returns the end offset (non inclusive) of the file extent item the given path
1383 * points to. If it points to an inline extent, the returned offset is rounded
1384 * up to the sector size.
1385 */
1386u64 btrfs_file_extent_end(const struct btrfs_path *path)
1387{
1388	const struct extent_buffer *leaf = path->nodes[0];
1389	const int slot = path->slots[0];
1390	struct btrfs_file_extent_item *fi;
1391	struct btrfs_key key;
1392	u64 end;
1393
1394	btrfs_item_key_to_cpu(leaf, &key, slot);
1395	ASSERT(key.type == BTRFS_EXTENT_DATA_KEY);
1396	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
1397
1398	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE)
1399		end = leaf->fs_info->sectorsize;
1400	else
1401		end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1402
1403	return end;
1404}