Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2023 Western Digital Corporation or its affiliates.
4 */
5
6#include <linux/btrfs_tree.h>
7#include "ctree.h"
8#include "fs.h"
9#include "accessors.h"
10#include "transaction.h"
11#include "disk-io.h"
12#include "raid-stripe-tree.h"
13#include "volumes.h"
14#include "print-tree.h"
15
16static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
17 struct btrfs_path *path,
18 const struct btrfs_key *oldkey,
19 u64 newlen, u64 frontpad)
20{
21 struct btrfs_root *stripe_root = trans->fs_info->stripe_root;
22 struct btrfs_stripe_extent *extent, AUTO_KFREE(newitem);
23 struct extent_buffer *leaf;
24 int slot;
25 size_t item_size;
26 struct btrfs_key newkey = {
27 .objectid = oldkey->objectid + frontpad,
28 .type = BTRFS_RAID_STRIPE_KEY,
29 .offset = newlen,
30 };
31 int ret;
32
33 ASSERT(newlen > 0);
34 ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
35
36 leaf = path->nodes[0];
37 slot = path->slots[0];
38 item_size = btrfs_item_size(leaf, slot);
39
40 newitem = kzalloc(item_size, GFP_NOFS);
41 if (!newitem)
42 return -ENOMEM;
43
44 extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
45
46 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
47 struct btrfs_raid_stride *stride = &extent->strides[i];
48 u64 phys;
49
50 phys = btrfs_raid_stride_physical(leaf, stride) + frontpad;
51 btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys);
52 }
53
54 ret = btrfs_del_item(trans, stripe_root, path);
55 if (ret)
56 return ret;
57
58 btrfs_release_path(path);
59 return btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size);
60}
61
62int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
63{
64 struct btrfs_fs_info *fs_info = trans->fs_info;
65 struct btrfs_root *stripe_root = fs_info->stripe_root;
66 BTRFS_PATH_AUTO_FREE(path);
67 struct btrfs_key key;
68 struct extent_buffer *leaf;
69 u64 found_start;
70 u64 found_end;
71 u64 end = start + length;
72 int slot;
73 int ret;
74
75 if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root)
76 return 0;
77
78 if (!btrfs_is_testing(fs_info)) {
79 struct btrfs_chunk_map *map;
80 bool use_rst;
81
82 map = btrfs_find_chunk_map(fs_info, start, length);
83 if (!map)
84 return -EINVAL;
85 use_rst = btrfs_need_stripe_tree_update(fs_info, map->type);
86 btrfs_free_chunk_map(map);
87 if (!use_rst)
88 return 0;
89 }
90
91 path = btrfs_alloc_path();
92 if (!path)
93 return -ENOMEM;
94
95 while (1) {
96 key.objectid = start;
97 key.type = BTRFS_RAID_STRIPE_KEY;
98 key.offset = 0;
99
100 ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1);
101 if (ret < 0)
102 break;
103
104 if (path->slots[0] == btrfs_header_nritems(path->nodes[0]))
105 path->slots[0]--;
106
107 leaf = path->nodes[0];
108 slot = path->slots[0];
109 btrfs_item_key_to_cpu(leaf, &key, slot);
110 found_start = key.objectid;
111 found_end = found_start + key.offset;
112 ret = 0;
113
114 /*
115 * The stripe extent starts before the range we want to delete,
116 * but the range spans more than one stripe extent:
117 *
118 * |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---|
119 * |--- keep ---|--- drop ---|
120 *
121 * This means we have to get the previous item, truncate its
122 * length and then restart the search.
123 */
124 if (found_start > start) {
125 if (slot == 0) {
126 ret = btrfs_previous_item(stripe_root, path, start,
127 BTRFS_RAID_STRIPE_KEY);
128 if (ret) {
129 if (ret > 0)
130 ret = -ENOENT;
131 break;
132 }
133 } else {
134 path->slots[0]--;
135 }
136
137 leaf = path->nodes[0];
138 slot = path->slots[0];
139 btrfs_item_key_to_cpu(leaf, &key, slot);
140 found_start = key.objectid;
141 found_end = found_start + key.offset;
142 ASSERT(found_start <= start);
143 }
144
145 if (key.type != BTRFS_RAID_STRIPE_KEY)
146 break;
147
148 /* That stripe ends before we start, we're done. */
149 if (found_end <= start)
150 break;
151
152 trace_btrfs_raid_extent_delete(fs_info, start, end,
153 found_start, found_end);
154
155 /*
156 * The stripe extent starts before the range we want to delete
157 * and ends after the range we want to delete, i.e. we're
158 * punching a hole in the stripe extent:
159 *
160 * |--- RAID Stripe Extent ---|
161 * | keep |--- drop ---| keep |
162 *
163 * This means we need to a) truncate the existing item and b)
164 * create a second item for the remaining range.
165 */
166 if (found_start < start && found_end > end) {
167 size_t item_size;
168 u64 diff_start = start - found_start;
169 u64 diff_end = found_end - end;
170 struct btrfs_stripe_extent *extent;
171 struct btrfs_key newkey = {
172 .objectid = end,
173 .type = BTRFS_RAID_STRIPE_KEY,
174 .offset = diff_end,
175 };
176
177 /* The "right" item. */
178 ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey);
179 if (ret)
180 break;
181
182 item_size = btrfs_item_size(leaf, path->slots[0]);
183 extent = btrfs_item_ptr(leaf, path->slots[0],
184 struct btrfs_stripe_extent);
185
186 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
187 struct btrfs_raid_stride *stride = &extent->strides[i];
188 u64 phys;
189
190 phys = btrfs_raid_stride_physical(leaf, stride);
191 phys += diff_start + length;
192 btrfs_set_raid_stride_physical(leaf, stride, phys);
193 }
194
195 /* The "left" item. */
196 path->slots[0]--;
197 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
198 btrfs_partially_delete_raid_extent(trans, path, &key,
199 diff_start, 0);
200 break;
201 }
202
203 /*
204 * The stripe extent starts before the range we want to delete:
205 *
206 * |--- RAID Stripe Extent ---|
207 * |--- keep ---|--- drop ---|
208 *
209 * This means we have to duplicate the tree item, truncate the
210 * length to the new size and then re-insert the item.
211 */
212 if (found_start < start) {
213 u64 diff_start = start - found_start;
214
215 btrfs_partially_delete_raid_extent(trans, path, &key,
216 diff_start, 0);
217
218 start += (key.offset - diff_start);
219 length -= (key.offset - diff_start);
220 if (length == 0)
221 break;
222
223 btrfs_release_path(path);
224 continue;
225 }
226
227 /*
228 * The stripe extent ends after the range we want to delete:
229 *
230 * |--- RAID Stripe Extent ---|
231 * |--- drop ---|--- keep ---|
232 *
233 * This means we have to duplicate the tree item, truncate the
234 * length to the new size and then re-insert the item.
235 */
236 if (found_end > end) {
237 u64 diff_end = found_end - end;
238
239 btrfs_partially_delete_raid_extent(trans, path, &key,
240 key.offset - length,
241 length);
242 ASSERT(key.offset - diff_end == length);
243 break;
244 }
245
246 /* Finally we can delete the whole item, no more special cases. */
247 ret = btrfs_del_item(trans, stripe_root, path);
248 if (ret)
249 break;
250
251 start += key.offset;
252 length -= key.offset;
253 if (length == 0)
254 break;
255
256 btrfs_release_path(path);
257 }
258
259 return ret;
260}
261
262static int update_raid_extent_item(struct btrfs_trans_handle *trans,
263 struct btrfs_key *key,
264 struct btrfs_stripe_extent *stripe_extent,
265 const size_t item_size)
266{
267 BTRFS_PATH_AUTO_FREE(path);
268 struct extent_buffer *leaf;
269 int ret;
270 int slot;
271
272 path = btrfs_alloc_path();
273 if (!path)
274 return -ENOMEM;
275
276 ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path,
277 0, 1);
278 if (ret)
279 return (ret == 1 ? ret : -EINVAL);
280
281 leaf = path->nodes[0];
282 slot = path->slots[0];
283
284 write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
285 item_size);
286
287 return ret;
288}
289
290EXPORT_FOR_TESTS
291int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
292 struct btrfs_io_context *bioc)
293{
294 struct btrfs_fs_info *fs_info = trans->fs_info;
295 struct btrfs_key stripe_key;
296 struct btrfs_root *stripe_root = fs_info->stripe_root;
297 const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
298 struct btrfs_stripe_extent AUTO_KFREE(stripe_extent);
299 const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
300 int ret;
301
302 stripe_extent = kzalloc(item_size, GFP_NOFS);
303 if (!unlikely(stripe_extent)) {
304 btrfs_abort_transaction(trans, -ENOMEM);
305 btrfs_end_transaction(trans);
306 return -ENOMEM;
307 }
308
309 trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size,
310 num_stripes);
311 for (int i = 0; i < num_stripes; i++) {
312 u64 devid = bioc->stripes[i].dev->devid;
313 u64 physical = bioc->stripes[i].physical;
314 struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i];
315
316 btrfs_set_stack_raid_stride_devid(raid_stride, devid);
317 btrfs_set_stack_raid_stride_physical(raid_stride, physical);
318 }
319
320 stripe_key.objectid = bioc->logical;
321 stripe_key.type = BTRFS_RAID_STRIPE_KEY;
322 stripe_key.offset = bioc->size;
323
324 ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent,
325 item_size);
326 if (ret == -EEXIST) {
327 ret = update_raid_extent_item(trans, &stripe_key, stripe_extent,
328 item_size);
329 if (ret)
330 btrfs_abort_transaction(trans, ret);
331 } else if (ret) {
332 btrfs_abort_transaction(trans, ret);
333 }
334
335 return ret;
336}
337
338int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
339 struct btrfs_ordered_extent *ordered_extent)
340{
341 struct btrfs_io_context *bioc;
342 int ret;
343
344 if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE))
345 return 0;
346
347 list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) {
348 ret = btrfs_insert_one_raid_extent(trans, bioc);
349 if (ret)
350 return ret;
351 }
352
353 while (!list_empty(&ordered_extent->bioc_list)) {
354 bioc = list_first_entry(&ordered_extent->bioc_list,
355 typeof(*bioc), rst_ordered_entry);
356 list_del(&bioc->rst_ordered_entry);
357 btrfs_put_bioc(bioc);
358 }
359
360 return 0;
361}
362
363int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
364 u64 logical, u64 *length, u64 map_type,
365 u32 stripe_index, struct btrfs_io_stripe *stripe)
366{
367 struct btrfs_root *stripe_root = fs_info->stripe_root;
368 struct btrfs_stripe_extent *stripe_extent;
369 struct btrfs_key stripe_key;
370 struct btrfs_key found_key;
371 BTRFS_PATH_AUTO_FREE(path);
372 struct extent_buffer *leaf;
373 const u64 end = logical + *length;
374 int num_stripes;
375 u64 offset;
376 u64 found_logical;
377 u64 found_length;
378 u64 found_end;
379 int slot;
380 int ret;
381
382 stripe_key.objectid = logical;
383 stripe_key.type = BTRFS_RAID_STRIPE_KEY;
384 stripe_key.offset = 0;
385
386 path = btrfs_alloc_path();
387 if (!path)
388 return -ENOMEM;
389
390 if (stripe->rst_search_commit_root) {
391 path->skip_locking = true;
392 path->search_commit_root = true;
393 }
394
395 ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
396 if (ret < 0)
397 return ret;
398 if (ret) {
399 if (path->slots[0] != 0)
400 path->slots[0]--;
401 }
402
403 while (1) {
404 leaf = path->nodes[0];
405 slot = path->slots[0];
406
407 btrfs_item_key_to_cpu(leaf, &found_key, slot);
408 found_logical = found_key.objectid;
409 found_length = found_key.offset;
410 found_end = found_logical + found_length;
411
412 if (found_logical > end) {
413 ret = -ENODATA;
414 goto out;
415 }
416
417 if (in_range(logical, found_logical, found_length))
418 break;
419
420 ret = btrfs_next_item(stripe_root, path);
421 if (ret)
422 goto out;
423 }
424
425 offset = logical - found_logical;
426
427 /*
428 * If we have a logically contiguous, but physically non-continuous
429 * range, we need to split the bio. Record the length after which we
430 * must split the bio.
431 */
432 if (end > found_end)
433 *length -= end - found_end;
434
435 num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
436 stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
437
438 for (int i = 0; i < num_stripes; i++) {
439 struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
440 u64 devid = btrfs_raid_stride_devid(leaf, stride);
441 u64 physical = btrfs_raid_stride_physical(leaf, stride);
442
443 if (devid != stripe->dev->devid)
444 continue;
445
446 if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
447 continue;
448
449 stripe->physical = physical + offset;
450
451 trace_btrfs_get_raid_extent_offset(fs_info, logical, *length,
452 stripe->physical, devid);
453
454 return 0;
455 }
456
457 /* If we're here, we haven't found the requested devid in the stripe. */
458 ret = -ENODATA;
459out:
460 if (ret > 0)
461 ret = -ENODATA;
462 if (ret && ret != -EIO && !stripe->rst_search_commit_root) {
463 btrfs_debug(fs_info,
464 "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
465 logical, logical + *length, stripe->dev->devid,
466 btrfs_bg_type_to_raid_name(map_type));
467 }
468
469 return ret;
470}