Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

zonefs: Separate zone information from inode information

In preparation for adding dynamic inode allocation, separate an inode
zone information from the zonefs inode structure. The new data structure
zonefs_zone is introduced to store in memory information about a zone
that must be kept throughout the lifetime of the device mount.

Linking between a zone file inode and its zone information is done by
setting the inode i_private field to point to a struct zonefs_zone.
Using the i_private pointer avoids the need for adding a pointer in
struct zonefs_inode_info. Beside the vfs inode, this structure is
reduced to a mutex and a write open counter.

One struct zonefs_zone is created per file inode on mount. These
structures are organized in an array using the new struct
zonefs_zone_group data structure to represent zone groups. The
zonefs_zone arrays are indexed per file number (the index of a struct
zonefs_zone in its array directly gives the file number/name for that
zone file inode).

Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

+450 -305
+55 -44
fs/zonefs/file.c
··· 29 29 struct iomap *iomap, struct iomap *srcmap) 30 30 { 31 31 struct zonefs_inode_info *zi = ZONEFS_I(inode); 32 + struct zonefs_zone *z = zonefs_inode_zone(inode); 32 33 struct super_block *sb = inode->i_sb; 33 34 loff_t isize; 34 35 ··· 47 46 iomap->length = length; 48 47 } else { 49 48 iomap->type = IOMAP_MAPPED; 50 - iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; 49 + iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset; 51 50 iomap->length = isize - iomap->offset; 52 51 } 53 52 mutex_unlock(&zi->i_truncate_mutex); ··· 66 65 struct iomap *iomap, struct iomap *srcmap) 67 66 { 68 67 struct zonefs_inode_info *zi = ZONEFS_I(inode); 68 + struct zonefs_zone *z = zonefs_inode_zone(inode); 69 69 struct super_block *sb = inode->i_sb; 70 70 loff_t isize; 71 71 72 72 /* All write I/Os should always be within the file maximum size */ 73 - if (WARN_ON_ONCE(offset + length > zi->i_max_size)) 73 + if (WARN_ON_ONCE(offset + length > z->z_capacity)) 74 74 return -EIO; 75 75 76 76 /* ··· 79 77 * checked when writes are issued, so warn if we see a page writeback 80 78 * operation. 81 79 */ 82 - if (WARN_ON_ONCE(zonefs_zone_is_seq(zi) && !(flags & IOMAP_DIRECT))) 80 + if (WARN_ON_ONCE(zonefs_zone_is_seq(z) && !(flags & IOMAP_DIRECT))) 83 81 return -EIO; 84 82 85 83 /* ··· 90 88 mutex_lock(&zi->i_truncate_mutex); 91 89 iomap->bdev = inode->i_sb->s_bdev; 92 90 iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); 93 - iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; 91 + iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset; 94 92 isize = i_size_read(inode); 95 93 if (iomap->offset >= isize) { 96 94 iomap->type = IOMAP_UNWRITTEN; 97 - iomap->length = zi->i_max_size - iomap->offset; 95 + iomap->length = z->z_capacity - iomap->offset; 98 96 } else { 99 97 iomap->type = IOMAP_MAPPED; 100 98 iomap->length = isize - iomap->offset; ··· 127 125 static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, 128 126 struct inode *inode, loff_t offset) 129 127 { 130 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 128 + struct zonefs_zone *z = zonefs_inode_zone(inode); 131 129 132 - if (WARN_ON_ONCE(zonefs_zone_is_seq(zi))) 130 + if (WARN_ON_ONCE(zonefs_zone_is_seq(z))) 133 131 return -EIO; 134 132 if (WARN_ON_ONCE(offset >= i_size_read(inode))) 135 133 return -EIO; ··· 139 137 offset < wpc->iomap.offset + wpc->iomap.length) 140 138 return 0; 141 139 142 - return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset, 140 + return zonefs_write_iomap_begin(inode, offset, 141 + z->z_capacity - offset, 143 142 IOMAP_WRITE, &wpc->iomap, NULL); 144 143 } 145 144 ··· 188 185 int zonefs_file_truncate(struct inode *inode, loff_t isize) 189 186 { 190 187 struct zonefs_inode_info *zi = ZONEFS_I(inode); 188 + struct zonefs_zone *z = zonefs_inode_zone(inode); 191 189 loff_t old_isize; 192 190 enum req_op op; 193 191 int ret = 0; ··· 198 194 * only down to a 0 size, which is equivalent to a zone reset, and to 199 195 * the maximum file size, which is equivalent to a zone finish. 200 196 */ 201 - if (!zonefs_zone_is_seq(zi)) 197 + if (!zonefs_zone_is_seq(z)) 202 198 return -EPERM; 203 199 204 200 if (!isize) 205 201 op = REQ_OP_ZONE_RESET; 206 - else if (isize == zi->i_max_size) 202 + else if (isize == z->z_capacity) 207 203 op = REQ_OP_ZONE_FINISH; 208 204 else 209 205 return -EPERM; ··· 220 216 if (isize == old_isize) 221 217 goto unlock; 222 218 223 - ret = zonefs_zone_mgmt(inode, op); 219 + ret = zonefs_inode_zone_mgmt(inode, op); 224 220 if (ret) 225 221 goto unlock; 226 222 ··· 228 224 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set, 229 225 * take care of open zones. 230 226 */ 231 - if (zi->i_flags & ZONEFS_ZONE_OPEN) { 227 + if (z->z_flags & ZONEFS_ZONE_OPEN) { 232 228 /* 233 229 * Truncating a zone to EMPTY or FULL is the equivalent of 234 230 * closing the zone. For a truncation to 0, we need to ··· 238 234 * the open flag. 239 235 */ 240 236 if (!isize) 241 - ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); 237 + ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_OPEN); 242 238 else 243 - zi->i_flags &= ~ZONEFS_ZONE_OPEN; 239 + z->z_flags &= ~ZONEFS_ZONE_OPEN; 244 240 } 245 241 246 242 zonefs_update_stats(inode, isize); 247 243 truncate_setsize(inode, isize); 248 - zi->i_wpoffset = isize; 249 - zonefs_account_active(inode); 244 + z->z_wpoffset = isize; 245 + zonefs_inode_account_active(inode); 250 246 251 247 unlock: 252 248 mutex_unlock(&zi->i_truncate_mutex); ··· 353 349 return error; 354 350 } 355 351 356 - if (size && zonefs_zone_is_seq(zi)) { 352 + if (size && zonefs_inode_is_seq(inode)) { 357 353 /* 358 354 * Note that we may be seeing completions out of order, 359 355 * but that is not a problem since a write completed ··· 379 375 static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) 380 376 { 381 377 struct inode *inode = file_inode(iocb->ki_filp); 382 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 378 + struct zonefs_zone *z = zonefs_inode_zone(inode); 383 379 struct block_device *bdev = inode->i_sb->s_bdev; 384 380 unsigned int max = bdev_max_zone_append_sectors(bdev); 385 381 struct bio *bio; ··· 396 392 397 393 bio = bio_alloc(bdev, nr_pages, 398 394 REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS); 399 - bio->bi_iter.bi_sector = zi->i_zsector; 395 + bio->bi_iter.bi_sector = z->z_sector; 400 396 bio->bi_ioprio = iocb->ki_ioprio; 401 397 if (iocb_is_dsync(iocb)) 402 398 bio->bi_opf |= REQ_FUA; ··· 421 417 */ 422 418 if (!ret) { 423 419 sector_t wpsector = 424 - zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT); 420 + z->z_sector + (z->z_wpoffset >> SECTOR_SHIFT); 425 421 426 422 if (bio->bi_iter.bi_sector != wpsector) { 427 423 zonefs_warn(inode->i_sb, 428 424 "Corrupted write pointer %llu for zone at %llu\n", 429 - wpsector, zi->i_zsector); 425 + wpsector, z->z_sector); 430 426 ret = -EIO; 431 427 } 432 428 } ··· 454 450 loff_t count) 455 451 { 456 452 struct inode *inode = file_inode(file); 457 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 453 + struct zonefs_zone *z = zonefs_inode_zone(inode); 458 454 loff_t limit = rlimit(RLIMIT_FSIZE); 459 - loff_t max_size = zi->i_max_size; 455 + loff_t max_size = z->z_capacity; 460 456 461 457 if (limit != RLIM_INFINITY) { 462 458 if (pos >= limit) { ··· 480 476 struct file *file = iocb->ki_filp; 481 477 struct inode *inode = file_inode(file); 482 478 struct zonefs_inode_info *zi = ZONEFS_I(inode); 479 + struct zonefs_zone *z = zonefs_inode_zone(inode); 483 480 loff_t count; 484 481 485 482 if (IS_SWAPFILE(inode)) ··· 493 488 return -EINVAL; 494 489 495 490 if (iocb->ki_flags & IOCB_APPEND) { 496 - if (zonefs_zone_is_cnv(zi)) 491 + if (zonefs_zone_is_cnv(z)) 497 492 return -EINVAL; 498 493 mutex_lock(&zi->i_truncate_mutex); 499 - iocb->ki_pos = zi->i_wpoffset; 494 + iocb->ki_pos = z->z_wpoffset; 500 495 mutex_unlock(&zi->i_truncate_mutex); 501 496 } 502 497 ··· 523 518 { 524 519 struct inode *inode = file_inode(iocb->ki_filp); 525 520 struct zonefs_inode_info *zi = ZONEFS_I(inode); 521 + struct zonefs_zone *z = zonefs_inode_zone(inode); 526 522 struct super_block *sb = inode->i_sb; 527 523 bool sync = is_sync_kiocb(iocb); 528 524 bool append = false; ··· 534 528 * as this can cause write reordering (e.g. the first aio gets EAGAIN 535 529 * on the inode lock but the second goes through but is now unaligned). 536 530 */ 537 - if (zonefs_zone_is_seq(zi) && !sync && (iocb->ki_flags & IOCB_NOWAIT)) 531 + if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT)) 538 532 return -EOPNOTSUPP; 539 533 540 534 if (iocb->ki_flags & IOCB_NOWAIT) { ··· 556 550 } 557 551 558 552 /* Enforce sequential writes (append only) in sequential zones */ 559 - if (zonefs_zone_is_seq(zi)) { 553 + if (zonefs_zone_is_seq(z)) { 560 554 mutex_lock(&zi->i_truncate_mutex); 561 - if (iocb->ki_pos != zi->i_wpoffset) { 555 + if (iocb->ki_pos != z->z_wpoffset) { 562 556 mutex_unlock(&zi->i_truncate_mutex); 563 557 ret = -EINVAL; 564 558 goto inode_unlock; ··· 572 566 else 573 567 ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, 574 568 &zonefs_write_dio_ops, 0, NULL, 0); 575 - if (zonefs_zone_is_seq(zi) && 569 + if (zonefs_zone_is_seq(z) && 576 570 (ret > 0 || ret == -EIOCBQUEUED)) { 577 571 if (ret > 0) 578 572 count = ret; ··· 583 577 * will correct it. Also do active seq file accounting. 584 578 */ 585 579 mutex_lock(&zi->i_truncate_mutex); 586 - zi->i_wpoffset += count; 587 - zonefs_account_active(inode); 580 + z->z_wpoffset += count; 581 + zonefs_inode_account_active(inode); 588 582 mutex_unlock(&zi->i_truncate_mutex); 589 583 } 590 584 ··· 635 629 static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 636 630 { 637 631 struct inode *inode = file_inode(iocb->ki_filp); 632 + struct zonefs_zone *z = zonefs_inode_zone(inode); 638 633 639 634 if (unlikely(IS_IMMUTABLE(inode))) 640 635 return -EPERM; ··· 643 636 if (sb_rdonly(inode->i_sb)) 644 637 return -EROFS; 645 638 646 - /* Write operations beyond the zone size are not allowed */ 647 - if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size) 639 + /* Write operations beyond the zone capacity are not allowed */ 640 + if (iocb->ki_pos >= z->z_capacity) 648 641 return -EFBIG; 649 642 650 643 if (iocb->ki_flags & IOCB_DIRECT) { ··· 676 669 { 677 670 struct inode *inode = file_inode(iocb->ki_filp); 678 671 struct zonefs_inode_info *zi = ZONEFS_I(inode); 672 + struct zonefs_zone *z = zonefs_inode_zone(inode); 679 673 struct super_block *sb = inode->i_sb; 680 674 loff_t isize; 681 675 ssize_t ret; ··· 685 677 if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777))) 686 678 return -EPERM; 687 679 688 - if (iocb->ki_pos >= zi->i_max_size) 680 + if (iocb->ki_pos >= z->z_capacity) 689 681 return 0; 690 682 691 683 if (iocb->ki_flags & IOCB_NOWAIT) { ··· 746 738 static int zonefs_seq_file_write_open(struct inode *inode) 747 739 { 748 740 struct zonefs_inode_info *zi = ZONEFS_I(inode); 741 + struct zonefs_zone *z = zonefs_inode_zone(inode); 749 742 int ret = 0; 750 743 751 744 mutex_lock(&zi->i_truncate_mutex); ··· 764 755 goto unlock; 765 756 } 766 757 767 - if (i_size_read(inode) < zi->i_max_size) { 768 - ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); 758 + if (i_size_read(inode) < z->z_capacity) { 759 + ret = zonefs_inode_zone_mgmt(inode, 760 + REQ_OP_ZONE_OPEN); 769 761 if (ret) { 770 762 atomic_dec(&sbi->s_wro_seq_files); 771 763 goto unlock; 772 764 } 773 - zi->i_flags |= ZONEFS_ZONE_OPEN; 774 - zonefs_account_active(inode); 765 + z->z_flags |= ZONEFS_ZONE_OPEN; 766 + zonefs_inode_account_active(inode); 775 767 } 776 768 } 777 769 } ··· 802 792 static void zonefs_seq_file_write_close(struct inode *inode) 803 793 { 804 794 struct zonefs_inode_info *zi = ZONEFS_I(inode); 795 + struct zonefs_zone *z = zonefs_inode_zone(inode); 805 796 struct super_block *sb = inode->i_sb; 806 797 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 807 798 int ret = 0; ··· 818 807 * its maximum size or it was fully written). For this case, we only 819 808 * need to decrement the write open count. 820 809 */ 821 - if (zi->i_flags & ZONEFS_ZONE_OPEN) { 822 - ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); 810 + if (z->z_flags & ZONEFS_ZONE_OPEN) { 811 + ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); 823 812 if (ret) { 824 813 __zonefs_io_error(inode, false); 825 814 /* ··· 828 817 * exhausted). So take preventive action by remounting 829 818 * read-only. 830 819 */ 831 - if (zi->i_flags & ZONEFS_ZONE_OPEN && 820 + if (z->z_flags & ZONEFS_ZONE_OPEN && 832 821 !(sb->s_flags & SB_RDONLY)) { 833 822 zonefs_warn(sb, 834 823 "closing zone at %llu failed %d\n", 835 - zi->i_zsector, ret); 824 + z->z_sector, ret); 836 825 zonefs_warn(sb, 837 826 "remounting filesystem read-only\n"); 838 827 sb->s_flags |= SB_RDONLY; ··· 840 829 goto unlock; 841 830 } 842 831 843 - zi->i_flags &= ~ZONEFS_ZONE_OPEN; 844 - zonefs_account_active(inode); 832 + z->z_flags &= ~ZONEFS_ZONE_OPEN; 833 + zonefs_inode_account_active(inode); 845 834 } 846 835 847 836 atomic_dec(&sbi->s_wro_seq_files);
+341 -230
fs/zonefs/super.c
··· 28 28 #include "trace.h" 29 29 30 30 /* 31 - * Manage the active zone count. Called with zi->i_truncate_mutex held. 31 + * Get the name of a zone group directory. 32 32 */ 33 - void zonefs_account_active(struct inode *inode) 33 + static const char *zonefs_zgroup_name(enum zonefs_ztype ztype) 34 34 { 35 - struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); 36 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 35 + switch (ztype) { 36 + case ZONEFS_ZTYPE_CNV: 37 + return "cnv"; 38 + case ZONEFS_ZTYPE_SEQ: 39 + return "seq"; 40 + default: 41 + WARN_ON_ONCE(1); 42 + return "???"; 43 + } 44 + } 37 45 38 - lockdep_assert_held(&zi->i_truncate_mutex); 46 + /* 47 + * Manage the active zone count. 48 + */ 49 + static void zonefs_account_active(struct super_block *sb, 50 + struct zonefs_zone *z) 51 + { 52 + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 39 53 40 - if (zonefs_zone_is_cnv(zi)) 54 + if (zonefs_zone_is_cnv(z)) 41 55 return; 42 56 43 57 /* 44 58 * For zones that transitioned to the offline or readonly condition, 45 59 * we only need to clear the active state. 46 60 */ 47 - if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) 61 + if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) 48 62 goto out; 49 63 50 64 /* 51 65 * If the zone is active, that is, if it is explicitly open or 52 66 * partially written, check if it was already accounted as active. 53 67 */ 54 - if ((zi->i_flags & ZONEFS_ZONE_OPEN) || 55 - (zi->i_wpoffset > 0 && zi->i_wpoffset < zi->i_max_size)) { 56 - if (!(zi->i_flags & ZONEFS_ZONE_ACTIVE)) { 57 - zi->i_flags |= ZONEFS_ZONE_ACTIVE; 68 + if ((z->z_flags & ZONEFS_ZONE_OPEN) || 69 + (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) { 70 + if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) { 71 + z->z_flags |= ZONEFS_ZONE_ACTIVE; 58 72 atomic_inc(&sbi->s_active_seq_files); 59 73 } 60 74 return; ··· 76 62 77 63 out: 78 64 /* The zone is not active. If it was, update the active count */ 79 - if (zi->i_flags & ZONEFS_ZONE_ACTIVE) { 80 - zi->i_flags &= ~ZONEFS_ZONE_ACTIVE; 65 + if (z->z_flags & ZONEFS_ZONE_ACTIVE) { 66 + z->z_flags &= ~ZONEFS_ZONE_ACTIVE; 81 67 atomic_dec(&sbi->s_active_seq_files); 82 68 } 83 69 } 84 70 85 - int zonefs_zone_mgmt(struct inode *inode, enum req_op op) 71 + /* 72 + * Manage the active zone count. Called with zi->i_truncate_mutex held. 73 + */ 74 + void zonefs_inode_account_active(struct inode *inode) 86 75 { 87 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 88 - int ret; 76 + lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); 89 77 90 - lockdep_assert_held(&zi->i_truncate_mutex); 78 + return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode)); 79 + } 80 + 81 + /* 82 + * Execute a zone management operation. 83 + */ 84 + static int zonefs_zone_mgmt(struct super_block *sb, 85 + struct zonefs_zone *z, enum req_op op) 86 + { 87 + int ret; 91 88 92 89 /* 93 90 * With ZNS drives, closing an explicitly open zone that has not been ··· 108 83 * are exceeded, make sure that the zone does not remain active by 109 84 * resetting it. 110 85 */ 111 - if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset) 86 + if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset) 112 87 op = REQ_OP_ZONE_RESET; 113 88 114 - trace_zonefs_zone_mgmt(inode, op); 115 - ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector, 116 - zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS); 89 + trace_zonefs_zone_mgmt(sb, z, op); 90 + ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector, 91 + z->z_size >> SECTOR_SHIFT, GFP_NOFS); 117 92 if (ret) { 118 - zonefs_err(inode->i_sb, 93 + zonefs_err(sb, 119 94 "Zone management operation %s at %llu failed %d\n", 120 - blk_op_str(op), zi->i_zsector, ret); 95 + blk_op_str(op), z->z_sector, ret); 121 96 return ret; 122 97 } 123 98 124 99 return 0; 125 100 } 126 101 102 + int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op) 103 + { 104 + lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); 105 + 106 + return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op); 107 + } 108 + 127 109 void zonefs_i_size_write(struct inode *inode, loff_t isize) 128 110 { 129 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 111 + struct zonefs_zone *z = zonefs_inode_zone(inode); 130 112 131 113 i_size_write(inode, isize); 114 + 132 115 /* 133 116 * A full zone is no longer open/active and does not need 134 117 * explicit closing. 135 118 */ 136 - if (isize >= zi->i_max_size) { 119 + if (isize >= z->z_capacity) { 137 120 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); 138 121 139 - if (zi->i_flags & ZONEFS_ZONE_ACTIVE) 122 + if (z->z_flags & ZONEFS_ZONE_ACTIVE) 140 123 atomic_dec(&sbi->s_active_seq_files); 141 - zi->i_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE); 124 + z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE); 142 125 } 143 126 } 144 127 ··· 183 150 } 184 151 185 152 /* 186 - * Check a zone condition and adjust its file inode access permissions for 187 - * offline and readonly zones. Return the inode size corresponding to the 188 - * amount of readable data in the zone. 153 + * Check a zone condition. Return the amount of written (and still readable) 154 + * data in the zone. 189 155 */ 190 - static loff_t zonefs_check_zone_condition(struct inode *inode, 156 + static loff_t zonefs_check_zone_condition(struct super_block *sb, 157 + struct zonefs_zone *z, 191 158 struct blk_zone *zone) 192 159 { 193 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 194 - 195 160 switch (zone->cond) { 196 161 case BLK_ZONE_COND_OFFLINE: 197 - zonefs_warn(inode->i_sb, "inode %lu: offline zone\n", 198 - inode->i_ino); 199 - zi->i_flags |= ZONEFS_ZONE_OFFLINE; 162 + zonefs_warn(sb, "Zone %llu: offline zone\n", 163 + z->z_sector); 164 + z->z_flags |= ZONEFS_ZONE_OFFLINE; 200 165 return 0; 201 166 case BLK_ZONE_COND_READONLY: 202 167 /* ··· 205 174 * the inode size as it was when last updated so that the user 206 175 * can recover data. 207 176 */ 208 - zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n", 209 - inode->i_ino); 210 - zi->i_flags |= ZONEFS_ZONE_READONLY; 211 - if (zonefs_zone_is_cnv(zi)) 212 - return zi->i_max_size; 213 - return zi->i_wpoffset; 177 + zonefs_warn(sb, "Zone %llu: read-only zone\n", 178 + z->z_sector); 179 + z->z_flags |= ZONEFS_ZONE_READONLY; 180 + if (zonefs_zone_is_cnv(z)) 181 + return z->z_capacity; 182 + return z->z_wpoffset; 214 183 case BLK_ZONE_COND_FULL: 215 184 /* The write pointer of full zones is invalid. */ 216 - return zi->i_max_size; 185 + return z->z_capacity; 217 186 default: 218 - if (zonefs_zone_is_cnv(zi)) 219 - return zi->i_max_size; 187 + if (zonefs_zone_is_cnv(z)) 188 + return z->z_capacity; 220 189 return (zone->wp - zone->start) << SECTOR_SHIFT; 221 190 } 222 191 } ··· 227 196 */ 228 197 static void zonefs_inode_update_mode(struct inode *inode) 229 198 { 230 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 199 + struct zonefs_zone *z = zonefs_inode_zone(inode); 231 200 232 - if (zi->i_flags & ZONEFS_ZONE_OFFLINE) { 201 + if (z->z_flags & ZONEFS_ZONE_OFFLINE) { 233 202 /* Offline zones cannot be read nor written */ 234 203 inode->i_flags |= S_IMMUTABLE; 235 204 inode->i_mode &= ~0777; 236 - } else if (zi->i_flags & ZONEFS_ZONE_READONLY) { 205 + } else if (z->z_flags & ZONEFS_ZONE_READONLY) { 237 206 /* Readonly zones cannot be written */ 238 207 inode->i_flags |= S_IMMUTABLE; 239 - if (zi->i_flags & ZONEFS_ZONE_INIT_MODE) 208 + if (z->z_flags & ZONEFS_ZONE_INIT_MODE) 240 209 inode->i_mode &= ~0777; 241 210 else 242 211 inode->i_mode &= ~0222; 243 212 } 244 213 245 - zi->i_flags &= ~ZONEFS_ZONE_INIT_MODE; 214 + z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; 246 215 } 247 216 248 217 struct zonefs_ioerr_data { ··· 255 224 { 256 225 struct zonefs_ioerr_data *err = data; 257 226 struct inode *inode = err->inode; 258 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 227 + struct zonefs_zone *z = zonefs_inode_zone(inode); 259 228 struct super_block *sb = inode->i_sb; 260 229 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 261 230 loff_t isize, data_size; ··· 266 235 * as there is no inconsistency between the inode size and the amount of 267 236 * data writen in the zone (data_size). 268 237 */ 269 - data_size = zonefs_check_zone_condition(inode, zone); 238 + data_size = zonefs_check_zone_condition(sb, z, zone); 270 239 isize = i_size_read(inode); 271 - if (!(zi->i_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && 240 + if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && 272 241 !err->write && isize == data_size) 273 242 return 0; 274 243 ··· 291 260 * In all cases, warn about inode size inconsistency and handle the 292 261 * IO error according to the zone condition and to the mount options. 293 262 */ 294 - if (zonefs_zone_is_seq(zi) && isize != data_size) 295 - zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", 263 + if (zonefs_zone_is_seq(z) && isize != data_size) 264 + zonefs_warn(sb, 265 + "inode %lu: invalid size %lld (should be %lld)\n", 296 266 inode->i_ino, isize, data_size); 297 267 298 268 /* ··· 302 270 * zone condition to read-only and offline respectively, as if the 303 271 * condition was signaled by the hardware. 304 272 */ 305 - if ((zi->i_flags & ZONEFS_ZONE_OFFLINE) || 273 + if ((z->z_flags & ZONEFS_ZONE_OFFLINE) || 306 274 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) { 307 275 zonefs_warn(sb, "inode %lu: read/write access disabled\n", 308 276 inode->i_ino); 309 - if (!(zi->i_flags & ZONEFS_ZONE_OFFLINE)) 310 - zi->i_flags |= ZONEFS_ZONE_OFFLINE; 277 + if (!(z->z_flags & ZONEFS_ZONE_OFFLINE)) 278 + z->z_flags |= ZONEFS_ZONE_OFFLINE; 311 279 zonefs_inode_update_mode(inode); 312 280 data_size = 0; 313 - } else if ((zi->i_flags & ZONEFS_ZONE_READONLY) || 281 + } else if ((z->z_flags & ZONEFS_ZONE_READONLY) || 314 282 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) { 315 283 zonefs_warn(sb, "inode %lu: write access disabled\n", 316 284 inode->i_ino); 317 - if (!(zi->i_flags & ZONEFS_ZONE_READONLY)) 318 - zi->i_flags |= ZONEFS_ZONE_READONLY; 285 + if (!(z->z_flags & ZONEFS_ZONE_READONLY)) 286 + z->z_flags |= ZONEFS_ZONE_READONLY; 319 287 zonefs_inode_update_mode(inode); 320 288 data_size = isize; 321 289 } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && ··· 331 299 * close of the zone when the inode file is closed. 332 300 */ 333 301 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) && 334 - (zi->i_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE))) 335 - zi->i_flags &= ~ZONEFS_ZONE_OPEN; 302 + (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE))) 303 + z->z_flags &= ~ZONEFS_ZONE_OPEN; 336 304 337 305 /* 338 306 * If error=remount-ro was specified, any error result in remounting ··· 349 317 */ 350 318 zonefs_update_stats(inode, data_size); 351 319 zonefs_i_size_write(inode, data_size); 352 - zi->i_wpoffset = data_size; 353 - zonefs_account_active(inode); 320 + z->z_wpoffset = data_size; 321 + zonefs_inode_account_active(inode); 354 322 355 323 return 0; 356 324 } ··· 364 332 */ 365 333 void __zonefs_io_error(struct inode *inode, bool write) 366 334 { 367 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 335 + struct zonefs_zone *z = zonefs_inode_zone(inode); 368 336 struct super_block *sb = inode->i_sb; 369 337 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 370 338 unsigned int noio_flag; ··· 380 348 * files with aggregated conventional zones, for which the inode zone 381 349 * size is always larger than the device zone size. 382 350 */ 383 - if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev)) 384 - nr_zones = zi->i_zone_size >> 351 + if (z->z_size > bdev_zone_sectors(sb->s_bdev)) 352 + nr_zones = z->z_size >> 385 353 (sbi->s_zone_sectors_shift + SECTOR_SHIFT); 386 354 387 355 /* ··· 393 361 * the GFP_NOIO context avoids both problems. 394 362 */ 395 363 noio_flag = memalloc_noio_save(); 396 - ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones, 364 + ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, 397 365 zonefs_io_error_cb, &err); 398 366 if (ret != nr_zones) 399 367 zonefs_err(sb, "Get inode %lu zone information failed %d\n", ··· 413 381 414 382 inode_init_once(&zi->i_vnode); 415 383 mutex_init(&zi->i_truncate_mutex); 416 - zi->i_wpoffset = 0; 417 384 zi->i_wr_refcnt = 0; 418 - zi->i_flags = 0; 419 385 420 386 return &zi->i_vnode; 421 387 } ··· 446 416 buf->f_bavail = buf->f_bfree; 447 417 448 418 for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { 449 - if (sbi->s_nr_files[t]) 450 - buf->f_files += sbi->s_nr_files[t] + 1; 419 + if (sbi->s_zgroup[t].g_nr_zones) 420 + buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1; 451 421 } 452 422 buf->f_ffree = 0; 453 423 ··· 587 557 }; 588 558 589 559 static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, 590 - enum zonefs_ztype type) 560 + enum zonefs_ztype ztype) 591 561 { 592 562 struct super_block *sb = parent->i_sb; 593 563 594 - inode->i_ino = bdev_nr_zones(sb->s_bdev) + type + 1; 564 + inode->i_ino = bdev_nr_zones(sb->s_bdev) + ztype + 1; 595 565 inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555); 596 566 inode->i_op = &zonefs_dir_inode_operations; 597 567 inode->i_fop = &simple_dir_operations; ··· 603 573 .setattr = zonefs_inode_setattr, 604 574 }; 605 575 606 - static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, 607 - enum zonefs_ztype type) 576 + static void zonefs_init_file_inode(struct inode *inode, 577 + struct zonefs_zone *z) 608 578 { 609 579 struct super_block *sb = inode->i_sb; 610 580 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 611 - struct zonefs_inode_info *zi = ZONEFS_I(inode); 612 - int ret = 0; 613 581 614 - inode->i_ino = zone->start >> sbi->s_zone_sectors_shift; 582 + inode->i_private = z; 583 + 584 + inode->i_ino = z->z_sector >> sbi->s_zone_sectors_shift; 615 585 inode->i_mode = S_IFREG | sbi->s_perm; 616 - 617 - if (type == ZONEFS_ZTYPE_CNV) 618 - zi->i_flags |= ZONEFS_ZONE_CNV; 619 - 620 - zi->i_zsector = zone->start; 621 - zi->i_zone_size = zone->len << SECTOR_SHIFT; 622 - if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && 623 - !(sbi->s_features & ZONEFS_F_AGGRCNV)) { 624 - zonefs_err(sb, 625 - "zone size %llu doesn't match device's zone sectors %llu\n", 626 - zi->i_zone_size, 627 - bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); 628 - return -EINVAL; 629 - } 630 - 631 - zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, 632 - zone->capacity << SECTOR_SHIFT); 633 - zi->i_wpoffset = zonefs_check_zone_condition(inode, zone); 634 - 635 586 inode->i_uid = sbi->s_uid; 636 587 inode->i_gid = sbi->s_gid; 637 - inode->i_size = zi->i_wpoffset; 638 - inode->i_blocks = zi->i_max_size >> SECTOR_SHIFT; 588 + inode->i_size = z->z_wpoffset; 589 + inode->i_blocks = z->z_capacity >> SECTOR_SHIFT; 639 590 640 591 inode->i_op = &zonefs_file_inode_operations; 641 592 inode->i_fop = &zonefs_file_operations; 642 593 inode->i_mapping->a_ops = &zonefs_file_aops; 643 594 644 595 /* Update the inode access rights depending on the zone condition */ 645 - zi->i_flags |= ZONEFS_ZONE_INIT_MODE; 596 + z->z_flags |= ZONEFS_ZONE_INIT_MODE; 646 597 zonefs_inode_update_mode(inode); 647 - 648 - sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes); 649 - sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits; 650 - sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits; 651 - 652 - mutex_lock(&zi->i_truncate_mutex); 653 - 654 - /* 655 - * For sequential zones, make sure that any open zone is closed first 656 - * to ensure that the initial number of open zones is 0, in sync with 657 - * the open zone accounting done when the mount option 658 - * ZONEFS_MNTOPT_EXPLICIT_OPEN is used. 659 - */ 660 - if (type == ZONEFS_ZTYPE_SEQ && 661 - (zone->cond == BLK_ZONE_COND_IMP_OPEN || 662 - zone->cond == BLK_ZONE_COND_EXP_OPEN)) { 663 - ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); 664 - if (ret) 665 - goto unlock; 666 - } 667 - 668 - zonefs_account_active(inode); 669 - 670 - unlock: 671 - mutex_unlock(&zi->i_truncate_mutex); 672 - 673 - return ret; 674 598 } 675 599 676 600 static struct dentry *zonefs_create_inode(struct dentry *parent, 677 - const char *name, struct blk_zone *zone, 678 - enum zonefs_ztype type) 601 + const char *name, 602 + struct zonefs_zone *z, 603 + enum zonefs_ztype ztype) 679 604 { 680 605 struct inode *dir = d_inode(parent); 681 606 struct dentry *dentry; ··· 646 661 goto dput; 647 662 648 663 inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime; 649 - if (zone) { 650 - ret = zonefs_init_file_inode(inode, zone, type); 651 - if (ret) { 652 - iput(inode); 653 - goto dput; 654 - } 655 - } else { 656 - zonefs_init_dir_inode(dir, inode, type); 657 - } 664 + if (z) 665 + zonefs_init_file_inode(inode, z); 666 + else 667 + zonefs_init_dir_inode(dir, inode, ztype); 658 668 659 669 d_add(dentry, inode); 660 670 dir->i_size++; ··· 665 685 struct zonefs_zone_data { 666 686 struct super_block *sb; 667 687 unsigned int nr_zones[ZONEFS_ZTYPE_MAX]; 688 + sector_t cnv_zone_start; 668 689 struct blk_zone *zones; 669 690 }; 670 691 671 692 /* 672 - * Create a zone group and populate it with zone files. 693 + * Create the inodes for a zone group. 673 694 */ 674 - static int zonefs_create_zgroup(struct zonefs_zone_data *zd, 675 - enum zonefs_ztype type) 695 + static int zonefs_create_zgroup_inodes(struct super_block *sb, 696 + enum zonefs_ztype ztype) 676 697 { 677 - struct super_block *sb = zd->sb; 678 698 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 679 - struct blk_zone *zone, *next, *end; 680 - const char *zgroup_name; 681 - char *file_name; 699 + struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype]; 682 700 struct dentry *dir, *dent; 683 - unsigned int n = 0; 684 - int ret; 701 + char *file_name; 702 + int i, ret = 0; 703 + 704 + if (!zgroup) 705 + return -ENOMEM; 685 706 686 707 /* If the group is empty, there is nothing to do */ 687 - if (!zd->nr_zones[type]) 708 + if (!zgroup->g_nr_zones) 688 709 return 0; 689 710 690 711 file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL); 691 712 if (!file_name) 692 713 return -ENOMEM; 693 714 694 - if (type == ZONEFS_ZTYPE_CNV) 695 - zgroup_name = "cnv"; 696 - else 697 - zgroup_name = "seq"; 698 - 699 - dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); 715 + dir = zonefs_create_inode(sb->s_root, zonefs_zgroup_name(ztype), 716 + NULL, ztype); 700 717 if (IS_ERR(dir)) { 701 718 ret = PTR_ERR(dir); 702 719 goto free; 703 720 } 704 721 705 - /* 706 - * The first zone contains the super block: skip it. 707 - */ 708 - end = zd->zones + bdev_nr_zones(sb->s_bdev); 709 - for (zone = &zd->zones[1]; zone < end; zone = next) { 710 - 711 - next = zone + 1; 712 - if (zonefs_zone_type(zone) != type) 713 - continue; 714 - 715 - /* 716 - * For conventional zones, contiguous zones can be aggregated 717 - * together to form larger files. Note that this overwrites the 718 - * length of the first zone of the set of contiguous zones 719 - * aggregated together. If one offline or read-only zone is 720 - * found, assume that all zones aggregated have the same 721 - * condition. 722 - */ 723 - if (type == ZONEFS_ZTYPE_CNV && 724 - (sbi->s_features & ZONEFS_F_AGGRCNV)) { 725 - for (; next < end; next++) { 726 - if (zonefs_zone_type(next) != type) 727 - break; 728 - zone->len += next->len; 729 - zone->capacity += next->capacity; 730 - if (next->cond == BLK_ZONE_COND_READONLY && 731 - zone->cond != BLK_ZONE_COND_OFFLINE) 732 - zone->cond = BLK_ZONE_COND_READONLY; 733 - else if (next->cond == BLK_ZONE_COND_OFFLINE) 734 - zone->cond = BLK_ZONE_COND_OFFLINE; 735 - } 736 - if (zone->capacity != zone->len) { 737 - zonefs_err(sb, "Invalid conventional zone capacity\n"); 738 - ret = -EINVAL; 739 - goto free; 740 - } 741 - } 742 - 743 - /* 744 - * Use the file number within its group as file name. 745 - */ 746 - snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); 747 - dent = zonefs_create_inode(dir, file_name, zone, type); 722 + for (i = 0; i < zgroup->g_nr_zones; i++) { 723 + /* Use the zone number within its group as the file name */ 724 + snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", i); 725 + dent = zonefs_create_inode(dir, file_name, 726 + &zgroup->g_zones[i], ztype); 748 727 if (IS_ERR(dent)) { 749 728 ret = PTR_ERR(dent); 750 - goto free; 729 + break; 751 730 } 752 - 753 - n++; 754 731 } 755 - 756 - zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", 757 - zgroup_name, n, n > 1 ? "s" : ""); 758 - 759 - sbi->s_nr_files[type] = n; 760 - ret = 0; 761 732 762 733 free: 763 734 kfree(file_name); ··· 720 789 void *data) 721 790 { 722 791 struct zonefs_zone_data *zd = data; 792 + struct super_block *sb = zd->sb; 793 + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 723 794 724 795 /* 725 - * Count the number of usable zones: the first zone at index 0 contains 726 - * the super block and is ignored. 796 + * We do not care about the first zone: it contains the super block 797 + * and not exposed as a file. 798 + */ 799 + if (!idx) 800 + return 0; 801 + 802 + /* 803 + * Count the number of zones that will be exposed as files. 804 + * For sequential zones, we always have as many files as zones. 805 + * FOr conventional zones, the number of files depends on if we have 806 + * conventional zones aggregation enabled. 727 807 */ 728 808 switch (zone->type) { 729 809 case BLK_ZONE_TYPE_CONVENTIONAL: 730 - zone->wp = zone->start + zone->len; 731 - if (idx) 732 - zd->nr_zones[ZONEFS_ZTYPE_CNV]++; 810 + if (sbi->s_features & ZONEFS_F_AGGRCNV) { 811 + /* One file per set of contiguous conventional zones */ 812 + if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) || 813 + zone->start != zd->cnv_zone_start) 814 + sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; 815 + zd->cnv_zone_start = zone->start + zone->len; 816 + } else { 817 + /* One file per zone */ 818 + sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; 819 + } 733 820 break; 734 821 case BLK_ZONE_TYPE_SEQWRITE_REQ: 735 822 case BLK_ZONE_TYPE_SEQWRITE_PREF: 736 - if (idx) 737 - zd->nr_zones[ZONEFS_ZTYPE_SEQ]++; 823 + sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++; 738 824 break; 739 825 default: 740 826 zonefs_err(zd->sb, "Unsupported zone type 0x%x\n", ··· 791 843 return 0; 792 844 } 793 845 794 - static inline void zonefs_cleanup_zone_info(struct zonefs_zone_data *zd) 846 + static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd) 795 847 { 796 848 kvfree(zd->zones); 849 + } 850 + 851 + /* 852 + * Create a zone group and populate it with zone files. 853 + */ 854 + static int zonefs_init_zgroup(struct super_block *sb, 855 + struct zonefs_zone_data *zd, 856 + enum zonefs_ztype ztype) 857 + { 858 + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 859 + struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype]; 860 + struct blk_zone *zone, *next, *end; 861 + struct zonefs_zone *z; 862 + unsigned int n = 0; 863 + int ret; 864 + 865 + /* Allocate the zone group. If it is empty, we have nothing to do. */ 866 + if (!zgroup->g_nr_zones) 867 + return 0; 868 + 869 + zgroup->g_zones = kvcalloc(zgroup->g_nr_zones, 870 + sizeof(struct zonefs_zone), GFP_KERNEL); 871 + if (!zgroup->g_zones) 872 + return -ENOMEM; 873 + 874 + /* 875 + * Initialize the zone groups using the device zone information. 876 + * We always skip the first zone as it contains the super block 877 + * and is not use to back a file. 878 + */ 879 + end = zd->zones + bdev_nr_zones(sb->s_bdev); 880 + for (zone = &zd->zones[1]; zone < end; zone = next) { 881 + 882 + next = zone + 1; 883 + if (zonefs_zone_type(zone) != ztype) 884 + continue; 885 + 886 + if (WARN_ON_ONCE(n >= zgroup->g_nr_zones)) 887 + return -EINVAL; 888 + 889 + /* 890 + * For conventional zones, contiguous zones can be aggregated 891 + * together to form larger files. Note that this overwrites the 892 + * length of the first zone of the set of contiguous zones 893 + * aggregated together. If one offline or read-only zone is 894 + * found, assume that all zones aggregated have the same 895 + * condition. 896 + */ 897 + if (ztype == ZONEFS_ZTYPE_CNV && 898 + (sbi->s_features & ZONEFS_F_AGGRCNV)) { 899 + for (; next < end; next++) { 900 + if (zonefs_zone_type(next) != ztype) 901 + break; 902 + zone->len += next->len; 903 + zone->capacity += next->capacity; 904 + if (next->cond == BLK_ZONE_COND_READONLY && 905 + zone->cond != BLK_ZONE_COND_OFFLINE) 906 + zone->cond = BLK_ZONE_COND_READONLY; 907 + else if (next->cond == BLK_ZONE_COND_OFFLINE) 908 + zone->cond = BLK_ZONE_COND_OFFLINE; 909 + } 910 + } 911 + 912 + z = &zgroup->g_zones[n]; 913 + if (ztype == ZONEFS_ZTYPE_CNV) 914 + z->z_flags |= ZONEFS_ZONE_CNV; 915 + z->z_sector = zone->start; 916 + z->z_size = zone->len << SECTOR_SHIFT; 917 + if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && 918 + !(sbi->s_features & ZONEFS_F_AGGRCNV)) { 919 + zonefs_err(sb, 920 + "Invalid zone size %llu (device zone sectors %llu)\n", 921 + z->z_size, 922 + bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); 923 + return -EINVAL; 924 + } 925 + 926 + z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE, 927 + zone->capacity << SECTOR_SHIFT); 928 + z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone); 929 + 930 + sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes); 931 + sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits; 932 + sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits; 933 + 934 + /* 935 + * For sequential zones, make sure that any open zone is closed 936 + * first to ensure that the initial number of open zones is 0, 937 + * in sync with the open zone accounting done when the mount 938 + * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used. 939 + */ 940 + if (ztype == ZONEFS_ZTYPE_SEQ && 941 + (zone->cond == BLK_ZONE_COND_IMP_OPEN || 942 + zone->cond == BLK_ZONE_COND_EXP_OPEN)) { 943 + ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE); 944 + if (ret) 945 + return ret; 946 + } 947 + 948 + zonefs_account_active(sb, z); 949 + 950 + n++; 951 + } 952 + 953 + if (WARN_ON_ONCE(n != zgroup->g_nr_zones)) 954 + return -EINVAL; 955 + 956 + zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", 957 + zonefs_zgroup_name(ztype), 958 + zgroup->g_nr_zones, 959 + zgroup->g_nr_zones > 1 ? "s" : ""); 960 + 961 + return 0; 962 + } 963 + 964 + static void zonefs_free_zgroups(struct super_block *sb) 965 + { 966 + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 967 + enum zonefs_ztype ztype; 968 + 969 + if (!sbi) 970 + return; 971 + 972 + for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 973 + kvfree(sbi->s_zgroup[ztype].g_zones); 974 + sbi->s_zgroup[ztype].g_zones = NULL; 975 + } 976 + } 977 + 978 + /* 979 + * Create a zone group and populate it with zone files. 980 + */ 981 + static int zonefs_init_zgroups(struct super_block *sb) 982 + { 983 + struct zonefs_zone_data zd; 984 + enum zonefs_ztype ztype; 985 + int ret; 986 + 987 + /* First get the device zone information */ 988 + memset(&zd, 0, sizeof(struct zonefs_zone_data)); 989 + zd.sb = sb; 990 + ret = zonefs_get_zone_info(&zd); 991 + if (ret) 992 + goto cleanup; 993 + 994 + /* Allocate and initialize the zone groups */ 995 + for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 996 + ret = zonefs_init_zgroup(sb, &zd, ztype); 997 + if (ret) { 998 + zonefs_info(sb, 999 + "Zone group \"%s\" initialization failed\n", 1000 + zonefs_zgroup_name(ztype)); 1001 + break; 1002 + } 1003 + } 1004 + 1005 + cleanup: 1006 + zonefs_free_zone_info(&zd); 1007 + if (ret) 1008 + zonefs_free_zgroups(sb); 1009 + 1010 + return ret; 797 1011 } 798 1012 799 1013 /* ··· 1055 945 */ 1056 946 static int zonefs_fill_super(struct super_block *sb, void *data, int silent) 1057 947 { 1058 - struct zonefs_zone_data zd; 1059 948 struct zonefs_sb_info *sbi; 1060 949 struct inode *inode; 1061 950 enum zonefs_ztype t; ··· 1107 998 if (ret) 1108 999 return ret; 1109 1000 1110 - memset(&zd, 0, sizeof(struct zonefs_zone_data)); 1111 - zd.sb = sb; 1112 - ret = zonefs_get_zone_info(&zd); 1113 - if (ret) 1114 - goto cleanup; 1115 - 1116 - ret = zonefs_sysfs_register(sb); 1117 - if (ret) 1118 - goto cleanup; 1119 - 1120 1001 zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev)); 1121 1002 1122 1003 if (!sbi->s_max_wro_seq_files && ··· 1116 1017 "No open and active zone limits. Ignoring explicit_open mount option\n"); 1117 1018 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; 1118 1019 } 1020 + 1021 + /* Initialize the zone groups */ 1022 + ret = zonefs_init_zgroups(sb); 1023 + if (ret) 1024 + goto cleanup; 1119 1025 1120 1026 /* Create root directory inode */ 1121 1027 ret = -ENOMEM; ··· 1141 1037 1142 1038 /* Create and populate files in zone groups directories */ 1143 1039 for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { 1144 - ret = zonefs_create_zgroup(&zd, t); 1040 + ret = zonefs_create_zgroup_inodes(sb, t); 1145 1041 if (ret) 1146 - break; 1042 + goto cleanup; 1147 1043 } 1148 1044 1045 + ret = zonefs_sysfs_register(sb); 1046 + if (ret) 1047 + goto cleanup; 1048 + 1049 + return 0; 1050 + 1149 1051 cleanup: 1150 - zonefs_cleanup_zone_info(&zd); 1052 + zonefs_free_zgroups(sb); 1151 1053 1152 1054 return ret; 1153 1055 } ··· 1172 1062 d_genocide(sb->s_root); 1173 1063 1174 1064 zonefs_sysfs_unregister(sb); 1065 + zonefs_free_zgroups(sb); 1175 1066 kill_block_super(sb); 1176 1067 kfree(sbi); 1177 1068 }
+11 -9
fs/zonefs/trace.h
··· 20 20 #define show_dev(dev) MAJOR(dev), MINOR(dev) 21 21 22 22 TRACE_EVENT(zonefs_zone_mgmt, 23 - TP_PROTO(struct inode *inode, enum req_op op), 24 - TP_ARGS(inode, op), 23 + TP_PROTO(struct super_block *sb, struct zonefs_zone *z, 24 + enum req_op op), 25 + TP_ARGS(sb, z, op), 25 26 TP_STRUCT__entry( 26 27 __field(dev_t, dev) 27 28 __field(ino_t, ino) ··· 31 30 __field(sector_t, nr_sectors) 32 31 ), 33 32 TP_fast_assign( 34 - __entry->dev = inode->i_sb->s_dev; 35 - __entry->ino = inode->i_ino; 33 + __entry->dev = sb->s_dev; 34 + __entry->ino = 35 + z->z_sector >> ZONEFS_SB(sb)->s_zone_sectors_shift; 36 36 __entry->op = op; 37 - __entry->sector = ZONEFS_I(inode)->i_zsector; 38 - __entry->nr_sectors = 39 - ZONEFS_I(inode)->i_zone_size >> SECTOR_SHIFT; 37 + __entry->sector = z->z_sector; 38 + __entry->nr_sectors = z->z_size >> SECTOR_SHIFT; 40 39 ), 41 40 TP_printk("bdev=(%d,%d), ino=%lu op=%s, sector=%llu, nr_sectors=%llu", 42 41 show_dev(__entry->dev), (unsigned long)__entry->ino, ··· 59 58 TP_fast_assign( 60 59 __entry->dev = inode->i_sb->s_dev; 61 60 __entry->ino = inode->i_ino; 62 - __entry->sector = ZONEFS_I(inode)->i_zsector; 61 + __entry->sector = zonefs_inode_zone(inode)->z_sector; 63 62 __entry->size = size; 64 - __entry->wpoffset = ZONEFS_I(inode)->i_wpoffset; 63 + __entry->wpoffset = 64 + zonefs_inode_zone(inode)->z_wpoffset; 65 65 __entry->ret = ret; 66 66 ), 67 67 TP_printk("bdev=(%d, %d), ino=%lu, sector=%llu, size=%zu, wpoffset=%llu, ret=%zu",
+43 -22
fs/zonefs/zonefs.h
··· 47 47 #define ZONEFS_ZONE_CNV (1U << 31) 48 48 49 49 /* 50 + * In-memory per-file inode zone data. 51 + */ 52 + struct zonefs_zone { 53 + /* Zone state flags */ 54 + unsigned int z_flags; 55 + 56 + /* Zone start sector (512B unit) */ 57 + sector_t z_sector; 58 + 59 + /* Zone size (bytes) */ 60 + loff_t z_size; 61 + 62 + /* Zone capacity (file maximum size, bytes) */ 63 + loff_t z_capacity; 64 + 65 + /* Write pointer offset in the zone (sequential zones only, bytes) */ 66 + loff_t z_wpoffset; 67 + }; 68 + 69 + /* 70 + * In memory zone group information: all zones of a group are exposed 71 + * as files, one file per zone. 72 + */ 73 + struct zonefs_zone_group { 74 + unsigned int g_nr_zones; 75 + struct zonefs_zone *g_zones; 76 + }; 77 + 78 + /* 50 79 * In-memory inode data. 51 80 */ 52 81 struct zonefs_inode_info { 53 82 struct inode i_vnode; 54 - 55 - /* File zone start sector (512B unit) */ 56 - sector_t i_zsector; 57 - 58 - /* File zone write pointer position (sequential zones only) */ 59 - loff_t i_wpoffset; 60 - 61 - /* File maximum size */ 62 - loff_t i_max_size; 63 - 64 - /* File zone size */ 65 - loff_t i_zone_size; 66 83 67 84 /* 68 85 * To serialise fully against both syscall and mmap based IO and ··· 98 81 99 82 /* guarded by i_truncate_mutex */ 100 83 unsigned int i_wr_refcnt; 101 - unsigned int i_flags; 102 84 }; 103 85 104 86 static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode) ··· 105 89 return container_of(inode, struct zonefs_inode_info, i_vnode); 106 90 } 107 91 108 - static inline bool zonefs_zone_is_cnv(struct zonefs_inode_info *zi) 92 + static inline bool zonefs_zone_is_cnv(struct zonefs_zone *z) 109 93 { 110 - return zi->i_flags & ZONEFS_ZONE_CNV; 94 + return z->z_flags & ZONEFS_ZONE_CNV; 111 95 } 112 96 113 - static inline bool zonefs_zone_is_seq(struct zonefs_inode_info *zi) 97 + static inline bool zonefs_zone_is_seq(struct zonefs_zone *z) 114 98 { 115 - return !zonefs_zone_is_cnv(zi); 99 + return !zonefs_zone_is_cnv(z); 100 + } 101 + 102 + static inline struct zonefs_zone *zonefs_inode_zone(struct inode *inode) 103 + { 104 + return inode->i_private; 116 105 } 117 106 118 107 static inline bool zonefs_inode_is_cnv(struct inode *inode) 119 108 { 120 - return zonefs_zone_is_cnv(ZONEFS_I(inode)); 109 + return zonefs_zone_is_cnv(zonefs_inode_zone(inode)); 121 110 } 122 111 123 112 static inline bool zonefs_inode_is_seq(struct inode *inode) 124 113 { 125 - return zonefs_zone_is_seq(ZONEFS_I(inode)); 114 + return zonefs_zone_is_seq(zonefs_inode_zone(inode)); 126 115 } 127 116 128 117 /* ··· 221 200 uuid_t s_uuid; 222 201 unsigned int s_zone_sectors_shift; 223 202 224 - unsigned int s_nr_files[ZONEFS_ZTYPE_MAX]; 203 + struct zonefs_zone_group s_zgroup[ZONEFS_ZTYPE_MAX]; 225 204 226 205 loff_t s_blocks; 227 206 loff_t s_used_blocks; ··· 250 229 pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args) 251 230 252 231 /* In super.c */ 253 - void zonefs_account_active(struct inode *inode); 254 - int zonefs_zone_mgmt(struct inode *inode, enum req_op op); 232 + void zonefs_inode_account_active(struct inode *inode); 233 + int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op); 255 234 void zonefs_i_size_write(struct inode *inode, loff_t isize); 256 235 void zonefs_update_stats(struct inode *inode, loff_t new_isize); 257 236 void __zonefs_io_error(struct inode *inode, bool write);