at v4.8 7.9 kB view raw
1/* 2 * fs/logfs/dev_bdev.c - Device access methods for block devices 3 * 4 * As should be obvious for Linux kernel code, license is GPLv2 5 * 6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> 7 */ 8#include "logfs.h" 9#include <linux/bio.h> 10#include <linux/blkdev.h> 11#include <linux/buffer_head.h> 12#include <linux/gfp.h> 13#include <linux/prefetch.h> 14 15#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 16 17static int sync_request(struct page *page, struct block_device *bdev, int op) 18{ 19 struct bio bio; 20 struct bio_vec bio_vec; 21 22 bio_init(&bio); 23 bio.bi_max_vecs = 1; 24 bio.bi_io_vec = &bio_vec; 25 bio_vec.bv_page = page; 26 bio_vec.bv_len = PAGE_SIZE; 27 bio_vec.bv_offset = 0; 28 bio.bi_vcnt = 1; 29 bio.bi_bdev = bdev; 30 bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9); 31 bio.bi_iter.bi_size = PAGE_SIZE; 32 bio_set_op_attrs(&bio, op, 0); 33 34 return submit_bio_wait(&bio); 35} 36 37static int bdev_readpage(void *_sb, struct page *page) 38{ 39 struct super_block *sb = _sb; 40 struct block_device *bdev = logfs_super(sb)->s_bdev; 41 int err; 42 43 err = sync_request(page, bdev, READ); 44 if (err) { 45 ClearPageUptodate(page); 46 SetPageError(page); 47 } else { 48 SetPageUptodate(page); 49 ClearPageError(page); 50 } 51 unlock_page(page); 52 return err; 53} 54 55static DECLARE_WAIT_QUEUE_HEAD(wq); 56 57static void writeseg_end_io(struct bio *bio) 58{ 59 struct bio_vec *bvec; 60 int i; 61 struct super_block *sb = bio->bi_private; 62 struct logfs_super *super = logfs_super(sb); 63 64 BUG_ON(bio->bi_error); /* FIXME: Retry io or write elsewhere */ 65 66 bio_for_each_segment_all(bvec, bio, i) { 67 end_page_writeback(bvec->bv_page); 68 put_page(bvec->bv_page); 69 } 70 bio_put(bio); 71 if (atomic_dec_and_test(&super->s_pending_writes)) 72 wake_up(&wq); 73} 74 75static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, 76 size_t nr_pages) 77{ 78 struct logfs_super *super = logfs_super(sb); 79 struct address_space *mapping = super->s_mapping_inode->i_mapping; 80 struct bio *bio; 81 struct page *page; 82 unsigned int max_pages; 83 int i; 84 85 max_pages = min_t(size_t, nr_pages, BIO_MAX_PAGES); 86 87 bio = bio_alloc(GFP_NOFS, max_pages); 88 BUG_ON(!bio); 89 90 for (i = 0; i < nr_pages; i++) { 91 if (i >= max_pages) { 92 /* Block layer cannot split bios :( */ 93 bio->bi_vcnt = i; 94 bio->bi_iter.bi_size = i * PAGE_SIZE; 95 bio->bi_bdev = super->s_bdev; 96 bio->bi_iter.bi_sector = ofs >> 9; 97 bio->bi_private = sb; 98 bio->bi_end_io = writeseg_end_io; 99 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 100 atomic_inc(&super->s_pending_writes); 101 submit_bio(bio); 102 103 ofs += i * PAGE_SIZE; 104 index += i; 105 nr_pages -= i; 106 i = 0; 107 108 bio = bio_alloc(GFP_NOFS, max_pages); 109 BUG_ON(!bio); 110 } 111 page = find_lock_page(mapping, index + i); 112 BUG_ON(!page); 113 bio->bi_io_vec[i].bv_page = page; 114 bio->bi_io_vec[i].bv_len = PAGE_SIZE; 115 bio->bi_io_vec[i].bv_offset = 0; 116 117 BUG_ON(PageWriteback(page)); 118 set_page_writeback(page); 119 unlock_page(page); 120 } 121 bio->bi_vcnt = nr_pages; 122 bio->bi_iter.bi_size = nr_pages * PAGE_SIZE; 123 bio->bi_bdev = super->s_bdev; 124 bio->bi_iter.bi_sector = ofs >> 9; 125 bio->bi_private = sb; 126 bio->bi_end_io = writeseg_end_io; 127 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 128 atomic_inc(&super->s_pending_writes); 129 submit_bio(bio); 130 return 0; 131} 132 133static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len) 134{ 135 struct logfs_super *super = logfs_super(sb); 136 int head; 137 138 BUG_ON(super->s_flags & LOGFS_SB_FLAG_RO); 139 140 if (len == 0) { 141 /* This can happen when the object fit perfectly into a 142 * segment, the segment gets written per sync and subsequently 143 * closed. 144 */ 145 return; 146 } 147 head = ofs & (PAGE_SIZE - 1); 148 if (head) { 149 ofs -= head; 150 len += head; 151 } 152 len = PAGE_ALIGN(len); 153 __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); 154} 155 156 157static void erase_end_io(struct bio *bio) 158{ 159 struct super_block *sb = bio->bi_private; 160 struct logfs_super *super = logfs_super(sb); 161 162 BUG_ON(bio->bi_error); /* FIXME: Retry io or write elsewhere */ 163 BUG_ON(bio->bi_vcnt == 0); 164 bio_put(bio); 165 if (atomic_dec_and_test(&super->s_pending_writes)) 166 wake_up(&wq); 167} 168 169static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, 170 size_t nr_pages) 171{ 172 struct logfs_super *super = logfs_super(sb); 173 struct bio *bio; 174 unsigned int max_pages; 175 int i; 176 177 max_pages = min_t(size_t, nr_pages, BIO_MAX_PAGES); 178 179 bio = bio_alloc(GFP_NOFS, max_pages); 180 BUG_ON(!bio); 181 182 for (i = 0; i < nr_pages; i++) { 183 if (i >= max_pages) { 184 /* Block layer cannot split bios :( */ 185 bio->bi_vcnt = i; 186 bio->bi_iter.bi_size = i * PAGE_SIZE; 187 bio->bi_bdev = super->s_bdev; 188 bio->bi_iter.bi_sector = ofs >> 9; 189 bio->bi_private = sb; 190 bio->bi_end_io = erase_end_io; 191 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 192 atomic_inc(&super->s_pending_writes); 193 submit_bio(bio); 194 195 ofs += i * PAGE_SIZE; 196 index += i; 197 nr_pages -= i; 198 i = 0; 199 200 bio = bio_alloc(GFP_NOFS, max_pages); 201 BUG_ON(!bio); 202 } 203 bio->bi_io_vec[i].bv_page = super->s_erase_page; 204 bio->bi_io_vec[i].bv_len = PAGE_SIZE; 205 bio->bi_io_vec[i].bv_offset = 0; 206 } 207 bio->bi_vcnt = nr_pages; 208 bio->bi_iter.bi_size = nr_pages * PAGE_SIZE; 209 bio->bi_bdev = super->s_bdev; 210 bio->bi_iter.bi_sector = ofs >> 9; 211 bio->bi_private = sb; 212 bio->bi_end_io = erase_end_io; 213 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 214 atomic_inc(&super->s_pending_writes); 215 submit_bio(bio); 216 return 0; 217} 218 219static int bdev_erase(struct super_block *sb, loff_t to, size_t len, 220 int ensure_write) 221{ 222 struct logfs_super *super = logfs_super(sb); 223 224 BUG_ON(to & (PAGE_SIZE - 1)); 225 BUG_ON(len & (PAGE_SIZE - 1)); 226 227 if (super->s_flags & LOGFS_SB_FLAG_RO) 228 return -EROFS; 229 230 if (ensure_write) { 231 /* 232 * Object store doesn't care whether erases happen or not. 233 * But for the journal they are required. Otherwise a scan 234 * can find an old commit entry and assume it is the current 235 * one, travelling back in time. 236 */ 237 do_erase(sb, to, to >> PAGE_SHIFT, len >> PAGE_SHIFT); 238 } 239 240 return 0; 241} 242 243static void bdev_sync(struct super_block *sb) 244{ 245 struct logfs_super *super = logfs_super(sb); 246 247 wait_event(wq, atomic_read(&super->s_pending_writes) == 0); 248} 249 250static struct page *bdev_find_first_sb(struct super_block *sb, u64 *ofs) 251{ 252 struct logfs_super *super = logfs_super(sb); 253 struct address_space *mapping = super->s_mapping_inode->i_mapping; 254 filler_t *filler = bdev_readpage; 255 256 *ofs = 0; 257 return read_cache_page(mapping, 0, filler, sb); 258} 259 260static struct page *bdev_find_last_sb(struct super_block *sb, u64 *ofs) 261{ 262 struct logfs_super *super = logfs_super(sb); 263 struct address_space *mapping = super->s_mapping_inode->i_mapping; 264 filler_t *filler = bdev_readpage; 265 u64 pos = (super->s_bdev->bd_inode->i_size & ~0xfffULL) - 0x1000; 266 pgoff_t index = pos >> PAGE_SHIFT; 267 268 *ofs = pos; 269 return read_cache_page(mapping, index, filler, sb); 270} 271 272static int bdev_write_sb(struct super_block *sb, struct page *page) 273{ 274 struct block_device *bdev = logfs_super(sb)->s_bdev; 275 276 /* Nothing special to do for block devices. */ 277 return sync_request(page, bdev, WRITE); 278} 279 280static void bdev_put_device(struct logfs_super *s) 281{ 282 blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 283} 284 285static int bdev_can_write_buf(struct super_block *sb, u64 ofs) 286{ 287 return 0; 288} 289 290static const struct logfs_device_ops bd_devops = { 291 .find_first_sb = bdev_find_first_sb, 292 .find_last_sb = bdev_find_last_sb, 293 .write_sb = bdev_write_sb, 294 .readpage = bdev_readpage, 295 .writeseg = bdev_writeseg, 296 .erase = bdev_erase, 297 .can_write_buf = bdev_can_write_buf, 298 .sync = bdev_sync, 299 .put_device = bdev_put_device, 300}; 301 302int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type, 303 const char *devname) 304{ 305 struct block_device *bdev; 306 307 bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL, 308 type); 309 if (IS_ERR(bdev)) 310 return PTR_ERR(bdev); 311 312 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { 313 int mtdnr = MINOR(bdev->bd_dev); 314 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 315 return logfs_get_sb_mtd(p, mtdnr); 316 } 317 318 p->s_bdev = bdev; 319 p->s_mtd = NULL; 320 p->s_devops = &bd_devops; 321 return 0; 322}