Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: closure per journal buf

Prep work for having multiple journal writes in flight.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+41 -23
+12 -6
fs/bcachefs/journal.c
··· 186 186 187 187 if (__bch2_journal_pin_put(j, seq)) 188 188 bch2_journal_reclaim_fast(j); 189 - if (write) 190 - closure_call(&j->io, bch2_journal_write, j->wq, NULL); 189 + if (write) { 190 + struct journal_buf *w = j->buf + (seq & JOURNAL_BUF_MASK); 191 + closure_call(&w->io, bch2_journal_write, j->wq, NULL); 192 + } 191 193 } 192 194 193 195 /* ··· 1276 1274 unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE); 1277 1275 1278 1276 for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) { 1279 - ja->bio[i] = bio_kmalloc(nr_bvecs, GFP_KERNEL); 1277 + ja->bio[i] = kmalloc(struct_size(ja->bio[i], bio.bi_inline_vecs, 1278 + nr_bvecs), GFP_KERNEL); 1280 1279 if (!ja->bio[i]) 1281 1280 return -BCH_ERR_ENOMEM_dev_journal_init; 1282 - bio_init(ja->bio[i], NULL, ja->bio[i]->bi_inline_vecs, nr_bvecs, 0); 1281 + 1282 + ja->bio[i]->ca = ca; 1283 + ja->bio[i]->buf_idx = i; 1284 + bio_init(&ja->bio[i]->bio, NULL, ja->bio[i]->bio.bi_inline_vecs, nr_bvecs, 0); 1283 1285 } 1284 1286 1285 1287 ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); ··· 1346 1340 j->buf[i].data = kvpmalloc(j->buf[i].buf_size, GFP_KERNEL); 1347 1341 if (!j->buf[i].data) 1348 1342 return -BCH_ERR_ENOMEM_journal_buf; 1343 + j->buf[i].idx = i; 1349 1344 } 1350 1345 1351 1346 j->pin.front = j->pin.back = 1; ··· 1466 1459 { 1467 1460 struct journal_entry_pin_list *pin_list; 1468 1461 struct journal_entry_pin *pin; 1469 - unsigned i; 1470 1462 1471 1463 spin_lock(&j->lock); 1472 1464 *seq = max(*seq, j->pin.front); ··· 1483 1477 prt_newline(out); 1484 1478 printbuf_indent_add(out, 2); 1485 1479 1486 - for (i = 0; i < ARRAY_SIZE(pin_list->list); i++) 1480 + for (unsigned i = 0; i < ARRAY_SIZE(pin_list->list); i++) 1487 1481 list_for_each_entry(pin, &pin_list->list[i], list) { 1488 1482 prt_printf(out, "\t%px %ps", pin, pin->flush); 1489 1483 prt_newline(out);
+19 -15
fs/bcachefs/journal_io.c
··· 1597 1597 1598 1598 static CLOSURE_CALLBACK(journal_write_done) 1599 1599 { 1600 - closure_type(j, struct journal, io); 1600 + closure_type(w, struct journal_buf, io); 1601 + struct journal *j = container_of(w, struct journal, buf[w->idx]); 1601 1602 struct bch_fs *c = container_of(j, struct bch_fs, journal); 1602 - struct journal_buf *w = journal_last_unwritten_buf(j); 1603 1603 struct bch_replicas_padded replicas; 1604 1604 union journal_res_state old, new; 1605 1605 u64 v, seq; ··· 1676 1676 1677 1677 if (!journal_state_count(new, new.unwritten_idx) && 1678 1678 journal_last_unwritten_seq(j) <= journal_cur_seq(j)) { 1679 + struct journal_buf *w = j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK); 1679 1680 spin_unlock(&j->lock); 1680 - closure_call(&j->io, bch2_journal_write, j->wq, NULL); 1681 + closure_call(&w->io, bch2_journal_write, j->wq, NULL); 1681 1682 } else if (journal_last_unwritten_seq(j) == journal_cur_seq(j) && 1682 1683 new.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) { 1683 1684 struct journal_buf *buf = journal_cur_buf(j); ··· 1699 1698 1700 1699 static void journal_write_endio(struct bio *bio) 1701 1700 { 1702 - struct bch_dev *ca = bio->bi_private; 1701 + struct journal_bio *jbio = container_of(bio, struct journal_bio, bio); 1702 + struct bch_dev *ca = jbio->ca; 1703 1703 struct journal *j = &ca->fs->journal; 1704 - struct journal_buf *w = journal_last_unwritten_buf(j); 1705 - unsigned long flags; 1704 + struct journal_buf *w = j->buf + jbio->buf_idx; 1706 1705 1707 1706 if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, 1708 1707 "error writing journal entry %llu: %s", 1709 1708 le64_to_cpu(w->data->seq), 1710 1709 bch2_blk_status_to_str(bio->bi_status)) || 1711 1710 bch2_meta_write_fault("journal")) { 1711 + unsigned long flags; 1712 + 1712 1713 spin_lock_irqsave(&j->err_lock, flags); 1713 1714 bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx); 1714 1715 spin_unlock_irqrestore(&j->err_lock, flags); 1715 1716 } 1716 1717 1717 - closure_put(&j->io); 1718 + closure_put(&w->io); 1718 1719 percpu_ref_put(&ca->io_ref); 1719 1720 } 1720 1721 1721 1722 static CLOSURE_CALLBACK(do_journal_write) 1722 1723 { 1723 - closure_type(j, struct journal, io); 1724 + closure_type(w, struct journal_buf, io); 1725 + struct journal *j = container_of(w, struct journal, buf[w->idx]); 1724 1726 struct bch_fs *c = container_of(j, struct bch_fs, journal); 1725 - unsigned buf_idx = journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK; 1726 - struct journal_buf *w = j->buf + buf_idx; 1727 1727 unsigned sectors = vstruct_sectors(w->data, c->block_bits); 1728 1728 1729 1729 extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) { ··· 1740 1738 this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal], 1741 1739 sectors); 1742 1740 1743 - struct bio *bio = ja->bio[buf_idx]; 1741 + struct bio *bio = &ja->bio[w->idx]->bio; 1744 1742 bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); 1745 1743 bio->bi_iter.bi_sector = ptr->offset; 1746 1744 bio->bi_end_io = journal_write_endio; ··· 1939 1937 1940 1938 CLOSURE_CALLBACK(bch2_journal_write) 1941 1939 { 1942 - closure_type(j, struct journal, io); 1940 + closure_type(w, struct journal_buf, io); 1941 + struct journal *j = container_of(w, struct journal, buf[w->idx]); 1943 1942 struct bch_fs *c = container_of(j, struct bch_fs, journal); 1944 - unsigned buf_idx = journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK; 1945 - struct journal_buf *w = j->buf + buf_idx; 1946 1943 struct bch_replicas_padded replicas; 1947 1944 struct printbuf journal_debug_buf = PRINTBUF; 1948 1945 unsigned nr_rw_members = 0; ··· 2020 2019 if (ret) 2021 2020 goto err; 2022 2021 2022 + if (!JSET_NO_FLUSH(w->data)) 2023 + closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq)); 2024 + 2023 2025 if (!JSET_NO_FLUSH(w->data) && w->separate_flush) { 2024 2026 for_each_rw_member(c, ca) { 2025 2027 percpu_ref_get(&ca->io_ref); 2026 2028 2027 2029 struct journal_device *ja = &ca->journal; 2028 - struct bio *bio = ja->bio[buf_idx]; 2030 + struct bio *bio = &ja->bio[w->idx]->bio; 2029 2031 bio_reset(bio, ca->disk_sb.bdev, 2030 2032 REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH); 2031 2033 bio->bi_end_io = journal_write_endio;
+10 -2
fs/bcachefs/journal_types.h
··· 18 18 * the journal that are being staged or in flight. 19 19 */ 20 20 struct journal_buf { 21 + struct closure io; 21 22 struct jset *data; 22 23 23 24 __BKEY_PADDED(key, BCH_REPLICAS_MAX); ··· 38 37 bool must_flush; /* something wants a flush */ 39 38 bool separate_flush; 40 39 bool need_flush_to_write_buffer; 40 + u8 idx; 41 41 }; 42 42 43 43 /* ··· 152 150 153 151 typedef DARRAY(u64) darray_u64; 154 152 153 + struct journal_bio { 154 + struct bch_dev *ca; 155 + unsigned buf_idx; 156 + 157 + struct bio bio; 158 + }; 159 + 155 160 /* Embedded in struct bch_fs */ 156 161 struct journal { 157 162 /* Fastpath stuff up front: */ ··· 213 204 wait_queue_head_t wait; 214 205 struct closure_waitlist async_wait; 215 206 216 - struct closure io; 217 207 struct delayed_work write_work; 218 208 struct workqueue_struct *wq; 219 209 ··· 323 315 u64 *buckets; 324 316 325 317 /* Bio for journal reads/writes to this device */ 326 - struct bio *bio[JOURNAL_BUF_NR]; 318 + struct journal_bio *bio[JOURNAL_BUF_NR]; 327 319 328 320 /* for bch_journal_read_device */ 329 321 struct closure read;