···40#include "xfs_rw.h"41#include "xfs_iomap.h"42#include <linux/mpage.h>043#include <linux/writeback.h>4445STATIC void xfs_count_page_state(struct page *, int *, int *, int *);46-STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,47- struct writeback_control *wbc, void *, int, int);4849#if defined(XFS_RW_TRACE)50void···54 int mask)55{56 xfs_inode_t *ip;57- bhv_desc_t *bdp;58 vnode_t *vp = LINVFS_GET_VP(inode);59 loff_t isize = i_size_read(inode);60- loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;61 int delalloc = -1, unmapped = -1, unwritten = -1;6263 if (page_has_buffers(page))64 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);6566- bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);67- ip = XFS_BHVTOI(bdp);68 if (!ip->i_rwtrace)69 return;70···100 queue_work(xfsdatad_workqueue, &ioend->io_work);101}102000000103STATIC void104xfs_destroy_ioend(105 xfs_ioend_t *ioend)106{0000000107 vn_iowake(ioend->io_vnode);108 mempool_free(ioend, xfs_ioend_pool);109}110111/*0000000000000000000000000000112 * Issue transactions to convert a buffer range from unwritten113 * to written extents.114 */···161 vnode_t *vp = ioend->io_vnode;162 xfs_off_t offset = ioend->io_offset;163 size_t size = ioend->io_size;164- struct buffer_head *bh, *next;165 int error;166167 if (ioend->io_uptodate)168 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);169-170- /* ioend->io_buffer_head is only non-NULL for buffered I/O */171- for (bh = ioend->io_buffer_head; bh; bh = next) {172- next = bh->b_private;173-174- bh->b_end_io = NULL;175- clear_buffer_unwritten(bh);176- end_buffer_async_write(bh, ioend->io_uptodate);177- }178-179 xfs_destroy_ioend(ioend);180}181···176 */177STATIC xfs_ioend_t *178xfs_alloc_ioend(179- struct inode *inode)0180{181 xfs_ioend_t *ioend;182···190 */191 atomic_set(&ioend->io_remaining, 1);192 ioend->io_uptodate = 1; /* cleared if any I/O fails */00193 ioend->io_vnode = LINVFS_GET_VP(inode);194 ioend->io_buffer_head = NULL;0195 atomic_inc(&ioend->io_vnode->v_iocount);196 ioend->io_offset = 0;197 ioend->io_size = 0;198199- INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);00000200201 return ioend;202-}203-204-void205-linvfs_unwritten_done(206- struct buffer_head *bh,207- int uptodate)208-{209- xfs_ioend_t *ioend = bh->b_private;210- static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;211- unsigned long flags;212-213- ASSERT(buffer_unwritten(bh));214- bh->b_end_io = NULL;215-216- if (!uptodate)217- ioend->io_uptodate = 0;218-219- /*220- * Deep magic here. We reuse b_private in the buffer_heads to build221- * a chain for completing the I/O from user context after we've issued222- * a transaction to convert the unwritten extent.223- */224- spin_lock_irqsave(&unwritten_done_lock, flags);225- bh->b_private = ioend->io_buffer_head;226- ioend->io_buffer_head = bh;227- spin_unlock_irqrestore(&unwritten_done_lock, flags);228-229- xfs_finish_ioend(ioend);230}231232STATIC int···226 return -error;227}228229-/*230- * Finds the corresponding mapping in block @map array of the231- * given @offset within a @page.232- */233-STATIC xfs_iomap_t *234-xfs_offset_to_map(235- struct page *page,236 xfs_iomap_t *iomapp,237- unsigned long offset)238{239- loff_t full_offset; /* offset from start of file */00240241- ASSERT(offset < PAGE_CACHE_SIZE);000000000242243- full_offset = page->index; /* NB: using 64bit number */244- full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */245- full_offset += offset; /* offset from page start */246247- if (full_offset < iomapp->iomap_offset)248- return NULL;249- if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)250- return iomapp;251- return NULL;00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000252}253254STATIC void255xfs_map_at_offset(256- struct page *page,257 struct buffer_head *bh,258- unsigned long offset,259 int block_bits,260 xfs_iomap_t *iomapp)261{262 xfs_daddr_t bn;263- loff_t delta;264 int sector_shift;265266 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));267 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));268 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);269270- delta = page->index;271- delta <<= PAGE_CACHE_SHIFT;272- delta += offset;273- delta -= iomapp->iomap_offset;274- delta >>= block_bits;275-276 sector_shift = block_bits - BBSHIFT;277- bn = iomapp->iomap_bn >> sector_shift;278- bn += delta;279- BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME));0280 ASSERT((bn << sector_shift) >= iomapp->iomap_bn);281282 lock_buffer(bh);283 bh->b_blocknr = bn;284- bh->b_bdev = iomapp->iomap_target->pbr_bdev;285 set_buffer_mapped(bh);286 clear_buffer_delay(bh);0287}288289/*290- * Look for a page at index which is unlocked and contains our291- * unwritten extent flagged buffers at its head. Returns page292- * locked and with an extra reference count, and length of the293- * unwritten extent component on this page that we can write,294- * in units of filesystem blocks.295- */296-STATIC struct page *297-xfs_probe_unwritten_page(298- struct address_space *mapping,299- pgoff_t index,300- xfs_iomap_t *iomapp,301- xfs_ioend_t *ioend,302- unsigned long max_offset,303- unsigned long *fsbs,304- unsigned int bbits)305-{306- struct page *page;307-308- page = find_trylock_page(mapping, index);309- if (!page)310- return NULL;311- if (PageWriteback(page))312- goto out;313-314- if (page->mapping && page_has_buffers(page)) {315- struct buffer_head *bh, *head;316- unsigned long p_offset = 0;317-318- *fsbs = 0;319- bh = head = page_buffers(page);320- do {321- if (!buffer_unwritten(bh) || !buffer_uptodate(bh))322- break;323- if (!xfs_offset_to_map(page, iomapp, p_offset))324- break;325- if (p_offset >= max_offset)326- break;327- xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);328- set_buffer_unwritten_io(bh);329- bh->b_private = ioend;330- p_offset += bh->b_size;331- (*fsbs)++;332- } while ((bh = bh->b_this_page) != head);333-334- if (p_offset)335- return page;336- }337-338-out:339- unlock_page(page);340- return NULL;341-}342-343-/*344- * Look for a page at index which is unlocked and not mapped345- * yet - clustering for mmap write case.346 */347STATIC unsigned int348-xfs_probe_unmapped_page(349- struct address_space *mapping,350- pgoff_t index,351- unsigned int pg_offset)352{353- struct page *page;354 int ret = 0;355356- page = find_trylock_page(mapping, index);357- if (!page)358- return 0;359 if (PageWriteback(page))360- goto out;361362 if (page->mapping && PageDirty(page)) {363 if (page_has_buffers(page)) {···487488 bh = head = page_buffers(page);489 do {490- if (buffer_mapped(bh) || !buffer_uptodate(bh))00491 break;492 ret += bh->b_size;493 if (ret >= pg_offset)494 break;495 } while ((bh = bh->b_this_page) != head);496 } else497- ret = PAGE_CACHE_SIZE;498 }499500-out:501- unlock_page(page);502 return ret;503}504505-STATIC unsigned int506-xfs_probe_unmapped_cluster(507 struct inode *inode,508 struct page *startpage,509 struct buffer_head *bh,510- struct buffer_head *head)0511{0512 pgoff_t tindex, tlast, tloff;513- unsigned int pg_offset, len, total = 0;514- struct address_space *mapping = inode->i_mapping;515516 /* First sum forwards in this page */517 do {518- if (buffer_mapped(bh))519- break;520 total += bh->b_size;521 } while ((bh = bh->b_this_page) != head);522523- /* If we reached the end of the page, sum forwards in524- * following pages.525- */526- if (bh == head) {527- tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;528- /* Prune this back to avoid pathological behavior */529- tloff = min(tlast, startpage->index + 64);530- for (tindex = startpage->index + 1; tindex < tloff; tindex++) {531- len = xfs_probe_unmapped_page(mapping, tindex,532- PAGE_CACHE_SIZE);533- if (!len)534- return total;00000000000000000000000000535 total += len;0536 }537- if (tindex == tlast &&538- (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {539- total += xfs_probe_unmapped_page(mapping,540- tindex, pg_offset);541- }542 }0543 return total;544}545546/*547- * Probe for a given page (index) in the inode and test if it is delayed548- * and without unwritten buffers. Returns page locked and with an extra549- * reference count.550 */551-STATIC struct page *552-xfs_probe_delalloc_page(553- struct inode *inode,554- pgoff_t index)555{556- struct page *page;557-558- page = find_trylock_page(inode->i_mapping, index);559- if (!page)560- return NULL;561 if (PageWriteback(page))562- goto out;563564 if (page->mapping && page_has_buffers(page)) {565 struct buffer_head *bh, *head;···589590 bh = head = page_buffers(page);591 do {592- if (buffer_unwritten(bh)) {593- acceptable = 0;00000594 break;595- } else if (buffer_delay(bh)) {596- acceptable = 1;597- }598 } while ((bh = bh->b_this_page) != head);599600 if (acceptable)601- return page;602 }603604-out:605- unlock_page(page);606- return NULL;607-}608-609-STATIC int610-xfs_map_unwritten(611- struct inode *inode,612- struct page *start_page,613- struct buffer_head *head,614- struct buffer_head *curr,615- unsigned long p_offset,616- int block_bits,617- xfs_iomap_t *iomapp,618- struct writeback_control *wbc,619- int startio,620- int all_bh)621-{622- struct buffer_head *bh = curr;623- xfs_iomap_t *tmp;624- xfs_ioend_t *ioend;625- loff_t offset;626- unsigned long nblocks = 0;627-628- offset = start_page->index;629- offset <<= PAGE_CACHE_SHIFT;630- offset += p_offset;631-632- ioend = xfs_alloc_ioend(inode);633-634- /* First map forwards in the page consecutive buffers635- * covering this unwritten extent636- */637- do {638- if (!buffer_unwritten(bh))639- break;640- tmp = xfs_offset_to_map(start_page, iomapp, p_offset);641- if (!tmp)642- break;643- xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);644- set_buffer_unwritten_io(bh);645- bh->b_private = ioend;646- p_offset += bh->b_size;647- nblocks++;648- } while ((bh = bh->b_this_page) != head);649-650- atomic_add(nblocks, &ioend->io_remaining);651-652- /* If we reached the end of the page, map forwards in any653- * following pages which are also covered by this extent.654- */655- if (bh == head) {656- struct address_space *mapping = inode->i_mapping;657- pgoff_t tindex, tloff, tlast;658- unsigned long bs;659- unsigned int pg_offset, bbits = inode->i_blkbits;660- struct page *page;661-662- tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;663- tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;664- tloff = min(tlast, tloff);665- for (tindex = start_page->index + 1; tindex < tloff; tindex++) {666- page = xfs_probe_unwritten_page(mapping,667- tindex, iomapp, ioend,668- PAGE_CACHE_SIZE, &bs, bbits);669- if (!page)670- break;671- nblocks += bs;672- atomic_add(bs, &ioend->io_remaining);673- xfs_convert_page(inode, page, iomapp, wbc, ioend,674- startio, all_bh);675- /* stop if converting the next page might add676- * enough blocks that the corresponding byte677- * count won't fit in our ulong page buf length */678- if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))679- goto enough;680- }681-682- if (tindex == tlast &&683- (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {684- page = xfs_probe_unwritten_page(mapping,685- tindex, iomapp, ioend,686- pg_offset, &bs, bbits);687- if (page) {688- nblocks += bs;689- atomic_add(bs, &ioend->io_remaining);690- xfs_convert_page(inode, page, iomapp, wbc, ioend,691- startio, all_bh);692- if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))693- goto enough;694- }695- }696- }697-698-enough:699- ioend->io_size = (xfs_off_t)nblocks << block_bits;700- ioend->io_offset = offset;701- xfs_finish_ioend(ioend);702 return 0;703-}704-705-STATIC void706-xfs_submit_page(707- struct page *page,708- struct writeback_control *wbc,709- struct buffer_head *bh_arr[],710- int bh_count,711- int probed_page,712- int clear_dirty)713-{714- struct buffer_head *bh;715- int i;716-717- BUG_ON(PageWriteback(page));718- if (bh_count)719- set_page_writeback(page);720- if (clear_dirty)721- clear_page_dirty(page);722- unlock_page(page);723-724- if (bh_count) {725- for (i = 0; i < bh_count; i++) {726- bh = bh_arr[i];727- mark_buffer_async_write(bh);728- if (buffer_unwritten(bh))729- set_buffer_unwritten_io(bh);730- set_buffer_uptodate(bh);731- clear_buffer_dirty(bh);732- }733-734- for (i = 0; i < bh_count; i++)735- submit_bh(WRITE, bh_arr[i]);736-737- if (probed_page && clear_dirty)738- wbc->nr_to_write--; /* Wrote an "extra" page */739- }740}741742/*···612 * delalloc/unwritten pages only, for the original page it is possible613 * that the page has no mapping at all.614 */615-STATIC void616xfs_convert_page(617 struct inode *inode,618 struct page *page,619- xfs_iomap_t *iomapp,00620 struct writeback_control *wbc,621- void *private,622 int startio,623 int all_bh)624{625- struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;626- xfs_iomap_t *mp = iomapp, *tmp;627- unsigned long offset, end_offset;628- int index = 0;629 int bbits = inode->i_blkbits;630 int len, page_dirty;00631632- end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1));000000000633634 /*635 * page_dirty is initially a count of buffers on the page before636 * EOF and is decrememted as we move each into a cleanable state.000000000637 */638- len = 1 << inode->i_blkbits;639- end_offset = max(end_offset, PAGE_CACHE_SIZE);640- end_offset = roundup(end_offset, len);641- page_dirty = end_offset / len;642643- offset = 0;00000644 bh = head = page_buffers(page);645 do {646 if (offset >= end_offset)647 break;648- if (!(PageUptodate(page) || buffer_uptodate(bh)))649- continue;650- if (buffer_mapped(bh) && all_bh &&651- !(buffer_unwritten(bh) || buffer_delay(bh))) {652- if (startio) {653- lock_buffer(bh);654- bh_arr[index++] = bh;655- page_dirty--;656- }657 continue;658 }659- tmp = xfs_offset_to_map(page, mp, offset);660- if (!tmp)661- continue;662- ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));663- ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));664665- /* If this is a new unwritten extent buffer (i.e. one666- * that we haven't passed in private data for, we must667- * now map this buffer too.668- */669- if (buffer_unwritten(bh) && !bh->b_end_io) {670- ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN);671- xfs_map_unwritten(inode, page, head, bh, offset,672- bbits, tmp, wbc, startio, all_bh);673- } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) {674- xfs_map_at_offset(page, bh, offset, bbits, tmp);675- if (buffer_unwritten(bh)) {676- set_buffer_unwritten_io(bh);677- bh->b_private = private;678- ASSERT(private);000000000000000000000679 }680 }681- if (startio) {682- bh_arr[index++] = bh;683- } else {684- set_buffer_dirty(bh);685- unlock_buffer(bh);686- mark_buffer_dirty(bh);687- }688- page_dirty--;689 } while (offset += len, (bh = bh->b_this_page) != head);690691- if (startio && index) {692- xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty);693- } else {694- unlock_page(page);000000000000695 }000000696}697698/*···750 struct inode *inode,751 pgoff_t tindex,752 xfs_iomap_t *iomapp,0753 struct writeback_control *wbc,754 int startio,755 int all_bh,756 pgoff_t tlast)757{758- struct page *page;0759760- for (; tindex <= tlast; tindex++) {761- page = xfs_probe_delalloc_page(inode, tindex);762- if (!page)00763 break;764- xfs_convert_page(inode, page, iomapp, wbc, NULL,765- startio, all_bh);00000000766 }767}768···805 int startio,806 int unmapped) /* also implies page uptodate */807{808- struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;809- xfs_iomap_t *iomp, iomap;0810 loff_t offset;811 unsigned long p_offset = 0;0812 __uint64_t end_offset;813 pgoff_t end_index, last_index, tlast;814- int len, err, i, cnt = 0, uptodate = 1;815- int flags;816- int page_dirty;0817818 /* wait for other IO threads? */819- flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK;0820821 /* Is this page beyond the end of the file? */822 offset = i_size_read(inode);···835 }836 }837838- end_offset = min_t(unsigned long long,839- (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);840- offset = (loff_t)page->index << PAGE_CACHE_SHIFT;841-842 /*843 * page_dirty is initially a count of buffers on the page before844 * EOF and is decrememted as we move each into a cleanable state.845- */00000000000846 len = 1 << inode->i_blkbits;847- p_offset = max(p_offset, PAGE_CACHE_SIZE);848- p_offset = roundup(p_offset, len);0849 page_dirty = p_offset / len;850851- iomp = NULL;852- p_offset = 0;853 bh = head = page_buffers(page);00000854855 do {856 if (offset >= end_offset)857 break;858 if (!buffer_uptodate(bh))859 uptodate = 0;860- if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio)00000861 continue;862-863- if (iomp) {864- iomp = xfs_offset_to_map(page, &iomap, p_offset);865 }000866867 /*868 * First case, map an unwritten extent and prepare for869 * extent state conversion transaction on completion.870- */871- if (buffer_unwritten(bh)) {872- if (!startio)873- continue;874- if (!iomp) {875- err = xfs_map_blocks(inode, offset, len, &iomap,876- BMAPI_WRITE|BMAPI_IGNSTATE);877- if (err) {878- goto error;879- }880- iomp = xfs_offset_to_map(page, &iomap,881- p_offset);882- }883- if (iomp) {884- if (!bh->b_end_io) {885- err = xfs_map_unwritten(inode, page,886- head, bh, p_offset,887- inode->i_blkbits, iomp,888- wbc, startio, unmapped);889- if (err) {890- goto error;891- }892- } else {893- set_bit(BH_Lock, &bh->b_state);894- }895- BUG_ON(!buffer_locked(bh));896- bh_arr[cnt++] = bh;897- page_dirty--;898- }899- /*900 * Second case, allocate space for a delalloc buffer.901 * We can return EAGAIN here in the release page case.902- */903- } else if (buffer_delay(bh)) {904- if (!iomp) {905- err = xfs_map_blocks(inode, offset, len, &iomap,906- BMAPI_ALLOCATE | flags);907- if (err) {908- goto error;909- }910- iomp = xfs_offset_to_map(page, &iomap,911- p_offset);00000000000000912 }913- if (iomp) {914- xfs_map_at_offset(page, bh, p_offset,915- inode->i_blkbits, iomp);000000000000000916 if (startio) {917- bh_arr[cnt++] = bh;00918 } else {919 set_buffer_dirty(bh);920 unlock_buffer(bh);921 mark_buffer_dirty(bh);922 }923 page_dirty--;000000000000000000000000000000924 }925 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&926 (unmapped || startio)) {927-928- if (!buffer_mapped(bh)) {929- int size;930-931- /*932- * Getting here implies an unmapped buffer933- * was found, and we are in a path where we934- * need to write the whole page out.935- */936- if (!iomp) {937- size = xfs_probe_unmapped_cluster(938- inode, page, bh, head);939- err = xfs_map_blocks(inode, offset,940- size, &iomap,941- BMAPI_WRITE|BMAPI_MMAP);942- if (err) {943- goto error;944- }945- iomp = xfs_offset_to_map(page, &iomap,946- p_offset);947- }948- if (iomp) {949- xfs_map_at_offset(page,950- bh, p_offset,951- inode->i_blkbits, iomp);952- if (startio) {953- bh_arr[cnt++] = bh;954- } else {955- set_buffer_dirty(bh);956- unlock_buffer(bh);957- mark_buffer_dirty(bh);958- }959- page_dirty--;960- }961- } else if (startio) {962- if (buffer_uptodate(bh) &&963- !test_and_set_bit(BH_Lock, &bh->b_state)) {964- bh_arr[cnt++] = bh;965- page_dirty--;966- }967- }968 }969- } while (offset += len, p_offset += len,970- ((bh = bh->b_this_page) != head));000971972 if (uptodate && bh == head)973 SetPageUptodate(page);974975- if (startio) {976- xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty);977- }978979- if (iomp) {980- offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>981 PAGE_CACHE_SHIFT;982 tlast = min_t(pgoff_t, offset, last_index);983- xfs_cluster_write(inode, page->index + 1, iomp, wbc,984- startio, unmapped, tlast);985 }000986987 return page_dirty;988989error:990- for (i = 0; i < cnt; i++) {991- unlock_buffer(bh_arr[i]);992- }993994 /*995 * If it's delalloc and we have nowhere to put it,···1009 * us to try again.1010 */1011 if (err != -EAGAIN) {1012- if (!unmapped) {1013 block_invalidatepage(page, 0);1014- }1015 ClearPageUptodate(page);1016 }1017 return err;···1074 }10751076 /* If this is a realtime file, data might be on a new device */1077- bh_result->b_bdev = iomap.iomap_target->pbr_bdev;10781079 /* If we previously allocated a block out beyond eof and1080 * we are now coming back to use it then we will need to···1186 if (error)1187 return -error;11881189- iocb->private = xfs_alloc_ioend(inode);11901191 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,1192- iomap.iomap_target->pbr_bdev,1193 iov, offset, nr_segs,1194 linvfs_get_blocks_direct,1195 linvfs_end_io_direct);
···40#include "xfs_rw.h"41#include "xfs_iomap.h"42#include <linux/mpage.h>43+#include <linux/pagevec.h>44#include <linux/writeback.h>4546STATIC void xfs_count_page_state(struct page *, int *, int *, int *);004748#if defined(XFS_RW_TRACE)49void···55 int mask)56{57 xfs_inode_t *ip;058 vnode_t *vp = LINVFS_GET_VP(inode);59 loff_t isize = i_size_read(inode);60+ loff_t offset = page_offset(page);61 int delalloc = -1, unmapped = -1, unwritten = -1;6263 if (page_has_buffers(page))64 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);6566+ ip = xfs_vtoi(vp);067 if (!ip->i_rwtrace)68 return;69···103 queue_work(xfsdatad_workqueue, &ioend->io_work);104}105106+/*107+ * We're now finished for good with this ioend structure.108+ * Update the page state via the associated buffer_heads,109+ * release holds on the inode and bio, and finally free110+ * up memory. Do not use the ioend after this.111+ */112STATIC void113xfs_destroy_ioend(114 xfs_ioend_t *ioend)115{116+ struct buffer_head *bh, *next;117+118+ for (bh = ioend->io_buffer_head; bh; bh = next) {119+ next = bh->b_private;120+ bh->b_end_io(bh, ioend->io_uptodate);121+ }122+123 vn_iowake(ioend->io_vnode);124 mempool_free(ioend, xfs_ioend_pool);125}126127/*128+ * Buffered IO write completion for delayed allocate extents.129+ * TODO: Update ondisk isize now that we know the file data130+ * has been flushed (i.e. the notorious "NULL file" problem).131+ */132+STATIC void133+xfs_end_bio_delalloc(134+ void *data)135+{136+ xfs_ioend_t *ioend = data;137+138+ xfs_destroy_ioend(ioend);139+}140+141+/*142+ * Buffered IO write completion for regular, written extents.143+ */144+STATIC void145+xfs_end_bio_written(146+ void *data)147+{148+ xfs_ioend_t *ioend = data;149+150+ xfs_destroy_ioend(ioend);151+}152+153+/*154+ * IO write completion for unwritten extents.155+ *156 * Issue transactions to convert a buffer range from unwritten157 * to written extents.158 */···123 vnode_t *vp = ioend->io_vnode;124 xfs_off_t offset = ioend->io_offset;125 size_t size = ioend->io_size;0126 int error;127128 if (ioend->io_uptodate)129 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);0000000000130 xfs_destroy_ioend(ioend);131}132···149 */150STATIC xfs_ioend_t *151xfs_alloc_ioend(152+ struct inode *inode,153+ unsigned int type)154{155 xfs_ioend_t *ioend;156···162 */163 atomic_set(&ioend->io_remaining, 1);164 ioend->io_uptodate = 1; /* cleared if any I/O fails */165+ ioend->io_list = NULL;166+ ioend->io_type = type;167 ioend->io_vnode = LINVFS_GET_VP(inode);168 ioend->io_buffer_head = NULL;169+ ioend->io_buffer_tail = NULL;170 atomic_inc(&ioend->io_vnode->v_iocount);171 ioend->io_offset = 0;172 ioend->io_size = 0;173174+ if (type == IOMAP_UNWRITTEN)175+ INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);176+ else if (type == IOMAP_DELAY)177+ INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);178+ else179+ INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);180181 return ioend;0000000000000000000000000000182}183184STATIC int···218 return -error;219}220221+STATIC inline int222+xfs_iomap_valid(00000223 xfs_iomap_t *iomapp,224+ loff_t offset)225{226+ return offset >= iomapp->iomap_offset &&227+ offset < iomapp->iomap_offset + iomapp->iomap_bsize;228+}229230+/*231+ * BIO completion handler for buffered IO.232+ */233+STATIC int234+xfs_end_bio(235+ struct bio *bio,236+ unsigned int bytes_done,237+ int error)238+{239+ xfs_ioend_t *ioend = bio->bi_private;240241+ if (bio->bi_size)242+ return 1;0243244+ ASSERT(ioend);245+ ASSERT(atomic_read(&bio->bi_cnt) >= 1);246+247+ /* Toss bio and pass work off to an xfsdatad thread */248+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))249+ ioend->io_uptodate = 0;250+ bio->bi_private = NULL;251+ bio->bi_end_io = NULL;252+253+ bio_put(bio);254+ xfs_finish_ioend(ioend);255+ return 0;256+}257+258+STATIC void259+xfs_submit_ioend_bio(260+ xfs_ioend_t *ioend,261+ struct bio *bio)262+{263+ atomic_inc(&ioend->io_remaining);264+265+ bio->bi_private = ioend;266+ bio->bi_end_io = xfs_end_bio;267+268+ submit_bio(WRITE, bio);269+ ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));270+ bio_put(bio);271+}272+273+STATIC struct bio *274+xfs_alloc_ioend_bio(275+ struct buffer_head *bh)276+{277+ struct bio *bio;278+ int nvecs = bio_get_nr_vecs(bh->b_bdev);279+280+ do {281+ bio = bio_alloc(GFP_NOIO, nvecs);282+ nvecs >>= 1;283+ } while (!bio);284+285+ ASSERT(bio->bi_private == NULL);286+ bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);287+ bio->bi_bdev = bh->b_bdev;288+ bio_get(bio);289+ return bio;290+}291+292+STATIC void293+xfs_start_buffer_writeback(294+ struct buffer_head *bh)295+{296+ ASSERT(buffer_mapped(bh));297+ ASSERT(buffer_locked(bh));298+ ASSERT(!buffer_delay(bh));299+ ASSERT(!buffer_unwritten(bh));300+301+ mark_buffer_async_write(bh);302+ set_buffer_uptodate(bh);303+ clear_buffer_dirty(bh);304+}305+306+STATIC void307+xfs_start_page_writeback(308+ struct page *page,309+ struct writeback_control *wbc,310+ int clear_dirty,311+ int buffers)312+{313+ ASSERT(PageLocked(page));314+ ASSERT(!PageWriteback(page));315+ set_page_writeback(page);316+ if (clear_dirty)317+ clear_page_dirty(page);318+ unlock_page(page);319+ if (!buffers) {320+ end_page_writeback(page);321+ wbc->pages_skipped++; /* We didn't write this page */322+ }323+}324+325+static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)326+{327+ return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));328+}329+330+/*331+ * Submit all of the bios for all of the ioends we have saved up,332+ * covering the initial writepage page and also any probed pages.333+ */334+STATIC void335+xfs_submit_ioend(336+ xfs_ioend_t *ioend)337+{338+ xfs_ioend_t *next;339+ struct buffer_head *bh;340+ struct bio *bio;341+ sector_t lastblock = 0;342+343+ do {344+ next = ioend->io_list;345+ bio = NULL;346+347+ for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {348+ xfs_start_buffer_writeback(bh);349+350+ if (!bio) {351+ retry:352+ bio = xfs_alloc_ioend_bio(bh);353+ } else if (bh->b_blocknr != lastblock + 1) {354+ xfs_submit_ioend_bio(ioend, bio);355+ goto retry;356+ }357+358+ if (bio_add_buffer(bio, bh) != bh->b_size) {359+ xfs_submit_ioend_bio(ioend, bio);360+ goto retry;361+ }362+363+ lastblock = bh->b_blocknr;364+ }365+ if (bio)366+ xfs_submit_ioend_bio(ioend, bio);367+ xfs_finish_ioend(ioend);368+ } while ((ioend = next) != NULL);369+}370+371+/*372+ * Cancel submission of all buffer_heads so far in this endio.373+ * Toss the endio too. Only ever called for the initial page374+ * in a writepage request, so only ever one page.375+ */376+STATIC void377+xfs_cancel_ioend(378+ xfs_ioend_t *ioend)379+{380+ xfs_ioend_t *next;381+ struct buffer_head *bh, *next_bh;382+383+ do {384+ next = ioend->io_list;385+ bh = ioend->io_buffer_head;386+ do {387+ next_bh = bh->b_private;388+ clear_buffer_async_write(bh);389+ unlock_buffer(bh);390+ } while ((bh = next_bh) != NULL);391+392+ vn_iowake(ioend->io_vnode);393+ mempool_free(ioend, xfs_ioend_pool);394+ } while ((ioend = next) != NULL);395+}396+397+/*398+ * Test to see if we've been building up a completion structure for399+ * earlier buffers -- if so, we try to append to this ioend if we400+ * can, otherwise we finish off any current ioend and start another.401+ * Return true if we've finished the given ioend.402+ */403+STATIC void404+xfs_add_to_ioend(405+ struct inode *inode,406+ struct buffer_head *bh,407+ xfs_off_t offset,408+ unsigned int type,409+ xfs_ioend_t **result,410+ int need_ioend)411+{412+ xfs_ioend_t *ioend = *result;413+414+ if (!ioend || need_ioend || type != ioend->io_type) {415+ xfs_ioend_t *previous = *result;416+417+ ioend = xfs_alloc_ioend(inode, type);418+ ioend->io_offset = offset;419+ ioend->io_buffer_head = bh;420+ ioend->io_buffer_tail = bh;421+ if (previous)422+ previous->io_list = ioend;423+ *result = ioend;424+ } else {425+ ioend->io_buffer_tail->b_private = bh;426+ ioend->io_buffer_tail = bh;427+ }428+429+ bh->b_private = NULL;430+ ioend->io_size += bh->b_size;431}432433STATIC void434xfs_map_at_offset(0435 struct buffer_head *bh,436+ loff_t offset,437 int block_bits,438 xfs_iomap_t *iomapp)439{440 xfs_daddr_t bn;0441 int sector_shift;442443 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));444 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));445 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);446000000447 sector_shift = block_bits - BBSHIFT;448+ bn = (iomapp->iomap_bn >> sector_shift) +449+ ((offset - iomapp->iomap_offset) >> block_bits);450+451+ ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));452 ASSERT((bn << sector_shift) >= iomapp->iomap_bn);453454 lock_buffer(bh);455 bh->b_blocknr = bn;456+ bh->b_bdev = iomapp->iomap_target->bt_bdev;457 set_buffer_mapped(bh);458 clear_buffer_delay(bh);459+ clear_buffer_unwritten(bh);460}461462/*463+ * Look for a page at index that is suitable for clustering.0000000000000000000000000000000000000000000000000000000464 */465STATIC unsigned int466+xfs_probe_page(467+ struct page *page,468+ unsigned int pg_offset,469+ int mapped)470{0471 int ret = 0;472000473 if (PageWriteback(page))474+ return 0;475476 if (page->mapping && PageDirty(page)) {477 if (page_has_buffers(page)) {···357358 bh = head = page_buffers(page);359 do {360+ if (!buffer_uptodate(bh))361+ break;362+ if (mapped != buffer_mapped(bh))363 break;364 ret += bh->b_size;365 if (ret >= pg_offset)366 break;367 } while ((bh = bh->b_this_page) != head);368 } else369+ ret = mapped ? 0 : PAGE_CACHE_SIZE;370 }37100372 return ret;373}374375+STATIC size_t376+xfs_probe_cluster(377 struct inode *inode,378 struct page *startpage,379 struct buffer_head *bh,380+ struct buffer_head *head,381+ int mapped)382{383+ struct pagevec pvec;384 pgoff_t tindex, tlast, tloff;385+ size_t total = 0;386+ int done = 0, i;387388 /* First sum forwards in this page */389 do {390+ if (mapped != buffer_mapped(bh))391+ return total;392 total += bh->b_size;393 } while ((bh = bh->b_this_page) != head);394395+ /* if we reached the end of the page, sum forwards in following pages */396+ tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;397+ tindex = startpage->index + 1;398+399+ /* Prune this back to avoid pathological behavior */400+ tloff = min(tlast, startpage->index + 64);401+402+ pagevec_init(&pvec, 0);403+ while (!done && tindex <= tloff) {404+ unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);405+406+ if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))407+ break;408+409+ for (i = 0; i < pagevec_count(&pvec); i++) {410+ struct page *page = pvec.pages[i];411+ size_t pg_offset, len = 0;412+413+ if (tindex == tlast) {414+ pg_offset =415+ i_size_read(inode) & (PAGE_CACHE_SIZE - 1);416+ if (!pg_offset) {417+ done = 1;418+ break;419+ }420+ } else421+ pg_offset = PAGE_CACHE_SIZE;422+423+ if (page->index == tindex && !TestSetPageLocked(page)) {424+ len = xfs_probe_page(page, pg_offset, mapped);425+ unlock_page(page);426+ }427+428+ if (!len) {429+ done = 1;430+ break;431+ }432+433 total += len;434+ tindex++;435 }436+437+ pagevec_release(&pvec);438+ cond_resched();00439 }440+441 return total;442}443444/*445+ * Test if a given page is suitable for writing as part of an unwritten446+ * or delayed allocate extent.0447 */448+STATIC int449+xfs_is_delayed_page(450+ struct page *page,451+ unsigned int type)452{00000453 if (PageWriteback(page))454+ return 0;455456 if (page->mapping && page_has_buffers(page)) {457 struct buffer_head *bh, *head;···437438 bh = head = page_buffers(page);439 do {440+ if (buffer_unwritten(bh))441+ acceptable = (type == IOMAP_UNWRITTEN);442+ else if (buffer_delay(bh))443+ acceptable = (type == IOMAP_DELAY);444+ else if (buffer_mapped(bh))445+ acceptable = (type == 0);446+ else447 break;000448 } while ((bh = bh->b_this_page) != head);449450 if (acceptable)451+ return 1;452 }45300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000454 return 0;0000000000000000000000000000000000000455}456457/*···593 * delalloc/unwritten pages only, for the original page it is possible594 * that the page has no mapping at all.595 */596+STATIC int597xfs_convert_page(598 struct inode *inode,599 struct page *page,600+ loff_t tindex,601+ xfs_iomap_t *mp,602+ xfs_ioend_t **ioendp,603 struct writeback_control *wbc,0604 int startio,605 int all_bh)606{607+ struct buffer_head *bh, *head;608+ xfs_off_t end_offset;609+ unsigned long p_offset;610+ unsigned int type;611 int bbits = inode->i_blkbits;612 int len, page_dirty;613+ int count = 0, done = 0, uptodate = 1;614+ xfs_off_t offset = page_offset(page);615616+ if (page->index != tindex)617+ goto fail;618+ if (TestSetPageLocked(page))619+ goto fail;620+ if (PageWriteback(page))621+ goto fail_unlock_page;622+ if (page->mapping != inode->i_mapping)623+ goto fail_unlock_page;624+ if (!xfs_is_delayed_page(page, (*ioendp)->io_type))625+ goto fail_unlock_page;626627 /*628 * page_dirty is initially a count of buffers on the page before629 * EOF and is decrememted as we move each into a cleanable state.630+ *631+ * Derivation:632+ *633+ * End offset is the highest offset that this page should represent.634+ * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))635+ * will evaluate non-zero and be less than PAGE_CACHE_SIZE and636+ * hence give us the correct page_dirty count. On any other page,637+ * it will be zero and in that case we need page_dirty to be the638+ * count of buffers on the page.639 */640+ end_offset = min_t(unsigned long long,641+ (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,642+ i_size_read(inode));0643644+ len = 1 << inode->i_blkbits;645+ p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),646+ PAGE_CACHE_SIZE);647+ p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;648+ page_dirty = p_offset / len;649+650 bh = head = page_buffers(page);651 do {652 if (offset >= end_offset)653 break;654+ if (!buffer_uptodate(bh))655+ uptodate = 0;656+ if (!(PageUptodate(page) || buffer_uptodate(bh))) {657+ done = 1;00000658 continue;659 }00000660661+ if (buffer_unwritten(bh) || buffer_delay(bh)) {662+ if (buffer_unwritten(bh))663+ type = IOMAP_UNWRITTEN;664+ else665+ type = IOMAP_DELAY;666+667+ if (!xfs_iomap_valid(mp, offset)) {668+ done = 1;669+ continue;670+ }671+672+ ASSERT(!(mp->iomap_flags & IOMAP_HOLE));673+ ASSERT(!(mp->iomap_flags & IOMAP_DELAY));674+675+ xfs_map_at_offset(bh, offset, bbits, mp);676+ if (startio) {677+ xfs_add_to_ioend(inode, bh, offset,678+ type, ioendp, done);679+ } else {680+ set_buffer_dirty(bh);681+ unlock_buffer(bh);682+ mark_buffer_dirty(bh);683+ }684+ page_dirty--;685+ count++;686+ } else {687+ type = 0;688+ if (buffer_mapped(bh) && all_bh && startio) {689+ lock_buffer(bh);690+ xfs_add_to_ioend(inode, bh, offset,691+ type, ioendp, done);692+ count++;693+ page_dirty--;694+ } else {695+ done = 1;696 }697 }00000000698 } while (offset += len, (bh = bh->b_this_page) != head);699700+ if (uptodate && bh == head)701+ SetPageUptodate(page);702+703+ if (startio) {704+ if (count) {705+ struct backing_dev_info *bdi;706+707+ bdi = inode->i_mapping->backing_dev_info;708+ if (bdi_write_congested(bdi)) {709+ wbc->encountered_congestion = 1;710+ done = 1;711+ } else if (--wbc->nr_to_write <= 0) {712+ done = 1;713+ }714+ }715+ xfs_start_page_writeback(page, wbc, !page_dirty, count);716 }717+718+ return done;719+ fail_unlock_page:720+ unlock_page(page);721+ fail:722+ return 1;723}724725/*···685 struct inode *inode,686 pgoff_t tindex,687 xfs_iomap_t *iomapp,688+ xfs_ioend_t **ioendp,689 struct writeback_control *wbc,690 int startio,691 int all_bh,692 pgoff_t tlast)693{694+ struct pagevec pvec;695+ int done = 0, i;696697+ pagevec_init(&pvec, 0);698+ while (!done && tindex <= tlast) {699+ unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);700+701+ if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))702 break;703+704+ for (i = 0; i < pagevec_count(&pvec); i++) {705+ done = xfs_convert_page(inode, pvec.pages[i], tindex++,706+ iomapp, ioendp, wbc, startio, all_bh);707+ if (done)708+ break;709+ }710+711+ pagevec_release(&pvec);712+ cond_resched();713 }714}715···728 int startio,729 int unmapped) /* also implies page uptodate */730{731+ struct buffer_head *bh, *head;732+ xfs_iomap_t iomap;733+ xfs_ioend_t *ioend = NULL, *iohead = NULL;734 loff_t offset;735 unsigned long p_offset = 0;736+ unsigned int type;737 __uint64_t end_offset;738 pgoff_t end_index, last_index, tlast;739+ ssize_t size, len;740+ int flags, err, iomap_valid = 0, uptodate = 1;741+ int page_dirty, count = 0, trylock_flag = 0;742+ int all_bh = unmapped;743744 /* wait for other IO threads? */745+ if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking))746+ trylock_flag |= BMAPI_TRYLOCK;747748 /* Is this page beyond the end of the file? */749 offset = i_size_read(inode);···754 }755 }7560000757 /*758 * page_dirty is initially a count of buffers on the page before759 * EOF and is decrememted as we move each into a cleanable state.760+ *761+ * Derivation:762+ *763+ * End offset is the highest offset that this page should represent.764+ * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))765+ * will evaluate non-zero and be less than PAGE_CACHE_SIZE and766+ * hence give us the correct page_dirty count. On any other page,767+ * it will be zero and in that case we need page_dirty to be the768+ * count of buffers on the page.769+ */770+ end_offset = min_t(unsigned long long,771+ (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);772 len = 1 << inode->i_blkbits;773+ p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),774+ PAGE_CACHE_SIZE);775+ p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;776 page_dirty = p_offset / len;77700778 bh = head = page_buffers(page);779+ offset = page_offset(page);780+ flags = -1;781+ type = 0;782+783+ /* TODO: cleanup count and page_dirty */784785 do {786 if (offset >= end_offset)787 break;788 if (!buffer_uptodate(bh))789 uptodate = 0;790+ if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {791+ /*792+ * the iomap is actually still valid, but the ioend793+ * isn't. shouldn't happen too often.794+ */795+ iomap_valid = 0;796 continue;000797 }798+799+ if (iomap_valid)800+ iomap_valid = xfs_iomap_valid(&iomap, offset);801802 /*803 * First case, map an unwritten extent and prepare for804 * extent state conversion transaction on completion.805+ *00000000000000000000000000000806 * Second case, allocate space for a delalloc buffer.807 * We can return EAGAIN here in the release page case.808+ *809+ * Third case, an unmapped buffer was found, and we are810+ * in a path where we need to write the whole page out.811+ */812+ if (buffer_unwritten(bh) || buffer_delay(bh) ||813+ ((buffer_uptodate(bh) || PageUptodate(page)) &&814+ !buffer_mapped(bh) && (unmapped || startio))) {815+ /*816+ * Make sure we don't use a read-only iomap817+ */818+ if (flags == BMAPI_READ)819+ iomap_valid = 0;820+821+ if (buffer_unwritten(bh)) {822+ type = IOMAP_UNWRITTEN;823+ flags = BMAPI_WRITE|BMAPI_IGNSTATE;824+ } else if (buffer_delay(bh)) {825+ type = IOMAP_DELAY;826+ flags = BMAPI_ALLOCATE;827+ if (!startio)828+ flags |= trylock_flag;829+ } else {830+ type = IOMAP_NEW;831+ flags = BMAPI_WRITE|BMAPI_MMAP;832 }833+834+ if (!iomap_valid) {835+ if (type == IOMAP_NEW) {836+ size = xfs_probe_cluster(inode,837+ page, bh, head, 0);838+ } else {839+ size = len;840+ }841+842+ err = xfs_map_blocks(inode, offset, size,843+ &iomap, flags);844+ if (err)845+ goto error;846+ iomap_valid = xfs_iomap_valid(&iomap, offset);847+ }848+ if (iomap_valid) {849+ xfs_map_at_offset(bh, offset,850+ inode->i_blkbits, &iomap);851 if (startio) {852+ xfs_add_to_ioend(inode, bh, offset,853+ type, &ioend,854+ !iomap_valid);855 } else {856 set_buffer_dirty(bh);857 unlock_buffer(bh);858 mark_buffer_dirty(bh);859 }860 page_dirty--;861+ count++;862+ }863+ } else if (buffer_uptodate(bh) && startio) {864+ /*865+ * we got here because the buffer is already mapped.866+ * That means it must already have extents allocated867+ * underneath it. Map the extent by reading it.868+ */869+ if (!iomap_valid || type != 0) {870+ flags = BMAPI_READ;871+ size = xfs_probe_cluster(inode, page, bh,872+ head, 1);873+ err = xfs_map_blocks(inode, offset, size,874+ &iomap, flags);875+ if (err)876+ goto error;877+ iomap_valid = xfs_iomap_valid(&iomap, offset);878+ }879+880+ type = 0;881+ if (!test_and_set_bit(BH_Lock, &bh->b_state)) {882+ ASSERT(buffer_mapped(bh));883+ if (iomap_valid)884+ all_bh = 1;885+ xfs_add_to_ioend(inode, bh, offset, type,886+ &ioend, !iomap_valid);887+ page_dirty--;888+ count++;889+ } else {890+ iomap_valid = 0;891 }892 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&893 (unmapped || startio)) {894+ iomap_valid = 0;0000000000000000000000000000000000000000895 }896+897+ if (!iohead)898+ iohead = ioend;899+900+ } while (offset += len, ((bh = bh->b_this_page) != head));901902 if (uptodate && bh == head)903 SetPageUptodate(page);904905+ if (startio)906+ xfs_start_page_writeback(page, wbc, 1, count);0907908+ if (ioend && iomap_valid) {909+ offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>910 PAGE_CACHE_SHIFT;911 tlast = min_t(pgoff_t, offset, last_index);912+ xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,913+ wbc, startio, all_bh, tlast);914 }915+916+ if (iohead)917+ xfs_submit_ioend(iohead);918919 return page_dirty;920921error:922+ if (iohead)923+ xfs_cancel_ioend(iohead);0924925 /*926 * If it's delalloc and we have nowhere to put it,···916 * us to try again.917 */918 if (err != -EAGAIN) {919+ if (!unmapped)920 block_invalidatepage(page, 0);0921 ClearPageUptodate(page);922 }923 return err;···982 }983984 /* If this is a realtime file, data might be on a new device */985+ bh_result->b_bdev = iomap.iomap_target->bt_bdev;986987 /* If we previously allocated a block out beyond eof and988 * we are now coming back to use it then we will need to···1094 if (error)1095 return -error;10961097+ iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);10981099 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,1100+ iomap.iomap_target->bt_bdev,1101 iov, offset, nr_segs,1102 linvfs_get_blocks_direct,1103 linvfs_end_io_direct);
+10
fs/xfs/linux-2.6/xfs_aops.h
···2324typedef void (*xfs_ioend_func_t)(void *);25000026typedef struct xfs_ioend {0027 unsigned int io_uptodate; /* I/O status register */28 atomic_t io_remaining; /* hold count */29 struct vnode *io_vnode; /* file being written to */30 struct buffer_head *io_buffer_head;/* buffer linked list head */031 size_t io_size; /* size of the extent */32 xfs_off_t io_offset; /* offset in the file */33 struct work_struct io_work; /* xfsdatad work queue */34} xfs_ioend_t;0003536#endif /* __XFS_IOPS_H__ */
···2324typedef void (*xfs_ioend_func_t)(void *);2526+/*27+ * xfs_ioend struct manages large extent writes for XFS.28+ * It can manage several multi-page bio's at once.29+ */30typedef struct xfs_ioend {31+ struct xfs_ioend *io_list; /* next ioend in chain */32+ unsigned int io_type; /* delalloc / unwritten */33 unsigned int io_uptodate; /* I/O status register */34 atomic_t io_remaining; /* hold count */35 struct vnode *io_vnode; /* file being written to */36 struct buffer_head *io_buffer_head;/* buffer linked list head */37+ struct buffer_head *io_buffer_tail;/* buffer linked list tail */38 size_t io_size; /* size of the extent */39 xfs_off_t io_offset; /* offset in the file */40 struct work_struct io_work; /* xfsdatad work queue */41} xfs_ioend_t;42+43+extern struct address_space_operations linvfs_aops;44+extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);4546#endif /* __XFS_IOPS_H__ */
+661-702
fs/xfs/linux-2.6/xfs_buf.c
···31#include <linux/kthread.h>32#include "xfs_linux.h"3334-STATIC kmem_cache_t *pagebuf_zone;35-STATIC kmem_shaker_t pagebuf_shake;036STATIC int xfsbufd_wakeup(int, gfp_t);37-STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);3839STATIC struct workqueue_struct *xfslogd_workqueue;40struct workqueue_struct *xfsdatad_workqueue;4142-#ifdef PAGEBUF_TRACE43void44-pagebuf_trace(45- xfs_buf_t *pb,46 char *id,47 void *data,48 void *ra)49{50- ktrace_enter(pagebuf_trace_buf,51- pb, id,52- (void *)(unsigned long)pb->pb_flags,53- (void *)(unsigned long)pb->pb_hold.counter,54- (void *)(unsigned long)pb->pb_sema.count.counter,55 (void *)current,56 data, ra,57- (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),58- (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),59- (void *)(unsigned long)pb->pb_buffer_length,60 NULL, NULL, NULL, NULL, NULL);61}62-ktrace_t *pagebuf_trace_buf;63-#define PAGEBUF_TRACE_SIZE 409664-#define PB_TRACE(pb, id, data) \65- pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))66#else67-#define PB_TRACE(pb, id, data) do { } while (0)68#endif6970-#ifdef PAGEBUF_LOCK_TRACKING71-# define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid)72-# define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1)73-# define PB_GET_OWNER(pb) ((pb)->pb_last_holder)74#else75-# define PB_SET_OWNER(pb) do { } while (0)76-# define PB_CLEAR_OWNER(pb) do { } while (0)77-# define PB_GET_OWNER(pb) do { } while (0)78#endif7980-#define pb_to_gfp(flags) \81- ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \82- ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)8384-#define pb_to_km(flags) \85- (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)8687-#define pagebuf_allocate(flags) \88- kmem_zone_alloc(pagebuf_zone, pb_to_km(flags))89-#define pagebuf_deallocate(pb) \90- kmem_zone_free(pagebuf_zone, (pb));9192/*93- * Page Region interfaces.94 *95- * For pages in filesystems where the blocksize is smaller than the96- * pagesize, we use the page->private field (long) to hold a bitmap97- * of uptodate regions within the page.98 *99- * Each such region is "bytes per page / bits per long" bytes long.100 *101- * NBPPR == number-of-bytes-per-page-region102- * BTOPR == bytes-to-page-region (rounded up)103- * BTOPRT == bytes-to-page-region-truncated (rounded down)104 */105#if (BITS_PER_LONG == 32)106#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */···160}161162/*163- * Mapping of multi-page buffers into contiguous virtual space164 */165166typedef struct a_list {···173STATIC DEFINE_SPINLOCK(as_lock);174175/*176- * Try to batch vunmaps because they are costly.177 */178STATIC void179free_address(···216}217218/*219- * Internal pagebuf object manipulation220 */221222STATIC void223-_pagebuf_initialize(224- xfs_buf_t *pb,225 xfs_buftarg_t *target,226- loff_t range_base,227 size_t range_length,228- page_buf_flags_t flags)229{230 /*231- * We don't want certain flags to appear in pb->pb_flags.232 */233- flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);234235- memset(pb, 0, sizeof(xfs_buf_t));236- atomic_set(&pb->pb_hold, 1);237- init_MUTEX_LOCKED(&pb->pb_iodonesema);238- INIT_LIST_HEAD(&pb->pb_list);239- INIT_LIST_HEAD(&pb->pb_hash_list);240- init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */241- PB_SET_OWNER(pb);242- pb->pb_target = target;243- pb->pb_file_offset = range_base;244 /*245 * Set buffer_length and count_desired to the same value initially.246 * I/O routines should use count_desired, which will be the same in247 * most cases but may be reset (e.g. XFS recovery).248 */249- pb->pb_buffer_length = pb->pb_count_desired = range_length;250- pb->pb_flags = flags;251- pb->pb_bn = XFS_BUF_DADDR_NULL;252- atomic_set(&pb->pb_pin_count, 0);253- init_waitqueue_head(&pb->pb_waiters);254255- XFS_STATS_INC(pb_create);256- PB_TRACE(pb, "initialize", target);257}258259/*260- * Allocate a page array capable of holding a specified number261- * of pages, and point the page buf at it.262 */263STATIC int264-_pagebuf_get_pages(265- xfs_buf_t *pb,266 int page_count,267- page_buf_flags_t flags)268{269 /* Make sure that we have a page list */270- if (pb->pb_pages == NULL) {271- pb->pb_offset = page_buf_poff(pb->pb_file_offset);272- pb->pb_page_count = page_count;273- if (page_count <= PB_PAGES) {274- pb->pb_pages = pb->pb_page_array;275 } else {276- pb->pb_pages = kmem_alloc(sizeof(struct page *) *277- page_count, pb_to_km(flags));278- if (pb->pb_pages == NULL)279 return -ENOMEM;280 }281- memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);282 }283 return 0;284}285286/*287- * Frees pb_pages if it was malloced.288 */289STATIC void290-_pagebuf_free_pages(291 xfs_buf_t *bp)292{293- if (bp->pb_pages != bp->pb_page_array) {294- kmem_free(bp->pb_pages,295- bp->pb_page_count * sizeof(struct page *));296 }297}298···300 * Releases the specified buffer.301 *302 * The modification state of any associated pages is left unchanged.303- * The buffer most not be on any hash - use pagebuf_rele instead for304 * hashed and refcounted buffers305 */306void307-pagebuf_free(308 xfs_buf_t *bp)309{310- PB_TRACE(bp, "free", 0);311312- ASSERT(list_empty(&bp->pb_hash_list));313314- if (bp->pb_flags & _PBF_PAGE_CACHE) {315 uint i;316317- if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))318- free_address(bp->pb_addr - bp->pb_offset);319320- for (i = 0; i < bp->pb_page_count; i++)321- page_cache_release(bp->pb_pages[i]);322- _pagebuf_free_pages(bp);323- } else if (bp->pb_flags & _PBF_KMEM_ALLOC) {324 /*325- * XXX(hch): bp->pb_count_desired might be incorrect (see326- * pagebuf_associate_memory for details), but fortunately327 * the Linux version of kmem_free ignores the len argument..328 */329- kmem_free(bp->pb_addr, bp->pb_count_desired);330- _pagebuf_free_pages(bp);331 }332333- pagebuf_deallocate(bp);334}335336/*337 * Finds all pages for buffer in question and builds it's page list.338 */339STATIC int340-_pagebuf_lookup_pages(341 xfs_buf_t *bp,342 uint flags)343{344- struct address_space *mapping = bp->pb_target->pbr_mapping;345- size_t blocksize = bp->pb_target->pbr_bsize;346- size_t size = bp->pb_count_desired;347 size_t nbytes, offset;348- gfp_t gfp_mask = pb_to_gfp(flags);349 unsigned short page_count, i;350 pgoff_t first;351- loff_t end;352 int error;353354- end = bp->pb_file_offset + bp->pb_buffer_length;355- page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);356357- error = _pagebuf_get_pages(bp, page_count, flags);358 if (unlikely(error))359 return error;360- bp->pb_flags |= _PBF_PAGE_CACHE;361362- offset = bp->pb_offset;363- first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;364365- for (i = 0; i < bp->pb_page_count; i++) {366 struct page *page;367 uint retries = 0;368369 retry:370 page = find_or_create_page(mapping, first + i, gfp_mask);371 if (unlikely(page == NULL)) {372- if (flags & PBF_READ_AHEAD) {373- bp->pb_page_count = i;374- for (i = 0; i < bp->pb_page_count; i++)375- unlock_page(bp->pb_pages[i]);376 return -ENOMEM;377 }378···388 "deadlock in %s (mode:0x%x)\n",389 __FUNCTION__, gfp_mask);390391- XFS_STATS_INC(pb_page_retries);392 xfsbufd_wakeup(0, gfp_mask);393 blk_congestion_wait(WRITE, HZ/50);394 goto retry;395 }396397- XFS_STATS_INC(pb_page_found);398399 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);400 size -= nbytes;···402 if (!PageUptodate(page)) {403 page_count--;404 if (blocksize >= PAGE_CACHE_SIZE) {405- if (flags & PBF_READ)406- bp->pb_locked = 1;407 } else if (!PagePrivate(page)) {408 if (test_page_region(page, offset, nbytes))409 page_count++;410 }411 }412413- bp->pb_pages[i] = page;414 offset = 0;415 }416417- if (!bp->pb_locked) {418- for (i = 0; i < bp->pb_page_count; i++)419- unlock_page(bp->pb_pages[i]);420 }421422- if (page_count == bp->pb_page_count)423- bp->pb_flags |= PBF_DONE;424425- PB_TRACE(bp, "lookup_pages", (long)page_count);426 return error;427}428···430 * Map buffer into kernel address-space if nessecary.431 */432STATIC int433-_pagebuf_map_pages(434 xfs_buf_t *bp,435 uint flags)436{437 /* A single page buffer is always mappable */438- if (bp->pb_page_count == 1) {439- bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;440- bp->pb_flags |= PBF_MAPPED;441- } else if (flags & PBF_MAPPED) {442 if (as_list_len > 64)443 purge_addresses();444- bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,445- VM_MAP, PAGE_KERNEL);446- if (unlikely(bp->pb_addr == NULL))447 return -ENOMEM;448- bp->pb_addr += bp->pb_offset;449- bp->pb_flags |= PBF_MAPPED;450 }451452 return 0;···457 */458459/*460- * _pagebuf_find461- *462- * Looks up, and creates if absent, a lockable buffer for463 * a given range of an inode. The buffer is returned464 * locked. If other overlapping buffers exist, they are465 * released before the new buffer is created and locked,···465 * are unlocked. No I/O is implied by this call.466 */467xfs_buf_t *468-_pagebuf_find(469 xfs_buftarg_t *btp, /* block device target */470- loff_t ioff, /* starting offset of range */471 size_t isize, /* length of range */472- page_buf_flags_t flags, /* PBF_TRYLOCK */473- xfs_buf_t *new_pb)/* newly allocated buffer */474{475- loff_t range_base;476 size_t range_length;477 xfs_bufhash_t *hash;478- xfs_buf_t *pb, *n;479480 range_base = (ioff << BBSHIFT);481 range_length = (isize << BBSHIFT);482483 /* Check for IOs smaller than the sector size / not sector aligned */484- ASSERT(!(range_length < (1 << btp->pbr_sshift)));485- ASSERT(!(range_base & (loff_t)btp->pbr_smask));486487 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];488489 spin_lock(&hash->bh_lock);490491- list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {492- ASSERT(btp == pb->pb_target);493- if (pb->pb_file_offset == range_base &&494- pb->pb_buffer_length == range_length) {495 /*496- * If we look at something bring it to the497 * front of the list for next time.498 */499- atomic_inc(&pb->pb_hold);500- list_move(&pb->pb_hash_list, &hash->bh_list);501 goto found;502 }503 }504505 /* No match found */506- if (new_pb) {507- _pagebuf_initialize(new_pb, btp, range_base,508 range_length, flags);509- new_pb->pb_hash = hash;510- list_add(&new_pb->pb_hash_list, &hash->bh_list);511 } else {512- XFS_STATS_INC(pb_miss_locked);513 }514515 spin_unlock(&hash->bh_lock);516- return new_pb;517518found:519 spin_unlock(&hash->bh_lock);···522 * if this does not work then we need to drop the523 * spinlock and do a hard attempt on the semaphore.524 */525- if (down_trylock(&pb->pb_sema)) {526- if (!(flags & PBF_TRYLOCK)) {527 /* wait for buffer ownership */528- PB_TRACE(pb, "get_lock", 0);529- pagebuf_lock(pb);530- XFS_STATS_INC(pb_get_locked_waited);531 } else {532 /* We asked for a trylock and failed, no need533 * to look at file offset and length here, we534- * know that this pagebuf at least overlaps our535- * pagebuf and is locked, therefore our buffer536- * either does not exist, or is this buffer537 */538-539- pagebuf_rele(pb);540- XFS_STATS_INC(pb_busy_locked);541- return (NULL);542 }543 } else {544 /* trylock worked */545- PB_SET_OWNER(pb);546 }547548- if (pb->pb_flags & PBF_STALE) {549- ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0);550- pb->pb_flags &= PBF_MAPPED;551 }552- PB_TRACE(pb, "got_lock", 0);553- XFS_STATS_INC(pb_get_locked);554- return (pb);555}556557/*558- * xfs_buf_get_flags assembles a buffer covering the specified range.559- *560 * Storage in memory for all portions of the buffer will be allocated,561 * although backing storage may not be.562 */563xfs_buf_t *564-xfs_buf_get_flags( /* allocate a buffer */565 xfs_buftarg_t *target,/* target for buffer */566- loff_t ioff, /* starting offset of range */567 size_t isize, /* length of range */568- page_buf_flags_t flags) /* PBF_TRYLOCK */569{570- xfs_buf_t *pb, *new_pb;571 int error = 0, i;572573- new_pb = pagebuf_allocate(flags);574- if (unlikely(!new_pb))575 return NULL;576577- pb = _pagebuf_find(target, ioff, isize, flags, new_pb);578- if (pb == new_pb) {579- error = _pagebuf_lookup_pages(pb, flags);580 if (error)581 goto no_buffer;582 } else {583- pagebuf_deallocate(new_pb);584- if (unlikely(pb == NULL))585 return NULL;586 }587588- for (i = 0; i < pb->pb_page_count; i++)589- mark_page_accessed(pb->pb_pages[i]);590591- if (!(pb->pb_flags & PBF_MAPPED)) {592- error = _pagebuf_map_pages(pb, flags);593 if (unlikely(error)) {594 printk(KERN_WARNING "%s: failed to map pages\n",595 __FUNCTION__);···595 }596 }597598- XFS_STATS_INC(pb_get);599600 /*601 * Always fill in the block number now, the mapped cases can do602 * their own overlay of this later.603 */604- pb->pb_bn = ioff;605- pb->pb_count_desired = pb->pb_buffer_length;606607- PB_TRACE(pb, "get", (unsigned long)flags);608- return pb;609610 no_buffer:611- if (flags & (PBF_LOCK | PBF_TRYLOCK))612- pagebuf_unlock(pb);613- pagebuf_rele(pb);614 return NULL;615}616617xfs_buf_t *618xfs_buf_read_flags(619 xfs_buftarg_t *target,620- loff_t ioff,621 size_t isize,622- page_buf_flags_t flags)623{624- xfs_buf_t *pb;625626- flags |= PBF_READ;627628- pb = xfs_buf_get_flags(target, ioff, isize, flags);629- if (pb) {630- if (!XFS_BUF_ISDONE(pb)) {631- PB_TRACE(pb, "read", (unsigned long)flags);632- XFS_STATS_INC(pb_get_read);633- pagebuf_iostart(pb, flags);634- } else if (flags & PBF_ASYNC) {635- PB_TRACE(pb, "read_async", (unsigned long)flags);636 /*637 * Read ahead call which is already satisfied,638 * drop the buffer639 */640 goto no_buffer;641 } else {642- PB_TRACE(pb, "read_done", (unsigned long)flags);643 /* We do not want read in the flags */644- pb->pb_flags &= ~PBF_READ;645 }646 }647648- return pb;649650 no_buffer:651- if (flags & (PBF_LOCK | PBF_TRYLOCK))652- pagebuf_unlock(pb);653- pagebuf_rele(pb);654 return NULL;655}656657/*658- * If we are not low on memory then do the readahead in a deadlock659- * safe manner.660 */661void662-pagebuf_readahead(663 xfs_buftarg_t *target,664- loff_t ioff,665 size_t isize,666- page_buf_flags_t flags)667{668 struct backing_dev_info *bdi;669670- bdi = target->pbr_mapping->backing_dev_info;671 if (bdi_read_congested(bdi))672 return;673674- flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD);675 xfs_buf_read_flags(target, ioff, isize, flags);676}677678xfs_buf_t *679-pagebuf_get_empty(680 size_t len,681 xfs_buftarg_t *target)682{683- xfs_buf_t *pb;684685- pb = pagebuf_allocate(0);686- if (pb)687- _pagebuf_initialize(pb, target, 0, len, 0);688- return pb;689}690691static inline struct page *···701}702703int704-pagebuf_associate_memory(705- xfs_buf_t *pb,706 void *mem,707 size_t len)708{···719 page_count++;720721 /* Free any previous set of page pointers */722- if (pb->pb_pages)723- _pagebuf_free_pages(pb);724725- pb->pb_pages = NULL;726- pb->pb_addr = mem;727728- rval = _pagebuf_get_pages(pb, page_count, 0);729 if (rval)730 return rval;731732- pb->pb_offset = offset;733 ptr = (size_t) mem & PAGE_CACHE_MASK;734 end = PAGE_CACHE_ALIGN((size_t) mem + len);735 end_cur = end;736 /* set up first page */737- pb->pb_pages[0] = mem_to_page(mem);738739 ptr += PAGE_CACHE_SIZE;740- pb->pb_page_count = ++i;741 while (ptr < end) {742- pb->pb_pages[i] = mem_to_page((void *)ptr);743- pb->pb_page_count = ++i;744 ptr += PAGE_CACHE_SIZE;745 }746- pb->pb_locked = 0;747748- pb->pb_count_desired = pb->pb_buffer_length = len;749- pb->pb_flags |= PBF_MAPPED;750751 return 0;752}753754xfs_buf_t *755-pagebuf_get_no_daddr(756 size_t len,757 xfs_buftarg_t *target)758{···761 void *data;762 int error;763764- bp = pagebuf_allocate(0);765 if (unlikely(bp == NULL))766 goto fail;767- _pagebuf_initialize(bp, target, 0, len, 0);768769 try_again:770 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);···773774 /* check whether alignment matches.. */775 if ((__psunsigned_t)data !=776- ((__psunsigned_t)data & ~target->pbr_smask)) {777 /* .. else double the size and try again */778 kmem_free(data, malloc_len);779 malloc_len <<= 1;780 goto try_again;781 }782783- error = pagebuf_associate_memory(bp, data, len);784 if (error)785 goto fail_free_mem;786- bp->pb_flags |= _PBF_KMEM_ALLOC;787788- pagebuf_unlock(bp);789790- PB_TRACE(bp, "no_daddr", data);791 return bp;792 fail_free_mem:793 kmem_free(data, malloc_len);794 fail_free_buf:795- pagebuf_free(bp);796 fail:797 return NULL;798}799800/*801- * pagebuf_hold802- *803 * Increment reference count on buffer, to hold the buffer concurrently804 * with another thread which may release (free) the buffer asynchronously.805- *806 * Must hold the buffer already to call this function.807 */808void809-pagebuf_hold(810- xfs_buf_t *pb)811{812- atomic_inc(&pb->pb_hold);813- PB_TRACE(pb, "hold", 0);814}815816/*817- * pagebuf_rele818- *819- * pagebuf_rele releases a hold on the specified buffer. If the820- * the hold count is 1, pagebuf_rele calls pagebuf_free.821 */822void823-pagebuf_rele(824- xfs_buf_t *pb)825{826- xfs_bufhash_t *hash = pb->pb_hash;827828- PB_TRACE(pb, "rele", pb->pb_relse);829830- if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {831- if (pb->pb_relse) {832- atomic_inc(&pb->pb_hold);833 spin_unlock(&hash->bh_lock);834- (*(pb->pb_relse)) (pb);835- } else if (pb->pb_flags & PBF_FS_MANAGED) {836 spin_unlock(&hash->bh_lock);837 } else {838- ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)));839- list_del_init(&pb->pb_hash_list);840 spin_unlock(&hash->bh_lock);841- pagebuf_free(pb);842 }843 } else {844 /*845 * Catch reference count leaks846 */847- ASSERT(atomic_read(&pb->pb_hold) >= 0);848 }849}850···855 */856857/*858- * pagebuf_cond_lock859- *860- * pagebuf_cond_lock locks a buffer object, if it is not already locked.861- * Note that this in no way862- * locks the underlying pages, so it is only useful for synchronizing863- * concurrent use of page buffer objects, not for synchronizing independent864- * access to the underlying pages.865 */866int867-pagebuf_cond_lock( /* lock buffer, if not locked */868- /* returns -EBUSY if locked) */869- xfs_buf_t *pb)870{871 int locked;872873- locked = down_trylock(&pb->pb_sema) == 0;874 if (locked) {875- PB_SET_OWNER(pb);876 }877- PB_TRACE(pb, "cond_lock", (long)locked);878- return(locked ? 0 : -EBUSY);879}880881#if defined(DEBUG) || defined(XFS_BLI_TRACE)882-/*883- * pagebuf_lock_value884- *885- * Return lock value for a pagebuf886- */887int888-pagebuf_lock_value(889- xfs_buf_t *pb)890{891- return(atomic_read(&pb->pb_sema.count));892}893#endif894895/*896- * pagebuf_lock897- *898- * pagebuf_lock locks a buffer object. Note that this in no way899- * locks the underlying pages, so it is only useful for synchronizing900- * concurrent use of page buffer objects, not for synchronizing independent901- * access to the underlying pages.902 */903-int904-pagebuf_lock(905- xfs_buf_t *pb)906{907- PB_TRACE(pb, "lock", 0);908- if (atomic_read(&pb->pb_io_remaining))909- blk_run_address_space(pb->pb_target->pbr_mapping);910- down(&pb->pb_sema);911- PB_SET_OWNER(pb);912- PB_TRACE(pb, "locked", 0);913- return 0;914}915916/*917- * pagebuf_unlock918- *919- * pagebuf_unlock releases the lock on the buffer object created by920- * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages921- * created by pagebuf_pin).922- *923 * If the buffer is marked delwri but is not queued, do so before we924- * unlock the buffer as we need to set flags correctly. We also need to925 * take a reference for the delwri queue because the unlocker is going to926 * drop their's and they don't know we just queued it.927 */928void929-pagebuf_unlock( /* unlock buffer */930- xfs_buf_t *pb) /* buffer to unlock */931{932- if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) {933- atomic_inc(&pb->pb_hold);934- pb->pb_flags |= PBF_ASYNC;935- pagebuf_delwri_queue(pb, 0);936 }937938- PB_CLEAR_OWNER(pb);939- up(&pb->pb_sema);940- PB_TRACE(pb, "unlock", 0);941}942943944/*945 * Pinning Buffer Storage in Memory946- */947-948-/*949- * pagebuf_pin950- *951- * pagebuf_pin locks all of the memory represented by a buffer in952- * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for953- * the same or different buffers affecting a given page, will954- * properly count the number of outstanding "pin" requests. The955- * buffer may be released after the pagebuf_pin and a different956- * buffer used when calling pagebuf_unpin, if desired.957- * pagebuf_pin should be used by the file system when it wants be958- * assured that no attempt will be made to force the affected959- * memory to disk. It does not assure that a given logical page960- * will not be moved to a different physical page.961 */962void963-pagebuf_pin(964- xfs_buf_t *pb)965{966- atomic_inc(&pb->pb_pin_count);967- PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);968}969970-/*971- * pagebuf_unpin972- *973- * pagebuf_unpin reverses the locking of memory performed by974- * pagebuf_pin. Note that both functions affected the logical975- * pages associated with the buffer, not the buffer itself.976- */977void978-pagebuf_unpin(979- xfs_buf_t *pb)980{981- if (atomic_dec_and_test(&pb->pb_pin_count)) {982- wake_up_all(&pb->pb_waiters);983- }984- PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);985}986987int988-pagebuf_ispin(989- xfs_buf_t *pb)990{991- return atomic_read(&pb->pb_pin_count);992}993994-/*995- * pagebuf_wait_unpin996- *997- * pagebuf_wait_unpin waits until all of the memory associated998- * with the buffer is not longer locked in memory. It returns999- * immediately if none of the affected pages are locked.1000- */1001-static inline void1002-_pagebuf_wait_unpin(1003- xfs_buf_t *pb)1004{1005 DECLARE_WAITQUEUE (wait, current);10061007- if (atomic_read(&pb->pb_pin_count) == 0)1008 return;10091010- add_wait_queue(&pb->pb_waiters, &wait);1011 for (;;) {1012 set_current_state(TASK_UNINTERRUPTIBLE);1013- if (atomic_read(&pb->pb_pin_count) == 0)1014 break;1015- if (atomic_read(&pb->pb_io_remaining))1016- blk_run_address_space(pb->pb_target->pbr_mapping);1017 schedule();1018 }1019- remove_wait_queue(&pb->pb_waiters, &wait);1020 set_current_state(TASK_RUNNING);1021}1022···978 * Buffer Utility Routines979 */980981-/*982- * pagebuf_iodone983- *984- * pagebuf_iodone marks a buffer for which I/O is in progress985- * done with respect to that I/O. The pb_iodone routine, if986- * present, will be called as a side-effect.987- */988STATIC void989-pagebuf_iodone_work(990 void *v)991{992 xfs_buf_t *bp = (xfs_buf_t *)v;993994- if (bp->pb_iodone)995- (*(bp->pb_iodone))(bp);996- else if (bp->pb_flags & PBF_ASYNC)997 xfs_buf_relse(bp);998}9991000void1001-pagebuf_iodone(1002- xfs_buf_t *pb,1003 int schedule)1004{1005- pb->pb_flags &= ~(PBF_READ | PBF_WRITE);1006- if (pb->pb_error == 0)1007- pb->pb_flags |= PBF_DONE;10081009- PB_TRACE(pb, "iodone", pb->pb_iodone);10101011- if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {1012 if (schedule) {1013- INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);1014- queue_work(xfslogd_workqueue, &pb->pb_iodone_work);1015 } else {1016- pagebuf_iodone_work(pb);1017 }1018 } else {1019- up(&pb->pb_iodonesema);1020 }1021}10221023-/*1024- * pagebuf_ioerror1025- *1026- * pagebuf_ioerror sets the error code for a buffer.1027- */1028void1029-pagebuf_ioerror( /* mark/clear buffer error flag */1030- xfs_buf_t *pb, /* buffer to mark */1031- int error) /* error to store (0 if none) */1032{1033 ASSERT(error >= 0 && error <= 0xffff);1034- pb->pb_error = (unsigned short)error;1035- PB_TRACE(pb, "ioerror", (unsigned long)error);1036}10371038/*1039- * pagebuf_iostart1040- *1041- * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.1042- * If necessary, it will arrange for any disk space allocation required,1043- * and it will break up the request if the block mappings require it.1044- * The pb_iodone routine in the buffer supplied will only be called1045 * when all of the subsidiary I/O requests, if any, have been completed.1046- * pagebuf_iostart calls the pagebuf_ioinitiate routine or1047- * pagebuf_iorequest, if the former routine is not defined, to start1048- * the I/O on a given low-level request.1049 */1050int1051-pagebuf_iostart( /* start I/O on a buffer */1052- xfs_buf_t *pb, /* buffer to start */1053- page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */1054- /* PBF_WRITE, PBF_DELWRI, */1055- /* PBF_DONT_BLOCK */1056{1057 int status = 0;10581059- PB_TRACE(pb, "iostart", (unsigned long)flags);10601061- if (flags & PBF_DELWRI) {1062- pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);1063- pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC);1064- pagebuf_delwri_queue(pb, 1);1065 return status;1066 }10671068- pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \1069- PBF_READ_AHEAD | _PBF_RUN_QUEUES);1070- pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \1071- PBF_READ_AHEAD | _PBF_RUN_QUEUES);10721073- BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL);10741075 /* For writes allow an alternate strategy routine to precede1076 * the actual I/O request (which may not be issued at all in1077 * a shutdown situation, for example).1078 */1079- status = (flags & PBF_WRITE) ?1080- pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);10811082 /* Wait for I/O if we are not an async request.1083 * Note: async I/O request completion will release the buffer,1084 * and that can already be done by this point. So using the1085 * buffer pointer from here on, after async I/O, is invalid.1086 */1087- if (!status && !(flags & PBF_ASYNC))1088- status = pagebuf_iowait(pb);10891090 return status;1091}10921093-/*1094- * Helper routine for pagebuf_iorequest1095- */1096-1097STATIC __inline__ int1098-_pagebuf_iolocked(1099- xfs_buf_t *pb)1100{1101- ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));1102- if (pb->pb_flags & PBF_READ)1103- return pb->pb_locked;1104 return 0;1105}11061107STATIC __inline__ void1108-_pagebuf_iodone(1109- xfs_buf_t *pb,1110 int schedule)1111{1112- if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {1113- pb->pb_locked = 0;1114- pagebuf_iodone(pb, schedule);1115 }1116}11171118STATIC int1119-bio_end_io_pagebuf(1120 struct bio *bio,1121 unsigned int bytes_done,1122 int error)1123{1124- xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private;1125- unsigned int blocksize = pb->pb_target->pbr_bsize;1126 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;11271128 if (bio->bi_size)1129 return 1;11301131 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))1132- pb->pb_error = EIO;11331134 do {1135 struct page *page = bvec->bv_page;11361137- if (unlikely(pb->pb_error)) {1138- if (pb->pb_flags & PBF_READ)1139 ClearPageUptodate(page);1140 SetPageError(page);1141- } else if (blocksize == PAGE_CACHE_SIZE) {1142 SetPageUptodate(page);1143 } else if (!PagePrivate(page) &&1144- (pb->pb_flags & _PBF_PAGE_CACHE)) {1145 set_page_region(page, bvec->bv_offset, bvec->bv_len);1146 }11471148 if (--bvec >= bio->bi_io_vec)1149 prefetchw(&bvec->bv_page->flags);11501151- if (_pagebuf_iolocked(pb)) {1152 unlock_page(page);1153 }1154 } while (bvec >= bio->bi_io_vec);11551156- _pagebuf_iodone(pb, 1);1157 bio_put(bio);1158 return 0;1159}11601161STATIC void1162-_pagebuf_ioapply(1163- xfs_buf_t *pb)1164{1165 int i, rw, map_i, total_nr_pages, nr_pages;1166 struct bio *bio;1167- int offset = pb->pb_offset;1168- int size = pb->pb_count_desired;1169- sector_t sector = pb->pb_bn;1170- unsigned int blocksize = pb->pb_target->pbr_bsize;1171- int locking = _pagebuf_iolocked(pb);11721173- total_nr_pages = pb->pb_page_count;1174 map_i = 0;11751176- if (pb->pb_flags & _PBF_RUN_QUEUES) {1177- pb->pb_flags &= ~_PBF_RUN_QUEUES;1178- rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC;1179 } else {1180- rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;1181 }11821183- if (pb->pb_flags & PBF_ORDERED) {1184- ASSERT(!(pb->pb_flags & PBF_READ));1185 rw = WRITE_BARRIER;1186 }11871188- /* Special code path for reading a sub page size pagebuf in --1189 * we populate up the whole page, and hence the other metadata1190 * in the same page. This optimization is only valid when the1191- * filesystem block size and the page size are equal.1192 */1193- if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&1194- (pb->pb_flags & PBF_READ) && locking &&1195- (blocksize == PAGE_CACHE_SIZE)) {1196 bio = bio_alloc(GFP_NOIO, 1);11971198- bio->bi_bdev = pb->pb_target->pbr_bdev;1199 bio->bi_sector = sector - (offset >> BBSHIFT);1200- bio->bi_end_io = bio_end_io_pagebuf;1201- bio->bi_private = pb;12021203- bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);1204 size = 0;12051206- atomic_inc(&pb->pb_io_remaining);12071208 goto submit_io;1209 }12101211 /* Lock down the pages which we need to for the request */1212- if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {1213 for (i = 0; size; i++) {1214 int nbytes = PAGE_CACHE_SIZE - offset;1215- struct page *page = pb->pb_pages[i];12161217 if (nbytes > size)1218 nbytes = size;···1197 size -= nbytes;1198 offset = 0;1199 }1200- offset = pb->pb_offset;1201- size = pb->pb_count_desired;1202 }12031204next_chunk:1205- atomic_inc(&pb->pb_io_remaining);1206 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);1207 if (nr_pages > total_nr_pages)1208 nr_pages = total_nr_pages;12091210 bio = bio_alloc(GFP_NOIO, nr_pages);1211- bio->bi_bdev = pb->pb_target->pbr_bdev;1212 bio->bi_sector = sector;1213- bio->bi_end_io = bio_end_io_pagebuf;1214- bio->bi_private = pb;12151216 for (; size && nr_pages; nr_pages--, map_i++) {1217- int nbytes = PAGE_CACHE_SIZE - offset;12181219 if (nbytes > size)1220 nbytes = size;12211222- if (bio_add_page(bio, pb->pb_pages[map_i],1223- nbytes, offset) < nbytes)1224 break;12251226 offset = 0;···1236 goto next_chunk;1237 } else {1238 bio_put(bio);1239- pagebuf_ioerror(pb, EIO);1240 }1241}12421243-/*1244- * pagebuf_iorequest -- the core I/O request routine.1245- */1246int1247-pagebuf_iorequest( /* start real I/O */1248- xfs_buf_t *pb) /* buffer to convey to device */1249{1250- PB_TRACE(pb, "iorequest", 0);12511252- if (pb->pb_flags & PBF_DELWRI) {1253- pagebuf_delwri_queue(pb, 1);1254 return 0;1255 }12561257- if (pb->pb_flags & PBF_WRITE) {1258- _pagebuf_wait_unpin(pb);1259 }12601261- pagebuf_hold(pb);12621263 /* Set the count to 1 initially, this will stop an I/O1264 * completion callout which happens before we have started1265- * all the I/O from calling pagebuf_iodone too early.1266 */1267- atomic_set(&pb->pb_io_remaining, 1);1268- _pagebuf_ioapply(pb);1269- _pagebuf_iodone(pb, 0);12701271- pagebuf_rele(pb);1272 return 0;1273}12741275/*1276- * pagebuf_iowait1277- *1278- * pagebuf_iowait waits for I/O to complete on the buffer supplied.1279- * It returns immediately if no I/O is pending. In any case, it returns1280- * the error code, if any, or 0 if there is no error.1281 */1282int1283-pagebuf_iowait(1284- xfs_buf_t *pb)1285{1286- PB_TRACE(pb, "iowait", 0);1287- if (atomic_read(&pb->pb_io_remaining))1288- blk_run_address_space(pb->pb_target->pbr_mapping);1289- down(&pb->pb_iodonesema);1290- PB_TRACE(pb, "iowaited", (long)pb->pb_error);1291- return pb->pb_error;1292}12931294-caddr_t1295-pagebuf_offset(1296- xfs_buf_t *pb,1297 size_t offset)1298{1299 struct page *page;13001301- offset += pb->pb_offset;013021303- page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];1304- return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));01305}13061307/*1308- * pagebuf_iomove1309- *1310 * Move data into or out of a buffer.1311 */1312void1313-pagebuf_iomove(1314- xfs_buf_t *pb, /* buffer to process */1315 size_t boff, /* starting buffer offset */1316 size_t bsize, /* length to copy */1317 caddr_t data, /* data address */1318- page_buf_rw_t mode) /* read/write flag */1319{1320 size_t bend, cpoff, csize;1321 struct page *page;13221323 bend = boff + bsize;1324 while (boff < bend) {1325- page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];1326- cpoff = page_buf_poff(boff + pb->pb_offset);1327 csize = min_t(size_t,1328- PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);13291330 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));13311332 switch (mode) {1333- case PBRW_ZERO:1334 memset(page_address(page) + cpoff, 0, csize);1335 break;1336- case PBRW_READ:1337 memcpy(data, page_address(page) + cpoff, csize);1338 break;1339- case PBRW_WRITE:1340 memcpy(page_address(page) + cpoff, data, csize);1341 }1342···1341}13421343/*1344- * Handling of buftargs.1345 */13461347/*1348- * Wait for any bufs with callbacks that have been submitted but1349- * have not yet returned... walk the hash list for the target.1350 */1351void1352xfs_wait_buftarg(···1360 hash = &btp->bt_hash[i];1361again:1362 spin_lock(&hash->bh_lock);1363- list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) {1364- ASSERT(btp == bp->pb_target);1365- if (!(bp->pb_flags & PBF_FS_MANAGED)) {1366 spin_unlock(&hash->bh_lock);1367 /*1368 * Catch superblock reference count leaks1369 * immediately1370 */1371- BUG_ON(bp->pb_bn == 0);1372 delay(100);1373 goto again;1374 }···1378}13791380/*1381- * Allocate buffer hash table for a given target.1382- * For devices containing metadata (i.e. not the log/realtime devices)1383- * we need to allocate a much larger hash table.1384 */1385STATIC void1386xfs_alloc_bufhash(···1403xfs_free_bufhash(1404 xfs_buftarg_t *btp)1405{1406- kmem_free(btp->bt_hash,1407- (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));1408 btp->bt_hash = NULL;0000000000000000000000001409}14101411void···1438{1439 xfs_flush_buftarg(btp, 1);1440 if (external)1441- xfs_blkdev_put(btp->pbr_bdev);1442 xfs_free_bufhash(btp);1443- iput(btp->pbr_mapping->host);00000001444 kmem_free(btp, sizeof(*btp));1445}1446···1458 unsigned int sectorsize,1459 int verbose)1460{1461- btp->pbr_bsize = blocksize;1462- btp->pbr_sshift = ffs(sectorsize) - 1;1463- btp->pbr_smask = sectorsize - 1;14641465- if (set_blocksize(btp->pbr_bdev, sectorsize)) {1466 printk(KERN_WARNING1467 "XFS: Cannot set_blocksize to %u on device %s\n",1468 sectorsize, XFS_BUFTARG_NAME(btp));···1482}14831484/*1485-* When allocating the initial buffer target we have not yet1486-* read in the superblock, so don't know what sized sectors1487-* are being used is at this early stage. Play safe.1488-*/1489STATIC int1490xfs_setsize_buftarg_early(1491 xfs_buftarg_t *btp,···1533 mapping->a_ops = &mapping_aops;1534 mapping->backing_dev_info = bdi;1535 mapping_set_gfp_mask(mapping, GFP_NOFS);1536- btp->pbr_mapping = mapping;1537 return 0;000000000000000000001538}15391540xfs_buftarg_t *···15661567 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);15681569- btp->pbr_dev = bdev->bd_dev;1570- btp->pbr_bdev = bdev;1571 if (xfs_setsize_buftarg_early(btp, bdev))1572 goto error;1573 if (xfs_mapping_buftarg(btp, bdev))001574 goto error;1575 xfs_alloc_bufhash(btp, external);1576 return btp;···158415851586/*1587- * Pagebuf delayed write buffer handling1588 */1589-1590-STATIC LIST_HEAD(pbd_delwrite_queue);1591-STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);1592-1593STATIC void1594-pagebuf_delwri_queue(1595- xfs_buf_t *pb,1596 int unlock)1597{1598- PB_TRACE(pb, "delwri_q", (long)unlock);1599- ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==1600- (PBF_DELWRI|PBF_ASYNC));16011602- spin_lock(&pbd_delwrite_lock);0001603 /* If already in the queue, dequeue and place at tail */1604- if (!list_empty(&pb->pb_list)) {1605- ASSERT(pb->pb_flags & _PBF_DELWRI_Q);1606- if (unlock) {1607- atomic_dec(&pb->pb_hold);1608- }1609- list_del(&pb->pb_list);1610 }16111612- pb->pb_flags |= _PBF_DELWRI_Q;1613- list_add_tail(&pb->pb_list, &pbd_delwrite_queue);1614- pb->pb_queuetime = jiffies;1615- spin_unlock(&pbd_delwrite_lock);16161617 if (unlock)1618- pagebuf_unlock(pb);1619}16201621void1622-pagebuf_delwri_dequeue(1623- xfs_buf_t *pb)1624{01625 int dequeued = 0;16261627- spin_lock(&pbd_delwrite_lock);1628- if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {1629- ASSERT(pb->pb_flags & _PBF_DELWRI_Q);1630- list_del_init(&pb->pb_list);1631 dequeued = 1;1632 }1633- pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);1634- spin_unlock(&pbd_delwrite_lock);16351636 if (dequeued)1637- pagebuf_rele(pb);16381639- PB_TRACE(pb, "delwri_dq", (long)dequeued);1640}16411642STATIC void1643-pagebuf_runall_queues(1644 struct workqueue_struct *queue)1645{1646 flush_workqueue(queue);1647}1648-1649-/* Defines for pagebuf daemon */1650-STATIC struct task_struct *xfsbufd_task;1651-STATIC int xfsbufd_force_flush;1652-STATIC int xfsbufd_force_sleep;16531654STATIC int1655xfsbufd_wakeup(1656 int priority,1657 gfp_t mask)1658{1659- if (xfsbufd_force_sleep)1660- return 0;1661- xfsbufd_force_flush = 1;1662- barrier();1663- wake_up_process(xfsbufd_task);000001664 return 0;1665}1666···1668{1669 struct list_head tmp;1670 unsigned long age;1671- xfs_buftarg_t *target;1672- xfs_buf_t *pb, *n;0016731674 current->flags |= PF_MEMALLOC;16751676 INIT_LIST_HEAD(&tmp);1677 do {1678 if (unlikely(freezing(current))) {1679- xfsbufd_force_sleep = 1;1680 refrigerator();1681 } else {1682- xfsbufd_force_sleep = 0;1683 }16841685 schedule_timeout_interruptible(1686 xfs_buf_timer_centisecs * msecs_to_jiffies(10));16871688 age = xfs_buf_age_centisecs * msecs_to_jiffies(10);1689- spin_lock(&pbd_delwrite_lock);1690- list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {1691- PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));1692- ASSERT(pb->pb_flags & PBF_DELWRI);16931694- if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {1695- if (!xfsbufd_force_flush &&01696 time_before(jiffies,1697- pb->pb_queuetime + age)) {1698- pagebuf_unlock(pb);1699 break;1700 }17011702- pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);1703- pb->pb_flags |= PBF_WRITE;1704- list_move(&pb->pb_list, &tmp);1705 }1706 }1707- spin_unlock(&pbd_delwrite_lock);17081709 while (!list_empty(&tmp)) {1710- pb = list_entry(tmp.next, xfs_buf_t, pb_list);1711- target = pb->pb_target;17121713- list_del_init(&pb->pb_list);1714- pagebuf_iostrategy(pb);17151716- blk_run_address_space(target->pbr_mapping);1717 }17181719 if (as_list_len > 0)1720 purge_addresses();17211722- xfsbufd_force_flush = 0;1723 } while (!kthread_should_stop());17241725 return 0;1726}17271728/*1729- * Go through all incore buffers, and release buffers if they belong to1730- * the given device. This is used in filesystem error handling to1731- * preserve the consistency of its metadata.1732 */1733int1734xfs_flush_buftarg(···1739 int wait)1740{1741 struct list_head tmp;1742- xfs_buf_t *pb, *n;1743 int pincount = 0;0017441745- pagebuf_runall_queues(xfsdatad_workqueue);1746- pagebuf_runall_queues(xfslogd_workqueue);17471748 INIT_LIST_HEAD(&tmp);1749- spin_lock(&pbd_delwrite_lock);1750- list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {1751-1752- if (pb->pb_target != target)1753- continue;1754-1755- ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));1756- PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));1757- if (pagebuf_ispin(pb)) {1758 pincount++;1759 continue;1760 }17611762- list_move(&pb->pb_list, &tmp);1763 }1764- spin_unlock(&pbd_delwrite_lock);17651766 /*1767 * Dropped the delayed write list lock, now walk the temporary list1768 */1769- list_for_each_entry_safe(pb, n, &tmp, pb_list) {1770- pagebuf_lock(pb);1771- pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);1772- pb->pb_flags |= PBF_WRITE;1773 if (wait)1774- pb->pb_flags &= ~PBF_ASYNC;1775 else1776- list_del_init(&pb->pb_list);17771778- pagebuf_iostrategy(pb);1779 }17801781 /*1782 * Remaining list items must be flushed before returning1783 */1784 while (!list_empty(&tmp)) {1785- pb = list_entry(tmp.next, xfs_buf_t, pb_list);17861787- list_del_init(&pb->pb_list);1788- xfs_iowait(pb);1789- xfs_buf_relse(pb);1790 }17911792 if (wait)1793- blk_run_address_space(target->pbr_mapping);17941795 return pincount;1796}17971798int __init1799-pagebuf_init(void)1800{1801 int error = -ENOMEM;18021803-#ifdef PAGEBUF_TRACE1804- pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);1805#endif18061807- pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");1808- if (!pagebuf_zone)1809 goto out_free_trace_buf;18101811 xfslogd_workqueue = create_workqueue("xfslogd");···1815 if (!xfsdatad_workqueue)1816 goto out_destroy_xfslogd_workqueue;18171818- xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");1819- if (IS_ERR(xfsbufd_task)) {1820- error = PTR_ERR(xfsbufd_task);1821 goto out_destroy_xfsdatad_workqueue;1822- }1823-1824- pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);1825- if (!pagebuf_shake)1826- goto out_stop_xfsbufd;18271828 return 0;18291830- out_stop_xfsbufd:1831- kthread_stop(xfsbufd_task);1832 out_destroy_xfsdatad_workqueue:1833 destroy_workqueue(xfsdatad_workqueue);1834 out_destroy_xfslogd_workqueue:1835 destroy_workqueue(xfslogd_workqueue);1836 out_free_buf_zone:1837- kmem_zone_destroy(pagebuf_zone);1838 out_free_trace_buf:1839-#ifdef PAGEBUF_TRACE1840- ktrace_free(pagebuf_trace_buf);1841#endif1842 return error;1843}18441845void1846-pagebuf_terminate(void)1847{1848- kmem_shake_deregister(pagebuf_shake);1849- kthread_stop(xfsbufd_task);1850 destroy_workqueue(xfsdatad_workqueue);1851 destroy_workqueue(xfslogd_workqueue);1852- kmem_zone_destroy(pagebuf_zone);1853-#ifdef PAGEBUF_TRACE1854- ktrace_free(pagebuf_trace_buf);1855#endif1856}
···31#include <linux/kthread.h>32#include "xfs_linux.h"3334+STATIC kmem_zone_t *xfs_buf_zone;35+STATIC kmem_shaker_t xfs_buf_shake;36+STATIC int xfsbufd(void *);37STATIC int xfsbufd_wakeup(int, gfp_t);38+STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);3940STATIC struct workqueue_struct *xfslogd_workqueue;41struct workqueue_struct *xfsdatad_workqueue;4243+#ifdef XFS_BUF_TRACE44void45+xfs_buf_trace(46+ xfs_buf_t *bp,47 char *id,48 void *data,49 void *ra)50{51+ ktrace_enter(xfs_buf_trace_buf,52+ bp, id,53+ (void *)(unsigned long)bp->b_flags,54+ (void *)(unsigned long)bp->b_hold.counter,55+ (void *)(unsigned long)bp->b_sema.count.counter,56 (void *)current,57 data, ra,58+ (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),59+ (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),60+ (void *)(unsigned long)bp->b_buffer_length,61 NULL, NULL, NULL, NULL, NULL);62}63+ktrace_t *xfs_buf_trace_buf;64+#define XFS_BUF_TRACE_SIZE 409665+#define XB_TRACE(bp, id, data) \66+ xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))67#else68+#define XB_TRACE(bp, id, data) do { } while (0)69#endif7071+#ifdef XFS_BUF_LOCK_TRACKING72+# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)73+# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)74+# define XB_GET_OWNER(bp) ((bp)->b_last_holder)75#else76+# define XB_SET_OWNER(bp) do { } while (0)77+# define XB_CLEAR_OWNER(bp) do { } while (0)78+# define XB_GET_OWNER(bp) do { } while (0)79#endif8081+#define xb_to_gfp(flags) \82+ ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \83+ ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)8485+#define xb_to_km(flags) \86+ (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)8788+#define xfs_buf_allocate(flags) \89+ kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))90+#define xfs_buf_deallocate(bp) \91+ kmem_zone_free(xfs_buf_zone, (bp));9293/*94+ * Page Region interfaces.95 *96+ * For pages in filesystems where the blocksize is smaller than the97+ * pagesize, we use the page->private field (long) to hold a bitmap98+ * of uptodate regions within the page.99 *100+ * Each such region is "bytes per page / bits per long" bytes long.101 *102+ * NBPPR == number-of-bytes-per-page-region103+ * BTOPR == bytes-to-page-region (rounded up)104+ * BTOPRT == bytes-to-page-region-truncated (rounded down)105 */106#if (BITS_PER_LONG == 32)107#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */···159}160161/*162+ * Mapping of multi-page buffers into contiguous virtual space163 */164165typedef struct a_list {···172STATIC DEFINE_SPINLOCK(as_lock);173174/*175+ * Try to batch vunmaps because they are costly.176 */177STATIC void178free_address(···215}216217/*218+ * Internal xfs_buf_t object manipulation219 */220221STATIC void222+_xfs_buf_initialize(223+ xfs_buf_t *bp,224 xfs_buftarg_t *target,225+ xfs_off_t range_base,226 size_t range_length,227+ xfs_buf_flags_t flags)228{229 /*230+ * We don't want certain flags to appear in b_flags.231 */232+ flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);233234+ memset(bp, 0, sizeof(xfs_buf_t));235+ atomic_set(&bp->b_hold, 1);236+ init_MUTEX_LOCKED(&bp->b_iodonesema);237+ INIT_LIST_HEAD(&bp->b_list);238+ INIT_LIST_HEAD(&bp->b_hash_list);239+ init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */240+ XB_SET_OWNER(bp);241+ bp->b_target = target;242+ bp->b_file_offset = range_base;243 /*244 * Set buffer_length and count_desired to the same value initially.245 * I/O routines should use count_desired, which will be the same in246 * most cases but may be reset (e.g. XFS recovery).247 */248+ bp->b_buffer_length = bp->b_count_desired = range_length;249+ bp->b_flags = flags;250+ bp->b_bn = XFS_BUF_DADDR_NULL;251+ atomic_set(&bp->b_pin_count, 0);252+ init_waitqueue_head(&bp->b_waiters);253254+ XFS_STATS_INC(xb_create);255+ XB_TRACE(bp, "initialize", target);256}257258/*259+ * Allocate a page array capable of holding a specified number260+ * of pages, and point the page buf at it.261 */262STATIC int263+_xfs_buf_get_pages(264+ xfs_buf_t *bp,265 int page_count,266+ xfs_buf_flags_t flags)267{268 /* Make sure that we have a page list */269+ if (bp->b_pages == NULL) {270+ bp->b_offset = xfs_buf_poff(bp->b_file_offset);271+ bp->b_page_count = page_count;272+ if (page_count <= XB_PAGES) {273+ bp->b_pages = bp->b_page_array;274 } else {275+ bp->b_pages = kmem_alloc(sizeof(struct page *) *276+ page_count, xb_to_km(flags));277+ if (bp->b_pages == NULL)278 return -ENOMEM;279 }280+ memset(bp->b_pages, 0, sizeof(struct page *) * page_count);281 }282 return 0;283}284285/*286+ * Frees b_pages if it was allocated.287 */288STATIC void289+_xfs_buf_free_pages(290 xfs_buf_t *bp)291{292+ if (bp->b_pages != bp->b_page_array) {293+ kmem_free(bp->b_pages,294+ bp->b_page_count * sizeof(struct page *));295 }296}297···299 * Releases the specified buffer.300 *301 * The modification state of any associated pages is left unchanged.302+ * The buffer most not be on any hash - use xfs_buf_rele instead for303 * hashed and refcounted buffers304 */305void306+xfs_buf_free(307 xfs_buf_t *bp)308{309+ XB_TRACE(bp, "free", 0);310311+ ASSERT(list_empty(&bp->b_hash_list));312313+ if (bp->b_flags & _XBF_PAGE_CACHE) {314 uint i;315316+ if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))317+ free_address(bp->b_addr - bp->b_offset);318319+ for (i = 0; i < bp->b_page_count; i++)320+ page_cache_release(bp->b_pages[i]);321+ _xfs_buf_free_pages(bp);322+ } else if (bp->b_flags & _XBF_KMEM_ALLOC) {323 /*324+ * XXX(hch): bp->b_count_desired might be incorrect (see325+ * xfs_buf_associate_memory for details), but fortunately326 * the Linux version of kmem_free ignores the len argument..327 */328+ kmem_free(bp->b_addr, bp->b_count_desired);329+ _xfs_buf_free_pages(bp);330 }331332+ xfs_buf_deallocate(bp);333}334335/*336 * Finds all pages for buffer in question and builds it's page list.337 */338STATIC int339+_xfs_buf_lookup_pages(340 xfs_buf_t *bp,341 uint flags)342{343+ struct address_space *mapping = bp->b_target->bt_mapping;344+ size_t blocksize = bp->b_target->bt_bsize;345+ size_t size = bp->b_count_desired;346 size_t nbytes, offset;347+ gfp_t gfp_mask = xb_to_gfp(flags);348 unsigned short page_count, i;349 pgoff_t first;350+ xfs_off_t end;351 int error;352353+ end = bp->b_file_offset + bp->b_buffer_length;354+ page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);355356+ error = _xfs_buf_get_pages(bp, page_count, flags);357 if (unlikely(error))358 return error;359+ bp->b_flags |= _XBF_PAGE_CACHE;360361+ offset = bp->b_offset;362+ first = bp->b_file_offset >> PAGE_CACHE_SHIFT;363364+ for (i = 0; i < bp->b_page_count; i++) {365 struct page *page;366 uint retries = 0;367368 retry:369 page = find_or_create_page(mapping, first + i, gfp_mask);370 if (unlikely(page == NULL)) {371+ if (flags & XBF_READ_AHEAD) {372+ bp->b_page_count = i;373+ for (i = 0; i < bp->b_page_count; i++)374+ unlock_page(bp->b_pages[i]);375 return -ENOMEM;376 }377···387 "deadlock in %s (mode:0x%x)\n",388 __FUNCTION__, gfp_mask);389390+ XFS_STATS_INC(xb_page_retries);391 xfsbufd_wakeup(0, gfp_mask);392 blk_congestion_wait(WRITE, HZ/50);393 goto retry;394 }395396+ XFS_STATS_INC(xb_page_found);397398 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);399 size -= nbytes;···401 if (!PageUptodate(page)) {402 page_count--;403 if (blocksize >= PAGE_CACHE_SIZE) {404+ if (flags & XBF_READ)405+ bp->b_locked = 1;406 } else if (!PagePrivate(page)) {407 if (test_page_region(page, offset, nbytes))408 page_count++;409 }410 }411412+ bp->b_pages[i] = page;413 offset = 0;414 }415416+ if (!bp->b_locked) {417+ for (i = 0; i < bp->b_page_count; i++)418+ unlock_page(bp->b_pages[i]);419 }420421+ if (page_count == bp->b_page_count)422+ bp->b_flags |= XBF_DONE;423424+ XB_TRACE(bp, "lookup_pages", (long)page_count);425 return error;426}427···429 * Map buffer into kernel address-space if nessecary.430 */431STATIC int432+_xfs_buf_map_pages(433 xfs_buf_t *bp,434 uint flags)435{436 /* A single page buffer is always mappable */437+ if (bp->b_page_count == 1) {438+ bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;439+ bp->b_flags |= XBF_MAPPED;440+ } else if (flags & XBF_MAPPED) {441 if (as_list_len > 64)442 purge_addresses();443+ bp->b_addr = vmap(bp->b_pages, bp->b_page_count,444+ VM_MAP, PAGE_KERNEL);445+ if (unlikely(bp->b_addr == NULL))446 return -ENOMEM;447+ bp->b_addr += bp->b_offset;448+ bp->b_flags |= XBF_MAPPED;449 }450451 return 0;···456 */457458/*459+ * Look up, and creates if absent, a lockable buffer for00460 * a given range of an inode. The buffer is returned461 * locked. If other overlapping buffers exist, they are462 * released before the new buffer is created and locked,···466 * are unlocked. No I/O is implied by this call.467 */468xfs_buf_t *469+_xfs_buf_find(470 xfs_buftarg_t *btp, /* block device target */471+ xfs_off_t ioff, /* starting offset of range */472 size_t isize, /* length of range */473+ xfs_buf_flags_t flags,474+ xfs_buf_t *new_bp)475{476+ xfs_off_t range_base;477 size_t range_length;478 xfs_bufhash_t *hash;479+ xfs_buf_t *bp, *n;480481 range_base = (ioff << BBSHIFT);482 range_length = (isize << BBSHIFT);483484 /* Check for IOs smaller than the sector size / not sector aligned */485+ ASSERT(!(range_length < (1 << btp->bt_sshift)));486+ ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));487488 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];489490 spin_lock(&hash->bh_lock);491492+ list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {493+ ASSERT(btp == bp->b_target);494+ if (bp->b_file_offset == range_base &&495+ bp->b_buffer_length == range_length) {496 /*497+ * If we look at something, bring it to the498 * front of the list for next time.499 */500+ atomic_inc(&bp->b_hold);501+ list_move(&bp->b_hash_list, &hash->bh_list);502 goto found;503 }504 }505506 /* No match found */507+ if (new_bp) {508+ _xfs_buf_initialize(new_bp, btp, range_base,509 range_length, flags);510+ new_bp->b_hash = hash;511+ list_add(&new_bp->b_hash_list, &hash->bh_list);512 } else {513+ XFS_STATS_INC(xb_miss_locked);514 }515516 spin_unlock(&hash->bh_lock);517+ return new_bp;518519found:520 spin_unlock(&hash->bh_lock);···523 * if this does not work then we need to drop the524 * spinlock and do a hard attempt on the semaphore.525 */526+ if (down_trylock(&bp->b_sema)) {527+ if (!(flags & XBF_TRYLOCK)) {528 /* wait for buffer ownership */529+ XB_TRACE(bp, "get_lock", 0);530+ xfs_buf_lock(bp);531+ XFS_STATS_INC(xb_get_locked_waited);532 } else {533 /* We asked for a trylock and failed, no need534 * to look at file offset and length here, we535+ * know that this buffer at least overlaps our536+ * buffer and is locked, therefore our buffer537+ * either does not exist, or is this buffer.538 */539+ xfs_buf_rele(bp);540+ XFS_STATS_INC(xb_busy_locked);541+ return NULL;0542 }543 } else {544 /* trylock worked */545+ XB_SET_OWNER(bp);546 }547548+ if (bp->b_flags & XBF_STALE) {549+ ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);550+ bp->b_flags &= XBF_MAPPED;551 }552+ XB_TRACE(bp, "got_lock", 0);553+ XFS_STATS_INC(xb_get_locked);554+ return bp;555}556557/*558+ * Assembles a buffer covering the specified range.0559 * Storage in memory for all portions of the buffer will be allocated,560 * although backing storage may not be.561 */562xfs_buf_t *563+xfs_buf_get_flags(564 xfs_buftarg_t *target,/* target for buffer */565+ xfs_off_t ioff, /* starting offset of range */566 size_t isize, /* length of range */567+ xfs_buf_flags_t flags)568{569+ xfs_buf_t *bp, *new_bp;570 int error = 0, i;571572+ new_bp = xfs_buf_allocate(flags);573+ if (unlikely(!new_bp))574 return NULL;575576+ bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);577+ if (bp == new_bp) {578+ error = _xfs_buf_lookup_pages(bp, flags);579 if (error)580 goto no_buffer;581 } else {582+ xfs_buf_deallocate(new_bp);583+ if (unlikely(bp == NULL))584 return NULL;585 }586587+ for (i = 0; i < bp->b_page_count; i++)588+ mark_page_accessed(bp->b_pages[i]);589590+ if (!(bp->b_flags & XBF_MAPPED)) {591+ error = _xfs_buf_map_pages(bp, flags);592 if (unlikely(error)) {593 printk(KERN_WARNING "%s: failed to map pages\n",594 __FUNCTION__);···598 }599 }600601+ XFS_STATS_INC(xb_get);602603 /*604 * Always fill in the block number now, the mapped cases can do605 * their own overlay of this later.606 */607+ bp->b_bn = ioff;608+ bp->b_count_desired = bp->b_buffer_length;609610+ XB_TRACE(bp, "get", (unsigned long)flags);611+ return bp;612613 no_buffer:614+ if (flags & (XBF_LOCK | XBF_TRYLOCK))615+ xfs_buf_unlock(bp);616+ xfs_buf_rele(bp);617 return NULL;618}619620xfs_buf_t *621xfs_buf_read_flags(622 xfs_buftarg_t *target,623+ xfs_off_t ioff,624 size_t isize,625+ xfs_buf_flags_t flags)626{627+ xfs_buf_t *bp;628629+ flags |= XBF_READ;630631+ bp = xfs_buf_get_flags(target, ioff, isize, flags);632+ if (bp) {633+ if (!XFS_BUF_ISDONE(bp)) {634+ XB_TRACE(bp, "read", (unsigned long)flags);635+ XFS_STATS_INC(xb_get_read);636+ xfs_buf_iostart(bp, flags);637+ } else if (flags & XBF_ASYNC) {638+ XB_TRACE(bp, "read_async", (unsigned long)flags);639 /*640 * Read ahead call which is already satisfied,641 * drop the buffer642 */643 goto no_buffer;644 } else {645+ XB_TRACE(bp, "read_done", (unsigned long)flags);646 /* We do not want read in the flags */647+ bp->b_flags &= ~XBF_READ;648 }649 }650651+ return bp;652653 no_buffer:654+ if (flags & (XBF_LOCK | XBF_TRYLOCK))655+ xfs_buf_unlock(bp);656+ xfs_buf_rele(bp);657 return NULL;658}659660/*661+ * If we are not low on memory then do the readahead in a deadlock662+ * safe manner.663 */664void665+xfs_buf_readahead(666 xfs_buftarg_t *target,667+ xfs_off_t ioff,668 size_t isize,669+ xfs_buf_flags_t flags)670{671 struct backing_dev_info *bdi;672673+ bdi = target->bt_mapping->backing_dev_info;674 if (bdi_read_congested(bdi))675 return;676677+ flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);678 xfs_buf_read_flags(target, ioff, isize, flags);679}680681xfs_buf_t *682+xfs_buf_get_empty(683 size_t len,684 xfs_buftarg_t *target)685{686+ xfs_buf_t *bp;687688+ bp = xfs_buf_allocate(0);689+ if (bp)690+ _xfs_buf_initialize(bp, target, 0, len, 0);691+ return bp;692}693694static inline struct page *···704}705706int707+xfs_buf_associate_memory(708+ xfs_buf_t *bp,709 void *mem,710 size_t len)711{···722 page_count++;723724 /* Free any previous set of page pointers */725+ if (bp->b_pages)726+ _xfs_buf_free_pages(bp);727728+ bp->b_pages = NULL;729+ bp->b_addr = mem;730731+ rval = _xfs_buf_get_pages(bp, page_count, 0);732 if (rval)733 return rval;734735+ bp->b_offset = offset;736 ptr = (size_t) mem & PAGE_CACHE_MASK;737 end = PAGE_CACHE_ALIGN((size_t) mem + len);738 end_cur = end;739 /* set up first page */740+ bp->b_pages[0] = mem_to_page(mem);741742 ptr += PAGE_CACHE_SIZE;743+ bp->b_page_count = ++i;744 while (ptr < end) {745+ bp->b_pages[i] = mem_to_page((void *)ptr);746+ bp->b_page_count = ++i;747 ptr += PAGE_CACHE_SIZE;748 }749+ bp->b_locked = 0;750751+ bp->b_count_desired = bp->b_buffer_length = len;752+ bp->b_flags |= XBF_MAPPED;753754 return 0;755}756757xfs_buf_t *758+xfs_buf_get_noaddr(759 size_t len,760 xfs_buftarg_t *target)761{···764 void *data;765 int error;766767+ bp = xfs_buf_allocate(0);768 if (unlikely(bp == NULL))769 goto fail;770+ _xfs_buf_initialize(bp, target, 0, len, 0);771772 try_again:773 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);···776777 /* check whether alignment matches.. */778 if ((__psunsigned_t)data !=779+ ((__psunsigned_t)data & ~target->bt_smask)) {780 /* .. else double the size and try again */781 kmem_free(data, malloc_len);782 malloc_len <<= 1;783 goto try_again;784 }785786+ error = xfs_buf_associate_memory(bp, data, len);787 if (error)788 goto fail_free_mem;789+ bp->b_flags |= _XBF_KMEM_ALLOC;790791+ xfs_buf_unlock(bp);792793+ XB_TRACE(bp, "no_daddr", data);794 return bp;795 fail_free_mem:796 kmem_free(data, malloc_len);797 fail_free_buf:798+ xfs_buf_free(bp);799 fail:800 return NULL;801}802803/*00804 * Increment reference count on buffer, to hold the buffer concurrently805 * with another thread which may release (free) the buffer asynchronously.0806 * Must hold the buffer already to call this function.807 */808void809+xfs_buf_hold(810+ xfs_buf_t *bp)811{812+ atomic_inc(&bp->b_hold);813+ XB_TRACE(bp, "hold", 0);814}815816/*817+ * Releases a hold on the specified buffer. If the818+ * the hold count is 1, calls xfs_buf_free.00819 */820void821+xfs_buf_rele(822+ xfs_buf_t *bp)823{824+ xfs_bufhash_t *hash = bp->b_hash;825826+ XB_TRACE(bp, "rele", bp->b_relse);827828+ if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {829+ if (bp->b_relse) {830+ atomic_inc(&bp->b_hold);831 spin_unlock(&hash->bh_lock);832+ (*(bp->b_relse)) (bp);833+ } else if (bp->b_flags & XBF_FS_MANAGED) {834 spin_unlock(&hash->bh_lock);835 } else {836+ ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));837+ list_del_init(&bp->b_hash_list);838 spin_unlock(&hash->bh_lock);839+ xfs_buf_free(bp);840 }841 } else {842 /*843 * Catch reference count leaks844 */845+ ASSERT(atomic_read(&bp->b_hold) >= 0);846 }847}848···863 */864865/*866+ * Locks a buffer object, if it is not already locked.867+ * Note that this in no way locks the underlying pages, so it is only868+ * useful for synchronizing concurrent use of buffer objects, not for869+ * synchronizing independent access to the underlying pages.000870 */871int872+xfs_buf_cond_lock(873+ xfs_buf_t *bp)0874{875 int locked;876877+ locked = down_trylock(&bp->b_sema) == 0;878 if (locked) {879+ XB_SET_OWNER(bp);880 }881+ XB_TRACE(bp, "cond_lock", (long)locked);882+ return locked ? 0 : -EBUSY;883}884885#if defined(DEBUG) || defined(XFS_BLI_TRACE)00000886int887+xfs_buf_lock_value(888+ xfs_buf_t *bp)889{890+ return atomic_read(&bp->b_sema.count);891}892#endif893894/*895+ * Locks a buffer object.896+ * Note that this in no way locks the underlying pages, so it is only897+ * useful for synchronizing concurrent use of buffer objects, not for898+ * synchronizing independent access to the underlying pages.00899 */900+void901+xfs_buf_lock(902+ xfs_buf_t *bp)903{904+ XB_TRACE(bp, "lock", 0);905+ if (atomic_read(&bp->b_io_remaining))906+ blk_run_address_space(bp->b_target->bt_mapping);907+ down(&bp->b_sema);908+ XB_SET_OWNER(bp);909+ XB_TRACE(bp, "locked", 0);0910}911912/*913+ * Releases the lock on the buffer object.00000914 * If the buffer is marked delwri but is not queued, do so before we915+ * unlock the buffer as we need to set flags correctly. We also need to916 * take a reference for the delwri queue because the unlocker is going to917 * drop their's and they don't know we just queued it.918 */919void920+xfs_buf_unlock(921+ xfs_buf_t *bp)922{923+ if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {924+ atomic_inc(&bp->b_hold);925+ bp->b_flags |= XBF_ASYNC;926+ xfs_buf_delwri_queue(bp, 0);927 }928929+ XB_CLEAR_OWNER(bp);930+ up(&bp->b_sema);931+ XB_TRACE(bp, "unlock", 0);932}933934935/*936 * Pinning Buffer Storage in Memory937+ * Ensure that no attempt to force a buffer to disk will succeed.00000000000000938 */939void940+xfs_buf_pin(941+ xfs_buf_t *bp)942{943+ atomic_inc(&bp->b_pin_count);944+ XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);945}9460000000947void948+xfs_buf_unpin(949+ xfs_buf_t *bp)950{951+ if (atomic_dec_and_test(&bp->b_pin_count))952+ wake_up_all(&bp->b_waiters);953+ XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);0954}955956int957+xfs_buf_ispin(958+ xfs_buf_t *bp)959{960+ return atomic_read(&bp->b_pin_count);961}962963+STATIC void964+xfs_buf_wait_unpin(965+ xfs_buf_t *bp)0000000966{967 DECLARE_WAITQUEUE (wait, current);968969+ if (atomic_read(&bp->b_pin_count) == 0)970 return;971972+ add_wait_queue(&bp->b_waiters, &wait);973 for (;;) {974 set_current_state(TASK_UNINTERRUPTIBLE);975+ if (atomic_read(&bp->b_pin_count) == 0)976 break;977+ if (atomic_read(&bp->b_io_remaining))978+ blk_run_address_space(bp->b_target->bt_mapping);979 schedule();980 }981+ remove_wait_queue(&bp->b_waiters, &wait);982 set_current_state(TASK_RUNNING);983}984···1032 * Buffer Utility Routines1033 */103400000001035STATIC void1036+xfs_buf_iodone_work(1037 void *v)1038{1039 xfs_buf_t *bp = (xfs_buf_t *)v;10401041+ if (bp->b_iodone)1042+ (*(bp->b_iodone))(bp);1043+ else if (bp->b_flags & XBF_ASYNC)1044 xfs_buf_relse(bp);1045}10461047void1048+xfs_buf_ioend(1049+ xfs_buf_t *bp,1050 int schedule)1051{1052+ bp->b_flags &= ~(XBF_READ | XBF_WRITE);1053+ if (bp->b_error == 0)1054+ bp->b_flags |= XBF_DONE;10551056+ XB_TRACE(bp, "iodone", bp->b_iodone);10571058+ if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {1059 if (schedule) {1060+ INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);1061+ queue_work(xfslogd_workqueue, &bp->b_iodone_work);1062 } else {1063+ xfs_buf_iodone_work(bp);1064 }1065 } else {1066+ up(&bp->b_iodonesema);1067 }1068}1069000001070void1071+xfs_buf_ioerror(1072+ xfs_buf_t *bp,1073+ int error)1074{1075 ASSERT(error >= 0 && error <= 0xffff);1076+ bp->b_error = (unsigned short)error;1077+ XB_TRACE(bp, "ioerror", (unsigned long)error);1078}10791080/*1081+ * Initiate I/O on a buffer, based on the flags supplied.1082+ * The b_iodone routine in the buffer supplied will only be called00001083 * when all of the subsidiary I/O requests, if any, have been completed.0001084 */1085int1086+xfs_buf_iostart(1087+ xfs_buf_t *bp,1088+ xfs_buf_flags_t flags)001089{1090 int status = 0;10911092+ XB_TRACE(bp, "iostart", (unsigned long)flags);10931094+ if (flags & XBF_DELWRI) {1095+ bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);1096+ bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);1097+ xfs_buf_delwri_queue(bp, 1);1098 return status;1099 }11001101+ bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \1102+ XBF_READ_AHEAD | _XBF_RUN_QUEUES);1103+ bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \1104+ XBF_READ_AHEAD | _XBF_RUN_QUEUES);11051106+ BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);11071108 /* For writes allow an alternate strategy routine to precede1109 * the actual I/O request (which may not be issued at all in1110 * a shutdown situation, for example).1111 */1112+ status = (flags & XBF_WRITE) ?1113+ xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);11141115 /* Wait for I/O if we are not an async request.1116 * Note: async I/O request completion will release the buffer,1117 * and that can already be done by this point. So using the1118 * buffer pointer from here on, after async I/O, is invalid.1119 */1120+ if (!status && !(flags & XBF_ASYNC))1121+ status = xfs_buf_iowait(bp);11221123 return status;1124}112500001126STATIC __inline__ int1127+_xfs_buf_iolocked(1128+ xfs_buf_t *bp)1129{1130+ ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE));1131+ if (bp->b_flags & XBF_READ)1132+ return bp->b_locked;1133 return 0;1134}11351136STATIC __inline__ void1137+_xfs_buf_ioend(1138+ xfs_buf_t *bp,1139 int schedule)1140{1141+ if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {1142+ bp->b_locked = 0;1143+ xfs_buf_ioend(bp, schedule);1144 }1145}11461147STATIC int1148+xfs_buf_bio_end_io(1149 struct bio *bio,1150 unsigned int bytes_done,1151 int error)1152{1153+ xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;1154+ unsigned int blocksize = bp->b_target->bt_bsize;1155 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;11561157 if (bio->bi_size)1158 return 1;11591160 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))1161+ bp->b_error = EIO;11621163 do {1164 struct page *page = bvec->bv_page;11651166+ if (unlikely(bp->b_error)) {1167+ if (bp->b_flags & XBF_READ)1168 ClearPageUptodate(page);1169 SetPageError(page);1170+ } else if (blocksize >= PAGE_CACHE_SIZE) {1171 SetPageUptodate(page);1172 } else if (!PagePrivate(page) &&1173+ (bp->b_flags & _XBF_PAGE_CACHE)) {1174 set_page_region(page, bvec->bv_offset, bvec->bv_len);1175 }11761177 if (--bvec >= bio->bi_io_vec)1178 prefetchw(&bvec->bv_page->flags);11791180+ if (_xfs_buf_iolocked(bp)) {1181 unlock_page(page);1182 }1183 } while (bvec >= bio->bi_io_vec);11841185+ _xfs_buf_ioend(bp, 1);1186 bio_put(bio);1187 return 0;1188}11891190STATIC void1191+_xfs_buf_ioapply(1192+ xfs_buf_t *bp)1193{1194 int i, rw, map_i, total_nr_pages, nr_pages;1195 struct bio *bio;1196+ int offset = bp->b_offset;1197+ int size = bp->b_count_desired;1198+ sector_t sector = bp->b_bn;1199+ unsigned int blocksize = bp->b_target->bt_bsize;1200+ int locking = _xfs_buf_iolocked(bp);12011202+ total_nr_pages = bp->b_page_count;1203 map_i = 0;12041205+ if (bp->b_flags & _XBF_RUN_QUEUES) {1206+ bp->b_flags &= ~_XBF_RUN_QUEUES;1207+ rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;1208 } else {1209+ rw = (bp->b_flags & XBF_READ) ? READ : WRITE;1210 }12111212+ if (bp->b_flags & XBF_ORDERED) {1213+ ASSERT(!(bp->b_flags & XBF_READ));1214 rw = WRITE_BARRIER;1215 }12161217+ /* Special code path for reading a sub page size buffer in --1218 * we populate up the whole page, and hence the other metadata1219 * in the same page. This optimization is only valid when the1220+ * filesystem block size is not smaller than the page size.1221 */1222+ if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&1223+ (bp->b_flags & XBF_READ) && locking &&1224+ (blocksize >= PAGE_CACHE_SIZE)) {1225 bio = bio_alloc(GFP_NOIO, 1);12261227+ bio->bi_bdev = bp->b_target->bt_bdev;1228 bio->bi_sector = sector - (offset >> BBSHIFT);1229+ bio->bi_end_io = xfs_buf_bio_end_io;1230+ bio->bi_private = bp;12311232+ bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);1233 size = 0;12341235+ atomic_inc(&bp->b_io_remaining);12361237 goto submit_io;1238 }12391240 /* Lock down the pages which we need to for the request */1241+ if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) {1242 for (i = 0; size; i++) {1243 int nbytes = PAGE_CACHE_SIZE - offset;1244+ struct page *page = bp->b_pages[i];12451246 if (nbytes > size)1247 nbytes = size;···1276 size -= nbytes;1277 offset = 0;1278 }1279+ offset = bp->b_offset;1280+ size = bp->b_count_desired;1281 }12821283next_chunk:1284+ atomic_inc(&bp->b_io_remaining);1285 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);1286 if (nr_pages > total_nr_pages)1287 nr_pages = total_nr_pages;12881289 bio = bio_alloc(GFP_NOIO, nr_pages);1290+ bio->bi_bdev = bp->b_target->bt_bdev;1291 bio->bi_sector = sector;1292+ bio->bi_end_io = xfs_buf_bio_end_io;1293+ bio->bi_private = bp;12941295 for (; size && nr_pages; nr_pages--, map_i++) {1296+ int rbytes, nbytes = PAGE_CACHE_SIZE - offset;12971298 if (nbytes > size)1299 nbytes = size;13001301+ rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);1302+ if (rbytes < nbytes)1303 break;13041305 offset = 0;···1315 goto next_chunk;1316 } else {1317 bio_put(bio);1318+ xfs_buf_ioerror(bp, EIO);1319 }1320}13210001322int1323+xfs_buf_iorequest(1324+ xfs_buf_t *bp)1325{1326+ XB_TRACE(bp, "iorequest", 0);13271328+ if (bp->b_flags & XBF_DELWRI) {1329+ xfs_buf_delwri_queue(bp, 1);1330 return 0;1331 }13321333+ if (bp->b_flags & XBF_WRITE) {1334+ xfs_buf_wait_unpin(bp);1335 }13361337+ xfs_buf_hold(bp);13381339 /* Set the count to 1 initially, this will stop an I/O1340 * completion callout which happens before we have started1341+ * all the I/O from calling xfs_buf_ioend too early.1342 */1343+ atomic_set(&bp->b_io_remaining, 1);1344+ _xfs_buf_ioapply(bp);1345+ _xfs_buf_ioend(bp, 0);13461347+ xfs_buf_rele(bp);1348 return 0;1349}13501351/*1352+ * Waits for I/O to complete on the buffer supplied.1353+ * It returns immediately if no I/O is pending.1354+ * It returns the I/O error code, if any, or 0 if there was no error.001355 */1356int1357+xfs_buf_iowait(1358+ xfs_buf_t *bp)1359{1360+ XB_TRACE(bp, "iowait", 0);1361+ if (atomic_read(&bp->b_io_remaining))1362+ blk_run_address_space(bp->b_target->bt_mapping);1363+ down(&bp->b_iodonesema);1364+ XB_TRACE(bp, "iowaited", (long)bp->b_error);1365+ return bp->b_error;1366}13671368+xfs_caddr_t1369+xfs_buf_offset(1370+ xfs_buf_t *bp,1371 size_t offset)1372{1373 struct page *page;13741375+ if (bp->b_flags & XBF_MAPPED)1376+ return XFS_BUF_PTR(bp) + offset;13771378+ offset += bp->b_offset;1379+ page = bp->b_pages[offset >> PAGE_CACHE_SHIFT];1380+ return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));1381}13821383/*001384 * Move data into or out of a buffer.1385 */1386void1387+xfs_buf_iomove(1388+ xfs_buf_t *bp, /* buffer to process */1389 size_t boff, /* starting buffer offset */1390 size_t bsize, /* length to copy */1391 caddr_t data, /* data address */1392+ xfs_buf_rw_t mode) /* read/write/zero flag */1393{1394 size_t bend, cpoff, csize;1395 struct page *page;13961397 bend = boff + bsize;1398 while (boff < bend) {1399+ page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];1400+ cpoff = xfs_buf_poff(boff + bp->b_offset);1401 csize = min_t(size_t,1402+ PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff);14031404 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));14051406 switch (mode) {1407+ case XBRW_ZERO:1408 memset(page_address(page) + cpoff, 0, csize);1409 break;1410+ case XBRW_READ:1411 memcpy(data, page_address(page) + cpoff, csize);1412 break;1413+ case XBRW_WRITE:1414 memcpy(page_address(page) + cpoff, data, csize);1415 }1416···1425}14261427/*1428+ * Handling of buffer targets (buftargs).1429 */14301431/*1432+ * Wait for any bufs with callbacks that have been submitted but1433+ * have not yet returned... walk the hash list for the target.1434 */1435void1436xfs_wait_buftarg(···1444 hash = &btp->bt_hash[i];1445again:1446 spin_lock(&hash->bh_lock);1447+ list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {1448+ ASSERT(btp == bp->b_target);1449+ if (!(bp->b_flags & XBF_FS_MANAGED)) {1450 spin_unlock(&hash->bh_lock);1451 /*1452 * Catch superblock reference count leaks1453 * immediately1454 */1455+ BUG_ON(bp->b_bn == 0);1456 delay(100);1457 goto again;1458 }···1462}14631464/*1465+ * Allocate buffer hash table for a given target.1466+ * For devices containing metadata (i.e. not the log/realtime devices)1467+ * we need to allocate a much larger hash table.1468 */1469STATIC void1470xfs_alloc_bufhash(···1487xfs_free_bufhash(1488 xfs_buftarg_t *btp)1489{1490+ kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));01491 btp->bt_hash = NULL;1492+}1493+1494+/*1495+ * buftarg list for delwrite queue processing1496+ */1497+STATIC LIST_HEAD(xfs_buftarg_list);1498+STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);1499+1500+STATIC void1501+xfs_register_buftarg(1502+ xfs_buftarg_t *btp)1503+{1504+ spin_lock(&xfs_buftarg_lock);1505+ list_add(&btp->bt_list, &xfs_buftarg_list);1506+ spin_unlock(&xfs_buftarg_lock);1507+}1508+1509+STATIC void1510+xfs_unregister_buftarg(1511+ xfs_buftarg_t *btp)1512+{1513+ spin_lock(&xfs_buftarg_lock);1514+ list_del(&btp->bt_list);1515+ spin_unlock(&xfs_buftarg_lock);1516}15171518void···1499{1500 xfs_flush_buftarg(btp, 1);1501 if (external)1502+ xfs_blkdev_put(btp->bt_bdev);1503 xfs_free_bufhash(btp);1504+ iput(btp->bt_mapping->host);1505+1506+ /* Unregister the buftarg first so that we don't get a1507+ * wakeup finding a non-existent task1508+ */1509+ xfs_unregister_buftarg(btp);1510+ kthread_stop(btp->bt_task);1511+1512 kmem_free(btp, sizeof(*btp));1513}1514···1512 unsigned int sectorsize,1513 int verbose)1514{1515+ btp->bt_bsize = blocksize;1516+ btp->bt_sshift = ffs(sectorsize) - 1;1517+ btp->bt_smask = sectorsize - 1;15181519+ if (set_blocksize(btp->bt_bdev, sectorsize)) {1520 printk(KERN_WARNING1521 "XFS: Cannot set_blocksize to %u on device %s\n",1522 sectorsize, XFS_BUFTARG_NAME(btp));···1536}15371538/*1539+ * When allocating the initial buffer target we have not yet1540+ * read in the superblock, so don't know what sized sectors1541+ * are being used is at this early stage. Play safe.1542+ */1543STATIC int1544xfs_setsize_buftarg_early(1545 xfs_buftarg_t *btp,···1587 mapping->a_ops = &mapping_aops;1588 mapping->backing_dev_info = bdi;1589 mapping_set_gfp_mask(mapping, GFP_NOFS);1590+ btp->bt_mapping = mapping;1591 return 0;1592+}1593+1594+STATIC int1595+xfs_alloc_delwrite_queue(1596+ xfs_buftarg_t *btp)1597+{1598+ int error = 0;1599+1600+ INIT_LIST_HEAD(&btp->bt_list);1601+ INIT_LIST_HEAD(&btp->bt_delwrite_queue);1602+ spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");1603+ btp->bt_flags = 0;1604+ btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");1605+ if (IS_ERR(btp->bt_task)) {1606+ error = PTR_ERR(btp->bt_task);1607+ goto out_error;1608+ }1609+ xfs_register_buftarg(btp);1610+out_error:1611+ return error;1612}16131614xfs_buftarg_t *···16001601 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);16021603+ btp->bt_dev = bdev->bd_dev;1604+ btp->bt_bdev = bdev;1605 if (xfs_setsize_buftarg_early(btp, bdev))1606 goto error;1607 if (xfs_mapping_buftarg(btp, bdev))1608+ goto error;1609+ if (xfs_alloc_delwrite_queue(btp))1610 goto error;1611 xfs_alloc_bufhash(btp, external);1612 return btp;···161616171618/*1619+ * Delayed write buffer handling1620 */00001621STATIC void1622+xfs_buf_delwri_queue(1623+ xfs_buf_t *bp,1624 int unlock)1625{1626+ struct list_head *dwq = &bp->b_target->bt_delwrite_queue;1627+ spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;016281629+ XB_TRACE(bp, "delwri_q", (long)unlock);1630+ ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));1631+1632+ spin_lock(dwlk);1633 /* If already in the queue, dequeue and place at tail */1634+ if (!list_empty(&bp->b_list)) {1635+ ASSERT(bp->b_flags & _XBF_DELWRI_Q);1636+ if (unlock)1637+ atomic_dec(&bp->b_hold);1638+ list_del(&bp->b_list);01639 }16401641+ bp->b_flags |= _XBF_DELWRI_Q;1642+ list_add_tail(&bp->b_list, dwq);1643+ bp->b_queuetime = jiffies;1644+ spin_unlock(dwlk);16451646 if (unlock)1647+ xfs_buf_unlock(bp);1648}16491650void1651+xfs_buf_delwri_dequeue(1652+ xfs_buf_t *bp)1653{1654+ spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;1655 int dequeued = 0;16561657+ spin_lock(dwlk);1658+ if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {1659+ ASSERT(bp->b_flags & _XBF_DELWRI_Q);1660+ list_del_init(&bp->b_list);1661 dequeued = 1;1662 }1663+ bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);1664+ spin_unlock(dwlk);16651666 if (dequeued)1667+ xfs_buf_rele(bp);16681669+ XB_TRACE(bp, "delwri_dq", (long)dequeued);1670}16711672STATIC void1673+xfs_buf_runall_queues(1674 struct workqueue_struct *queue)1675{1676 flush_workqueue(queue);1677}0000016781679STATIC int1680xfsbufd_wakeup(1681 int priority,1682 gfp_t mask)1683{1684+ xfs_buftarg_t *btp;1685+1686+ spin_lock(&xfs_buftarg_lock);1687+ list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {1688+ if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))1689+ continue;1690+ set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);1691+ wake_up_process(btp->bt_task);1692+ }1693+ spin_unlock(&xfs_buftarg_lock);1694 return 0;1695}1696···1702{1703 struct list_head tmp;1704 unsigned long age;1705+ xfs_buftarg_t *target = (xfs_buftarg_t *)data;1706+ xfs_buf_t *bp, *n;1707+ struct list_head *dwq = &target->bt_delwrite_queue;1708+ spinlock_t *dwlk = &target->bt_delwrite_lock;17091710 current->flags |= PF_MEMALLOC;17111712 INIT_LIST_HEAD(&tmp);1713 do {1714 if (unlikely(freezing(current))) {1715+ set_bit(XBT_FORCE_SLEEP, &target->bt_flags);1716 refrigerator();1717 } else {1718+ clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);1719 }17201721 schedule_timeout_interruptible(1722 xfs_buf_timer_centisecs * msecs_to_jiffies(10));17231724 age = xfs_buf_age_centisecs * msecs_to_jiffies(10);1725+ spin_lock(dwlk);1726+ list_for_each_entry_safe(bp, n, dwq, b_list) {1727+ XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));1728+ ASSERT(bp->b_flags & XBF_DELWRI);17291730+ if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {1731+ if (!test_bit(XBT_FORCE_FLUSH,1732+ &target->bt_flags) &&1733 time_before(jiffies,1734+ bp->b_queuetime + age)) {1735+ xfs_buf_unlock(bp);1736 break;1737 }17381739+ bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);1740+ bp->b_flags |= XBF_WRITE;1741+ list_move(&bp->b_list, &tmp);1742 }1743 }1744+ spin_unlock(dwlk);17451746 while (!list_empty(&tmp)) {1747+ bp = list_entry(tmp.next, xfs_buf_t, b_list);1748+ ASSERT(target == bp->b_target);17491750+ list_del_init(&bp->b_list);1751+ xfs_buf_iostrategy(bp);17521753+ blk_run_address_space(target->bt_mapping);1754 }17551756 if (as_list_len > 0)1757 purge_addresses();17581759+ clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);1760 } while (!kthread_should_stop());17611762 return 0;1763}17641765/*1766+ * Go through all incore buffers, and release buffers if they belong to1767+ * the given device. This is used in filesystem error handling to1768+ * preserve the consistency of its metadata.1769 */1770int1771xfs_flush_buftarg(···1770 int wait)1771{1772 struct list_head tmp;1773+ xfs_buf_t *bp, *n;1774 int pincount = 0;1775+ struct list_head *dwq = &target->bt_delwrite_queue;1776+ spinlock_t *dwlk = &target->bt_delwrite_lock;17771778+ xfs_buf_runall_queues(xfsdatad_workqueue);1779+ xfs_buf_runall_queues(xfslogd_workqueue);17801781 INIT_LIST_HEAD(&tmp);1782+ spin_lock(dwlk);1783+ list_for_each_entry_safe(bp, n, dwq, b_list) {1784+ ASSERT(bp->b_target == target);1785+ ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));1786+ XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));1787+ if (xfs_buf_ispin(bp)) {0001788 pincount++;1789 continue;1790 }17911792+ list_move(&bp->b_list, &tmp);1793 }1794+ spin_unlock(dwlk);17951796 /*1797 * Dropped the delayed write list lock, now walk the temporary list1798 */1799+ list_for_each_entry_safe(bp, n, &tmp, b_list) {1800+ xfs_buf_lock(bp);1801+ bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);1802+ bp->b_flags |= XBF_WRITE;1803 if (wait)1804+ bp->b_flags &= ~XBF_ASYNC;1805 else1806+ list_del_init(&bp->b_list);18071808+ xfs_buf_iostrategy(bp);1809 }18101811 /*1812 * Remaining list items must be flushed before returning1813 */1814 while (!list_empty(&tmp)) {1815+ bp = list_entry(tmp.next, xfs_buf_t, b_list);18161817+ list_del_init(&bp->b_list);1818+ xfs_iowait(bp);1819+ xfs_buf_relse(bp);1820 }18211822 if (wait)1823+ blk_run_address_space(target->bt_mapping);18241825 return pincount;1826}18271828int __init1829+xfs_buf_init(void)1830{1831 int error = -ENOMEM;18321833+#ifdef XFS_BUF_TRACE1834+ xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);1835#endif18361837+ xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");1838+ if (!xfs_buf_zone)1839 goto out_free_trace_buf;18401841 xfslogd_workqueue = create_workqueue("xfslogd");···1847 if (!xfsdatad_workqueue)1848 goto out_destroy_xfslogd_workqueue;18491850+ xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);1851+ if (!xfs_buf_shake)01852 goto out_destroy_xfsdatad_workqueue;0000018531854 return 0;1855001856 out_destroy_xfsdatad_workqueue:1857 destroy_workqueue(xfsdatad_workqueue);1858 out_destroy_xfslogd_workqueue:1859 destroy_workqueue(xfslogd_workqueue);1860 out_free_buf_zone:1861+ kmem_zone_destroy(xfs_buf_zone);1862 out_free_trace_buf:1863+#ifdef XFS_BUF_TRACE1864+ ktrace_free(xfs_buf_trace_buf);1865#endif1866 return error;1867}18681869void1870+xfs_buf_terminate(void)1871{1872+ kmem_shake_deregister(xfs_buf_shake);01873 destroy_workqueue(xfsdatad_workqueue);1874 destroy_workqueue(xfslogd_workqueue);1875+ kmem_zone_destroy(xfs_buf_zone);1876+#ifdef XFS_BUF_TRACE1877+ ktrace_free(xfs_buf_trace_buf);1878#endif1879}
+262-386
fs/xfs/linux-2.6/xfs_buf.h
···32 * Base types33 */3435-#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))3637-#define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)38-#define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)39-#define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)40-#define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)4142-typedef enum page_buf_rw_e {43- PBRW_READ = 1, /* transfer into target memory */44- PBRW_WRITE = 2, /* transfer from target memory */45- PBRW_ZERO = 3 /* Zero target memory */46-} page_buf_rw_t;4748-49-typedef enum page_buf_flags_e { /* pb_flags values */50- PBF_READ = (1 << 0), /* buffer intended for reading from device */51- PBF_WRITE = (1 << 1), /* buffer intended for writing to device */52- PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */53- PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */54- PBF_DONE = (1 << 5), /* all pages in the buffer uptodate */55- PBF_DELWRI = (1 << 6), /* buffer has dirty pages */56- PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */57- PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */58- PBF_ORDERED = (1 << 11), /* use ordered writes */59- PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */6061 /* flags used only as arguments to access routines */62- PBF_LOCK = (1 << 14), /* lock requested */63- PBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */64- PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */6566 /* flags used only internally */67- _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */68- _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */69- _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */70- _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */71-} page_buf_flags_t;7200007374typedef struct xfs_bufhash {75 struct list_head bh_list;···80} xfs_bufhash_t;8182typedef struct xfs_buftarg {83- dev_t pbr_dev;84- struct block_device *pbr_bdev;85- struct address_space *pbr_mapping;86- unsigned int pbr_bsize;87- unsigned int pbr_sshift;88- size_t pbr_smask;8990- /* per-device buffer hash table */91 uint bt_hashmask;92 uint bt_hashshift;93 xfs_bufhash_t *bt_hash;000000094} xfs_buftarg_t;9596/*97- * xfs_buf_t: Buffer structure for page cache-based buffers98 *99- * This buffer structure is used by the page cache buffer management routines100- * to refer to an assembly of pages forming a logical buffer. The actual I/O101- * is performed with buffer_head structures, as required by drivers.102- * 103- * The buffer structure is used on temporary basis only, and discarded when104- * released. The real data storage is recorded in the page cache. Metadata is105 * hashed to the block device on which the file system resides.106 */107108struct xfs_buf;000109110-/* call-back function on I/O completion */111-typedef void (*page_buf_iodone_t)(struct xfs_buf *);112-/* call-back function on I/O completion */113-typedef void (*page_buf_relse_t)(struct xfs_buf *);114-/* pre-write function */115-typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);116-117-#define PB_PAGES 2118119typedef struct xfs_buf {120- struct semaphore pb_sema; /* semaphore for lockables */121- unsigned long pb_queuetime; /* time buffer was queued */122- atomic_t pb_pin_count; /* pin count */123- wait_queue_head_t pb_waiters; /* unpin waiters */124- struct list_head pb_list;125- page_buf_flags_t pb_flags; /* status flags */126- struct list_head pb_hash_list; /* hash table list */127- xfs_bufhash_t *pb_hash; /* hash table list start */128- xfs_buftarg_t *pb_target; /* buffer target (device) */129- atomic_t pb_hold; /* reference count */130- xfs_daddr_t pb_bn; /* block number for I/O */131- loff_t pb_file_offset; /* offset in file */132- size_t pb_buffer_length; /* size of buffer in bytes */133- size_t pb_count_desired; /* desired transfer size */134- void *pb_addr; /* virtual address of buffer */135- struct work_struct pb_iodone_work;136- atomic_t pb_io_remaining;/* #outstanding I/O requests */137- page_buf_iodone_t pb_iodone; /* I/O completion function */138- page_buf_relse_t pb_relse; /* releasing function */139- page_buf_bdstrat_t pb_strat; /* pre-write function */140- struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */141- void *pb_fspriv;142- void *pb_fspriv2;143- void *pb_fspriv3;144- unsigned short pb_error; /* error code on I/O */145- unsigned short pb_locked; /* page array is locked */146- unsigned int pb_page_count; /* size of page array */147- unsigned int pb_offset; /* page offset in first page */148- struct page **pb_pages; /* array of page pointers */149- struct page *pb_page_array[PB_PAGES]; /* inline pages */150-#ifdef PAGEBUF_LOCK_TRACKING151- int pb_last_holder;152#endif153} xfs_buf_t;154155156/* Finding and Reading Buffers */157-158-extern xfs_buf_t *_pagebuf_find( /* find buffer for block if */159- /* the block is in memory */160- xfs_buftarg_t *, /* inode for block */161- loff_t, /* starting offset of range */162- size_t, /* length of range */163- page_buf_flags_t, /* PBF_LOCK */164- xfs_buf_t *); /* newly allocated buffer */165-166#define xfs_incore(buftarg,blkno,len,lockit) \167- _pagebuf_find(buftarg, blkno ,len, lockit, NULL)168169-extern xfs_buf_t *xfs_buf_get_flags( /* allocate a buffer */170- xfs_buftarg_t *, /* inode for buffer */171- loff_t, /* starting offset of range */172- size_t, /* length of range */173- page_buf_flags_t); /* PBF_LOCK, PBF_READ, */174- /* PBF_ASYNC */175-176#define xfs_buf_get(target, blkno, len, flags) \177- xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)178179-extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */180- xfs_buftarg_t *, /* inode for buffer */181- loff_t, /* starting offset of range */182- size_t, /* length of range */183- page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC */184-185#define xfs_buf_read(target, blkno, len, flags) \186- xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)187188-extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */189- /* no memory or disk address */190- size_t len,191- xfs_buftarg_t *); /* mount point "fake" inode */192-193-extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */194- /* without disk address */195- size_t len,196- xfs_buftarg_t *); /* mount point "fake" inode */197-198-extern int pagebuf_associate_memory(199- xfs_buf_t *,200- void *,201- size_t);202-203-extern void pagebuf_hold( /* increment reference count */204- xfs_buf_t *); /* buffer to hold */205-206-extern void pagebuf_readahead( /* read ahead into cache */207- xfs_buftarg_t *, /* target for buffer (or NULL) */208- loff_t, /* starting offset of range */209- size_t, /* length of range */210- page_buf_flags_t); /* additional read flags */211212/* Releasing Buffers */213-214-extern void pagebuf_free( /* deallocate a buffer */215- xfs_buf_t *); /* buffer to deallocate */216-217-extern void pagebuf_rele( /* release hold on a buffer */218- xfs_buf_t *); /* buffer to release */219220/* Locking and Unlocking Buffers */221-222-extern int pagebuf_cond_lock( /* lock buffer, if not locked */223- /* (returns -EBUSY if locked) */224- xfs_buf_t *); /* buffer to lock */225-226-extern int pagebuf_lock_value( /* return count on lock */227- xfs_buf_t *); /* buffer to check */228-229-extern int pagebuf_lock( /* lock buffer */230- xfs_buf_t *); /* buffer to lock */231-232-extern void pagebuf_unlock( /* unlock buffer */233- xfs_buf_t *); /* buffer to unlock */234235/* Buffer Read and Write Routines */0000000236237-extern void pagebuf_iodone( /* mark buffer I/O complete */238- xfs_buf_t *, /* buffer to mark */239- int); /* run completion locally, or in240- * a helper thread. */241-242-extern void pagebuf_ioerror( /* mark buffer in error (or not) */243- xfs_buf_t *, /* buffer to mark */244- int); /* error to store (0 if none) */245-246-extern int pagebuf_iostart( /* start I/O on a buffer */247- xfs_buf_t *, /* buffer to start */248- page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */249- /* PBF_READ, PBF_WRITE, */250- /* PBF_DELWRI */251-252-extern int pagebuf_iorequest( /* start real I/O */253- xfs_buf_t *); /* buffer to convey to device */254-255-extern int pagebuf_iowait( /* wait for buffer I/O done */256- xfs_buf_t *); /* buffer to wait on */257-258-extern void pagebuf_iomove( /* move data in/out of pagebuf */259- xfs_buf_t *, /* buffer to manipulate */260- size_t, /* starting buffer offset */261- size_t, /* length in buffer */262- caddr_t, /* data pointer */263- page_buf_rw_t); /* direction */264-265-static inline int pagebuf_iostrategy(xfs_buf_t *pb)266{267- return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);268}269270-static inline int pagebuf_geterror(xfs_buf_t *pb)271{272- return pb ? pb->pb_error : ENOMEM;273}274275/* Buffer Utility Routines */276-277-extern caddr_t pagebuf_offset( /* pointer at offset in buffer */278- xfs_buf_t *, /* buffer to offset into */279- size_t); /* offset */280281/* Pinning Buffer Storage in Memory */282-283-extern void pagebuf_pin( /* pin buffer in memory */284- xfs_buf_t *); /* buffer to pin */285-286-extern void pagebuf_unpin( /* unpin buffered data */287- xfs_buf_t *); /* buffer to unpin */288-289-extern int pagebuf_ispin( /* check if buffer is pinned */290- xfs_buf_t *); /* buffer to check */291292/* Delayed Write Buffer Routines */293-294-extern void pagebuf_delwri_dequeue(xfs_buf_t *);295296/* Buffer Daemon Setup Routines */00297298-extern int pagebuf_init(void);299-extern void pagebuf_terminate(void);300-301-302-#ifdef PAGEBUF_TRACE303-extern ktrace_t *pagebuf_trace_buf;304-extern void pagebuf_trace(305- xfs_buf_t *, /* buffer being traced */306- char *, /* description of operation */307- void *, /* arbitrary diagnostic value */308- void *); /* return address */309#else310-# define pagebuf_trace(pb, id, ptr, ra) do { } while (0)311#endif312313-#define pagebuf_target_name(target) \314- ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })31531600000317318-/* These are just for xfs_syncsub... it sets an internal variable319- * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t320- */321-#define XFS_B_ASYNC PBF_ASYNC322-#define XFS_B_DELWRI PBF_DELWRI323-#define XFS_B_READ PBF_READ324-#define XFS_B_WRITE PBF_WRITE325-#define XFS_B_STALE PBF_STALE326327-#define XFS_BUF_TRYLOCK PBF_TRYLOCK328-#define XFS_INCORE_TRYLOCK PBF_TRYLOCK329-#define XFS_BUF_LOCK PBF_LOCK330-#define XFS_BUF_MAPPED PBF_MAPPED331332-#define BUF_BUSY PBF_DONT_BLOCK00333334-#define XFS_BUF_BFLAGS(x) ((x)->pb_flags)335-#define XFS_BUF_ZEROFLAGS(x) \336- ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))337-338-#define XFS_BUF_STALE(x) ((x)->pb_flags |= XFS_B_STALE)339-#define XFS_BUF_UNSTALE(x) ((x)->pb_flags &= ~XFS_B_STALE)340-#define XFS_BUF_ISSTALE(x) ((x)->pb_flags & XFS_B_STALE)341-#define XFS_BUF_SUPER_STALE(x) do { \342- XFS_BUF_STALE(x); \343- pagebuf_delwri_dequeue(x); \344- XFS_BUF_DONE(x); \345 } while (0)346347-#define XFS_BUF_MANAGE PBF_FS_MANAGED348-#define XFS_BUF_UNMANAGE(x) ((x)->pb_flags &= ~PBF_FS_MANAGED)349350-#define XFS_BUF_DELAYWRITE(x) ((x)->pb_flags |= PBF_DELWRI)351-#define XFS_BUF_UNDELAYWRITE(x) pagebuf_delwri_dequeue(x)352-#define XFS_BUF_ISDELAYWRITE(x) ((x)->pb_flags & PBF_DELWRI)353354-#define XFS_BUF_ERROR(x,no) pagebuf_ioerror(x,no)355-#define XFS_BUF_GETERROR(x) pagebuf_geterror(x)356-#define XFS_BUF_ISERROR(x) (pagebuf_geterror(x)?1:0)357358-#define XFS_BUF_DONE(x) ((x)->pb_flags |= PBF_DONE)359-#define XFS_BUF_UNDONE(x) ((x)->pb_flags &= ~PBF_DONE)360-#define XFS_BUF_ISDONE(x) ((x)->pb_flags & PBF_DONE)361362-#define XFS_BUF_BUSY(x) do { } while (0)363-#define XFS_BUF_UNBUSY(x) do { } while (0)364-#define XFS_BUF_ISBUSY(x) (1)365366-#define XFS_BUF_ASYNC(x) ((x)->pb_flags |= PBF_ASYNC)367-#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC)368-#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC)369370-#define XFS_BUF_ORDERED(x) ((x)->pb_flags |= PBF_ORDERED)371-#define XFS_BUF_UNORDERED(x) ((x)->pb_flags &= ~PBF_ORDERED)372-#define XFS_BUF_ISORDERED(x) ((x)->pb_flags & PBF_ORDERED)373374-#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n")375-#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n")376-#define XFS_BUF_ISSHUT(x) (0)377378-#define XFS_BUF_HOLD(x) pagebuf_hold(x)379-#define XFS_BUF_READ(x) ((x)->pb_flags |= PBF_READ)380-#define XFS_BUF_UNREAD(x) ((x)->pb_flags &= ~PBF_READ)381-#define XFS_BUF_ISREAD(x) ((x)->pb_flags & PBF_READ)382383-#define XFS_BUF_WRITE(x) ((x)->pb_flags |= PBF_WRITE)384-#define XFS_BUF_UNWRITE(x) ((x)->pb_flags &= ~PBF_WRITE)385-#define XFS_BUF_ISWRITE(x) ((x)->pb_flags & PBF_WRITE)386387-#define XFS_BUF_ISUNINITIAL(x) (0)388-#define XFS_BUF_UNUNINITIAL(x) (0)389390-#define XFS_BUF_BP_ISMAPPED(bp) 1391392-#define XFS_BUF_IODONE_FUNC(buf) (buf)->pb_iodone393-#define XFS_BUF_SET_IODONE_FUNC(buf, func) \394- (buf)->pb_iodone = (func)395-#define XFS_BUF_CLR_IODONE_FUNC(buf) \396- (buf)->pb_iodone = NULL397-#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func) \398- (buf)->pb_strat = (func)399-#define XFS_BUF_CLR_BDSTRAT_FUNC(buf) \400- (buf)->pb_strat = NULL401402-#define XFS_BUF_FSPRIVATE(buf, type) \403- ((type)(buf)->pb_fspriv)404-#define XFS_BUF_SET_FSPRIVATE(buf, value) \405- (buf)->pb_fspriv = (void *)(value)406-#define XFS_BUF_FSPRIVATE2(buf, type) \407- ((type)(buf)->pb_fspriv2)408-#define XFS_BUF_SET_FSPRIVATE2(buf, value) \409- (buf)->pb_fspriv2 = (void *)(value)410-#define XFS_BUF_FSPRIVATE3(buf, type) \411- ((type)(buf)->pb_fspriv3)412-#define XFS_BUF_SET_FSPRIVATE3(buf, value) \413- (buf)->pb_fspriv3 = (void *)(value)414-#define XFS_BUF_SET_START(buf)415416-#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \417- (buf)->pb_relse = (value)00000000418419-#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr)00420421-static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)000000000000422{423- if (bp->pb_flags & PBF_MAPPED)424- return XFS_BUF_PTR(bp) + offset;425- return (xfs_caddr_t) pagebuf_offset(bp, offset);0426}427428-#define XFS_BUF_SET_PTR(bp, val, count) \429- pagebuf_associate_memory(bp, val, count)430-#define XFS_BUF_ADDR(bp) ((bp)->pb_bn)431-#define XFS_BUF_SET_ADDR(bp, blk) \432- ((bp)->pb_bn = (xfs_daddr_t)(blk))433-#define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset)434-#define XFS_BUF_SET_OFFSET(bp, off) \435- ((bp)->pb_file_offset = (off))436-#define XFS_BUF_COUNT(bp) ((bp)->pb_count_desired)437-#define XFS_BUF_SET_COUNT(bp, cnt) \438- ((bp)->pb_count_desired = (cnt))439-#define XFS_BUF_SIZE(bp) ((bp)->pb_buffer_length)440-#define XFS_BUF_SET_SIZE(bp, cnt) \441- ((bp)->pb_buffer_length = (cnt))442-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)443-#define XFS_BUF_SET_VTYPE(bp, type)444-#define XFS_BUF_SET_REF(bp, ref)445-446-#define XFS_BUF_ISPINNED(bp) pagebuf_ispin(bp)447-448-#define XFS_BUF_VALUSEMA(bp) pagebuf_lock_value(bp)449-#define XFS_BUF_CPSEMA(bp) (pagebuf_cond_lock(bp) == 0)450-#define XFS_BUF_VSEMA(bp) pagebuf_unlock(bp)451-#define XFS_BUF_PSEMA(bp,x) pagebuf_lock(bp)452-#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);453-454-/* setup the buffer target from a buftarg structure */455-#define XFS_BUF_SET_TARGET(bp, target) \456- (bp)->pb_target = (target)457-#define XFS_BUF_TARGET(bp) ((bp)->pb_target)458-#define XFS_BUFTARG_NAME(target) \459- pagebuf_target_name(target)460-461-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)462-#define XFS_BUF_SET_VTYPE(bp, type)463-#define XFS_BUF_SET_REF(bp, ref)464-465-static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)466{467- bp->pb_fspriv3 = mp;468- bp->pb_strat = xfs_bdstrat_cb;469- pagebuf_delwri_dequeue(bp);470- return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);471}472473-static inline void xfs_buf_relse(xfs_buf_t *bp)474-{475- if (!bp->pb_relse)476- pagebuf_unlock(bp);477- pagebuf_rele(bp);478-}479-480-#define xfs_bpin(bp) pagebuf_pin(bp)481-#define xfs_bunpin(bp) pagebuf_unpin(bp)482483#define xfs_buftrace(id, bp) \484- pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))485486-#define xfs_biodone(pb) \487- pagebuf_iodone(pb, 0)488489-#define xfs_biomove(pb, off, len, data, rw) \490- pagebuf_iomove((pb), (off), (len), (data), \491- ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)492493-#define xfs_biozero(pb, off, len) \494- pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)495496497-static inline int XFS_bwrite(xfs_buf_t *pb)498{499- int iowait = (pb->pb_flags & PBF_ASYNC) == 0;500 int error = 0;501502 if (!iowait)503- pb->pb_flags |= _PBF_RUN_QUEUES;504505- pagebuf_delwri_dequeue(pb);506- pagebuf_iostrategy(pb);507 if (iowait) {508- error = pagebuf_iowait(pb);509- xfs_buf_relse(pb);510 }511 return error;512}513514-#define XFS_bdwrite(pb) \515- pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)516517static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)518{519- bp->pb_strat = xfs_bdstrat_cb;520- bp->pb_fspriv3 = mp;521-522- return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);523}524525-#define XFS_bdstrat(bp) pagebuf_iorequest(bp)526527-#define xfs_iowait(pb) pagebuf_iowait(pb)528529#define xfs_baread(target, rablkno, ralen) \530- pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)531-532-#define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target))533-#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))534-#define xfs_buf_free(bp) pagebuf_free(bp)535536537/*538 * Handling of buftargs.539 */540-541extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);542extern void xfs_free_buftarg(xfs_buftarg_t *, int);543extern void xfs_wait_buftarg(xfs_buftarg_t *);544extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);545extern int xfs_flush_buftarg(xfs_buftarg_t *, int);546547-#define xfs_getsize_buftarg(buftarg) \548- block_size((buftarg)->pbr_bdev)549-#define xfs_readonly_buftarg(buftarg) \550- bdev_read_only((buftarg)->pbr_bdev)551-#define xfs_binval(buftarg) \552- xfs_flush_buftarg(buftarg, 1)553-#define XFS_bflush(buftarg) \554- xfs_flush_buftarg(buftarg, 1)555556#endif /* __XFS_BUF_H__ */
···32 * Base types33 */3435+#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))3637+#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)38+#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)39+#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)40+#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)4142+typedef enum {43+ XBRW_READ = 1, /* transfer into target memory */44+ XBRW_WRITE = 2, /* transfer from target memory */45+ XBRW_ZERO = 3, /* Zero target memory */46+} xfs_buf_rw_t;4748+typedef enum {49+ XBF_READ = (1 << 0), /* buffer intended for reading from device */50+ XBF_WRITE = (1 << 1), /* buffer intended for writing to device */51+ XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */52+ XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */53+ XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */54+ XBF_DELWRI = (1 << 6), /* buffer has dirty pages */55+ XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */56+ XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */57+ XBF_ORDERED = (1 << 11), /* use ordered writes */58+ XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */05960 /* flags used only as arguments to access routines */61+ XBF_LOCK = (1 << 14), /* lock requested */62+ XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */63+ XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */6465 /* flags used only internally */66+ _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */67+ _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */68+ _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */69+ _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */70+} xfs_buf_flags_t;7172+typedef enum {73+ XBT_FORCE_SLEEP = (0 << 1),74+ XBT_FORCE_FLUSH = (1 << 1),75+} xfs_buftarg_flags_t;7677typedef struct xfs_bufhash {78 struct list_head bh_list;···77} xfs_bufhash_t;7879typedef struct xfs_buftarg {80+ dev_t bt_dev;81+ struct block_device *bt_bdev;82+ struct address_space *bt_mapping;83+ unsigned int bt_bsize;84+ unsigned int bt_sshift;85+ size_t bt_smask;8687+ /* per device buffer hash table */88 uint bt_hashmask;89 uint bt_hashshift;90 xfs_bufhash_t *bt_hash;91+92+ /* per device delwri queue */93+ struct task_struct *bt_task;94+ struct list_head bt_list;95+ struct list_head bt_delwrite_queue;96+ spinlock_t bt_delwrite_lock;97+ unsigned long bt_flags;98} xfs_buftarg_t;99100/*101+ * xfs_buf_t: Buffer structure for pagecache-based buffers102 *103+ * This buffer structure is used by the pagecache buffer management routines104+ * to refer to an assembly of pages forming a logical buffer.105+ *106+ * The buffer structure is used on a temporary basis only, and discarded when107+ * released. The real data storage is recorded in the pagecache. Buffers are0108 * hashed to the block device on which the file system resides.109 */110111struct xfs_buf;112+typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);113+typedef void (*xfs_buf_relse_t)(struct xfs_buf *);114+typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);115116+#define XB_PAGES 20000000117118typedef struct xfs_buf {119+ struct semaphore b_sema; /* semaphore for lockables */120+ unsigned long b_queuetime; /* time buffer was queued */121+ atomic_t b_pin_count; /* pin count */122+ wait_queue_head_t b_waiters; /* unpin waiters */123+ struct list_head b_list;124+ xfs_buf_flags_t b_flags; /* status flags */125+ struct list_head b_hash_list; /* hash table list */126+ xfs_bufhash_t *b_hash; /* hash table list start */127+ xfs_buftarg_t *b_target; /* buffer target (device) */128+ atomic_t b_hold; /* reference count */129+ xfs_daddr_t b_bn; /* block number for I/O */130+ xfs_off_t b_file_offset; /* offset in file */131+ size_t b_buffer_length;/* size of buffer in bytes */132+ size_t b_count_desired;/* desired transfer size */133+ void *b_addr; /* virtual address of buffer */134+ struct work_struct b_iodone_work;135+ atomic_t b_io_remaining; /* #outstanding I/O requests */136+ xfs_buf_iodone_t b_iodone; /* I/O completion function */137+ xfs_buf_relse_t b_relse; /* releasing function */138+ xfs_buf_bdstrat_t b_strat; /* pre-write function */139+ struct semaphore b_iodonesema; /* Semaphore for I/O waiters */140+ void *b_fspriv;141+ void *b_fspriv2;142+ void *b_fspriv3;143+ unsigned short b_error; /* error code on I/O */144+ unsigned short b_locked; /* page array is locked */145+ unsigned int b_page_count; /* size of page array */146+ unsigned int b_offset; /* page offset in first page */147+ struct page **b_pages; /* array of page pointers */148+ struct page *b_page_array[XB_PAGES]; /* inline pages */149+#ifdef XFS_BUF_LOCK_TRACKING150+ int b_last_holder;151#endif152} xfs_buf_t;153154155/* Finding and Reading Buffers */156+extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,157+ xfs_buf_flags_t, xfs_buf_t *);0000000158#define xfs_incore(buftarg,blkno,len,lockit) \159+ _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)160161+extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t,162+ xfs_buf_flags_t);00000163#define xfs_buf_get(target, blkno, len, flags) \164+ xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)165166+extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,167+ xfs_buf_flags_t);0000168#define xfs_buf_read(target, blkno, len, flags) \169+ xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)170171+extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);172+extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);173+extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);174+extern void xfs_buf_hold(xfs_buf_t *);175+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,176+ xfs_buf_flags_t);00000000000000000177178/* Releasing Buffers */179+extern void xfs_buf_free(xfs_buf_t *);180+extern void xfs_buf_rele(xfs_buf_t *);0000181182/* Locking and Unlocking Buffers */183+extern int xfs_buf_cond_lock(xfs_buf_t *);184+extern int xfs_buf_lock_value(xfs_buf_t *);185+extern void xfs_buf_lock(xfs_buf_t *);186+extern void xfs_buf_unlock(xfs_buf_t *);000000000187188/* Buffer Read and Write Routines */189+extern void xfs_buf_ioend(xfs_buf_t *, int);190+extern void xfs_buf_ioerror(xfs_buf_t *, int);191+extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);192+extern int xfs_buf_iorequest(xfs_buf_t *);193+extern int xfs_buf_iowait(xfs_buf_t *);194+extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,195+ xfs_buf_rw_t);196197+static inline int xfs_buf_iostrategy(xfs_buf_t *bp)0000000000000000000000000000198{199+ return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);200}201202+static inline int xfs_buf_geterror(xfs_buf_t *bp)203{204+ return bp ? bp->b_error : ENOMEM;205}206207/* Buffer Utility Routines */208+extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);000209210/* Pinning Buffer Storage in Memory */211+extern void xfs_buf_pin(xfs_buf_t *);212+extern void xfs_buf_unpin(xfs_buf_t *);213+extern int xfs_buf_ispin(xfs_buf_t *);000000214215/* Delayed Write Buffer Routines */216+extern void xfs_buf_delwri_dequeue(xfs_buf_t *);0217218/* Buffer Daemon Setup Routines */219+extern int xfs_buf_init(void);220+extern void xfs_buf_terminate(void);221222+#ifdef XFS_BUF_TRACE223+extern ktrace_t *xfs_buf_trace_buf;224+extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);00000000225#else226+#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0)227#endif228229+#define xfs_buf_target_name(target) \230+ ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })231232233+#define XFS_B_ASYNC XBF_ASYNC234+#define XFS_B_DELWRI XBF_DELWRI235+#define XFS_B_READ XBF_READ236+#define XFS_B_WRITE XBF_WRITE237+#define XFS_B_STALE XBF_STALE238239+#define XFS_BUF_TRYLOCK XBF_TRYLOCK240+#define XFS_INCORE_TRYLOCK XBF_TRYLOCK241+#define XFS_BUF_LOCK XBF_LOCK242+#define XFS_BUF_MAPPED XBF_MAPPED0000243244+#define BUF_BUSY XBF_DONT_BLOCK000245246+#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)247+#define XFS_BUF_ZEROFLAGS(bp) \248+ ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI))249250+#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE)251+#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE)252+#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE)253+#define XFS_BUF_SUPER_STALE(bp) do { \254+ XFS_BUF_STALE(bp); \255+ xfs_buf_delwri_dequeue(bp); \256+ XFS_BUF_DONE(bp); \0000257 } while (0)258259+#define XFS_BUF_MANAGE XBF_FS_MANAGED260+#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)261262+#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)263+#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)264+#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)265266+#define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no)267+#define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp)268+#define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0)269270+#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE)271+#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE)272+#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE)273274+#define XFS_BUF_BUSY(bp) do { } while (0)275+#define XFS_BUF_UNBUSY(bp) do { } while (0)276+#define XFS_BUF_ISBUSY(bp) (1)277278+#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC)279+#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC)280+#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC)281282+#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED)283+#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)284+#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)285286+#define XFS_BUF_SHUT(bp) do { } while (0)287+#define XFS_BUF_UNSHUT(bp) do { } while (0)288+#define XFS_BUF_ISSHUT(bp) (0)289290+#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)291+#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)292+#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)293+#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ)294295+#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE)296+#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)297+#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)298299+#define XFS_BUF_ISUNINITIAL(bp) (0)300+#define XFS_BUF_UNUNINITIAL(bp) (0)301302+#define XFS_BUF_BP_ISMAPPED(bp) (1)303304+#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)305+#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))306+#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)307+#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func))308+#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL)0000309310+#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)311+#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))312+#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)313+#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))314+#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3)315+#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val))316+#define XFS_BUF_SET_START(bp) do { } while (0)317+#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))00000318319+#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)320+#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt)321+#define XFS_BUF_ADDR(bp) ((bp)->b_bn)322+#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno))323+#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset)324+#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off))325+#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired)326+#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt))327+#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)328+#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))329330+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0)331+#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)332+#define XFS_BUF_SET_REF(bp, ref) do { } while (0)333334+#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp)335+336+#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)337+#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)338+#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)339+#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)340+#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema);341+342+#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))343+#define XFS_BUF_TARGET(bp) ((bp)->b_target)344+#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target)345+346+static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)347{348+ bp->b_fspriv3 = mp;349+ bp->b_strat = xfs_bdstrat_cb;350+ xfs_buf_delwri_dequeue(bp);351+ return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);352}353354+static inline void xfs_buf_relse(xfs_buf_t *bp)0000000000000000000000000000000000000355{356+ if (!bp->b_relse)357+ xfs_buf_unlock(bp);358+ xfs_buf_rele(bp);0359}360361+#define xfs_bpin(bp) xfs_buf_pin(bp)362+#define xfs_bunpin(bp) xfs_buf_unpin(bp)0000000363364#define xfs_buftrace(id, bp) \365+ xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))366367+#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)0368369+#define xfs_biomove(bp, off, len, data, rw) \370+ xfs_buf_iomove((bp), (off), (len), (data), \371+ ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ)372373+#define xfs_biozero(bp, off, len) \374+ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)375376377+static inline int XFS_bwrite(xfs_buf_t *bp)378{379+ int iowait = (bp->b_flags & XBF_ASYNC) == 0;380 int error = 0;381382 if (!iowait)383+ bp->b_flags |= _XBF_RUN_QUEUES;384385+ xfs_buf_delwri_dequeue(bp);386+ xfs_buf_iostrategy(bp);387 if (iowait) {388+ error = xfs_buf_iowait(bp);389+ xfs_buf_relse(bp);390 }391 return error;392}393394+#define XFS_bdwrite(bp) xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC)0395396static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)397{398+ bp->b_strat = xfs_bdstrat_cb;399+ bp->b_fspriv3 = mp;400+ return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);0401}402403+#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)404405+#define xfs_iowait(bp) xfs_buf_iowait(bp)406407#define xfs_baread(target, rablkno, ralen) \408+ xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)0000409410411/*412 * Handling of buftargs.413 */0414extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);415extern void xfs_free_buftarg(xfs_buftarg_t *, int);416extern void xfs_wait_buftarg(xfs_buftarg_t *);417extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);418extern int xfs_flush_buftarg(xfs_buftarg_t *, int);419420+#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)421+#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)422+423+#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1)424+#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)000425426#endif /* __XFS_BUF_H__ */
···509 vnode_t *vp = LINVFS_GET_VP(inode);510 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);511 int error = 0;0512 xfs_inode_t *ip;513514 if (vp->v_vfsp->vfs_flag & VFS_DMI) {515+ ip = xfs_vtoi(vp);516+ if (!ip) {517 error = -EINVAL;518 goto open_exec_out;519 }0520 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {521 error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,522 0, 0, 0, NULL);
+3-7
fs/xfs/linux-2.6/xfs_ioctl.c
···146147 if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {148 xfs_inode_t *ip;149- bhv_desc_t *bhv;150 int lock_mode;151152 /* need to get access to the xfs_inode to read the generation */153- bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);154- ASSERT(bhv);155- ip = XFS_BHVTOI(bhv);156 ASSERT(ip);157 lock_mode = xfs_ilock_map_shared(ip);158···748 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?749 mp->m_rtdev_targp : mp->m_ddev_targp;750751- da.d_mem = da.d_miniosz = 1 << target->pbr_sshift;752- /* The size dio will do in one go */753- da.d_maxiosz = 64 * PAGE_CACHE_SIZE;754755 if (copy_to_user(arg, &da, sizeof(da)))756 return -XFS_ERROR(EFAULT);
···146147 if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {148 xfs_inode_t *ip;0149 int lock_mode;150151 /* need to get access to the xfs_inode to read the generation */152+ ip = xfs_vtoi(vp);00153 ASSERT(ip);154 lock_mode = xfs_ilock_map_shared(ip);155···751 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?752 mp->m_rtdev_targp : mp->m_ddev_targp;753754+ da.d_mem = da.d_miniosz = 1 << target->bt_sshift;755+ da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);0756757 if (copy_to_user(arg, &da, sizeof(da)))758 return -XFS_ERROR(EFAULT);
+81-40
fs/xfs/linux-2.6/xfs_iops.c
···54#include <linux/capability.h>55#include <linux/xattr.h>56#include <linux/namei.h>05758#define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) || \59 (S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME))00000000000000000000000000000000006061/*62 * Change the requested timestamp in the given inode.···111{112 struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));113 timespec_t tv;114-115- /*116- * We're not supposed to change timestamps in readonly-mounted117- * filesystems. Throw it away if anyone asks us.118- */119- if (unlikely(IS_RDONLY(inode)))120- return;121-122- /*123- * Don't update access timestamps on reads if mounted "noatime".124- * Throw it away if anyone asks us.125- */126- if (unlikely(127- (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&128- (flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==129- XFS_ICHGTIME_ACC))130- return;131132 nanotime(&tv);133 if (flags & XFS_ICHGTIME_MOD) {···148 * Variant on the above which avoids querying the system clock149 * in situations where we know the Linux inode timestamps have150 * just been updated (and so we can update our inode cheaply).151- * We also skip the readonly and noatime checks here, they are152- * also catered for already.153 */154void155xfs_ichgtime_fast(···158 timespec_t *tvp;159160 /*000000161 * We're not supposed to change timestamps in readonly-mounted162 * filesystems. Throw it away if anyone asks us.163 */164 if (unlikely(IS_RDONLY(inode)))165 return;166167- /*168- * Don't update access timestamps on reads if mounted "noatime".169- * Throw it away if anyone asks us.170- */171- if (unlikely(172- (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&173- ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==174- XFS_ICHGTIME_ACC)))175- return;176-177 if (flags & XFS_ICHGTIME_MOD) {178 tvp = &inode->i_mtime;179 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;180 ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;181- }182- if (flags & XFS_ICHGTIME_ACC) {183- tvp = &inode->i_atime;184- ip->i_d.di_atime.t_sec = (__int32_t)tvp->tv_sec;185- ip->i_d.di_atime.t_nsec = (__int32_t)tvp->tv_nsec;186 }187 if (flags & XFS_ICHGTIME_CHG) {188 tvp = &inode->i_ctime;···218 if (i_size_read(ip) != va.va_size)219 i_size_write(ip, va.va_size);220 }000000000000000000000000000000000221}222223/*···318 break;319 }320000321 if (default_acl) {322 if (!error) {323 error = _ACL_INHERIT(vp, &va, default_acl);···337 teardown.d_inode = ip = LINVFS_GET_IP(vp);338 teardown.d_name = dentry->d_name;339340- vn_mark_bad(vp);341-342 if (S_ISDIR(mode))343 VOP_RMDIR(dvp, &teardown, NULL, err2);344 else···547 ASSERT(dentry);548 ASSERT(nd);549550- link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);551 if (!link) {552 nd_set_link(nd, ERR_PTR(-ENOMEM));553 return NULL;···563 vp = LINVFS_GET_VP(dentry->d_inode);564565 iov.iov_base = link;566- iov.iov_len = MAXNAMELEN;567568 uio->uio_iov = &iov;569 uio->uio_offset = 0;570 uio->uio_segflg = UIO_SYSSPACE;571- uio->uio_resid = MAXNAMELEN;572 uio->uio_iovcnt = 1;573574 VOP_READLINK(vp, uio, 0, NULL, error);···576 kfree(link);577 link = ERR_PTR(-error);578 } else {579- link[MAXNAMELEN - uio->uio_resid] = '\0';580 }581 kfree(uio);582
···54#include <linux/capability.h>55#include <linux/xattr.h>56#include <linux/namei.h>57+#include <linux/security.h>5859#define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) || \60 (S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME))61+62+/*63+ * Get a XFS inode from a given vnode.64+ */65+xfs_inode_t *66+xfs_vtoi(67+ struct vnode *vp)68+{69+ bhv_desc_t *bdp;70+71+ bdp = bhv_lookup_range(VN_BHV_HEAD(vp),72+ VNODE_POSITION_XFS, VNODE_POSITION_XFS);73+ if (unlikely(bdp == NULL))74+ return NULL;75+ return XFS_BHVTOI(bdp);76+}77+78+/*79+ * Bring the atime in the XFS inode uptodate.80+ * Used before logging the inode to disk or when the Linux inode goes away.81+ */82+void83+xfs_synchronize_atime(84+ xfs_inode_t *ip)85+{86+ vnode_t *vp;87+88+ vp = XFS_ITOV_NULL(ip);89+ if (vp) {90+ struct inode *inode = &vp->v_inode;91+ ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;92+ ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;93+ }94+}9596/*97 * Change the requested timestamp in the given inode.···76{77 struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));78 timespec_t tv;000000000000000007980 nanotime(&tv);81 if (flags & XFS_ICHGTIME_MOD) {···130 * Variant on the above which avoids querying the system clock131 * in situations where we know the Linux inode timestamps have132 * just been updated (and so we can update our inode cheaply).00133 */134void135xfs_ichgtime_fast(···142 timespec_t *tvp;143144 /*145+ * Atime updates for read() & friends are handled lazily now, and146+ * explicit updates must go through xfs_ichgtime()147+ */148+ ASSERT((flags & XFS_ICHGTIME_ACC) == 0);149+150+ /*151 * We're not supposed to change timestamps in readonly-mounted152 * filesystems. Throw it away if anyone asks us.153 */154 if (unlikely(IS_RDONLY(inode)))155 return;1560000000000157 if (flags & XFS_ICHGTIME_MOD) {158 tvp = &inode->i_mtime;159 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;160 ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;00000161 }162 if (flags & XFS_ICHGTIME_CHG) {163 tvp = &inode->i_ctime;···211 if (i_size_read(ip) != va.va_size)212 i_size_write(ip, va.va_size);213 }214+}215+216+/*217+ * Hook in SELinux. This is not quite correct yet, what we really need218+ * here (as we do for default ACLs) is a mechanism by which creation of219+ * these attrs can be journalled at inode creation time (along with the220+ * inode, of course, such that log replay can't cause these to be lost).221+ */222+STATIC int223+linvfs_init_security(224+ struct vnode *vp,225+ struct inode *dir)226+{227+ struct inode *ip = LINVFS_GET_IP(vp);228+ size_t length;229+ void *value;230+ char *name;231+ int error;232+233+ error = security_inode_init_security(ip, dir, &name, &value, &length);234+ if (error) {235+ if (error == -EOPNOTSUPP)236+ return 0;237+ return -error;238+ }239+240+ VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error);241+ if (!error)242+ VMODIFY(vp);243+244+ kfree(name);245+ kfree(value);246+ return error;247}248249/*···278 break;279 }280281+ if (!error)282+ error = linvfs_init_security(vp, dir);283+284 if (default_acl) {285 if (!error) {286 error = _ACL_INHERIT(vp, &va, default_acl);···294 teardown.d_inode = ip = LINVFS_GET_IP(vp);295 teardown.d_name = dentry->d_name;29600297 if (S_ISDIR(mode))298 VOP_RMDIR(dvp, &teardown, NULL, err2);299 else···506 ASSERT(dentry);507 ASSERT(nd);508509+ link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL);510 if (!link) {511 nd_set_link(nd, ERR_PTR(-ENOMEM));512 return NULL;···522 vp = LINVFS_GET_VP(dentry->d_inode);523524 iov.iov_base = link;525+ iov.iov_len = MAXPATHLEN;526527 uio->uio_iov = &iov;528 uio->uio_offset = 0;529 uio->uio_segflg = UIO_SYSSPACE;530+ uio->uio_resid = MAXPATHLEN;531 uio->uio_iovcnt = 1;532533 VOP_READLINK(vp, uio, 0, NULL, error);···535 kfree(link);536 link = ERR_PTR(-error);537 } else {538+ link[MAXPATHLEN - uio->uio_resid] = '\0';539 }540 kfree(uio);541
···27 mutex_init(&uuid_monitor);28}29000000000030/*31 * uuid_getnodeuniq - obtain the node unique fields of a UUID.32 *···46void47uuid_getnodeuniq(uuid_t *uuid, int fsid [2])48{49- char *uu = (char *)uuid;5051- /* on IRIX, this function assumes big-endian fields within52- * the uuid, so we use INT_GET to get the same result on53- * little-endian systems54- */55-56- fsid[0] = (INT_GET(*(u_int16_t*)(uu+8), ARCH_CONVERT) << 16) +57- INT_GET(*(u_int16_t*)(uu+4), ARCH_CONVERT);58- fsid[1] = INT_GET(*(u_int32_t*)(uu ), ARCH_CONVERT);59}6061void
···27 mutex_init(&uuid_monitor);28}2930+31+/* IRIX interpretation of an uuid_t */32+typedef struct {33+ __be32 uu_timelow;34+ __be16 uu_timemid;35+ __be16 uu_timehi;36+ __be16 uu_clockseq;37+ __be16 uu_node[3];38+} xfs_uu_t;39+40/*41 * uuid_getnodeuniq - obtain the node unique fields of a UUID.42 *···36void37uuid_getnodeuniq(uuid_t *uuid, int fsid [2])38{39+ xfs_uu_t *uup = (xfs_uu_t *)uuid;4041+ fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |42+ be16_to_cpu(uup->uu_timemid);43+ fsid[1] = be16_to_cpu(uup->uu_timelow);0000044}4546void
···128 return (offset >= minforkoff) ? minforkoff : 0;129 }130131- if (unlikely(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {132 if (bytes <= XFS_IFORK_ASIZE(dp))133 return mp->m_attroffset >> 3;134 return 0;···157{158 unsigned long s;159160- if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR) &&161 !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {162 s = XFS_SB_LOCK(mp);163 if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {···311 */312 totsize -= size;313 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&314- !(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {315 /*316 * Last attribute now removed, revert to original317 * inode format making all literal area available···330 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);331 ASSERT(dp->i_d.di_forkoff);332 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||333- (mp->m_flags & XFS_MOUNT_COMPAT_ATTR));334 dp->i_afp->if_ext_max =335 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);336 dp->i_df.if_ext_max =···739 + name_loc->namelen740 + INT_GET(name_loc->valuelen, ARCH_CONVERT);741 }742- if (!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR) &&743 (bytes == sizeof(struct xfs_attr_sf_hdr)))744 return(-1);745 return(xfs_attr_shortform_bytesfit(dp, bytes));···778 goto out;779780 if (forkoff == -1) {781- ASSERT(!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR));782783 /*784 * Last attribute was removed, revert to original
···128 return (offset >= minforkoff) ? minforkoff : 0;129 }130131+ if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {132 if (bytes <= XFS_IFORK_ASIZE(dp))133 return mp->m_attroffset >> 3;134 return 0;···157{158 unsigned long s;159160+ if ((mp->m_flags & XFS_MOUNT_ATTR2) &&161 !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {162 s = XFS_SB_LOCK(mp);163 if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {···311 */312 totsize -= size;313 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&314+ (mp->m_flags & XFS_MOUNT_ATTR2)) {315 /*316 * Last attribute now removed, revert to original317 * inode format making all literal area available···330 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);331 ASSERT(dp->i_d.di_forkoff);332 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||333+ !(mp->m_flags & XFS_MOUNT_ATTR2));334 dp->i_afp->if_ext_max =335 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);336 dp->i_df.if_ext_max =···739 + name_loc->namelen740 + INT_GET(name_loc->valuelen, ARCH_CONVERT);741 }742+ if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&743 (bytes == sizeof(struct xfs_attr_sf_hdr)))744 return(-1);745 return(xfs_attr_shortform_bytesfit(dp, bytes));···778 goto out;779780 if (forkoff == -1) {781+ ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);782783 /*784 * Last attribute was removed, revert to original
+43-36
fs/xfs/xfs_attr_leaf.h
···63 * the leaf_entry. The namespaces are independent only because we also look64 * at the namespace bit when we are looking for a matching attribute name.65 *66- * We also store a "incomplete" bit in the leaf_entry. It shows that an67 * attribute is in the middle of being created and should not be shown to68 * the user if we crash during the time that the bit is set. We clear the69 * bit when we have finished setting up the attribute. We do this because···72 */73#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */7400000000000000000000000000000000000075typedef struct xfs_attr_leafblock {76- struct xfs_attr_leaf_hdr { /* constant-structure header block */77- xfs_da_blkinfo_t info; /* block type, links, etc. */78- __uint16_t count; /* count of active leaf_entry's */79- __uint16_t usedbytes; /* num bytes of names/values stored */80- __uint16_t firstused; /* first used byte in name area */81- __uint8_t holes; /* != 0 if blk needs compaction */82- __uint8_t pad1;83- struct xfs_attr_leaf_map { /* RLE map of free bytes */84- __uint16_t base; /* base of free region */85- __uint16_t size; /* length of free region */86- } freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */87- } hdr;88- struct xfs_attr_leaf_entry { /* sorted on key, not name */89- xfs_dahash_t hashval; /* hash value of name */90- __uint16_t nameidx; /* index into buffer of name/value */91- __uint8_t flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */92- __uint8_t pad2; /* unused pad byte */93- } entries[1]; /* variable sized array */94- struct xfs_attr_leaf_name_local {95- __uint16_t valuelen; /* number of bytes in value */96- __uint8_t namelen; /* length of name bytes */97- __uint8_t nameval[1]; /* name/value bytes */98- } namelist; /* grows from bottom of buf */99- struct xfs_attr_leaf_name_remote {100- xfs_dablk_t valueblk; /* block number of value bytes */101- __uint32_t valuelen; /* number of bytes in value */102- __uint8_t namelen; /* length of name bytes */103- __uint8_t name[1]; /* name bytes */104- } valuelist; /* grows from bottom of buf */105} xfs_attr_leafblock_t;106-typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t;107-typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t;108-typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t;109-typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t;110-typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t;111112/*113 * Flags used in the leaf_entry[i].flags field.···156 (leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];157}158159-#define XFS_ATTR_LEAF_NAME(leafp,idx) xfs_attr_leaf_name(leafp,idx)0160static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)161{162 return (&((char *)
···63 * the leaf_entry. The namespaces are independent only because we also look64 * at the namespace bit when we are looking for a matching attribute name.65 *66+ * We also store an "incomplete" bit in the leaf_entry. It shows that an67 * attribute is in the middle of being created and should not be shown to68 * the user if we crash during the time that the bit is set. We clear the69 * bit when we have finished setting up the attribute. We do this because···72 */73#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */7475+typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */76+ __uint16_t base; /* base of free region */77+ __uint16_t size; /* length of free region */78+} xfs_attr_leaf_map_t;79+80+typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */81+ xfs_da_blkinfo_t info; /* block type, links, etc. */82+ __uint16_t count; /* count of active leaf_entry's */83+ __uint16_t usedbytes; /* num bytes of names/values stored */84+ __uint16_t firstused; /* first used byte in name area */85+ __uint8_t holes; /* != 0 if blk needs compaction */86+ __uint8_t pad1;87+ xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];88+ /* N largest free regions */89+} xfs_attr_leaf_hdr_t;90+91+typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */92+ xfs_dahash_t hashval; /* hash value of name */93+ __uint16_t nameidx; /* index into buffer of name/value */94+ __uint8_t flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */95+ __uint8_t pad2; /* unused pad byte */96+} xfs_attr_leaf_entry_t;97+98+typedef struct xfs_attr_leaf_name_local {99+ __uint16_t valuelen; /* number of bytes in value */100+ __uint8_t namelen; /* length of name bytes */101+ __uint8_t nameval[1]; /* name/value bytes */102+} xfs_attr_leaf_name_local_t;103+104+typedef struct xfs_attr_leaf_name_remote {105+ xfs_dablk_t valueblk; /* block number of value bytes */106+ __uint32_t valuelen; /* number of bytes in value */107+ __uint8_t namelen; /* length of name bytes */108+ __uint8_t name[1]; /* name bytes */109+} xfs_attr_leaf_name_remote_t;110+111typedef struct xfs_attr_leafblock {112+ xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */113+ xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */114+ xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */115+ xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */0000000000000000000000000116} xfs_attr_leafblock_t;00000117118/*119 * Flags used in the leaf_entry[i].flags field.···150 (leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];151}152153+#define XFS_ATTR_LEAF_NAME(leafp,idx) \154+ xfs_attr_leaf_name(leafp,idx)155static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)156{157 return (&((char *)
+250-160
fs/xfs/xfs_bmap.c
···2146 return 0; /* keep gcc quite */2147}214800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002149#define XFS_ALLOC_GAP_UNITS 421502151/*2152 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.2153 * It figures out where to ask the underlying allocator to put the new extent.2154 */2155-STATIC int /* error */2156xfs_bmap_alloc(2157 xfs_bmalloca_t *ap) /* bmap alloc argument struct */2158{···2326 xfs_mount_t *mp; /* mount point structure */2327 int nullfb; /* true if ap->firstblock isn't set */2328 int rt; /* true if inode is realtime */2329-#ifdef __KERNEL__2330- xfs_extlen_t prod=0; /* product factor for allocators */2331- xfs_extlen_t ralen=0; /* realtime allocation length */2332-#endif23332334#define ISVALID(x,y) \2335 (rt ? \···2345 nullfb = ap->firstblock == NULLFSBLOCK;2346 rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;2347 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);2348-#ifdef __KERNEL__2349 if (rt) {2350- xfs_extlen_t extsz; /* file extent size for rt */2351- xfs_fileoff_t nexto; /* next file offset */2352- xfs_extlen_t orig_alen; /* original ap->alen */2353- xfs_fileoff_t orig_end; /* original off+len */2354- xfs_fileoff_t orig_off; /* original ap->off */2355- xfs_extlen_t mod_off; /* modulus calculations */2356- xfs_fileoff_t prevo; /* previous file offset */2357- xfs_rtblock_t rtx; /* realtime extent number */2358- xfs_extlen_t temp; /* temp for rt calculations */23592360- /*2361- * Set prod to match the realtime extent size.2362- */2363- if (!(extsz = ap->ip->i_d.di_extsize))2364- extsz = mp->m_sb.sb_rextsize;2365- prod = extsz / mp->m_sb.sb_rextsize;2366- orig_off = ap->off;2367- orig_alen = ap->alen;2368- orig_end = orig_off + orig_alen;2369- /*2370- * If the file offset is unaligned vs. the extent size2371- * we need to align it. This will be possible unless2372- * the file was previously written with a kernel that didn't2373- * perform this alignment.2374- */2375- mod_off = do_mod(orig_off, extsz);2376- if (mod_off) {2377- ap->alen += mod_off;2378- ap->off -= mod_off;2379- }2380- /*2381- * Same adjustment for the end of the requested area.2382- */2383- if ((temp = (ap->alen % extsz)))2384- ap->alen += extsz - temp;2385- /*2386- * If the previous block overlaps with this proposed allocation2387- * then move the start forward without adjusting the length.2388- */2389- prevo =2390- ap->prevp->br_startoff == NULLFILEOFF ?2391- 0 :2392- (ap->prevp->br_startoff +2393- ap->prevp->br_blockcount);2394- if (ap->off != orig_off && ap->off < prevo)2395- ap->off = prevo;2396- /*2397- * If the next block overlaps with this proposed allocation2398- * then move the start back without adjusting the length,2399- * but not before offset 0.2400- * This may of course make the start overlap previous block,2401- * and if we hit the offset 0 limit then the next block2402- * can still overlap too.2403- */2404- nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ?2405- NULLFILEOFF : ap->gotp->br_startoff;2406- if (!ap->eof &&2407- ap->off + ap->alen != orig_end &&2408- ap->off + ap->alen > nexto)2409- ap->off = nexto > ap->alen ? nexto - ap->alen : 0;2410- /*2411- * If we're now overlapping the next or previous extent that2412- * means we can't fit an extsz piece in this hole. Just move2413- * the start forward to the first valid spot and set2414- * the length so we hit the end.2415- */2416- if ((ap->off != orig_off && ap->off < prevo) ||2417- (ap->off + ap->alen != orig_end &&2418- ap->off + ap->alen > nexto)) {2419- ap->off = prevo;2420- ap->alen = nexto - prevo;2421- }2422- /*2423- * If the result isn't a multiple of rtextents we need to2424- * remove blocks until it is.2425- */2426- if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) {2427- /*2428- * We're not covering the original request, or2429- * we won't be able to once we fix the length.2430- */2431- if (orig_off < ap->off ||2432- orig_end > ap->off + ap->alen ||2433- ap->alen - temp < orig_alen)2434- return XFS_ERROR(EINVAL);2435- /*2436- * Try to fix it by moving the start up.2437- */2438- if (ap->off + temp <= orig_off) {2439- ap->alen -= temp;2440- ap->off += temp;2441- }2442- /*2443- * Try to fix it by moving the end in.2444- */2445- else if (ap->off + ap->alen - temp >= orig_end)2446- ap->alen -= temp;2447- /*2448- * Set the start to the minimum then trim the length.2449- */2450- else {2451- ap->alen -= orig_off - ap->off;2452- ap->off = orig_off;2453- ap->alen -= ap->alen % mp->m_sb.sb_rextsize;2454- }2455- /*2456- * Result doesn't cover the request, fail it.2457- */2458- if (orig_off < ap->off || orig_end > ap->off + ap->alen)2459- return XFS_ERROR(EINVAL);2460- }2461 ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);02462 /*2463 * If the offset & length are not perfectly aligned2464 * then kill prod, it will just get us in trouble.2465 */2466- if (do_mod(ap->off, extsz) || ap->alen % extsz)2467 prod = 1;2468 /*2469 * Set ralen to be the actual requested length in rtextents.···2389 ap->rval = rtx * mp->m_sb.sb_rextsize;2390 } else2391 ap->rval = 0;00000000000000002392 }2393-#else2394- if (rt)2395- ap->rval = 0;2396-#endif /* __KERNEL__ */2397- else if (nullfb)2398- ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);2399- else2400- ap->rval = ap->firstblock;2401 /*2402 * If allocating at eof, and there's a previous real block,2403 * try to use it's last block as our starting point.···2670 args.total = ap->total;2671 args.minlen = ap->minlen;2672 }2673- if (ap->ip->i_d.di_extsize) {02674 args.prod = ap->ip->i_d.di_extsize;2675 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))2676 args.mod = (xfs_extlen_t)(args.prod - args.mod);2677- } else if (mp->m_sb.sb_blocksize >= NBPP) {2678 args.prod = 1;2679 args.mod = 0;2680 } else {···36533654 ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,3655 lastxp, gotp, prevp);3656- rt = ip->i_d.di_flags & XFS_DIFLAG_REALTIME;3657- if(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM)) {3658 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "3659 "start_block : %llx start_off : %llx blkcnt : %llx "3660 "extent-state : %x \n",3661- (ip->i_mount)->m_fsname,(long long)ip->i_ino,3662- gotp->br_startblock, gotp->br_startoff,3663- gotp->br_blockcount,gotp->br_state);003664 }3665 return ep;3666}···3950 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);3951 if (!ip->i_d.di_forkoff)3952 ip->i_d.di_forkoff = mp->m_attroffset >> 3;3953- else if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))3954 version = 2;3955 break;3956 default:···4098 */4099 if (whichfork == XFS_DATA_FORK) {4100 maxleafents = MAXEXTNUM;4101- sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?4102- mp->m_attroffset : XFS_BMDR_SPACE_CALC(MINDBTPTRS);4103 } else {4104 maxleafents = MAXAEXTNUM;4105- sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?4106- mp->m_sb.sb_inodesize - mp->m_attroffset :4107- XFS_BMDR_SPACE_CALC(MINABTPTRS);4108 }4109 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);4110 minleafrecs = mp->m_bmap_dmnr[0];···4493 num_recs = be16_to_cpu(block->bb_numrecs);4494 if (unlikely(i + num_recs > room)) {4495 ASSERT(i + num_recs <= room);4496- xfs_fs_cmn_err(CE_WARN, ip->i_mount,4497- "corrupt dinode %Lu, (btree extents). Unmount and run xfs_repair.",4498 (unsigned long long) ip->i_ino);4499 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)",4500 XFS_ERRLEVEL_LOW,···4665 char contig; /* allocation must be one extent */4666 char delay; /* this request is for delayed alloc */4667 char exact; /* don't do all of wasdelayed extent */04668 xfs_bmbt_rec_t *ep; /* extent list entry pointer */4669 int error; /* error return */4670 xfs_bmbt_irec_t got; /* current extent list record */···4719 }4720 if (XFS_FORCED_SHUTDOWN(mp))4721 return XFS_ERROR(EIO);4722- rt = XFS_IS_REALTIME_INODE(ip);4723 ifp = XFS_IFORK_PTR(ip, whichfork);4724 ASSERT(ifp->if_ext_max ==4725 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));···4730 delay = (flags & XFS_BMAPI_DELAY) != 0;4731 trim = (flags & XFS_BMAPI_ENTIRE) == 0;4732 userdata = (flags & XFS_BMAPI_METADATA) == 0;04733 exact = (flags & XFS_BMAPI_EXACT) != 0;4734 rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;4735 contig = (flags & XFS_BMAPI_CONTIG) != 0;···4825 }4826 minlen = contig ? alen : 1;4827 if (delay) {4828- xfs_extlen_t extsz = 0;48294830 /* Figure out the extent size, adjust alen */4831 if (rt) {4832 if (!(extsz = ip->i_d.di_extsize))4833 extsz = mp->m_sb.sb_rextsize;4834- alen = roundup(alen, extsz);4835- extsz = alen / mp->m_sb.sb_rextsize;4836 }000000000048374838 /*4839 * Make a transaction-less quota reservation for···4872 xfs_bmap_worst_indlen(ip, alen);4873 ASSERT(indlen > 0);48744875- if (rt)4876 error = xfs_mod_incore_sb(mp,4877 XFS_SBS_FREXTENTS,4878 -(extsz), rsvd);4879- else4880 error = xfs_mod_incore_sb(mp,4881 XFS_SBS_FDBLOCKS,4882 -(alen), rsvd);04883 if (!error) {4884 error = xfs_mod_incore_sb(mp,4885 XFS_SBS_FDBLOCKS,4886 -(indlen), rsvd);4887- if (error && rt) {4888- xfs_mod_incore_sb(ip->i_mount,4889 XFS_SBS_FREXTENTS,4890 extsz, rsvd);4891- } else if (error) {4892- xfs_mod_incore_sb(ip->i_mount,4893 XFS_SBS_FDBLOCKS,4894 alen, rsvd);4895- }4896 }48974898 if (error) {4899- if (XFS_IS_QUOTA_ON(ip->i_mount))4900 /* unreserve the blocks now */04901 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(4902 mp, NULL, ip,4903 (long)alen, 0, rt ?···4937 bma.firstblock = *firstblock;4938 bma.alen = alen;4939 bma.off = aoff;04940 bma.wasdel = wasdelay;4941 bma.minlen = minlen;4942 bma.low = flist->xbf_low;···5359 return 0;5360 }5361 XFS_STATS_INC(xs_blk_unmap);5362- isrt = (whichfork == XFS_DATA_FORK) &&5363- (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);5364 start = bno;5365 bno = start + len - 1;5366 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,···5531 }5532 if (wasdel) {5533 ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);5534- /* Update realtim/data freespace, unreserve quota */5535 if (isrt) {5536 xfs_filblks_t rtexts;5537···5539 do_div(rtexts, mp->m_sb.sb_rextsize);5540 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,5541 (int)rtexts, rsvd);5542- XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,5543- -((long)del.br_blockcount), 0,5544 XFS_QMOPT_RES_RTBLKS);5545 } else {5546 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,5547 (int)del.br_blockcount, rsvd);5548- XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,5549- -((long)del.br_blockcount), 0,5550 XFS_QMOPT_RES_REGBLKS);5551 }5552 ip->i_delayed_blks -= del.br_blockcount;···5740 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)5741 return XFS_ERROR(EINVAL);5742 if (whichfork == XFS_DATA_FORK) {5743- if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) {005744 prealloced = 1;5745 fixlen = XFS_MAXIOFFSET(mp);5746 } else {
···2146 return 0; /* keep gcc quite */2147}21482149+/*2150+ * Adjust the size of the new extent based on di_extsize and rt extsize.2151+ */2152+STATIC int2153+xfs_bmap_extsize_align(2154+ xfs_mount_t *mp,2155+ xfs_bmbt_irec_t *gotp, /* next extent pointer */2156+ xfs_bmbt_irec_t *prevp, /* previous extent pointer */2157+ xfs_extlen_t extsz, /* align to this extent size */2158+ int rt, /* is this a realtime inode? */2159+ int eof, /* is extent at end-of-file? */2160+ int delay, /* creating delalloc extent? */2161+ int convert, /* overwriting unwritten extent? */2162+ xfs_fileoff_t *offp, /* in/out: aligned offset */2163+ xfs_extlen_t *lenp) /* in/out: aligned length */2164+{2165+ xfs_fileoff_t orig_off; /* original offset */2166+ xfs_extlen_t orig_alen; /* original length */2167+ xfs_fileoff_t orig_end; /* original off+len */2168+ xfs_fileoff_t nexto; /* next file offset */2169+ xfs_fileoff_t prevo; /* previous file offset */2170+ xfs_fileoff_t align_off; /* temp for offset */2171+ xfs_extlen_t align_alen; /* temp for length */2172+ xfs_extlen_t temp; /* temp for calculations */2173+2174+ if (convert)2175+ return 0;2176+2177+ orig_off = align_off = *offp;2178+ orig_alen = align_alen = *lenp;2179+ orig_end = orig_off + orig_alen;2180+2181+ /*2182+ * If this request overlaps an existing extent, then don't2183+ * attempt to perform any additional alignment.2184+ */2185+ if (!delay && !eof &&2186+ (orig_off >= gotp->br_startoff) &&2187+ (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {2188+ return 0;2189+ }2190+2191+ /*2192+ * If the file offset is unaligned vs. the extent size2193+ * we need to align it. This will be possible unless2194+ * the file was previously written with a kernel that didn't2195+ * perform this alignment, or if a truncate shot us in the2196+ * foot.2197+ */2198+ temp = do_mod(orig_off, extsz);2199+ if (temp) {2200+ align_alen += temp;2201+ align_off -= temp;2202+ }2203+ /*2204+ * Same adjustment for the end of the requested area.2205+ */2206+ if ((temp = (align_alen % extsz))) {2207+ align_alen += extsz - temp;2208+ }2209+ /*2210+ * If the previous block overlaps with this proposed allocation2211+ * then move the start forward without adjusting the length.2212+ */2213+ if (prevp->br_startoff != NULLFILEOFF) {2214+ if (prevp->br_startblock == HOLESTARTBLOCK)2215+ prevo = prevp->br_startoff;2216+ else2217+ prevo = prevp->br_startoff + prevp->br_blockcount;2218+ } else2219+ prevo = 0;2220+ if (align_off != orig_off && align_off < prevo)2221+ align_off = prevo;2222+ /*2223+ * If the next block overlaps with this proposed allocation2224+ * then move the start back without adjusting the length,2225+ * but not before offset 0.2226+ * This may of course make the start overlap previous block,2227+ * and if we hit the offset 0 limit then the next block2228+ * can still overlap too.2229+ */2230+ if (!eof && gotp->br_startoff != NULLFILEOFF) {2231+ if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||2232+ (!delay && gotp->br_startblock == DELAYSTARTBLOCK))2233+ nexto = gotp->br_startoff + gotp->br_blockcount;2234+ else2235+ nexto = gotp->br_startoff;2236+ } else2237+ nexto = NULLFILEOFF;2238+ if (!eof &&2239+ align_off + align_alen != orig_end &&2240+ align_off + align_alen > nexto)2241+ align_off = nexto > align_alen ? nexto - align_alen : 0;2242+ /*2243+ * If we're now overlapping the next or previous extent that2244+ * means we can't fit an extsz piece in this hole. Just move2245+ * the start forward to the first valid spot and set2246+ * the length so we hit the end.2247+ */2248+ if (align_off != orig_off && align_off < prevo)2249+ align_off = prevo;2250+ if (align_off + align_alen != orig_end &&2251+ align_off + align_alen > nexto &&2252+ nexto != NULLFILEOFF) {2253+ ASSERT(nexto > prevo);2254+ align_alen = nexto - align_off;2255+ }2256+2257+ /*2258+ * If realtime, and the result isn't a multiple of the realtime2259+ * extent size we need to remove blocks until it is.2260+ */2261+ if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {2262+ /*2263+ * We're not covering the original request, or2264+ * we won't be able to once we fix the length.2265+ */2266+ if (orig_off < align_off ||2267+ orig_end > align_off + align_alen ||2268+ align_alen - temp < orig_alen)2269+ return XFS_ERROR(EINVAL);2270+ /*2271+ * Try to fix it by moving the start up.2272+ */2273+ if (align_off + temp <= orig_off) {2274+ align_alen -= temp;2275+ align_off += temp;2276+ }2277+ /*2278+ * Try to fix it by moving the end in.2279+ */2280+ else if (align_off + align_alen - temp >= orig_end)2281+ align_alen -= temp;2282+ /*2283+ * Set the start to the minimum then trim the length.2284+ */2285+ else {2286+ align_alen -= orig_off - align_off;2287+ align_off = orig_off;2288+ align_alen -= align_alen % mp->m_sb.sb_rextsize;2289+ }2290+ /*2291+ * Result doesn't cover the request, fail it.2292+ */2293+ if (orig_off < align_off || orig_end > align_off + align_alen)2294+ return XFS_ERROR(EINVAL);2295+ } else {2296+ ASSERT(orig_off >= align_off);2297+ ASSERT(orig_end <= align_off + align_alen);2298+ }2299+2300+#ifdef DEBUG2301+ if (!eof && gotp->br_startoff != NULLFILEOFF)2302+ ASSERT(align_off + align_alen <= gotp->br_startoff);2303+ if (prevp->br_startoff != NULLFILEOFF)2304+ ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);2305+#endif2306+2307+ *lenp = align_alen;2308+ *offp = align_off;2309+ return 0;2310+}2311+2312#define XFS_ALLOC_GAP_UNITS 423132314/*2315 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.2316 * It figures out where to ask the underlying allocator to put the new extent.2317 */2318+STATIC int2319xfs_bmap_alloc(2320 xfs_bmalloca_t *ap) /* bmap alloc argument struct */2321{···2163 xfs_mount_t *mp; /* mount point structure */2164 int nullfb; /* true if ap->firstblock isn't set */2165 int rt; /* true if inode is realtime */2166+ xfs_extlen_t prod = 0; /* product factor for allocators */2167+ xfs_extlen_t ralen = 0; /* realtime allocation length */2168+ xfs_extlen_t align; /* minimum allocation alignment */2169+ xfs_rtblock_t rtx;21702171#define ISVALID(x,y) \2172 (rt ? \···2182 nullfb = ap->firstblock == NULLFSBLOCK;2183 rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;2184 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);02185 if (rt) {2186+ align = ap->ip->i_d.di_extsize ?2187+ ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;2188+ /* Set prod to match the extent size */2189+ prod = align / mp->m_sb.sb_rextsize;0000021902191+ error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,2192+ align, rt, ap->eof, 0,2193+ ap->conv, &ap->off, &ap->alen);2194+ if (error)2195+ return error;2196+ ASSERT(ap->alen);000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002197 ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);2198+2199 /*2200 * If the offset & length are not perfectly aligned2201 * then kill prod, it will just get us in trouble.2202 */2203+ if (do_mod(ap->off, align) || ap->alen % align)2204 prod = 1;2205 /*2206 * Set ralen to be the actual requested length in rtextents.···2326 ap->rval = rtx * mp->m_sb.sb_rextsize;2327 } else2328 ap->rval = 0;2329+ } else {2330+ align = (ap->userdata && ap->ip->i_d.di_extsize &&2331+ (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?2332+ ap->ip->i_d.di_extsize : 0;2333+ if (unlikely(align)) {2334+ error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,2335+ align, rt,2336+ ap->eof, 0, ap->conv,2337+ &ap->off, &ap->alen);2338+ ASSERT(!error);2339+ ASSERT(ap->alen);2340+ }2341+ if (nullfb)2342+ ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);2343+ else2344+ ap->rval = ap->firstblock;2345 }2346+00000002347 /*2348 * If allocating at eof, and there's a previous real block,2349 * try to use it's last block as our starting point.···2598 args.total = ap->total;2599 args.minlen = ap->minlen;2600 }2601+ if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&2602+ (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {2603 args.prod = ap->ip->i_d.di_extsize;2604 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))2605 args.mod = (xfs_extlen_t)(args.prod - args.mod);2606+ } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {2607 args.prod = 1;2608 args.mod = 0;2609 } else {···35803581 ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,3582 lastxp, gotp, prevp);3583+ rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);3584+ if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {3585 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "3586 "start_block : %llx start_off : %llx blkcnt : %llx "3587 "extent-state : %x \n",3588+ (ip->i_mount)->m_fsname, (long long)ip->i_ino,3589+ (unsigned long long)gotp->br_startblock,3590+ (unsigned long long)gotp->br_startoff,3591+ (unsigned long long)gotp->br_blockcount,3592+ gotp->br_state);3593 }3594 return ep;3595}···3875 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);3876 if (!ip->i_d.di_forkoff)3877 ip->i_d.di_forkoff = mp->m_attroffset >> 3;3878+ else if (mp->m_flags & XFS_MOUNT_ATTR2)3879 version = 2;3880 break;3881 default:···4023 */4024 if (whichfork == XFS_DATA_FORK) {4025 maxleafents = MAXEXTNUM;4026+ sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?4027+ XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset;4028 } else {4029 maxleafents = MAXAEXTNUM;4030+ sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?4031+ XFS_BMDR_SPACE_CALC(MINABTPTRS) :4032+ mp->m_sb.sb_inodesize - mp->m_attroffset;4033 }4034 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);4035 minleafrecs = mp->m_bmap_dmnr[0];···4418 num_recs = be16_to_cpu(block->bb_numrecs);4419 if (unlikely(i + num_recs > room)) {4420 ASSERT(i + num_recs <= room);4421+ xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,4422+ "corrupt dinode %Lu, (btree extents).",4423 (unsigned long long) ip->i_ino);4424 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)",4425 XFS_ERRLEVEL_LOW,···4590 char contig; /* allocation must be one extent */4591 char delay; /* this request is for delayed alloc */4592 char exact; /* don't do all of wasdelayed extent */4593+ char convert; /* unwritten extent I/O completion */4594 xfs_bmbt_rec_t *ep; /* extent list entry pointer */4595 int error; /* error return */4596 xfs_bmbt_irec_t got; /* current extent list record */···4643 }4644 if (XFS_FORCED_SHUTDOWN(mp))4645 return XFS_ERROR(EIO);4646+ rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);4647 ifp = XFS_IFORK_PTR(ip, whichfork);4648 ASSERT(ifp->if_ext_max ==4649 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));···4654 delay = (flags & XFS_BMAPI_DELAY) != 0;4655 trim = (flags & XFS_BMAPI_ENTIRE) == 0;4656 userdata = (flags & XFS_BMAPI_METADATA) == 0;4657+ convert = (flags & XFS_BMAPI_CONVERT) != 0;4658 exact = (flags & XFS_BMAPI_EXACT) != 0;4659 rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;4660 contig = (flags & XFS_BMAPI_CONTIG) != 0;···4748 }4749 minlen = contig ? alen : 1;4750 if (delay) {4751+ xfs_extlen_t extsz;47524753 /* Figure out the extent size, adjust alen */4754 if (rt) {4755 if (!(extsz = ip->i_d.di_extsize))4756 extsz = mp->m_sb.sb_rextsize;4757+ } else {4758+ extsz = ip->i_d.di_extsize;4759 }4760+ if (extsz) {4761+ error = xfs_bmap_extsize_align(mp,4762+ &got, &prev, extsz,4763+ rt, eof, delay, convert,4764+ &aoff, &alen);4765+ ASSERT(!error);4766+ }4767+4768+ if (rt)4769+ extsz = alen / mp->m_sb.sb_rextsize;47704771 /*4772 * Make a transaction-less quota reservation for···4785 xfs_bmap_worst_indlen(ip, alen);4786 ASSERT(indlen > 0);47874788+ if (rt) {4789 error = xfs_mod_incore_sb(mp,4790 XFS_SBS_FREXTENTS,4791 -(extsz), rsvd);4792+ } else {4793 error = xfs_mod_incore_sb(mp,4794 XFS_SBS_FDBLOCKS,4795 -(alen), rsvd);4796+ }4797 if (!error) {4798 error = xfs_mod_incore_sb(mp,4799 XFS_SBS_FDBLOCKS,4800 -(indlen), rsvd);4801+ if (error && rt)4802+ xfs_mod_incore_sb(mp,4803 XFS_SBS_FREXTENTS,4804 extsz, rsvd);4805+ else if (error)4806+ xfs_mod_incore_sb(mp,4807 XFS_SBS_FDBLOCKS,4808 alen, rsvd);04809 }48104811 if (error) {4812+ if (XFS_IS_QUOTA_ON(mp))4813 /* unreserve the blocks now */4814+ (void)4815 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(4816 mp, NULL, ip,4817 (long)alen, 0, rt ?···4849 bma.firstblock = *firstblock;4850 bma.alen = alen;4851 bma.off = aoff;4852+ bma.conv = convert;4853 bma.wasdel = wasdelay;4854 bma.minlen = minlen;4855 bma.low = flist->xbf_low;···5270 return 0;5271 }5272 XFS_STATS_INC(xs_blk_unmap);5273+ isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);05274 start = bno;5275 bno = start + len - 1;5276 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,···5443 }5444 if (wasdel) {5445 ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);5446+ /* Update realtime/data freespace, unreserve quota */5447 if (isrt) {5448 xfs_filblks_t rtexts;5449···5451 do_div(rtexts, mp->m_sb.sb_rextsize);5452 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,5453 (int)rtexts, rsvd);5454+ (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,5455+ NULL, ip, -((long)del.br_blockcount), 0,5456 XFS_QMOPT_RES_RTBLKS);5457 } else {5458 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,5459 (int)del.br_blockcount, rsvd);5460+ (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,5461+ NULL, ip, -((long)del.br_blockcount), 0,5462 XFS_QMOPT_RES_REGBLKS);5463 }5464 ip->i_delayed_blks -= del.br_blockcount;···5652 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)5653 return XFS_ERROR(EINVAL);5654 if (whichfork == XFS_DATA_FORK) {5655+ if ((ip->i_d.di_extsize && (ip->i_d.di_flags &5656+ (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||5657+ ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){5658 prealloced = 1;5659 fixlen = XFS_MAXIOFFSET(mp);5660 } else {
+6-1
fs/xfs/xfs_bmap.h
···62#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */63 /* combine contig. space */64#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */00006566#define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w)67static inline int xfs_bmapi_aflag(int w)···105 char wasdel; /* replacing a delayed allocation */106 char userdata;/* set if is user data */107 char low; /* low on space, using seq'l ags */108- char aeof; /* allocated space at eof */0109} xfs_bmalloca_t;110111#ifdef __KERNEL__
···62#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */63 /* combine contig. space */64#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */65+/* XFS_BMAPI_DIRECT_IO 0x800 */66+#define XFS_BMAPI_CONVERT 0x1000 /* unwritten extent conversion - */67+ /* need write cache flushing and no */68+ /* additional allocation alignments */6970#define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w)71static inline int xfs_bmapi_aflag(int w)···101 char wasdel; /* replacing a delayed allocation */102 char userdata;/* set if is user data */103 char low; /* low on space, using seq'l ags */104+ char aeof; /* allocated space at eof */105+ char conv; /* overwriting unwritten extents */106} xfs_bmalloca_t;107108#ifdef __KERNEL__
+1-1
fs/xfs/xfs_clnt.h
···57/*58 * XFS mount option flags -- args->flags159 */60-#define XFSMNT_COMPAT_ATTR 0x00000001 /* do not use ATTR2 format */61#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount62 * compatible */63#define XFSMNT_INO64 0x00000004 /* move inode numbers up
···57/*58 * XFS mount option flags -- args->flags159 */60+#define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */61#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount62 * compatible */63#define XFSMNT_INO64 0x00000004 /* move inode numbers up
···72 struct uio *uio; /* uio control structure */73} xfs_dir2_put_args_t;7475-#define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2)76-extern xfs_dirops_t xfsv2_dirops;77-78/*79 * Other interfaces used by the rest of the dir v2 code.80 */
···72 struct uio *uio; /* uio control structure */73} xfs_dir2_put_args_t;7400075/*76 * Other interfaces used by the rest of the dir v2 code.77 */
+34-30
fs/xfs/xfs_dir_leaf.h
···67 */68#define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */6900000000000000000000000000070typedef struct xfs_dir_leafblock {71- struct xfs_dir_leaf_hdr { /* constant-structure header block */72- xfs_da_blkinfo_t info; /* block type, links, etc. */73- __uint16_t count; /* count of active leaf_entry's */74- __uint16_t namebytes; /* num bytes of name strings stored */75- __uint16_t firstused; /* first used byte in name area */76- __uint8_t holes; /* != 0 if blk needs compaction */77- __uint8_t pad1;78- struct xfs_dir_leaf_map {/* RLE map of free bytes */79- __uint16_t base; /* base of free region */80- __uint16_t size; /* run length of free region */81- } freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */82- } hdr;83- struct xfs_dir_leaf_entry { /* sorted on key, not name */84- xfs_dahash_t hashval; /* hash value of name */85- __uint16_t nameidx; /* index into buffer of name */86- __uint8_t namelen; /* length of name string */87- __uint8_t pad2;88- } entries[1]; /* var sized array */89- struct xfs_dir_leaf_name {90- xfs_dir_ino_t inumber; /* inode number for this key */91- __uint8_t name[1]; /* name string itself */92- } namelist[1]; /* grows from bottom of buf */93} xfs_dir_leafblock_t;94-typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t;95-typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t;96-typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t;97-typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t;9899/*100 * Length of name for which a 512-byte block filesystem···130#define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \131 ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))132133-typedef struct xfs_dir_put_args134-{135 xfs_dircook_t cook; /* cookie of (next) entry */136 xfs_intino_t ino; /* inode number */137- struct xfs_dirent *dbp; /* buffer pointer */138 char *name; /* directory entry name */139 int namelen; /* length of name */140 int done; /* output: set if value was stored */···141 struct uio *uio; /* uio control structure */142} xfs_dir_put_args_t;143144-#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) xfs_dir_leaf_entsize_byname(len)0145static inline int xfs_dir_leaf_entsize_byname(int len)146{147 return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;
···67 */68#define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */6970+typedef struct xfs_dir_leaf_map { /* RLE map of free bytes */71+ __uint16_t base; /* base of free region */72+ __uint16_t size; /* run length of free region */73+} xfs_dir_leaf_map_t;74+75+typedef struct xfs_dir_leaf_hdr { /* constant-structure header block */76+ xfs_da_blkinfo_t info; /* block type, links, etc. */77+ __uint16_t count; /* count of active leaf_entry's */78+ __uint16_t namebytes; /* num bytes of name strings stored */79+ __uint16_t firstused; /* first used byte in name area */80+ __uint8_t holes; /* != 0 if blk needs compaction */81+ __uint8_t pad1;82+ xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE];83+} xfs_dir_leaf_hdr_t;84+85+typedef struct xfs_dir_leaf_entry { /* sorted on key, not name */86+ xfs_dahash_t hashval; /* hash value of name */87+ __uint16_t nameidx; /* index into buffer of name */88+ __uint8_t namelen; /* length of name string */89+ __uint8_t pad2;90+} xfs_dir_leaf_entry_t;91+92+typedef struct xfs_dir_leaf_name {93+ xfs_dir_ino_t inumber; /* inode number for this key */94+ __uint8_t name[1]; /* name string itself */95+} xfs_dir_leaf_name_t;96+97typedef struct xfs_dir_leafblock {98+ xfs_dir_leaf_hdr_t hdr; /* constant-structure header block */99+ xfs_dir_leaf_entry_t entries[1]; /* var sized array */100+ xfs_dir_leaf_name_t namelist[1]; /* grows from bottom of buf */0000000000000000000101} xfs_dir_leafblock_t;0000102103/*104 * Length of name for which a 512-byte block filesystem···126#define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \127 ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))128129+typedef struct xfs_dir_put_args {0130 xfs_dircook_t cook; /* cookie of (next) entry */131 xfs_intino_t ino; /* inode number */132+ struct xfs_dirent *dbp; /* buffer pointer */133 char *name; /* directory entry name */134 int namelen; /* length of name */135 int done; /* output: set if value was stored */···138 struct uio *uio; /* uio control structure */139} xfs_dir_put_args_t;140141+#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) \142+ xfs_dir_leaf_entsize_byname(len)143static inline int xfs_dir_leaf_entsize_byname(int len)144{145 return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;
-1
fs/xfs/xfs_error.c
···54 if (e != xfs_etrap[i])55 continue;56 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e);57- debug_stop_all_cpus((void *)-1LL);58 BUG();59 break;60 }
···54 if (e != xfs_etrap[i])55 continue;56 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e);057 BUG();58 break;59 }
+4-4
fs/xfs/xfs_error.h
···18#ifndef __XFS_ERROR_H__19#define __XFS_ERROR_H__2021-#define prdev(fmt,targ,args...) \22- printk("XFS: device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)23-24#define XFS_ERECOVER 1 /* Failure to recover log */25#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */26#define XFS_ENOLOGSPACE 3 /* Reservation too large */···179struct xfs_mount;180/* PRINTFLIKE4 */181extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,182- char *fmt, ...);183/* PRINTFLIKE3 */184extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);000185186#endif /* __XFS_ERROR_H__ */
···18#ifndef __XFS_ERROR_H__19#define __XFS_ERROR_H__2000021#define XFS_ERECOVER 1 /* Failure to recover log */22#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */23#define XFS_ENOLOGSPACE 3 /* Reservation too large */···182struct xfs_mount;183/* PRINTFLIKE4 */184extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,185+ char *fmt, ...);186/* PRINTFLIKE3 */187extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);188+189+#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \190+ xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)191192#endif /* __XFS_ERROR_H__ */
+6-4
fs/xfs/xfs_fs.h
···3 * All Rights Reserved.4 *5 * This program is free software; you can redistribute it and/or6- * modify it under the terms of the GNU General Public License as7- * published by the Free Software Foundation.8 *9 * This program is distributed in the hope that it would be useful,10 * but WITHOUT ANY WARRANTY; without even the implied warranty of11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the12- * GNU General Public License for more details.13 *14- * You should have received a copy of the GNU General Public License15 * along with this program; if not, write the Free Software Foundation,16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA17 */···65#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */66#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */67#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */0068#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */6970/*
···3 * All Rights Reserved.4 *5 * This program is free software; you can redistribute it and/or6+ * modify it under the terms of the GNU Lesser General Public License7+ * as published by the Free Software Foundation.8 *9 * This program is distributed in the hope that it would be useful,10 * but WITHOUT ANY WARRANTY; without even the implied warranty of11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the12+ * GNU Lesser General Public License for more details.13 *14+ * You should have received a copy of the GNU Lesser General Public License15 * along with this program; if not, write the Free Software Foundation,16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA17 */···65#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */66#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */67#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */68+#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */69+#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */70#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */7172/*
···271 if (ip->i_update_size)272 ip->i_update_size = 0;27300000274 vecp->i_addr = (xfs_caddr_t)&ip->i_d;275 vecp->i_len = sizeof(xfs_dinode_core_t);276 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);···608 if (iip->ili_pushbuf_flag == 0) {609 iip->ili_pushbuf_flag = 1;610#ifdef DEBUG611- iip->ili_push_owner = get_thread_id();612#endif613 /*614 * Inode is left locked in shared mode.···787 * trying to duplicate our effort.788 */789 ASSERT(iip->ili_pushbuf_flag != 0);790- ASSERT(iip->ili_push_owner == get_thread_id());791792 /*793 * If flushlock isn't locked anymore, chances are that the
···271 if (ip->i_update_size)272 ip->i_update_size = 0;273274+ /*275+ * Make sure to get the latest atime from the Linux inode.276+ */277+ xfs_synchronize_atime(ip);278+279 vecp->i_addr = (xfs_caddr_t)&ip->i_d;280 vecp->i_len = sizeof(xfs_dinode_core_t);281 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);···603 if (iip->ili_pushbuf_flag == 0) {604 iip->ili_pushbuf_flag = 1;605#ifdef DEBUG606+ iip->ili_push_owner = current_pid();607#endif608 /*609 * Inode is left locked in shared mode.···782 * trying to duplicate our effort.783 */784 ASSERT(iip->ili_pushbuf_flag != 0);785+ ASSERT(iip->ili_push_owner == current_pid());786787 /*788 * If flushlock isn't locked anymore, chances are that the
+236-193
fs/xfs/xfs_iomap.c
···262 case BMAPI_WRITE:263 /* If we found an extent, return it */264 if (nimaps &&265- (imap.br_startblock != HOLESTARTBLOCK) && 266 (imap.br_startblock != DELAYSTARTBLOCK)) {267 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,268 offset, count, iomapp, &imap, flags);···317}318319STATIC int0000000000000000000000000000000000000000000000000000320xfs_flush_space(321 xfs_inode_t *ip,322 int *fsynced,···414 xfs_iocore_t *io = &ip->i_iocore;415 xfs_fileoff_t offset_fsb;416 xfs_fileoff_t last_fsb;417- xfs_filblks_t count_fsb;418 xfs_fsblock_t firstfsb;00419 int nimaps;420- int error;421 int bmapi_flag;422 int quota_flag;423 int rt;424 xfs_trans_t *tp;425 xfs_bmbt_irec_t imap;426 xfs_bmap_free_t free_list;427- xfs_filblks_t qblocks, resblks;428 int committed;429- int resrtextents;430431 /*432 * Make sure that the dquots are there. This doesn't hold···437 if (error)438 return XFS_ERROR(error);439440- offset_fsb = XFS_B_TO_FSBT(mp, offset);441- last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));442- count_fsb = last_fsb - offset_fsb;443- if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) {444- xfs_fileoff_t map_last_fsb;445-446- map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;447- if (map_last_fsb < last_fsb) {448- last_fsb = map_last_fsb;449- count_fsb = last_fsb - offset_fsb;450- }451- ASSERT(count_fsb > 0);452- }453-454- /*455- * Determine if reserving space on the data or realtime partition.456- */457- if ((rt = XFS_IS_REALTIME_INODE(ip))) {458- xfs_extlen_t extsz;459-460 if (!(extsz = ip->i_d.di_extsize))461 extsz = mp->m_sb.sb_rextsize;462- resrtextents = qblocks = (count_fsb + extsz - 1);463- do_div(resrtextents, mp->m_sb.sb_rextsize);464- resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);465- quota_flag = XFS_QMOPT_RES_RTBLKS;466 } else {467- resrtextents = 0;468- resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);469- quota_flag = XFS_QMOPT_RES_REGBLKS;470 }000000000000000000000000000000000000000471472 /*473 * Allocate and setup the transaction···493 XFS_WRITE_LOG_RES(mp), resrtextents,494 XFS_TRANS_PERM_LOG_RES,495 XFS_WRITE_LOG_COUNT);496-497 /*498 * Check for running out of space, note: need lock to return499 */···502 if (error)503 goto error_out;504505- if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) {506- error = (EDQUOT);0507 goto error1;508- }509510- bmapi_flag = XFS_BMAPI_WRITE;511 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);512 xfs_trans_ihold(tp, ip);513514- if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt))0515 bmapi_flag |= XFS_BMAPI_PREALLOC;516517 /*518- * Issue the bmapi() call to allocate the blocks519 */520 XFS_BMAP_INIT(&free_list, &firstfsb);521 nimaps = 1;···550 "extent-state : %x \n",551 (ip->i_mount)->m_fsname,552 (long long)ip->i_ino,553- ret_imap->br_startblock, ret_imap->br_startoff,554- ret_imap->br_blockcount,ret_imap->br_state);00555 }556 return 0;557···569 return XFS_ERROR(error);570}571000000000000000000000000000000000000000000000000000000000572int573xfs_iomap_write_delay(574 xfs_inode_t *ip,···639 xfs_iocore_t *io = &ip->i_iocore;640 xfs_fileoff_t offset_fsb;641 xfs_fileoff_t last_fsb;642- xfs_fsize_t isize;0643 xfs_fsblock_t firstblock;00644 int nimaps;645- int error;646 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];647- int aeof;648- int fsynced = 0;649650 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);651···655 * Make sure that the dquots are there. This doesn't hold656 * the ilock across a disk read.657 */658-659 error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);660 if (error)661 return XFS_ERROR(error);662000000000663retry:664 isize = ip->i_d.di_size;665- if (io->io_new_size > isize) {666 isize = io->io_new_size;667- }668669- aeof = 0;670- offset_fsb = XFS_B_TO_FSBT(mp, offset);671- last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));672- /*673- * If the caller is doing a write at the end of the file,674- * then extend the allocation (and the buffer used for the write)675- * out to the file system's write iosize. We clean up any extra676- * space left over when the file is closed in xfs_inactive().677- *678- * For sync writes, we are flushing delayed allocate space to679- * try to make additional space available for allocation near680- * the filesystem full boundary - preallocation hurts in that681- * situation, of course.682- */683- if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {684- xfs_off_t aligned_offset;685- xfs_filblks_t count_fsb;686- unsigned int iosize;687- xfs_fileoff_t ioalign;688- int n;689- xfs_fileoff_t start_fsb;690691- /*692- * If there are any real blocks past eof, then don't693- * do any speculative allocation.694- */695- start_fsb = XFS_B_TO_FSBT(mp,696- ((xfs_ufsize_t)(offset + count - 1)));697- count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));698- while (count_fsb > 0) {699- nimaps = XFS_WRITE_IMAPS;700- error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,701- 0, &firstblock, 0, imap, &nimaps, NULL);702- if (error) {703- return error;704- }705- for (n = 0; n < nimaps; n++) {706- if ( !(io->io_flags & XFS_IOCORE_RT) && 707- !imap[n].br_startblock) {708- cmn_err(CE_PANIC,"Access to block "709- "zero: fs <%s> inode: %lld "710- "start_block : %llx start_off "711- ": %llx blkcnt : %llx "712- "extent-state : %x \n",713- (ip->i_mount)->m_fsname,714- (long long)ip->i_ino,715- imap[n].br_startblock,716- imap[n].br_startoff,717- imap[n].br_blockcount,718- imap[n].br_state);719- }720- if ((imap[n].br_startblock != HOLESTARTBLOCK) &&721- (imap[n].br_startblock != DELAYSTARTBLOCK)) {722- goto write_map;723- }724- start_fsb += imap[n].br_blockcount;725- count_fsb -= imap[n].br_blockcount;726- }727- }728- iosize = mp->m_writeio_blocks;729 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));730 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);731- last_fsb = ioalign + iosize;732- aeof = 1;0733 }734-write_map:0000000735 nimaps = XFS_WRITE_IMAPS;736 firstblock = NULLFSBLOCK;737-738- /*739- * If mounted with the "-o swalloc" option, roundup the allocation740- * request to a stripe width boundary if the file size is >=741- * stripe width and we are allocating past the allocation eof.742- */743- if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_swidth 744- && (mp->m_flags & XFS_MOUNT_SWALLOC)745- && (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)) && aeof) {746- int eof;747- xfs_fileoff_t new_last_fsb;748-749- new_last_fsb = roundup_64(last_fsb, mp->m_swidth);750- error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);751- if (error) {752- return error;753- }754- if (eof) {755- last_fsb = new_last_fsb;756- }757- /*758- * Roundup the allocation request to a stripe unit (m_dalign) boundary759- * if the file size is >= stripe unit size, and we are allocating past760- * the allocation eof.761- */762- } else if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_dalign &&763- (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) {764- int eof;765- xfs_fileoff_t new_last_fsb;766- new_last_fsb = roundup_64(last_fsb, mp->m_dalign);767- error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);768- if (error) {769- return error;770- }771- if (eof) {772- last_fsb = new_last_fsb;773- }774- /*775- * Round up the allocation request to a real-time extent boundary776- * if the file is on the real-time subvolume.777- */778- } else if (io->io_flags & XFS_IOCORE_RT && aeof) {779- int eof;780- xfs_fileoff_t new_last_fsb;781-782- new_last_fsb = roundup_64(last_fsb, mp->m_sb.sb_rextsize);783- error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);784- if (error) {785- return error;786- }787- if (eof)788- last_fsb = new_last_fsb;789- }790 error = xfs_bmapi(NULL, ip, offset_fsb,791 (xfs_filblks_t)(last_fsb - offset_fsb),792 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |793 XFS_BMAPI_ENTIRE, &firstblock, 1, imap,794 &nimaps, NULL);795- /*796- * This can be EDQUOT, if nimaps == 0797- */798- if (error && (error != ENOSPC)) {799 return XFS_ERROR(error);800- }801 /*802 * If bmapi returned us nothing, and if we didn't get back EDQUOT,803- * then we must have run out of space.804 */805 if (nimaps == 0) {806 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,···717 goto retry;718 }719720- *ret_imap = imap[0];721- *nmaps = 1;722- if ( !(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {723 cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld "724 "start_block : %llx start_off : %llx blkcnt : %llx "725 "extent-state : %x \n",726 (ip->i_mount)->m_fsname,727 (long long)ip->i_ino,728- ret_imap->br_startblock, ret_imap->br_startoff,729- ret_imap->br_blockcount,ret_imap->br_state);00730 }0000731 return 0;732}733···857 */858859 for (i = 0; i < nimaps; i++) {860- if ( !(io->io_flags & XFS_IOCORE_RT) && 861- !imap[i].br_startblock) {862 cmn_err(CE_PANIC,"Access to block zero: "863 "fs <%s> inode: %lld "864- "start_block : %llx start_off : %llx " 865 "blkcnt : %llx extent-state : %x \n",866 (ip->i_mount)->m_fsname,867 (long long)ip->i_ino,868- imap[i].br_startblock,869- imap[i].br_startoff,870- imap[i].br_blockcount,imap[i].br_state);0000871 }872 if ((offset_fsb >= imap[i].br_startoff) &&873 (offset_fsb < (imap[i].br_startoff +···908{909 xfs_mount_t *mp = ip->i_mount;910 xfs_iocore_t *io = &ip->i_iocore;911- xfs_trans_t *tp;912 xfs_fileoff_t offset_fsb;913 xfs_filblks_t count_fsb;914 xfs_filblks_t numblks_fsb;915- xfs_bmbt_irec_t imap;00000916 int committed;917 int error;918- int nres;919- int nimaps;920- xfs_fsblock_t firstfsb;921- xfs_bmap_free_t free_list;922923 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,924 &ip->i_iocore, offset, count);···927 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);928 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);929930- do {931- nres = XFS_DIOSTRAT_SPACE_RES(mp, 0);9320933 /*934 * set up a transaction to convert the range of extents935 * from unwritten to real. Do allocations in a loop until···937 */938939 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);940- error = xfs_trans_reserve(tp, nres,941 XFS_WRITE_LOG_RES(mp), 0,942 XFS_TRANS_PERM_LOG_RES,943 XFS_WRITE_LOG_COUNT);···956 XFS_BMAP_INIT(&free_list, &firstfsb);957 nimaps = 1;958 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,959- XFS_BMAPI_WRITE, &firstfsb,960 1, &imap, &nimaps, &free_list);961 if (error)962 goto error_on_bmapi_transaction;···970 xfs_iunlock(ip, XFS_ILOCK_EXCL);971 if (error)972 goto error0;973-974 if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) {975 cmn_err(CE_PANIC,"Access to block zero: fs <%s> "976 "inode: %lld start_block : %llx start_off : "977 "%llx blkcnt : %llx extent-state : %x \n",978 (ip->i_mount)->m_fsname,979 (long long)ip->i_ino,980- imap.br_startblock,imap.br_startoff,981- imap.br_blockcount,imap.br_state);00982 }983984 if ((numblks_fsb = imap.br_blockcount) == 0) {
···262 case BMAPI_WRITE:263 /* If we found an extent, return it */264 if (nimaps &&265+ (imap.br_startblock != HOLESTARTBLOCK) &&266 (imap.br_startblock != DELAYSTARTBLOCK)) {267 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,268 offset, count, iomapp, &imap, flags);···317}318319STATIC int320+xfs_iomap_eof_align_last_fsb(321+ xfs_mount_t *mp,322+ xfs_iocore_t *io,323+ xfs_fsize_t isize,324+ xfs_extlen_t extsize,325+ xfs_fileoff_t *last_fsb)326+{327+ xfs_fileoff_t new_last_fsb = 0;328+ xfs_extlen_t align;329+ int eof, error;330+331+ if (io->io_flags & XFS_IOCORE_RT)332+ ;333+ /*334+ * If mounted with the "-o swalloc" option, roundup the allocation335+ * request to a stripe width boundary if the file size is >=336+ * stripe width and we are allocating past the allocation eof.337+ */338+ else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&339+ (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))340+ new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);341+ /*342+ * Roundup the allocation request to a stripe unit (m_dalign) boundary343+ * if the file size is >= stripe unit size, and we are allocating past344+ * the allocation eof.345+ */346+ else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))347+ new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);348+349+ /*350+ * Always round up the allocation request to an extent boundary351+ * (when file on a real-time subvolume or has di_extsize hint).352+ */353+ if (extsize) {354+ if (new_last_fsb)355+ align = roundup_64(new_last_fsb, extsize);356+ else357+ align = extsize;358+ new_last_fsb = roundup_64(*last_fsb, align);359+ }360+361+ if (new_last_fsb) {362+ error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);363+ if (error)364+ return error;365+ if (eof)366+ *last_fsb = new_last_fsb;367+ }368+ return 0;369+}370+371+STATIC int372xfs_flush_space(373 xfs_inode_t *ip,374 int *fsynced,···362 xfs_iocore_t *io = &ip->i_iocore;363 xfs_fileoff_t offset_fsb;364 xfs_fileoff_t last_fsb;365+ xfs_filblks_t count_fsb, resaligned;366 xfs_fsblock_t firstfsb;367+ xfs_extlen_t extsz, temp;368+ xfs_fsize_t isize;369 int nimaps;0370 int bmapi_flag;371 int quota_flag;372 int rt;373 xfs_trans_t *tp;374 xfs_bmbt_irec_t imap;375 xfs_bmap_free_t free_list;376+ uint qblocks, resblks, resrtextents;377 int committed;378+ int error;379380 /*381 * Make sure that the dquots are there. This doesn't hold···384 if (error)385 return XFS_ERROR(error);386387+ rt = XFS_IS_REALTIME_INODE(ip);388+ if (unlikely(rt)) {000000000000000000389 if (!(extsz = ip->i_d.di_extsize))390 extsz = mp->m_sb.sb_rextsize;0000391 } else {392+ extsz = ip->i_d.di_extsize;00393 }394+395+ isize = ip->i_d.di_size;396+ if (io->io_new_size > isize)397+ isize = io->io_new_size;398+399+ offset_fsb = XFS_B_TO_FSBT(mp, offset);400+ last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));401+ if ((offset + count) > isize) {402+ error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,403+ &last_fsb);404+ if (error)405+ goto error_out;406+ } else {407+ if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))408+ last_fsb = MIN(last_fsb, (xfs_fileoff_t)409+ ret_imap->br_blockcount +410+ ret_imap->br_startoff);411+ }412+ count_fsb = last_fsb - offset_fsb;413+ ASSERT(count_fsb > 0);414+415+ resaligned = count_fsb;416+ if (unlikely(extsz)) {417+ if ((temp = do_mod(offset_fsb, extsz)))418+ resaligned += temp;419+ if ((temp = do_mod(resaligned, extsz)))420+ resaligned += extsz - temp;421+ }422+423+ if (unlikely(rt)) {424+ resrtextents = qblocks = resaligned;425+ resrtextents /= mp->m_sb.sb_rextsize;426+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);427+ quota_flag = XFS_QMOPT_RES_RTBLKS;428+ } else {429+ resrtextents = 0;430+ resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);431+ quota_flag = XFS_QMOPT_RES_REGBLKS;432+ }433434 /*435 * Allocate and setup the transaction···425 XFS_WRITE_LOG_RES(mp), resrtextents,426 XFS_TRANS_PERM_LOG_RES,427 XFS_WRITE_LOG_COUNT);0428 /*429 * Check for running out of space, note: need lock to return430 */···435 if (error)436 goto error_out;437438+ error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,439+ qblocks, 0, quota_flag);440+ if (error)441 goto error1;04420443 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);444 xfs_trans_ihold(tp, ip);445446+ bmapi_flag = XFS_BMAPI_WRITE;447+ if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))448 bmapi_flag |= XFS_BMAPI_PREALLOC;449450 /*451+ * Issue the xfs_bmapi() call to allocate the blocks452 */453 XFS_BMAP_INIT(&free_list, &firstfsb);454 nimaps = 1;···483 "extent-state : %x \n",484 (ip->i_mount)->m_fsname,485 (long long)ip->i_ino,486+ (unsigned long long)ret_imap->br_startblock,487+ (unsigned long long)ret_imap->br_startoff,488+ (unsigned long long)ret_imap->br_blockcount,489+ ret_imap->br_state);490 }491 return 0;492···500 return XFS_ERROR(error);501}502503+/*504+ * If the caller is doing a write at the end of the file,505+ * then extend the allocation out to the file system's write506+ * iosize. We clean up any extra space left over when the507+ * file is closed in xfs_inactive().508+ *509+ * For sync writes, we are flushing delayed allocate space to510+ * try to make additional space available for allocation near511+ * the filesystem full boundary - preallocation hurts in that512+ * situation, of course.513+ */514+STATIC int515+xfs_iomap_eof_want_preallocate(516+ xfs_mount_t *mp,517+ xfs_iocore_t *io,518+ xfs_fsize_t isize,519+ xfs_off_t offset,520+ size_t count,521+ int ioflag,522+ xfs_bmbt_irec_t *imap,523+ int nimaps,524+ int *prealloc)525+{526+ xfs_fileoff_t start_fsb;527+ xfs_filblks_t count_fsb;528+ xfs_fsblock_t firstblock;529+ int n, error, imaps;530+531+ *prealloc = 0;532+ if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)533+ return 0;534+535+ /*536+ * If there are any real blocks past eof, then don't537+ * do any speculative allocation.538+ */539+ start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));540+ count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));541+ while (count_fsb > 0) {542+ imaps = nimaps;543+ firstblock = NULLFSBLOCK;544+ error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,545+ 0, &firstblock, 0, imap, &imaps, NULL);546+ if (error)547+ return error;548+ for (n = 0; n < imaps; n++) {549+ if ((imap[n].br_startblock != HOLESTARTBLOCK) &&550+ (imap[n].br_startblock != DELAYSTARTBLOCK))551+ return 0;552+ start_fsb += imap[n].br_blockcount;553+ count_fsb -= imap[n].br_blockcount;554+ }555+ }556+ *prealloc = 1;557+ return 0;558+}559+560int561xfs_iomap_write_delay(562 xfs_inode_t *ip,···513 xfs_iocore_t *io = &ip->i_iocore;514 xfs_fileoff_t offset_fsb;515 xfs_fileoff_t last_fsb;516+ xfs_off_t aligned_offset;517+ xfs_fileoff_t ioalign;518 xfs_fsblock_t firstblock;519+ xfs_extlen_t extsz;520+ xfs_fsize_t isize;521 int nimaps;0522 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];523+ int prealloc, fsynced = 0;524+ int error;525526 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);527···527 * Make sure that the dquots are there. This doesn't hold528 * the ilock across a disk read.529 */0530 error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);531 if (error)532 return XFS_ERROR(error);533534+ if (XFS_IS_REALTIME_INODE(ip)) {535+ if (!(extsz = ip->i_d.di_extsize))536+ extsz = mp->m_sb.sb_rextsize;537+ } else {538+ extsz = ip->i_d.di_extsize;539+ }540+541+ offset_fsb = XFS_B_TO_FSBT(mp, offset);542+543retry:544 isize = ip->i_d.di_size;545+ if (io->io_new_size > isize)546 isize = io->io_new_size;0547548+ error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,549+ ioflag, imap, XFS_WRITE_IMAPS, &prealloc);550+ if (error)551+ return error;00000000000000000552553+ if (prealloc) {0000000000000000000000000000000000000554 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));555 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);556+ last_fsb = ioalign + mp->m_writeio_blocks;557+ } else {558+ last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));559 }560+561+ if (prealloc || extsz) {562+ error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,563+ &last_fsb);564+ if (error)565+ return error;566+ }567+568 nimaps = XFS_WRITE_IMAPS;569 firstblock = NULLFSBLOCK;00000000000000000000000000000000000000000000000000000570 error = xfs_bmapi(NULL, ip, offset_fsb,571 (xfs_filblks_t)(last_fsb - offset_fsb),572 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |573 XFS_BMAPI_ENTIRE, &firstblock, 1, imap,574 &nimaps, NULL);575+ if (error && (error != ENOSPC))000576 return XFS_ERROR(error);577+578 /*579 * If bmapi returned us nothing, and if we didn't get back EDQUOT,580+ * then we must have run out of space - flush delalloc, and retry..581 */582 if (nimaps == 0) {583 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,···684 goto retry;685 }686687+ if (!(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {00688 cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld "689 "start_block : %llx start_off : %llx blkcnt : %llx "690 "extent-state : %x \n",691 (ip->i_mount)->m_fsname,692 (long long)ip->i_ino,693+ (unsigned long long)ret_imap->br_startblock,694+ (unsigned long long)ret_imap->br_startoff,695+ (unsigned long long)ret_imap->br_blockcount,696+ ret_imap->br_state);697 }698+699+ *ret_imap = imap[0];700+ *nmaps = 1;701+702 return 0;703}704···820 */821822 for (i = 0; i < nimaps; i++) {823+ if (!(io->io_flags & XFS_IOCORE_RT) &&824+ !imap[i].br_startblock) {825 cmn_err(CE_PANIC,"Access to block zero: "826 "fs <%s> inode: %lld "827+ "start_block : %llx start_off : %llx "828 "blkcnt : %llx extent-state : %x \n",829 (ip->i_mount)->m_fsname,830 (long long)ip->i_ino,831+ (unsigned long long)832+ imap[i].br_startblock,833+ (unsigned long long)834+ imap[i].br_startoff,835+ (unsigned long long)836+ imap[i].br_blockcount,837+ imap[i].br_state);838 }839 if ((offset_fsb >= imap[i].br_startoff) &&840 (offset_fsb < (imap[i].br_startoff +···867{868 xfs_mount_t *mp = ip->i_mount;869 xfs_iocore_t *io = &ip->i_iocore;0870 xfs_fileoff_t offset_fsb;871 xfs_filblks_t count_fsb;872 xfs_filblks_t numblks_fsb;873+ xfs_fsblock_t firstfsb;874+ int nimaps;875+ xfs_trans_t *tp;876+ xfs_bmbt_irec_t imap;877+ xfs_bmap_free_t free_list;878+ uint resblks;879 int committed;880 int error;0000881882 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,883 &ip->i_iocore, offset, count);···886 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);887 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);888889+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;0890891+ do {892 /*893 * set up a transaction to convert the range of extents894 * from unwritten to real. Do allocations in a loop until···896 */897898 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);899+ error = xfs_trans_reserve(tp, resblks,900 XFS_WRITE_LOG_RES(mp), 0,901 XFS_TRANS_PERM_LOG_RES,902 XFS_WRITE_LOG_COUNT);···915 XFS_BMAP_INIT(&free_list, &firstfsb);916 nimaps = 1;917 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,918+ XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,919 1, &imap, &nimaps, &free_list);920 if (error)921 goto error_on_bmapi_transaction;···929 xfs_iunlock(ip, XFS_ILOCK_EXCL);930 if (error)931 goto error0;932+933 if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) {934 cmn_err(CE_PANIC,"Access to block zero: fs <%s> "935 "inode: %lld start_block : %llx start_off : "936 "%llx blkcnt : %llx extent-state : %x \n",937 (ip->i_mount)->m_fsname,938 (long long)ip->i_ino,939+ (unsigned long long)imap.br_startblock,940+ (unsigned long long)imap.br_startoff,941+ (unsigned long long)imap.br_blockcount,942+ imap.br_state);943 }944945 if ((numblks_fsb = imap.br_blockcount) == 0) {
···178#define xlog_trace_iclog(iclog,state)179#endif /* XFS_LOG_TRACE */18000000000000000000000000000000000000000000000000000000000000000000000000000000181/*182 * NOTES:183 *···505 if (readonly)506 vfsp->vfs_flag &= ~VFS_RDONLY;507508- error = xlog_recover(mp->m_log, readonly);509510 if (readonly)511 vfsp->vfs_flag |= VFS_RDONLY;···13971398 /* move grant heads by roundoff in sync */1399 s = GRANT_LOCK(log);1400- XLOG_GRANT_ADD_SPACE(log, roundoff, 'w');1401- XLOG_GRANT_ADD_SPACE(log, roundoff, 'r');1402 GRANT_UNLOCK(log, s);14031404 /* put cycle number in every block */···1591 * print out info relating to regions written which consume1592 * the reservation1593 */1594-#if defined(XFS_LOG_RES_DEBUG)1595STATIC void1596xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)1597{···1680 ticket->t_res_arr_sum, ticket->t_res_o_flow,1681 ticket->t_res_num_ophdrs, ophdr_spc,1682 ticket->t_res_arr_sum + 1683- ticket->t_res_o_flow + ophdr_spc,1684 ticket->t_res_num);16851686 for (i = 0; i < ticket->t_res_num; i++) {1687- uint r_type = ticket->t_res_arr[i].r_type; 1688 cmn_err(CE_WARN,1689 "region[%u]: %s - %u bytes\n",1690 i, ···1693 ticket->t_res_arr[i].r_len);1694 }1695}1696-#else1697-#define xlog_print_tic_res(mp, ticket)1698-#endif16991700/*1701 * Write some region out to in-core log···24612462 /* something is already sleeping; insert new transaction at end */2463 if (log->l_reserve_headq) {2464- XLOG_INS_TICKETQ(log->l_reserve_headq, tic);2465 xlog_trace_loggrant(log, tic,2466 "xlog_grant_log_space: sleep 1");2467 /*···2494 log->l_grant_reserve_bytes);2495 if (free_bytes < need_bytes) {2496 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)2497- XLOG_INS_TICKETQ(log->l_reserve_headq, tic);2498 xlog_trace_loggrant(log, tic,2499 "xlog_grant_log_space: sleep 2");2500 XFS_STATS_INC(xs_sleep_logspace);···2511 s = GRANT_LOCK(log);2512 goto redo;2513 } else if (tic->t_flags & XLOG_TIC_IN_Q)2514- XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);25152516 /* we've got enough space */2517- XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w');2518- XLOG_GRANT_ADD_SPACE(log, need_bytes, 'r');2519#ifdef DEBUG2520 tail_lsn = log->l_tail_lsn;2521 /*···25352536 error_return:2537 if (tic->t_flags & XLOG_TIC_IN_Q)2538- XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);2539 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret");2540 /*2541 * If we are failing, make sure the ticket doesn't have any···26042605 if (ntic != log->l_write_headq) {2606 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)2607- XLOG_INS_TICKETQ(log->l_write_headq, tic);26082609 xlog_trace_loggrant(log, tic,2610 "xlog_regrant_write_log_space: sleep 1");···2636 log->l_grant_write_bytes);2637 if (free_bytes < need_bytes) {2638 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)2639- XLOG_INS_TICKETQ(log->l_write_headq, tic);2640 XFS_STATS_INC(xs_sleep_logspace);2641 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);2642···2652 s = GRANT_LOCK(log);2653 goto redo;2654 } else if (tic->t_flags & XLOG_TIC_IN_Q)2655- XLOG_DEL_TICKETQ(log->l_write_headq, tic);26562657- XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); /* we've got enough space */02658#ifdef DEBUG2659 tail_lsn = log->l_tail_lsn;2660 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {···26722673 error_return:2674 if (tic->t_flags & XLOG_TIC_IN_Q)2675- XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);2676 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret");2677 /*2678 * If we are failing, make sure the ticket doesn't have any···2705 ticket->t_cnt--;27062707 s = GRANT_LOCK(log);2708- XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');2709- XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');2710 ticket->t_curr_res = ticket->t_unit_res;2711 XLOG_TIC_RESET_RES(ticket);2712 xlog_trace_loggrant(log, ticket,···2718 return;2719 }27202721- XLOG_GRANT_ADD_SPACE(log, ticket->t_unit_res, 'r');2722 xlog_trace_loggrant(log, ticket,2723 "xlog_regrant_reserve_log_space: exit");2724 xlog_verify_grant_head(log, 0);···2754 s = GRANT_LOCK(log);2755 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter");27562757- XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');2758- XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');27592760 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current");2761···2763 */2764 if (ticket->t_cnt > 0) {2765 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);2766- XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'w');2767- XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'r');2768 }27692770 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");
···178#define xlog_trace_iclog(iclog,state)179#endif /* XFS_LOG_TRACE */180181+182+static void183+xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)184+{185+ if (*qp) {186+ tic->t_next = (*qp);187+ tic->t_prev = (*qp)->t_prev;188+ (*qp)->t_prev->t_next = tic;189+ (*qp)->t_prev = tic;190+ } else {191+ tic->t_prev = tic->t_next = tic;192+ *qp = tic;193+ }194+195+ tic->t_flags |= XLOG_TIC_IN_Q;196+}197+198+static void199+xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)200+{201+ if (tic == tic->t_next) {202+ *qp = NULL;203+ } else {204+ *qp = tic->t_next;205+ tic->t_next->t_prev = tic->t_prev;206+ tic->t_prev->t_next = tic->t_next;207+ }208+209+ tic->t_next = tic->t_prev = NULL;210+ tic->t_flags &= ~XLOG_TIC_IN_Q;211+}212+213+static void214+xlog_grant_sub_space(struct log *log, int bytes)215+{216+ log->l_grant_write_bytes -= bytes;217+ if (log->l_grant_write_bytes < 0) {218+ log->l_grant_write_bytes += log->l_logsize;219+ log->l_grant_write_cycle--;220+ }221+222+ log->l_grant_reserve_bytes -= bytes;223+ if ((log)->l_grant_reserve_bytes < 0) {224+ log->l_grant_reserve_bytes += log->l_logsize;225+ log->l_grant_reserve_cycle--;226+ }227+228+}229+230+static void231+xlog_grant_add_space_write(struct log *log, int bytes)232+{233+ log->l_grant_write_bytes += bytes;234+ if (log->l_grant_write_bytes > log->l_logsize) {235+ log->l_grant_write_bytes -= log->l_logsize;236+ log->l_grant_write_cycle++;237+ }238+}239+240+static void241+xlog_grant_add_space_reserve(struct log *log, int bytes)242+{243+ log->l_grant_reserve_bytes += bytes;244+ if (log->l_grant_reserve_bytes > log->l_logsize) {245+ log->l_grant_reserve_bytes -= log->l_logsize;246+ log->l_grant_reserve_cycle++;247+ }248+}249+250+static inline void251+xlog_grant_add_space(struct log *log, int bytes)252+{253+ xlog_grant_add_space_write(log, bytes);254+ xlog_grant_add_space_reserve(log, bytes);255+}256+257+258/*259 * NOTES:260 *···428 if (readonly)429 vfsp->vfs_flag &= ~VFS_RDONLY;430431+ error = xlog_recover(mp->m_log);432433 if (readonly)434 vfsp->vfs_flag |= VFS_RDONLY;···13201321 /* move grant heads by roundoff in sync */1322 s = GRANT_LOCK(log);1323+ xlog_grant_add_space(log, roundoff);01324 GRANT_UNLOCK(log, s);13251326 /* put cycle number in every block */···1515 * print out info relating to regions written which consume1516 * the reservation1517 */01518STATIC void1519xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)1520{···1605 ticket->t_res_arr_sum, ticket->t_res_o_flow,1606 ticket->t_res_num_ophdrs, ophdr_spc,1607 ticket->t_res_arr_sum + 1608+ ticket->t_res_o_flow + ophdr_spc,1609 ticket->t_res_num);16101611 for (i = 0; i < ticket->t_res_num; i++) {1612+ uint r_type = ticket->t_res_arr[i].r_type; 1613 cmn_err(CE_WARN,1614 "region[%u]: %s - %u bytes\n",1615 i, ···1618 ticket->t_res_arr[i].r_len);1619 }1620}00016211622/*1623 * Write some region out to in-core log···23892390 /* something is already sleeping; insert new transaction at end */2391 if (log->l_reserve_headq) {2392+ xlog_ins_ticketq(&log->l_reserve_headq, tic);2393 xlog_trace_loggrant(log, tic,2394 "xlog_grant_log_space: sleep 1");2395 /*···2422 log->l_grant_reserve_bytes);2423 if (free_bytes < need_bytes) {2424 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)2425+ xlog_ins_ticketq(&log->l_reserve_headq, tic);2426 xlog_trace_loggrant(log, tic,2427 "xlog_grant_log_space: sleep 2");2428 XFS_STATS_INC(xs_sleep_logspace);···2439 s = GRANT_LOCK(log);2440 goto redo;2441 } else if (tic->t_flags & XLOG_TIC_IN_Q)2442+ xlog_del_ticketq(&log->l_reserve_headq, tic);24432444 /* we've got enough space */2445+ xlog_grant_add_space(log, need_bytes);02446#ifdef DEBUG2447 tail_lsn = log->l_tail_lsn;2448 /*···24642465 error_return:2466 if (tic->t_flags & XLOG_TIC_IN_Q)2467+ xlog_del_ticketq(&log->l_reserve_headq, tic);2468 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret");2469 /*2470 * If we are failing, make sure the ticket doesn't have any···25332534 if (ntic != log->l_write_headq) {2535 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)2536+ xlog_ins_ticketq(&log->l_write_headq, tic);25372538 xlog_trace_loggrant(log, tic,2539 "xlog_regrant_write_log_space: sleep 1");···2565 log->l_grant_write_bytes);2566 if (free_bytes < need_bytes) {2567 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)2568+ xlog_ins_ticketq(&log->l_write_headq, tic);2569 XFS_STATS_INC(xs_sleep_logspace);2570 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);2571···2581 s = GRANT_LOCK(log);2582 goto redo;2583 } else if (tic->t_flags & XLOG_TIC_IN_Q)2584+ xlog_del_ticketq(&log->l_write_headq, tic);25852586+ /* we've got enough space */2587+ xlog_grant_add_space_write(log, need_bytes);2588#ifdef DEBUG2589 tail_lsn = log->l_tail_lsn;2590 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {···26002601 error_return:2602 if (tic->t_flags & XLOG_TIC_IN_Q)2603+ xlog_del_ticketq(&log->l_reserve_headq, tic);2604 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret");2605 /*2606 * If we are failing, make sure the ticket doesn't have any···2633 ticket->t_cnt--;26342635 s = GRANT_LOCK(log);2636+ xlog_grant_sub_space(log, ticket->t_curr_res);02637 ticket->t_curr_res = ticket->t_unit_res;2638 XLOG_TIC_RESET_RES(ticket);2639 xlog_trace_loggrant(log, ticket,···2647 return;2648 }26492650+ xlog_grant_add_space_reserve(log, ticket->t_unit_res);2651 xlog_trace_loggrant(log, ticket,2652 "xlog_regrant_reserve_log_space: exit");2653 xlog_verify_grant_head(log, 0);···2683 s = GRANT_LOCK(log);2684 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter");26852686+ xlog_grant_sub_space(log, ticket->t_curr_res);026872688 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current");2689···2693 */2694 if (ticket->t_cnt > 0) {2695 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);2696+ xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);02697 }26982699 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");
+1-10
fs/xfs/xfs_log.h
···969798/* Region types for iovec's i_type */99-#if defined(XFS_LOG_RES_DEBUG)100#define XLOG_REG_TYPE_BFORMAT 1101#define XLOG_REG_TYPE_BCHUNK 2102#define XLOG_REG_TYPE_EFI_FORMAT 3···116#define XLOG_REG_TYPE_COMMIT 18117#define XLOG_REG_TYPE_TRANSHDR 19118#define XLOG_REG_TYPE_MAX 19119-#endif120121-#if defined(XFS_LOG_RES_DEBUG)122#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))123-#else124-#define XLOG_VEC_SET_TYPE(vecp, t)125-#endif126-127128typedef struct xfs_log_iovec {129 xfs_caddr_t i_addr; /* beginning address of region */130 int i_len; /* length in bytes of region */131-#if defined(XFS_LOG_RES_DEBUG)132- uint i_type; /* type of region */133-#endif134} xfs_log_iovec_t;135136typedef void* xfs_log_ticket_t;
···969798/* Region types for iovec's i_type */099#define XLOG_REG_TYPE_BFORMAT 1100#define XLOG_REG_TYPE_BCHUNK 2101#define XLOG_REG_TYPE_EFI_FORMAT 3···117#define XLOG_REG_TYPE_COMMIT 18118#define XLOG_REG_TYPE_TRANSHDR 19119#define XLOG_REG_TYPE_MAX 1901200121#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))0000122123typedef struct xfs_log_iovec {124 xfs_caddr_t i_addr; /* beginning address of region */125 int i_len; /* length in bytes of region */126+ uint i_type; /* type of region */00127} xfs_log_iovec_t;128129typedef void* xfs_log_ticket_t;
···253254255/* Ticket reservation region accounting */ 0256#define XLOG_TIC_LEN_MAX 15257#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \258 (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)···278 * we don't care about.279 */280typedef struct xlog_res {281+ uint r_len; /* region length :4 */282+ uint r_type; /* region's transaction type :4 */283} xlog_res_t;000000284285typedef struct xlog_ticket {286 sv_t t_sema; /* sleep on this semaphore : 20 */···301 char t_flags; /* properties of reservation : 1 */302 uint t_trans_type; /* transaction type : 4 */3030304 /* reservation array fields */305 uint t_res_num; /* num in array : 4 */0306 uint t_res_num_ophdrs; /* num op hdrs : 4 */307 uint t_res_arr_sum; /* array sum : 4 */308 uint t_res_o_flow; /* sum overflow : 4 */309+ xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : 8 * 15 */ 310} xlog_ticket_t;311312#endif···494495#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)496000000000000000000000000000000000000000000000000000000000497498/* common routines */499extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);500extern int xlog_find_tail(xlog_t *log,501 xfs_daddr_t *head_blk,502+ xfs_daddr_t *tail_blk);503+extern int xlog_recover(xlog_t *log);0504extern int xlog_recover_finish(xlog_t *log, int mfsi_flags);505extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);506extern void xlog_recover_process_iunlinks(xlog_t *log);
+6-6
fs/xfs/xfs_log_recover.c
···783xlog_find_tail(784 xlog_t *log,785 xfs_daddr_t *head_blk,786- xfs_daddr_t *tail_blk,787- int readonly)788{789 xlog_rec_header_t *rhead;790 xlog_op_header_t *op_head;···25622563 /*2564 * The logitem format's flag tells us if this was user quotaoff,2565- * group quotaoff or both.2566 */2567 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)2568 log->l_quotaoffs_flag |= XFS_DQ_USER;002569 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)2570 log->l_quotaoffs_flag |= XFS_DQ_GROUP;2571···3891 */3892int3893xlog_recover(3894- xlog_t *log,3895- int readonly)3896{3897 xfs_daddr_t head_blk, tail_blk;3898 int error;38993900 /* find the tail of the log */3901- if ((error = xlog_find_tail(log, &head_blk, &tail_blk, readonly)))3902 return error;39033904 if (tail_blk != head_blk) {
···783xlog_find_tail(784 xlog_t *log,785 xfs_daddr_t *head_blk,786+ xfs_daddr_t *tail_blk)0787{788 xlog_rec_header_t *rhead;789 xlog_op_header_t *op_head;···25632564 /*2565 * The logitem format's flag tells us if this was user quotaoff,2566+ * group/project quotaoff or both.2567 */2568 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)2569 log->l_quotaoffs_flag |= XFS_DQ_USER;2570+ if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)2571+ log->l_quotaoffs_flag |= XFS_DQ_PROJ;2572 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)2573 log->l_quotaoffs_flag |= XFS_DQ_GROUP;2574···3890 */3891int3892xlog_recover(3893+ xlog_t *log)03894{3895 xfs_daddr_t head_blk, tail_blk;3896 int error;38973898 /* find the tail of the log */3899+ if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))3900 return error;39013902 if (tail_blk != head_blk) {
+2-3
fs/xfs/xfs_mount.c
···51STATIC void xfs_uuid_unmount(xfs_mount_t *mp);52STATIC void xfs_unmountfs_wait(xfs_mount_t *);5354-static struct {55 short offset;56 short type; /* 0 = integer57 * 1 = binary / string (no translation)···10771078 xfs_iflush_all(mp);10791080- XFS_QM_DQPURGEALL(mp,1081- XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);10821083 /*1084 * Flush out the log synchronously so that we know for sure
···51STATIC void xfs_uuid_unmount(xfs_mount_t *mp);52STATIC void xfs_unmountfs_wait(xfs_mount_t *);5354+static const struct {55 short offset;56 short type; /* 0 = integer57 * 1 = binary / string (no translation)···10771078 xfs_iflush_all(mp);10791080+ XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);010811082 /*1083 * Flush out the log synchronously so that we know for sure
+1-2
fs/xfs/xfs_mount.h
···308 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */309 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */310 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */311-#define m_dev m_ddev_targp->pbr_dev312 __uint8_t m_dircook_elog; /* log d-cookie entry bits */313 __uint8_t m_blkbit_log; /* blocklog + NBBY */314 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */···392 user */393#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment394 allocations */395-#define XFS_MOUNT_COMPAT_ATTR (1ULL << 8) /* do not use attr2 format */396 /* (1ULL << 9) -- currently unused */397#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */398#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */
···308 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */309 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */310 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */0311 __uint8_t m_dircook_elog; /* log d-cookie entry bits */312 __uint8_t m_blkbit_log; /* blocklog + NBBY */313 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */···393 user */394#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment395 allocations */396+#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */397 /* (1ULL << 9) -- currently unused */398#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */399#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */
+2-5
fs/xfs/xfs_rename.c
···243 xfs_inode_t *inodes[4];244 int target_ip_dropped = 0; /* dropped target_ip link? */245 vnode_t *src_dir_vp;246- bhv_desc_t *target_dir_bdp;247 int spaceres;248 int target_link_zero = 0;249 int num_inodes;···259 * Find the XFS behavior descriptor for the target directory260 * vnode since it was not handed to us.261 */262- target_dir_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(target_dir_vp),263- &xfs_vnodeops);264- if (target_dir_bdp == NULL) {265 return XFS_ERROR(EXDEV);266 }267268 src_dp = XFS_BHVTOI(src_dir_bdp);269- target_dp = XFS_BHVTOI(target_dir_bdp);270 mp = src_dp->i_mount;271272 if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||
···243 xfs_inode_t *inodes[4];244 int target_ip_dropped = 0; /* dropped target_ip link? */245 vnode_t *src_dir_vp;0246 int spaceres;247 int target_link_zero = 0;248 int num_inodes;···260 * Find the XFS behavior descriptor for the target directory261 * vnode since it was not handed to us.262 */263+ target_dp = xfs_vtoi(target_dir_vp);264+ if (target_dp == NULL) {0265 return XFS_ERROR(EXDEV);266 }267268 src_dp = XFS_BHVTOI(src_dir_bdp);0269 mp = src_dp->i_mount;270271 if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||
+4-5
fs/xfs/xfs_rw.c
···238 }239 return (EIO);240}0241/*242 * Prints out an ALERT message about I/O error.243 */···253 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"254 " (\"%s\") error %d buf count %zd",255 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,256- XFS_BUFTARG_NAME(bp->pb_target),257- (__uint64_t)blkno,258- func,259- XFS_BUF_GETERROR(bp),260- XFS_BUF_COUNT(bp));261}262263/*
···238 }239 return (EIO);240}241+242/*243 * Prints out an ALERT message about I/O error.244 */···252 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"253 " (\"%s\") error %d buf count %zd",254 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,255+ XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),256+ (__uint64_t)blkno, func,257+ XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));00258}259260/*
···68 (XFS_SB_VERSION_NUMBITS | \69 XFS_SB_VERSION_OKREALFBITS | \70 XFS_SB_VERSION_OKSASHFBITS)0000000000007172/*73 * There are two words to hold XFS "feature" bits: the original···104#define XFS_SB_VERSION2_OKREALBITS \105 (XFS_SB_VERSION2_OKREALFBITS | \106 XFS_SB_VERSION2_OKSASHFBITS )00000107108typedef struct xfs_sb109{
+8-6
fs/xfs/xfs_trans.c
···1014 xfs_log_item_t *lip;1015 int i;1016#endif010171018 /*1019 * See if the caller is being too lazy to figure out if···1027 * filesystem. This happens in paths where we detect1028 * corruption and decide to give up.1029 */1030- if ((tp->t_flags & XFS_TRANS_DIRTY) &&1031- !XFS_FORCED_SHUTDOWN(tp->t_mountp))1032- xfs_force_shutdown(tp->t_mountp, XFS_CORRUPT_INCORE);01033#ifdef DEBUG1034 if (!(flags & XFS_TRANS_ABORT)) {1035 licp = &(tp->t_items);···1042 }10431044 lip = lidp->lid_item;1045- if (!XFS_FORCED_SHUTDOWN(tp->t_mountp))1046 ASSERT(!(lip->li_type == XFS_LI_EFD));1047 }1048 licp = licp->lic_next;···1050 }1051#endif1052 xfs_trans_unreserve_and_mod_sb(tp);1053- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);10541055 if (tp->t_ticket) {1056 if (flags & XFS_TRANS_RELEASE_LOG_RES) {···1059 } else {1060 log_flags = 0;1061 }1062- xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);1063 }10641065 /* mark this thread as no longer being in a transaction */
···1014 xfs_log_item_t *lip;1015 int i;1016#endif1017+ xfs_mount_t *mp = tp->t_mountp;10181019 /*1020 * See if the caller is being too lazy to figure out if···1026 * filesystem. This happens in paths where we detect1027 * corruption and decide to give up.1028 */1029+ if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {1030+ XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);1031+ xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);1032+ }1033#ifdef DEBUG1034 if (!(flags & XFS_TRANS_ABORT)) {1035 licp = &(tp->t_items);···1040 }10411042 lip = lidp->lid_item;1043+ if (!XFS_FORCED_SHUTDOWN(mp))1044 ASSERT(!(lip->li_type == XFS_LI_EFD));1045 }1046 licp = licp->lic_next;···1048 }1049#endif1050 xfs_trans_unreserve_and_mod_sb(tp);1051+ XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);10521053 if (tp->t_ticket) {1054 if (flags & XFS_TRANS_RELEASE_LOG_RES) {···1057 } else {1058 log_flags = 0;1059 }1060+ xfs_log_done(mp, tp->t_ticket, NULL, log_flags);1061 }10621063 /* mark this thread as no longer being in a transaction */