Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) International Business Machines Corp., 2000-2005
4 */
5/*
6 * jfs_xtree.c: extent allocation descriptor B+-tree manager
7 */
8
9#include <linux/fs.h>
10#include <linux/module.h>
11#include <linux/quotaops.h>
12#include <linux/seq_file.h>
13#include "jfs_incore.h"
14#include "jfs_filsys.h"
15#include "jfs_metapage.h"
16#include "jfs_dmap.h"
17#include "jfs_dinode.h"
18#include "jfs_superblock.h"
19#include "jfs_debug.h"
20
21/*
22 * xtree local flag
23 */
24#define XT_INSERT 0x00000001
25
26/*
27 * xtree key/entry comparison: extent offset
28 *
29 * return:
30 * -1: k < start of extent
31 * 0: start_of_extent <= k <= end_of_extent
32 * 1: k > end_of_extent
33 */
34#define XT_CMP(CMP, K, X, OFFSET64)\
35{\
36 OFFSET64 = offsetXAD(X);\
37 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
38 ((K) < OFFSET64) ? -1 : 0;\
39}
40
41/* write a xad entry */
42#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
43{\
44 (XAD)->flag = (FLAG);\
45 XADoffset((XAD), (OFF));\
46 XADlength((XAD), (LEN));\
47 XADaddress((XAD), (ADDR));\
48}
49
50#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
51
52/* for consistency */
53#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
54
55#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
56 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
57/* xtree entry parameter descriptor */
58struct xtsplit {
59 struct metapage *mp;
60 s16 index;
61 u8 flag;
62 s64 off;
63 s64 addr;
64 int len;
65 struct pxdlist *pxdlist;
66};
67
68
69/*
70 * statistics
71 */
72#ifdef CONFIG_JFS_STATISTICS
73static struct {
74 uint search;
75 uint fastSearch;
76 uint split;
77} xtStat;
78#endif
79
80
81/*
82 * forward references
83 */
84static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
85 struct btstack * btstack, int flag);
86
87static int xtSplitUp(tid_t tid,
88 struct inode *ip,
89 struct xtsplit * split, struct btstack * btstack);
90
91static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
92 struct metapage ** rmpp, s64 * rbnp);
93
94static int xtSplitRoot(tid_t tid, struct inode *ip,
95 struct xtsplit * split, struct metapage ** rmpp);
96
97/*
98 * xt_getpage()
99 *
100 * function: get the page buffer for a specified block address.
101 *
102 * parameters:
103 * ip - pointer to the inode
104 * bn - block number (s64) of the xtree page to be retrieved;
105 * mp - pointer to a metapage pointer where the page buffer is returned;
106 *
107 * returns:
108 * A pointer to the xtree page (xtpage_t) on success, -EIO on error.
109 */
110
111static inline xtpage_t *xt_getpage(struct inode *ip, s64 bn, struct metapage **mp)
112{
113 xtpage_t *p;
114 int rc;
115
116 BT_GETPAGE(ip, bn, *mp, xtpage_t, PSIZE, p, rc, i_xtroot);
117
118 if (rc)
119 return ERR_PTR(rc);
120 if ((le16_to_cpu(p->header.nextindex) < XTENTRYSTART) ||
121 (le16_to_cpu(p->header.nextindex) >
122 le16_to_cpu(p->header.maxentry)) ||
123 (le16_to_cpu(p->header.maxentry) >
124 ((bn == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) {
125 jfs_error(ip->i_sb, "xt_getpage: xtree page corrupt\n");
126 BT_PUTPAGE(*mp);
127 *mp = NULL;
128 return ERR_PTR(-EIO);
129 }
130 return p;
131}
132
133/*
134 * xtLookup()
135 *
136 * function: map a single page into a physical extent;
137 */
138int xtLookup(struct inode *ip, s64 lstart,
139 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
140{
141 int rc = 0;
142 struct btstack btstack;
143 int cmp;
144 s64 bn;
145 struct metapage *mp;
146 xtpage_t *p;
147 int index;
148 xad_t *xad;
149 s64 next, size, xoff, xend;
150 int xlen;
151 s64 xaddr;
152
153 *paddr = 0;
154 *plen = llen;
155
156 if (!no_check) {
157 /* is lookup offset beyond eof ? */
158 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
159 JFS_SBI(ip->i_sb)->l2bsize;
160 if (lstart >= size)
161 return 0;
162 }
163
164 /*
165 * search for the xad entry covering the logical extent
166 */
167//search:
168 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
169 jfs_err("xtLookup: xtSearch returned %d", rc);
170 return rc;
171 }
172
173 /*
174 * compute the physical extent covering logical extent
175 *
176 * N.B. search may have failed (e.g., hole in sparse file),
177 * and returned the index of the next entry.
178 */
179 /* retrieve search result */
180 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
181
182 /* is xad found covering start of logical extent ?
183 * lstart is a page start address,
184 * i.e., lstart cannot start in a hole;
185 */
186 if (cmp) {
187 if (next)
188 *plen = min(next - lstart, llen);
189 goto out;
190 }
191
192 /*
193 * lxd covered by xad
194 */
195 xad = &p->xad[index];
196 xoff = offsetXAD(xad);
197 xlen = lengthXAD(xad);
198 xend = xoff + xlen;
199 xaddr = addressXAD(xad);
200
201 /* initialize new pxd */
202 *pflag = xad->flag;
203 *paddr = xaddr + (lstart - xoff);
204 /* a page must be fully covered by an xad */
205 *plen = min(xend - lstart, llen);
206
207 out:
208 XT_PUTPAGE(mp);
209
210 return rc;
211}
212
213/*
214 * xtSearch()
215 *
216 * function: search for the xad entry covering specified offset.
217 *
218 * parameters:
219 * ip - file object;
220 * xoff - extent offset;
221 * nextp - address of next extent (if any) for search miss
222 * cmpp - comparison result:
223 * btstack - traverse stack;
224 * flag - search process flag (XT_INSERT);
225 *
226 * returns:
227 * btstack contains (bn, index) of search path traversed to the entry.
228 * *cmpp is set to result of comparison with the entry returned.
229 * the page containing the entry is pinned at exit.
230 */
231static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
232 int *cmpp, struct btstack * btstack, int flag)
233{
234 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
235 int cmp = 1; /* init for empty page */
236 s64 bn; /* block number */
237 struct metapage *mp; /* page buffer */
238 xtpage_t *p; /* page */
239 xad_t *xad;
240 int base, index, lim, btindex;
241 struct btframe *btsp;
242 int nsplit = 0; /* number of pages to split */
243 s64 t64;
244 s64 next = 0;
245
246 INCREMENT(xtStat.search);
247
248 BT_CLR(btstack);
249
250 btstack->nsplit = 0;
251
252 /*
253 * search down tree from root:
254 *
255 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
256 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
257 *
258 * if entry with search key K is not found
259 * internal page search find the entry with largest key Ki
260 * less than K which point to the child page to search;
261 * leaf page search find the entry with smallest key Kj
262 * greater than K so that the returned index is the position of
263 * the entry to be shifted right for insertion of new entry.
264 * for empty tree, search key is greater than any key of the tree.
265 *
266 * by convention, root bn = 0.
267 */
268 for (bn = 0;;) {
269 /* get/pin the page to search */
270 p = xt_getpage(ip, bn, &mp);
271 if (IS_ERR(p))
272 return PTR_ERR(p);
273
274 /* try sequential access heuristics with the previous
275 * access entry in target leaf page:
276 * once search narrowed down into the target leaf,
277 * key must either match an entry in the leaf or
278 * key entry does not exist in the tree;
279 */
280//fastSearch:
281 if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
282 (p->header.flag & BT_LEAF) &&
283 (index = jfs_ip->btindex) <
284 le16_to_cpu(p->header.nextindex)) {
285 xad = &p->xad[index];
286 t64 = offsetXAD(xad);
287 if (xoff < t64 + lengthXAD(xad)) {
288 if (xoff >= t64) {
289 *cmpp = 0;
290 goto out;
291 }
292
293 /* stop sequential access heuristics */
294 goto binarySearch;
295 } else { /* (t64 + lengthXAD(xad)) <= xoff */
296
297 /* try next sequential entry */
298 index++;
299 if (index <
300 le16_to_cpu(p->header.nextindex)) {
301 xad++;
302 t64 = offsetXAD(xad);
303 if (xoff < t64 + lengthXAD(xad)) {
304 if (xoff >= t64) {
305 *cmpp = 0;
306 goto out;
307 }
308
309 /* miss: key falls between
310 * previous and this entry
311 */
312 *cmpp = 1;
313 next = t64;
314 goto out;
315 }
316
317 /* (xoff >= t64 + lengthXAD(xad));
318 * matching entry may be further out:
319 * stop heuristic search
320 */
321 /* stop sequential access heuristics */
322 goto binarySearch;
323 }
324
325 /* (index == p->header.nextindex);
326 * miss: key entry does not exist in
327 * the target leaf/tree
328 */
329 *cmpp = 1;
330 goto out;
331 }
332
333 /*
334 * if hit, return index of the entry found, and
335 * if miss, where new entry with search key is
336 * to be inserted;
337 */
338 out:
339 /* compute number of pages to split */
340 if (flag & XT_INSERT) {
341 if (p->header.nextindex == /* little-endian */
342 p->header.maxentry)
343 nsplit++;
344 else
345 nsplit = 0;
346 btstack->nsplit = nsplit;
347 }
348
349 /* save search result */
350 btsp = btstack->top;
351 btsp->bn = bn;
352 btsp->index = index;
353 btsp->mp = mp;
354
355 /* update sequential access heuristics */
356 jfs_ip->btindex = index;
357
358 if (nextp)
359 *nextp = next;
360
361 INCREMENT(xtStat.fastSearch);
362 return 0;
363 }
364
365 /* well, ... full search now */
366 binarySearch:
367 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
368
369 /*
370 * binary search with search key K on the current page
371 */
372 for (base = XTENTRYSTART; lim; lim >>= 1) {
373 index = base + (lim >> 1);
374
375 XT_CMP(cmp, xoff, &p->xad[index], t64);
376 if (cmp == 0) {
377 /*
378 * search hit
379 */
380 /* search hit - leaf page:
381 * return the entry found
382 */
383 if (p->header.flag & BT_LEAF) {
384 *cmpp = cmp;
385
386 /* compute number of pages to split */
387 if (flag & XT_INSERT) {
388 if (p->header.nextindex ==
389 p->header.maxentry)
390 nsplit++;
391 else
392 nsplit = 0;
393 btstack->nsplit = nsplit;
394 }
395
396 /* save search result */
397 btsp = btstack->top;
398 btsp->bn = bn;
399 btsp->index = index;
400 btsp->mp = mp;
401
402 /* init sequential access heuristics */
403 btindex = jfs_ip->btindex;
404 if (index == btindex ||
405 index == btindex + 1)
406 jfs_ip->btorder = BT_SEQUENTIAL;
407 else
408 jfs_ip->btorder = BT_RANDOM;
409 jfs_ip->btindex = index;
410
411 return 0;
412 }
413 /* search hit - internal page:
414 * descend/search its child page
415 */
416 if (index < le16_to_cpu(p->header.nextindex)-1)
417 next = offsetXAD(&p->xad[index + 1]);
418 goto next;
419 }
420
421 if (cmp > 0) {
422 base = index + 1;
423 --lim;
424 }
425 }
426
427 /*
428 * search miss
429 *
430 * base is the smallest index with key (Kj) greater than
431 * search key (K) and may be zero or maxentry index.
432 */
433 if (base < le16_to_cpu(p->header.nextindex))
434 next = offsetXAD(&p->xad[base]);
435 /*
436 * search miss - leaf page:
437 *
438 * return location of entry (base) where new entry with
439 * search key K is to be inserted.
440 */
441 if (p->header.flag & BT_LEAF) {
442 *cmpp = cmp;
443
444 /* compute number of pages to split */
445 if (flag & XT_INSERT) {
446 if (p->header.nextindex ==
447 p->header.maxentry)
448 nsplit++;
449 else
450 nsplit = 0;
451 btstack->nsplit = nsplit;
452 }
453
454 /* save search result */
455 btsp = btstack->top;
456 btsp->bn = bn;
457 btsp->index = base;
458 btsp->mp = mp;
459
460 /* init sequential access heuristics */
461 btindex = jfs_ip->btindex;
462 if (base == btindex || base == btindex + 1)
463 jfs_ip->btorder = BT_SEQUENTIAL;
464 else
465 jfs_ip->btorder = BT_RANDOM;
466 jfs_ip->btindex = base;
467
468 if (nextp)
469 *nextp = next;
470
471 return 0;
472 }
473
474 /*
475 * search miss - non-leaf page:
476 *
477 * if base is non-zero, decrement base by one to get the parent
478 * entry of the child page to search.
479 */
480 index = base ? base - 1 : base;
481
482 /*
483 * go down to child page
484 */
485 next:
486 /* update number of pages to split */
487 if (p->header.nextindex == p->header.maxentry)
488 nsplit++;
489 else
490 nsplit = 0;
491
492 /* push (bn, index) of the parent page/entry */
493 if (BT_STACK_FULL(btstack)) {
494 jfs_error(ip->i_sb, "stack overrun!\n");
495 XT_PUTPAGE(mp);
496 return -EIO;
497 }
498 BT_PUSH(btstack, bn, index);
499
500 /* get the child page block number */
501 bn = addressXAD(&p->xad[index]);
502
503 /* unpin the parent page */
504 XT_PUTPAGE(mp);
505 }
506}
507
508/*
509 * xtInsert()
510 *
511 * function:
512 *
513 * parameter:
514 * tid - transaction id;
515 * ip - file object;
516 * xflag - extent flag (XAD_NOTRECORDED):
517 * xoff - extent offset;
518 * xlen - extent length;
519 * xaddrp - extent address pointer (in/out):
520 * if (*xaddrp)
521 * caller allocated data extent at *xaddrp;
522 * else
523 * allocate data extent and return its xaddr;
524 * flag -
525 *
526 * return:
527 */
528int xtInsert(tid_t tid, /* transaction id */
529 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
530 int flag)
531{
532 int rc = 0;
533 s64 xaddr, hint;
534 struct metapage *mp; /* meta-page buffer */
535 xtpage_t *p; /* base B+-tree index page */
536 s64 bn;
537 int index, nextindex;
538 struct btstack btstack; /* traverse stack */
539 struct xtsplit split; /* split information */
540 xad_t *xad;
541 int cmp;
542 s64 next;
543 struct tlock *tlck;
544 struct xtlock *xtlck;
545
546 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
547
548 /*
549 * search for the entry location at which to insert:
550 *
551 * xtFastSearch() and xtSearch() both returns (leaf page
552 * pinned, index at which to insert).
553 * n.b. xtSearch() may return index of maxentry of
554 * the full page.
555 */
556 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
557 return rc;
558
559 /* retrieve search result */
560 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
561
562 /* This test must follow XT_GETSEARCH since mp must be valid if
563 * we branch to out: */
564 if ((cmp == 0) || (next && (xlen > next - xoff))) {
565 rc = -EEXIST;
566 goto out;
567 }
568
569 /*
570 * allocate data extent requested
571 *
572 * allocation hint: last xad
573 */
574 if ((xaddr = *xaddrp) == 0) {
575 if (index > XTENTRYSTART) {
576 xad = &p->xad[index - 1];
577 hint = addressXAD(xad) + lengthXAD(xad) - 1;
578 } else
579 hint = 0;
580 if ((rc = dquot_alloc_block(ip, xlen)))
581 goto out;
582 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
583 dquot_free_block(ip, xlen);
584 goto out;
585 }
586 }
587
588 /*
589 * insert entry for new extent
590 */
591 xflag |= XAD_NEW;
592
593 /*
594 * if the leaf page is full, split the page and
595 * propagate up the router entry for the new page from split
596 *
597 * The xtSplitUp() will insert the entry and unpin the leaf page.
598 */
599 nextindex = le16_to_cpu(p->header.nextindex);
600 if (nextindex == le16_to_cpu(p->header.maxentry)) {
601 split.mp = mp;
602 split.index = index;
603 split.flag = xflag;
604 split.off = xoff;
605 split.len = xlen;
606 split.addr = xaddr;
607 split.pxdlist = NULL;
608 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
609 /* undo data extent allocation */
610 if (*xaddrp == 0) {
611 dbFree(ip, xaddr, (s64) xlen);
612 dquot_free_block(ip, xlen);
613 }
614 return rc;
615 }
616
617 *xaddrp = xaddr;
618 return 0;
619 }
620
621 /*
622 * insert the new entry into the leaf page
623 */
624 /*
625 * acquire a transaction lock on the leaf page;
626 *
627 * action: xad insertion/extension;
628 */
629 BT_MARK_DIRTY(mp, ip);
630
631 /* if insert into middle, shift right remaining entries. */
632 if (index < nextindex)
633 memmove(&p->xad[index + 1], &p->xad[index],
634 (nextindex - index) * sizeof(xad_t));
635
636 /* insert the new entry: mark the entry NEW */
637 xad = &p->xad[index];
638 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
639
640 /* advance next available entry index */
641 le16_add_cpu(&p->header.nextindex, 1);
642
643 /* Don't log it if there are no links to the file */
644 if (!test_cflag(COMMIT_Nolink, ip)) {
645 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
646 xtlck = (struct xtlock *) & tlck->lock;
647 xtlck->lwm.offset =
648 (xtlck->lwm.offset) ? min(index,
649 (int)xtlck->lwm.offset) : index;
650 xtlck->lwm.length =
651 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
652 }
653
654 *xaddrp = xaddr;
655
656 out:
657 /* unpin the leaf page */
658 XT_PUTPAGE(mp);
659
660 return rc;
661}
662
663
664/*
665 * xtSplitUp()
666 *
667 * function:
668 * split full pages as propagating insertion up the tree
669 *
670 * parameter:
671 * tid - transaction id;
672 * ip - file object;
673 * split - entry parameter descriptor;
674 * btstack - traverse stack from xtSearch()
675 *
676 * return:
677 */
678static int
679xtSplitUp(tid_t tid,
680 struct inode *ip, struct xtsplit * split, struct btstack * btstack)
681{
682 int rc = 0;
683 struct metapage *smp;
684 xtpage_t *sp; /* split page */
685 struct metapage *rmp;
686 s64 rbn; /* new right page block number */
687 struct metapage *rcmp;
688 xtpage_t *rcp; /* right child page */
689 s64 rcbn; /* right child page block number */
690 int skip; /* index of entry of insertion */
691 int nextindex; /* next available entry index of p */
692 struct btframe *parent; /* parent page entry on traverse stack */
693 xad_t *xad;
694 s64 xaddr;
695 int xlen;
696 int nsplit; /* number of pages split */
697 struct pxdlist pxdlist;
698 pxd_t *pxd;
699 struct tlock *tlck;
700 struct xtlock *xtlck;
701
702 smp = split->mp;
703 sp = XT_PAGE(ip, smp);
704
705 /* is inode xtree root extension/inline EA area free ? */
706 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
707 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
708 (JFS_IP(ip)->mode2 & INLINEEA)) {
709 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
710 JFS_IP(ip)->mode2 &= ~INLINEEA;
711
712 BT_MARK_DIRTY(smp, ip);
713 /*
714 * acquire a transaction lock on the leaf page;
715 *
716 * action: xad insertion/extension;
717 */
718
719 /* if insert into middle, shift right remaining entries. */
720 skip = split->index;
721 nextindex = le16_to_cpu(sp->header.nextindex);
722 if (skip < nextindex)
723 memmove(&sp->xad[skip + 1], &sp->xad[skip],
724 (nextindex - skip) * sizeof(xad_t));
725
726 /* insert the new entry: mark the entry NEW */
727 xad = &sp->xad[skip];
728 XT_PUTENTRY(xad, split->flag, split->off, split->len,
729 split->addr);
730
731 /* advance next available entry index */
732 le16_add_cpu(&sp->header.nextindex, 1);
733
734 /* Don't log it if there are no links to the file */
735 if (!test_cflag(COMMIT_Nolink, ip)) {
736 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
737 xtlck = (struct xtlock *) & tlck->lock;
738 xtlck->lwm.offset = (xtlck->lwm.offset) ?
739 min(skip, (int)xtlck->lwm.offset) : skip;
740 xtlck->lwm.length =
741 le16_to_cpu(sp->header.nextindex) -
742 xtlck->lwm.offset;
743 }
744
745 return 0;
746 }
747
748 /*
749 * allocate new index blocks to cover index page split(s)
750 *
751 * allocation hint: ?
752 */
753 if (split->pxdlist == NULL) {
754 nsplit = btstack->nsplit;
755 split->pxdlist = &pxdlist;
756 pxdlist.maxnpxd = pxdlist.npxd = 0;
757 pxd = &pxdlist.pxd[0];
758 xlen = JFS_SBI(ip->i_sb)->nbperpage;
759 for (; nsplit > 0; nsplit--, pxd++) {
760 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
761 == 0) {
762 PXDaddress(pxd, xaddr);
763 PXDlength(pxd, xlen);
764
765 pxdlist.maxnpxd++;
766
767 continue;
768 }
769
770 /* undo allocation */
771
772 XT_PUTPAGE(smp);
773 return rc;
774 }
775 }
776
777 /*
778 * Split leaf page <sp> into <sp> and a new right page <rp>.
779 *
780 * The split routines insert the new entry into the leaf page,
781 * and acquire txLock as appropriate.
782 * return <rp> pinned and its block number <rpbn>.
783 */
784 rc = (sp->header.flag & BT_ROOT) ?
785 xtSplitRoot(tid, ip, split, &rmp) :
786 xtSplitPage(tid, ip, split, &rmp, &rbn);
787
788 XT_PUTPAGE(smp);
789
790 if (rc)
791 return -EIO;
792 /*
793 * propagate up the router entry for the leaf page just split
794 *
795 * insert a router entry for the new page into the parent page,
796 * propagate the insert/split up the tree by walking back the stack
797 * of (bn of parent page, index of child page entry in parent page)
798 * that were traversed during the search for the page that split.
799 *
800 * the propagation of insert/split up the tree stops if the root
801 * splits or the page inserted into doesn't have to split to hold
802 * the new entry.
803 *
804 * the parent entry for the split page remains the same, and
805 * a new entry is inserted at its right with the first key and
806 * block number of the new right page.
807 *
808 * There are a maximum of 3 pages pinned at any time:
809 * right child, left parent and right parent (when the parent splits)
810 * to keep the child page pinned while working on the parent.
811 * make sure that all pins are released at exit.
812 */
813 while ((parent = BT_POP(btstack)) != NULL) {
814 /* parent page specified by stack frame <parent> */
815
816 /* keep current child pages <rcp> pinned */
817 rcmp = rmp;
818 rcbn = rbn;
819 rcp = XT_PAGE(ip, rcmp);
820
821 /*
822 * insert router entry in parent for new right child page <rp>
823 */
824 /* get/pin the parent page <sp> */
825 sp = xt_getpage(ip, parent->bn, &smp);
826 if (IS_ERR(sp)) {
827 XT_PUTPAGE(rcmp);
828 return PTR_ERR(sp);
829 }
830
831 /*
832 * The new key entry goes ONE AFTER the index of parent entry,
833 * because the split was to the right.
834 */
835 skip = parent->index + 1;
836
837 /*
838 * split or shift right remaining entries of the parent page
839 */
840 nextindex = le16_to_cpu(sp->header.nextindex);
841 /*
842 * parent page is full - split the parent page
843 */
844 if (nextindex == le16_to_cpu(sp->header.maxentry)) {
845 /* init for parent page split */
846 split->mp = smp;
847 split->index = skip; /* index at insert */
848 split->flag = XAD_NEW;
849 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
850 split->len = JFS_SBI(ip->i_sb)->nbperpage;
851 split->addr = rcbn;
852
853 /* unpin previous right child page */
854 XT_PUTPAGE(rcmp);
855
856 /* The split routines insert the new entry,
857 * and acquire txLock as appropriate.
858 * return <rp> pinned and its block number <rpbn>.
859 */
860 rc = (sp->header.flag & BT_ROOT) ?
861 xtSplitRoot(tid, ip, split, &rmp) :
862 xtSplitPage(tid, ip, split, &rmp, &rbn);
863 if (rc) {
864 XT_PUTPAGE(smp);
865 return rc;
866 }
867
868 XT_PUTPAGE(smp);
869 /* keep new child page <rp> pinned */
870 }
871 /*
872 * parent page is not full - insert in parent page
873 */
874 else {
875 /*
876 * insert router entry in parent for the right child
877 * page from the first entry of the right child page:
878 */
879 /*
880 * acquire a transaction lock on the parent page;
881 *
882 * action: router xad insertion;
883 */
884 BT_MARK_DIRTY(smp, ip);
885
886 /*
887 * if insert into middle, shift right remaining entries
888 */
889 if (skip < nextindex)
890 memmove(&sp->xad[skip + 1], &sp->xad[skip],
891 (nextindex -
892 skip) << L2XTSLOTSIZE);
893
894 /* insert the router entry */
895 xad = &sp->xad[skip];
896 XT_PUTENTRY(xad, XAD_NEW,
897 offsetXAD(&rcp->xad[XTENTRYSTART]),
898 JFS_SBI(ip->i_sb)->nbperpage, rcbn);
899
900 /* advance next available entry index. */
901 le16_add_cpu(&sp->header.nextindex, 1);
902
903 /* Don't log it if there are no links to the file */
904 if (!test_cflag(COMMIT_Nolink, ip)) {
905 tlck = txLock(tid, ip, smp,
906 tlckXTREE | tlckGROW);
907 xtlck = (struct xtlock *) & tlck->lock;
908 xtlck->lwm.offset = (xtlck->lwm.offset) ?
909 min(skip, (int)xtlck->lwm.offset) : skip;
910 xtlck->lwm.length =
911 le16_to_cpu(sp->header.nextindex) -
912 xtlck->lwm.offset;
913 }
914
915 /* unpin parent page */
916 XT_PUTPAGE(smp);
917
918 /* exit propagate up */
919 break;
920 }
921 }
922
923 /* unpin current right page */
924 XT_PUTPAGE(rmp);
925
926 return 0;
927}
928
929
930/*
931 * xtSplitPage()
932 *
933 * function:
934 * split a full non-root page into
935 * original/split/left page and new right page
936 * i.e., the original/split page remains as left page.
937 *
938 * parameter:
939 * int tid,
940 * struct inode *ip,
941 * struct xtsplit *split,
942 * struct metapage **rmpp,
943 * u64 *rbnp,
944 *
945 * return:
946 * Pointer to page in which to insert or NULL on error.
947 */
948static int
949xtSplitPage(tid_t tid, struct inode *ip,
950 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
951{
952 int rc = 0;
953 struct metapage *smp;
954 xtpage_t *sp;
955 struct metapage *rmp;
956 xtpage_t *rp; /* new right page allocated */
957 s64 rbn; /* new right page block number */
958 struct metapage *mp;
959 xtpage_t *p;
960 s64 nextbn;
961 int skip, maxentry, middle, righthalf, n;
962 xad_t *xad;
963 struct pxdlist *pxdlist;
964 pxd_t *pxd;
965 struct tlock *tlck;
966 struct xtlock *sxtlck = NULL, *rxtlck = NULL;
967 int quota_allocation = 0;
968
969 smp = split->mp;
970 sp = XT_PAGE(ip, smp);
971
972 INCREMENT(xtStat.split);
973
974 pxdlist = split->pxdlist;
975 pxd = &pxdlist->pxd[pxdlist->npxd];
976 pxdlist->npxd++;
977 rbn = addressPXD(pxd);
978
979 /* Allocate blocks to quota. */
980 rc = dquot_alloc_block(ip, lengthPXD(pxd));
981 if (rc)
982 goto clean_up;
983
984 quota_allocation += lengthPXD(pxd);
985
986 /*
987 * allocate the new right page for the split
988 */
989 rmp = get_metapage(ip, rbn, PSIZE, 1);
990 if (rmp == NULL) {
991 rc = -EIO;
992 goto clean_up;
993 }
994
995 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
996
997 BT_MARK_DIRTY(rmp, ip);
998 /*
999 * action: new page;
1000 */
1001
1002 rp = (xtpage_t *) rmp->data;
1003 rp->header.self = *pxd;
1004 rp->header.flag = sp->header.flag & BT_TYPE;
1005 rp->header.maxentry = sp->header.maxentry; /* little-endian */
1006 rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1007
1008 BT_MARK_DIRTY(smp, ip);
1009 /* Don't log it if there are no links to the file */
1010 if (!test_cflag(COMMIT_Nolink, ip)) {
1011 /*
1012 * acquire a transaction lock on the new right page;
1013 */
1014 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1015 rxtlck = (struct xtlock *) & tlck->lock;
1016 rxtlck->lwm.offset = XTENTRYSTART;
1017 /*
1018 * acquire a transaction lock on the split page
1019 */
1020 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1021 sxtlck = (struct xtlock *) & tlck->lock;
1022 }
1023
1024 /*
1025 * initialize/update sibling pointers of <sp> and <rp>
1026 */
1027 nextbn = le64_to_cpu(sp->header.next);
1028 rp->header.next = cpu_to_le64(nextbn);
1029 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1030 sp->header.next = cpu_to_le64(rbn);
1031
1032 skip = split->index;
1033
1034 /*
1035 * sequential append at tail (after last entry of last page)
1036 *
1037 * if splitting the last page on a level because of appending
1038 * a entry to it (skip is maxentry), it's likely that the access is
1039 * sequential. adding an empty page on the side of the level is less
1040 * work and can push the fill factor much higher than normal.
1041 * if we're wrong it's no big deal - we will do the split the right
1042 * way next time.
1043 * (it may look like it's equally easy to do a similar hack for
1044 * reverse sorted data, that is, split the tree left, but it's not.
1045 * Be my guest.)
1046 */
1047 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1048 /*
1049 * acquire a transaction lock on the new/right page;
1050 *
1051 * action: xad insertion;
1052 */
1053 /* insert entry at the first entry of the new right page */
1054 xad = &rp->xad[XTENTRYSTART];
1055 XT_PUTENTRY(xad, split->flag, split->off, split->len,
1056 split->addr);
1057
1058 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1059
1060 if (!test_cflag(COMMIT_Nolink, ip)) {
1061 /* rxtlck->lwm.offset = XTENTRYSTART; */
1062 rxtlck->lwm.length = 1;
1063 }
1064
1065 *rmpp = rmp;
1066 *rbnp = rbn;
1067
1068 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1069 return 0;
1070 }
1071
1072 /*
1073 * non-sequential insert (at possibly middle page)
1074 */
1075
1076 /*
1077 * update previous pointer of old next/right page of <sp>
1078 */
1079 if (nextbn != 0) {
1080 p = xt_getpage(ip, nextbn, &mp);
1081 if (IS_ERR(p)) {
1082 XT_PUTPAGE(rmp);
1083 return PTR_ERR(p);
1084 }
1085
1086 BT_MARK_DIRTY(mp, ip);
1087 /*
1088 * acquire a transaction lock on the next page;
1089 *
1090 * action:sibling pointer update;
1091 */
1092 if (!test_cflag(COMMIT_Nolink, ip))
1093 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1094
1095 p->header.prev = cpu_to_le64(rbn);
1096
1097 /* sibling page may have been updated previously, or
1098 * it may be updated later;
1099 */
1100
1101 XT_PUTPAGE(mp);
1102 }
1103
1104 /*
1105 * split the data between the split and new/right pages
1106 */
1107 maxentry = le16_to_cpu(sp->header.maxentry);
1108 middle = maxentry >> 1;
1109 righthalf = maxentry - middle;
1110
1111 /*
1112 * skip index in old split/left page - insert into left page:
1113 */
1114 if (skip <= middle) {
1115 /* move right half of split page to the new right page */
1116 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1117 righthalf << L2XTSLOTSIZE);
1118
1119 /* shift right tail of left half to make room for new entry */
1120 if (skip < middle)
1121 memmove(&sp->xad[skip + 1], &sp->xad[skip],
1122 (middle - skip) << L2XTSLOTSIZE);
1123
1124 /* insert new entry */
1125 xad = &sp->xad[skip];
1126 XT_PUTENTRY(xad, split->flag, split->off, split->len,
1127 split->addr);
1128
1129 /* update page header */
1130 sp->header.nextindex = cpu_to_le16(middle + 1);
1131 if (!test_cflag(COMMIT_Nolink, ip)) {
1132 sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1133 min(skip, (int)sxtlck->lwm.offset) : skip;
1134 }
1135
1136 rp->header.nextindex =
1137 cpu_to_le16(XTENTRYSTART + righthalf);
1138 }
1139 /*
1140 * skip index in new right page - insert into right page:
1141 */
1142 else {
1143 /* move left head of right half to right page */
1144 n = skip - middle;
1145 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1146 n << L2XTSLOTSIZE);
1147
1148 /* insert new entry */
1149 n += XTENTRYSTART;
1150 xad = &rp->xad[n];
1151 XT_PUTENTRY(xad, split->flag, split->off, split->len,
1152 split->addr);
1153
1154 /* move right tail of right half to right page */
1155 if (skip < maxentry)
1156 memmove(&rp->xad[n + 1], &sp->xad[skip],
1157 (maxentry - skip) << L2XTSLOTSIZE);
1158
1159 /* update page header */
1160 sp->header.nextindex = cpu_to_le16(middle);
1161 if (!test_cflag(COMMIT_Nolink, ip)) {
1162 sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1163 min(middle, (int)sxtlck->lwm.offset) : middle;
1164 }
1165
1166 rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1167 righthalf + 1);
1168 }
1169
1170 if (!test_cflag(COMMIT_Nolink, ip)) {
1171 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1172 sxtlck->lwm.offset;
1173
1174 /* rxtlck->lwm.offset = XTENTRYSTART; */
1175 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1176 XTENTRYSTART;
1177 }
1178
1179 *rmpp = rmp;
1180 *rbnp = rbn;
1181
1182 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1183 return rc;
1184
1185 clean_up:
1186
1187 /* Rollback quota allocation. */
1188 if (quota_allocation)
1189 dquot_free_block(ip, quota_allocation);
1190
1191 return (rc);
1192}
1193
1194
1195/*
1196 * xtSplitRoot()
1197 *
1198 * function:
1199 * split the full root page into original/root/split page and new
1200 * right page
1201 * i.e., root remains fixed in tree anchor (inode) and the root is
1202 * copied to a single new right child page since root page <<
1203 * non-root page, and the split root page contains a single entry
1204 * for the new right child page.
1205 *
1206 * parameter:
1207 * int tid,
1208 * struct inode *ip,
1209 * struct xtsplit *split,
1210 * struct metapage **rmpp)
1211 *
1212 * return:
1213 * Pointer to page in which to insert or NULL on error.
1214 */
1215static int
1216xtSplitRoot(tid_t tid,
1217 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1218{
1219 xtpage_t *sp;
1220 struct metapage *rmp;
1221 xtpage_t *rp;
1222 s64 rbn;
1223 int skip, nextindex;
1224 xad_t *xad;
1225 pxd_t *pxd;
1226 struct pxdlist *pxdlist;
1227 struct tlock *tlck;
1228 struct xtlock *xtlck;
1229 int rc;
1230
1231 sp = (xtpage_t *) &JFS_IP(ip)->i_xtroot;
1232
1233 INCREMENT(xtStat.split);
1234
1235 /*
1236 * allocate a single (right) child page
1237 */
1238 pxdlist = split->pxdlist;
1239 pxd = &pxdlist->pxd[pxdlist->npxd];
1240 pxdlist->npxd++;
1241 rbn = addressPXD(pxd);
1242 rmp = get_metapage(ip, rbn, PSIZE, 1);
1243 if (rmp == NULL)
1244 return -EIO;
1245
1246 /* Allocate blocks to quota. */
1247 rc = dquot_alloc_block(ip, lengthPXD(pxd));
1248 if (rc) {
1249 release_metapage(rmp);
1250 return rc;
1251 }
1252
1253 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1254
1255 /*
1256 * acquire a transaction lock on the new right page;
1257 *
1258 * action: new page;
1259 */
1260 BT_MARK_DIRTY(rmp, ip);
1261
1262 rp = (xtpage_t *) rmp->data;
1263 rp->header.flag =
1264 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1265 rp->header.self = *pxd;
1266 rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1267 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1268
1269 /* initialize sibling pointers */
1270 rp->header.next = 0;
1271 rp->header.prev = 0;
1272
1273 /*
1274 * copy the in-line root page into new right page extent
1275 */
1276 nextindex = le16_to_cpu(sp->header.maxentry);
1277 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1278 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1279
1280 /*
1281 * insert the new entry into the new right/child page
1282 * (skip index in the new right page will not change)
1283 */
1284 skip = split->index;
1285 /* if insert into middle, shift right remaining entries */
1286 if (skip != nextindex)
1287 memmove(&rp->xad[skip + 1], &rp->xad[skip],
1288 (nextindex - skip) * sizeof(xad_t));
1289
1290 xad = &rp->xad[skip];
1291 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1292
1293 /* update page header */
1294 rp->header.nextindex = cpu_to_le16(nextindex + 1);
1295
1296 if (!test_cflag(COMMIT_Nolink, ip)) {
1297 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1298 xtlck = (struct xtlock *) & tlck->lock;
1299 xtlck->lwm.offset = XTENTRYSTART;
1300 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1301 XTENTRYSTART;
1302 }
1303
1304 /*
1305 * reset the root
1306 *
1307 * init root with the single entry for the new right page
1308 * set the 1st entry offset to 0, which force the left-most key
1309 * at any level of the tree to be less than any search key.
1310 */
1311 /*
1312 * acquire a transaction lock on the root page (in-memory inode);
1313 *
1314 * action: root split;
1315 */
1316 BT_MARK_DIRTY(split->mp, ip);
1317
1318 xad = &sp->xad[XTENTRYSTART];
1319 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1320
1321 /* update page header of root */
1322 sp->header.flag &= ~BT_LEAF;
1323 sp->header.flag |= BT_INTERNAL;
1324
1325 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1326
1327 if (!test_cflag(COMMIT_Nolink, ip)) {
1328 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1329 xtlck = (struct xtlock *) & tlck->lock;
1330 xtlck->lwm.offset = XTENTRYSTART;
1331 xtlck->lwm.length = 1;
1332 }
1333
1334 *rmpp = rmp;
1335
1336 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1337 return 0;
1338}
1339
1340
1341/*
1342 * xtExtend()
1343 *
1344 * function: extend in-place;
1345 *
1346 * note: existing extent may or may not have been committed.
1347 * caller is responsible for pager buffer cache update, and
1348 * working block allocation map update;
1349 * update pmap: alloc whole extended extent;
1350 */
1351int xtExtend(tid_t tid, /* transaction id */
1352 struct inode *ip, s64 xoff, /* delta extent offset */
1353 s32 xlen, /* delta extent length */
1354 int flag)
1355{
1356 int rc = 0;
1357 int cmp;
1358 struct metapage *mp; /* meta-page buffer */
1359 xtpage_t *p; /* base B+-tree index page */
1360 s64 bn;
1361 int index, nextindex, len;
1362 struct btstack btstack; /* traverse stack */
1363 struct xtsplit split; /* split information */
1364 xad_t *xad;
1365 s64 xaddr;
1366 struct tlock *tlck;
1367 struct xtlock *xtlck = NULL;
1368
1369 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1370
1371 /* there must exist extent to be extended */
1372 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1373 return rc;
1374
1375 /* retrieve search result */
1376 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1377
1378 if (cmp != 0) {
1379 XT_PUTPAGE(mp);
1380 jfs_error(ip->i_sb, "xtSearch did not find extent\n");
1381 return -EIO;
1382 }
1383
1384 /* extension must be contiguous */
1385 xad = &p->xad[index];
1386 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1387 XT_PUTPAGE(mp);
1388 jfs_error(ip->i_sb, "extension is not contiguous\n");
1389 return -EIO;
1390 }
1391
1392 /*
1393 * acquire a transaction lock on the leaf page;
1394 *
1395 * action: xad insertion/extension;
1396 */
1397 BT_MARK_DIRTY(mp, ip);
1398 if (!test_cflag(COMMIT_Nolink, ip)) {
1399 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1400 xtlck = (struct xtlock *) & tlck->lock;
1401 }
1402
1403 /* extend will overflow extent ? */
1404 xlen = lengthXAD(xad) + xlen;
1405 if ((len = xlen - MAXXLEN) <= 0)
1406 goto extendOld;
1407
1408 /*
1409 * extent overflow: insert entry for new extent
1410 */
1411//insertNew:
1412 xoff = offsetXAD(xad) + MAXXLEN;
1413 xaddr = addressXAD(xad) + MAXXLEN;
1414 nextindex = le16_to_cpu(p->header.nextindex);
1415
1416 /*
1417 * if the leaf page is full, insert the new entry and
1418 * propagate up the router entry for the new page from split
1419 *
1420 * The xtSplitUp() will insert the entry and unpin the leaf page.
1421 */
1422 if (nextindex == le16_to_cpu(p->header.maxentry)) {
1423 /* xtSpliUp() unpins leaf pages */
1424 split.mp = mp;
1425 split.index = index + 1;
1426 split.flag = XAD_NEW;
1427 split.off = xoff; /* split offset */
1428 split.len = len;
1429 split.addr = xaddr;
1430 split.pxdlist = NULL;
1431 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1432 return rc;
1433
1434 /* get back old page */
1435 p = xt_getpage(ip, bn, &mp);
1436 if (IS_ERR(p))
1437 return PTR_ERR(p);
1438 /*
1439 * if leaf root has been split, original root has been
1440 * copied to new child page, i.e., original entry now
1441 * resides on the new child page;
1442 */
1443 if (p->header.flag & BT_INTERNAL) {
1444 ASSERT(p->header.nextindex ==
1445 cpu_to_le16(XTENTRYSTART + 1));
1446 xad = &p->xad[XTENTRYSTART];
1447 bn = addressXAD(xad);
1448 XT_PUTPAGE(mp);
1449
1450 /* get new child page */
1451 p = xt_getpage(ip, bn, &mp);
1452 if (IS_ERR(p))
1453 return PTR_ERR(p);
1454
1455 BT_MARK_DIRTY(mp, ip);
1456 if (!test_cflag(COMMIT_Nolink, ip)) {
1457 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1458 xtlck = (struct xtlock *) & tlck->lock;
1459 }
1460 }
1461 }
1462 /*
1463 * insert the new entry into the leaf page
1464 */
1465 else {
1466 /* insert the new entry: mark the entry NEW */
1467 xad = &p->xad[index + 1];
1468 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1469
1470 /* advance next available entry index */
1471 le16_add_cpu(&p->header.nextindex, 1);
1472 }
1473
1474 /* get back old entry */
1475 xad = &p->xad[index];
1476 xlen = MAXXLEN;
1477
1478 /*
1479 * extend old extent
1480 */
1481 extendOld:
1482 XADlength(xad, xlen);
1483 if (!(xad->flag & XAD_NEW))
1484 xad->flag |= XAD_EXTENDED;
1485
1486 if (!test_cflag(COMMIT_Nolink, ip)) {
1487 xtlck->lwm.offset =
1488 (xtlck->lwm.offset) ? min(index,
1489 (int)xtlck->lwm.offset) : index;
1490 xtlck->lwm.length =
1491 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1492 }
1493
1494 /* unpin the leaf page */
1495 XT_PUTPAGE(mp);
1496
1497 return rc;
1498}
1499
1500/*
1501 * xtUpdate()
1502 *
1503 * function: update XAD;
1504 *
1505 * update extent for allocated_but_not_recorded or
1506 * compressed extent;
1507 *
1508 * parameter:
1509 * nxad - new XAD;
1510 * logical extent of the specified XAD must be completely
1511 * contained by an existing XAD;
1512 */
1513int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1514{ /* new XAD */
1515 int rc = 0;
1516 int cmp;
1517 struct metapage *mp; /* meta-page buffer */
1518 xtpage_t *p; /* base B+-tree index page */
1519 s64 bn;
1520 int index0, index, newindex, nextindex;
1521 struct btstack btstack; /* traverse stack */
1522 struct xtsplit split; /* split information */
1523 xad_t *xad, *lxad, *rxad;
1524 int xflag;
1525 s64 nxoff, xoff;
1526 int nxlen, xlen, lxlen, rxlen;
1527 s64 nxaddr, xaddr;
1528 struct tlock *tlck;
1529 struct xtlock *xtlck = NULL;
1530 int newpage = 0;
1531
1532 /* there must exist extent to be tailgated */
1533 nxoff = offsetXAD(nxad);
1534 nxlen = lengthXAD(nxad);
1535 nxaddr = addressXAD(nxad);
1536
1537 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1538 return rc;
1539
1540 /* retrieve search result */
1541 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1542
1543 if (cmp != 0) {
1544 XT_PUTPAGE(mp);
1545 jfs_error(ip->i_sb, "Could not find extent\n");
1546 return -EIO;
1547 }
1548
1549 BT_MARK_DIRTY(mp, ip);
1550 /*
1551 * acquire tlock of the leaf page containing original entry
1552 */
1553 if (!test_cflag(COMMIT_Nolink, ip)) {
1554 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1555 xtlck = (struct xtlock *) & tlck->lock;
1556 }
1557
1558 xad = &p->xad[index0];
1559 xflag = xad->flag;
1560 xoff = offsetXAD(xad);
1561 xlen = lengthXAD(xad);
1562 xaddr = addressXAD(xad);
1563
1564 /* nXAD must be completely contained within XAD */
1565 if ((xoff > nxoff) ||
1566 (nxoff + nxlen > xoff + xlen)) {
1567 XT_PUTPAGE(mp);
1568 jfs_error(ip->i_sb,
1569 "nXAD in not completely contained within XAD\n");
1570 return -EIO;
1571 }
1572
1573 index = index0;
1574 newindex = index + 1;
1575 nextindex = le16_to_cpu(p->header.nextindex);
1576
1577 if (xoff < nxoff)
1578 goto coalesceRight;
1579
1580 /*
1581 * coalesce with left XAD
1582 */
1583 /* is XAD first entry of page ? */
1584 if (index == XTENTRYSTART)
1585 goto replace;
1586
1587 /* is nXAD logically and physically contiguous with lXAD ? */
1588 lxad = &p->xad[index - 1];
1589 lxlen = lengthXAD(lxad);
1590 if (!(lxad->flag & XAD_NOTRECORDED) &&
1591 (nxoff == offsetXAD(lxad) + lxlen) &&
1592 (nxaddr == addressXAD(lxad) + lxlen) &&
1593 (lxlen + nxlen < MAXXLEN)) {
1594 /* extend right lXAD */
1595 index0 = index - 1;
1596 XADlength(lxad, lxlen + nxlen);
1597
1598 /* If we just merged two extents together, need to make sure the
1599 * right extent gets logged. If the left one is marked XAD_NEW,
1600 * then we know it will be logged. Otherwise, mark as
1601 * XAD_EXTENDED
1602 */
1603 if (!(lxad->flag & XAD_NEW))
1604 lxad->flag |= XAD_EXTENDED;
1605
1606 if (xlen > nxlen) {
1607 /* truncate XAD */
1608 XADoffset(xad, xoff + nxlen);
1609 XADlength(xad, xlen - nxlen);
1610 XADaddress(xad, xaddr + nxlen);
1611 goto out;
1612 } else { /* (xlen == nxlen) */
1613
1614 /* remove XAD */
1615 if (index < nextindex - 1)
1616 memmove(&p->xad[index], &p->xad[index + 1],
1617 (nextindex - index -
1618 1) << L2XTSLOTSIZE);
1619
1620 p->header.nextindex =
1621 cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1622 1);
1623
1624 index = index0;
1625 newindex = index + 1;
1626 nextindex = le16_to_cpu(p->header.nextindex);
1627 xoff = nxoff = offsetXAD(lxad);
1628 xlen = nxlen = lxlen + nxlen;
1629 xaddr = nxaddr = addressXAD(lxad);
1630 goto coalesceRight;
1631 }
1632 }
1633
1634 /*
1635 * replace XAD with nXAD
1636 */
1637 replace: /* (nxoff == xoff) */
1638 if (nxlen == xlen) {
1639 /* replace XAD with nXAD:recorded */
1640 *xad = *nxad;
1641 xad->flag = xflag & ~XAD_NOTRECORDED;
1642
1643 goto coalesceRight;
1644 } else /* (nxlen < xlen) */
1645 goto updateLeft;
1646
1647 /*
1648 * coalesce with right XAD
1649 */
1650 coalesceRight: /* (xoff <= nxoff) */
1651 /* is XAD last entry of page ? */
1652 if (newindex == nextindex) {
1653 if (xoff == nxoff)
1654 goto out;
1655 goto updateRight;
1656 }
1657
1658 /* is nXAD logically and physically contiguous with rXAD ? */
1659 rxad = &p->xad[index + 1];
1660 rxlen = lengthXAD(rxad);
1661 if (!(rxad->flag & XAD_NOTRECORDED) &&
1662 (nxoff + nxlen == offsetXAD(rxad)) &&
1663 (nxaddr + nxlen == addressXAD(rxad)) &&
1664 (rxlen + nxlen < MAXXLEN)) {
1665 /* extend left rXAD */
1666 XADoffset(rxad, nxoff);
1667 XADlength(rxad, rxlen + nxlen);
1668 XADaddress(rxad, nxaddr);
1669
1670 /* If we just merged two extents together, need to make sure
1671 * the left extent gets logged. If the right one is marked
1672 * XAD_NEW, then we know it will be logged. Otherwise, mark as
1673 * XAD_EXTENDED
1674 */
1675 if (!(rxad->flag & XAD_NEW))
1676 rxad->flag |= XAD_EXTENDED;
1677
1678 if (xlen > nxlen)
1679 /* truncate XAD */
1680 XADlength(xad, xlen - nxlen);
1681 else { /* (xlen == nxlen) */
1682
1683 /* remove XAD */
1684 memmove(&p->xad[index], &p->xad[index + 1],
1685 (nextindex - index - 1) << L2XTSLOTSIZE);
1686
1687 p->header.nextindex =
1688 cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1689 1);
1690 }
1691
1692 goto out;
1693 } else if (xoff == nxoff)
1694 goto out;
1695
1696 if (xoff >= nxoff) {
1697 XT_PUTPAGE(mp);
1698 jfs_error(ip->i_sb, "xoff >= nxoff\n");
1699 return -EIO;
1700 }
1701
1702 /*
1703 * split XAD into (lXAD, nXAD):
1704 *
1705 * |---nXAD--->
1706 * --|----------XAD----------|--
1707 * |-lXAD-|
1708 */
1709 updateRight: /* (xoff < nxoff) */
1710 /* truncate old XAD as lXAD:not_recorded */
1711 xad = &p->xad[index];
1712 XADlength(xad, nxoff - xoff);
1713
1714 /* insert nXAD:recorded */
1715 if (nextindex == le16_to_cpu(p->header.maxentry)) {
1716
1717 /* xtSpliUp() unpins leaf pages */
1718 split.mp = mp;
1719 split.index = newindex;
1720 split.flag = xflag & ~XAD_NOTRECORDED;
1721 split.off = nxoff;
1722 split.len = nxlen;
1723 split.addr = nxaddr;
1724 split.pxdlist = NULL;
1725 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1726 return rc;
1727
1728 /* get back old page */
1729 p = xt_getpage(ip, bn, &mp);
1730 if (IS_ERR(p))
1731 return PTR_ERR(p);
1732 /*
1733 * if leaf root has been split, original root has been
1734 * copied to new child page, i.e., original entry now
1735 * resides on the new child page;
1736 */
1737 if (p->header.flag & BT_INTERNAL) {
1738 ASSERT(p->header.nextindex ==
1739 cpu_to_le16(XTENTRYSTART + 1));
1740 xad = &p->xad[XTENTRYSTART];
1741 bn = addressXAD(xad);
1742 XT_PUTPAGE(mp);
1743
1744 /* get new child page */
1745 p = xt_getpage(ip, bn, &mp);
1746 if (IS_ERR(p))
1747 return PTR_ERR(p);
1748
1749 BT_MARK_DIRTY(mp, ip);
1750 if (!test_cflag(COMMIT_Nolink, ip)) {
1751 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1752 xtlck = (struct xtlock *) & tlck->lock;
1753 }
1754 } else {
1755 /* is nXAD on new page ? */
1756 if (newindex >
1757 (le16_to_cpu(p->header.maxentry) >> 1)) {
1758 newindex =
1759 newindex -
1760 le16_to_cpu(p->header.nextindex) +
1761 XTENTRYSTART;
1762 newpage = 1;
1763 }
1764 }
1765 } else {
1766 /* if insert into middle, shift right remaining entries */
1767 if (newindex < nextindex)
1768 memmove(&p->xad[newindex + 1], &p->xad[newindex],
1769 (nextindex - newindex) << L2XTSLOTSIZE);
1770
1771 /* insert the entry */
1772 xad = &p->xad[newindex];
1773 *xad = *nxad;
1774 xad->flag = xflag & ~XAD_NOTRECORDED;
1775
1776 /* advance next available entry index. */
1777 p->header.nextindex =
1778 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1779 }
1780
1781 /*
1782 * does nXAD force 3-way split ?
1783 *
1784 * |---nXAD--->|
1785 * --|----------XAD-------------|--
1786 * |-lXAD-| |-rXAD -|
1787 */
1788 if (nxoff + nxlen == xoff + xlen)
1789 goto out;
1790
1791 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
1792 if (newpage) {
1793 /* close out old page */
1794 if (!test_cflag(COMMIT_Nolink, ip)) {
1795 xtlck->lwm.offset = (xtlck->lwm.offset) ?
1796 min(index0, (int)xtlck->lwm.offset) : index0;
1797 xtlck->lwm.length =
1798 le16_to_cpu(p->header.nextindex) -
1799 xtlck->lwm.offset;
1800 }
1801
1802 bn = le64_to_cpu(p->header.next);
1803 XT_PUTPAGE(mp);
1804
1805 /* get new right page */
1806 p = xt_getpage(ip, bn, &mp);
1807 if (IS_ERR(p))
1808 return PTR_ERR(p);
1809
1810 BT_MARK_DIRTY(mp, ip);
1811 if (!test_cflag(COMMIT_Nolink, ip)) {
1812 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1813 xtlck = (struct xtlock *) & tlck->lock;
1814 }
1815
1816 index0 = index = newindex;
1817 } else
1818 index++;
1819
1820 newindex = index + 1;
1821 nextindex = le16_to_cpu(p->header.nextindex);
1822 xlen = xlen - (nxoff - xoff);
1823 xoff = nxoff;
1824 xaddr = nxaddr;
1825
1826 /* recompute split pages */
1827 if (nextindex == le16_to_cpu(p->header.maxentry)) {
1828 XT_PUTPAGE(mp);
1829
1830 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1831 return rc;
1832
1833 /* retrieve search result */
1834 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1835
1836 if (cmp != 0) {
1837 XT_PUTPAGE(mp);
1838 jfs_error(ip->i_sb, "xtSearch failed\n");
1839 return -EIO;
1840 }
1841
1842 if (index0 != index) {
1843 XT_PUTPAGE(mp);
1844 jfs_error(ip->i_sb, "unexpected value of index\n");
1845 return -EIO;
1846 }
1847 }
1848
1849 /*
1850 * split XAD into (nXAD, rXAD)
1851 *
1852 * ---nXAD---|
1853 * --|----------XAD----------|--
1854 * |-rXAD-|
1855 */
1856 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */
1857 /* update old XAD with nXAD:recorded */
1858 xad = &p->xad[index];
1859 *xad = *nxad;
1860 xad->flag = xflag & ~XAD_NOTRECORDED;
1861
1862 /* insert rXAD:not_recorded */
1863 xoff = xoff + nxlen;
1864 xlen = xlen - nxlen;
1865 xaddr = xaddr + nxlen;
1866 if (nextindex == le16_to_cpu(p->header.maxentry)) {
1867/*
1868printf("xtUpdate.updateLeft.split p:0x%p\n", p);
1869*/
1870 /* xtSpliUp() unpins leaf pages */
1871 split.mp = mp;
1872 split.index = newindex;
1873 split.flag = xflag;
1874 split.off = xoff;
1875 split.len = xlen;
1876 split.addr = xaddr;
1877 split.pxdlist = NULL;
1878 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1879 return rc;
1880
1881 /* get back old page */
1882 p = xt_getpage(ip, bn, &mp);
1883 if (IS_ERR(p))
1884 return PTR_ERR(p);
1885
1886 /*
1887 * if leaf root has been split, original root has been
1888 * copied to new child page, i.e., original entry now
1889 * resides on the new child page;
1890 */
1891 if (p->header.flag & BT_INTERNAL) {
1892 ASSERT(p->header.nextindex ==
1893 cpu_to_le16(XTENTRYSTART + 1));
1894 xad = &p->xad[XTENTRYSTART];
1895 bn = addressXAD(xad);
1896 XT_PUTPAGE(mp);
1897
1898 /* get new child page */
1899 p = xt_getpage(ip, bn, &mp);
1900 if (IS_ERR(p))
1901 return PTR_ERR(p);
1902
1903 BT_MARK_DIRTY(mp, ip);
1904 if (!test_cflag(COMMIT_Nolink, ip)) {
1905 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1906 xtlck = (struct xtlock *) & tlck->lock;
1907 }
1908 }
1909 } else {
1910 /* if insert into middle, shift right remaining entries */
1911 if (newindex < nextindex)
1912 memmove(&p->xad[newindex + 1], &p->xad[newindex],
1913 (nextindex - newindex) << L2XTSLOTSIZE);
1914
1915 /* insert the entry */
1916 xad = &p->xad[newindex];
1917 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
1918
1919 /* advance next available entry index. */
1920 p->header.nextindex =
1921 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1922 }
1923
1924 out:
1925 if (!test_cflag(COMMIT_Nolink, ip)) {
1926 xtlck->lwm.offset = (xtlck->lwm.offset) ?
1927 min(index0, (int)xtlck->lwm.offset) : index0;
1928 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1929 xtlck->lwm.offset;
1930 }
1931
1932 /* unpin the leaf page */
1933 XT_PUTPAGE(mp);
1934
1935 return rc;
1936}
1937
1938
1939/*
1940 * xtAppend()
1941 *
1942 * function: grow in append mode from contiguous region specified ;
1943 *
1944 * parameter:
1945 * tid - transaction id;
1946 * ip - file object;
1947 * xflag - extent flag:
1948 * xoff - extent offset;
1949 * maxblocks - max extent length;
1950 * xlen - extent length (in/out);
1951 * xaddrp - extent address pointer (in/out):
1952 * flag -
1953 *
1954 * return:
1955 */
1956int xtAppend(tid_t tid, /* transaction id */
1957 struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
1958 s32 * xlenp, /* (in/out) */
1959 s64 * xaddrp, /* (in/out) */
1960 int flag)
1961{
1962 int rc = 0;
1963 struct metapage *mp; /* meta-page buffer */
1964 xtpage_t *p; /* base B+-tree index page */
1965 s64 bn, xaddr;
1966 int index, nextindex;
1967 struct btstack btstack; /* traverse stack */
1968 struct xtsplit split; /* split information */
1969 xad_t *xad;
1970 int cmp;
1971 struct tlock *tlck;
1972 struct xtlock *xtlck;
1973 int nsplit, nblocks, xlen;
1974 struct pxdlist pxdlist;
1975 pxd_t *pxd;
1976 s64 next;
1977
1978 xaddr = *xaddrp;
1979 xlen = *xlenp;
1980 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
1981 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
1982
1983 /*
1984 * search for the entry location at which to insert:
1985 *
1986 * xtFastSearch() and xtSearch() both returns (leaf page
1987 * pinned, index at which to insert).
1988 * n.b. xtSearch() may return index of maxentry of
1989 * the full page.
1990 */
1991 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
1992 return rc;
1993
1994 /* retrieve search result */
1995 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1996
1997 if (cmp == 0) {
1998 rc = -EEXIST;
1999 goto out;
2000 }
2001
2002 if (next)
2003 xlen = min(xlen, (int)(next - xoff));
2004//insert:
2005 /*
2006 * insert entry for new extent
2007 */
2008 xflag |= XAD_NEW;
2009
2010 /*
2011 * if the leaf page is full, split the page and
2012 * propagate up the router entry for the new page from split
2013 *
2014 * The xtSplitUp() will insert the entry and unpin the leaf page.
2015 */
2016 nextindex = le16_to_cpu(p->header.nextindex);
2017 if (nextindex < le16_to_cpu(p->header.maxentry))
2018 goto insertLeaf;
2019
2020 /*
2021 * allocate new index blocks to cover index page split(s)
2022 */
2023 nsplit = btstack.nsplit;
2024 split.pxdlist = &pxdlist;
2025 pxdlist.maxnpxd = pxdlist.npxd = 0;
2026 pxd = &pxdlist.pxd[0];
2027 nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2028 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2029 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2030 PXDaddress(pxd, xaddr);
2031 PXDlength(pxd, nblocks);
2032
2033 pxdlist.maxnpxd++;
2034
2035 continue;
2036 }
2037
2038 /* undo allocation */
2039
2040 goto out;
2041 }
2042
2043 xlen = min(xlen, maxblocks);
2044
2045 /*
2046 * allocate data extent requested
2047 */
2048 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2049 goto out;
2050
2051 split.mp = mp;
2052 split.index = index;
2053 split.flag = xflag;
2054 split.off = xoff;
2055 split.len = xlen;
2056 split.addr = xaddr;
2057 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2058 /* undo data extent allocation */
2059 dbFree(ip, *xaddrp, (s64) * xlenp);
2060
2061 return rc;
2062 }
2063
2064 *xaddrp = xaddr;
2065 *xlenp = xlen;
2066 return 0;
2067
2068 /*
2069 * insert the new entry into the leaf page
2070 */
2071 insertLeaf:
2072 /*
2073 * allocate data extent requested
2074 */
2075 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2076 goto out;
2077
2078 BT_MARK_DIRTY(mp, ip);
2079 /*
2080 * acquire a transaction lock on the leaf page;
2081 *
2082 * action: xad insertion/extension;
2083 */
2084 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2085 xtlck = (struct xtlock *) & tlck->lock;
2086
2087 /* insert the new entry: mark the entry NEW */
2088 xad = &p->xad[index];
2089 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2090
2091 /* advance next available entry index */
2092 le16_add_cpu(&p->header.nextindex, 1);
2093
2094 xtlck->lwm.offset =
2095 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2096 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2097 xtlck->lwm.offset;
2098
2099 *xaddrp = xaddr;
2100 *xlenp = xlen;
2101
2102 out:
2103 /* unpin the leaf page */
2104 XT_PUTPAGE(mp);
2105
2106 return rc;
2107}
2108
2109/*
2110 * xtInitRoot()
2111 *
2112 * initialize file root (inline in inode)
2113 */
2114void xtInitRoot(tid_t tid, struct inode *ip)
2115{
2116 xtroot_t *p;
2117
2118 /*
2119 * acquire a transaction lock on the root
2120 *
2121 * action:
2122 */
2123 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
2124 tlckXTREE | tlckNEW);
2125 p = &JFS_IP(ip)->i_xtroot;
2126
2127 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2128 p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2129
2130 if (S_ISDIR(ip->i_mode))
2131 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
2132 else {
2133 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
2134 ip->i_size = 0;
2135 }
2136
2137
2138 return;
2139}
2140
2141
2142/*
2143 * We can run into a deadlock truncating a file with a large number of
2144 * xtree pages (large fragmented file). A robust fix would entail a
2145 * reservation system where we would reserve a number of metadata pages
2146 * and tlocks which we would be guaranteed without a deadlock. Without
2147 * this, a partial fix is to limit number of metadata pages we will lock
2148 * in a single transaction. Currently we will truncate the file so that
2149 * no more than 50 leaf pages will be locked. The caller of xtTruncate
2150 * will be responsible for ensuring that the current transaction gets
2151 * committed, and that subsequent transactions are created to truncate
2152 * the file further if needed.
2153 */
2154#define MAX_TRUNCATE_LEAVES 50
2155
2156/*
2157 * xtTruncate()
2158 *
2159 * function:
2160 * traverse for truncation logging backward bottom up;
2161 * terminate at the last extent entry at the current subtree
2162 * root page covering new down size.
2163 * truncation may occur within the last extent entry.
2164 *
2165 * parameter:
2166 * int tid,
2167 * struct inode *ip,
2168 * s64 newsize,
2169 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
2170 *
2171 * return:
2172 *
2173 * note:
2174 * PWMAP:
2175 * 1. truncate (non-COMMIT_NOLINK file)
2176 * by jfs_truncate() or jfs_open(O_TRUNC):
2177 * xtree is updated;
2178 * 2. truncate index table of directory when last entry removed
2179 * map update via tlock at commit time;
2180 * PMAP:
2181 * Call xtTruncate_pmap instead
2182 * WMAP:
2183 * 1. remove (free zero link count) on last reference release
2184 * (pmap has been freed at commit zero link count);
2185 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
2186 * xtree is updated;
2187 * map update directly at truncation time;
2188 *
2189 * if (DELETE)
2190 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
2191 * else if (TRUNCATE)
2192 * must write LOG_NOREDOPAGE for deleted index page;
2193 *
2194 * pages may already have been tlocked by anonymous transactions
2195 * during file growth (i.e., write) before truncation;
2196 *
2197 * except last truncated entry, deleted entries remains as is
2198 * in the page (nextindex is updated) for other use
2199 * (e.g., log/update allocation map): this avoid copying the page
2200 * info but delay free of pages;
2201 *
2202 */
2203s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
2204{
2205 s64 teof;
2206 struct metapage *mp;
2207 xtpage_t *p;
2208 s64 bn;
2209 int index, nextindex;
2210 xad_t *xad;
2211 s64 xoff, xaddr;
2212 int xlen, len, freexlen;
2213 struct btstack btstack;
2214 struct btframe *parent;
2215 struct tblock *tblk = NULL;
2216 struct tlock *tlck = NULL;
2217 struct xtlock *xtlck = NULL;
2218 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */
2219 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */
2220 s64 nfreed;
2221 int freed, log;
2222 int locked_leaves = 0;
2223
2224 /* save object truncation type */
2225 if (tid) {
2226 tblk = tid_to_tblock(tid);
2227 tblk->xflag |= flag;
2228 }
2229
2230 nfreed = 0;
2231
2232 flag &= COMMIT_MAP;
2233 assert(flag != COMMIT_PMAP);
2234
2235 if (flag == COMMIT_PWMAP)
2236 log = 1;
2237 else {
2238 log = 0;
2239 xadlock.flag = mlckFREEXADLIST;
2240 xadlock.index = 1;
2241 }
2242
2243 /*
2244 * if the newsize is not an integral number of pages,
2245 * the file between newsize and next page boundary will
2246 * be cleared.
2247 * if truncating into a file hole, it will cause
2248 * a full block to be allocated for the logical block.
2249 */
2250
2251 /*
2252 * release page blocks of truncated region <teof, eof>
2253 *
2254 * free the data blocks from the leaf index blocks.
2255 * delete the parent index entries corresponding to
2256 * the freed child data/index blocks.
2257 * free the index blocks themselves which aren't needed
2258 * in new sized file.
2259 *
2260 * index blocks are updated only if the blocks are to be
2261 * retained in the new sized file.
2262 * if type is PMAP, the data and index pages are NOT
2263 * freed, and the data and index blocks are NOT freed
2264 * from working map.
2265 * (this will allow continued access of data/index of
2266 * temporary file (zerolink count file truncated to zero-length)).
2267 */
2268 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
2269 JFS_SBI(ip->i_sb)->l2bsize;
2270
2271 /* clear stack */
2272 BT_CLR(&btstack);
2273
2274 /*
2275 * start with root
2276 *
2277 * root resides in the inode
2278 */
2279 bn = 0;
2280
2281 /*
2282 * first access of each page:
2283 */
2284 getPage:
2285 p = xt_getpage(ip, bn, &mp);
2286 if (IS_ERR(p))
2287 return PTR_ERR(p);
2288
2289 /* process entries backward from last index */
2290 index = le16_to_cpu(p->header.nextindex) - 1;
2291
2292
2293 /* Since this is the rightmost page at this level, and we may have
2294 * already freed a page that was formerly to the right, let's make
2295 * sure that the next pointer is zero.
2296 */
2297 if (p->header.next) {
2298 if (log)
2299 /*
2300 * Make sure this change to the header is logged.
2301 * If we really truncate this leaf, the flag
2302 * will be changed to tlckTRUNCATE
2303 */
2304 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2305 BT_MARK_DIRTY(mp, ip);
2306 p->header.next = 0;
2307 }
2308
2309 if (p->header.flag & BT_INTERNAL)
2310 goto getChild;
2311
2312 /*
2313 * leaf page
2314 */
2315 freed = 0;
2316
2317 /* does region covered by leaf page precede Teof ? */
2318 xad = &p->xad[index];
2319 xoff = offsetXAD(xad);
2320 xlen = lengthXAD(xad);
2321 if (teof >= xoff + xlen) {
2322 XT_PUTPAGE(mp);
2323 goto getParent;
2324 }
2325
2326 /* (re)acquire tlock of the leaf page */
2327 if (log) {
2328 if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
2329 /*
2330 * We need to limit the size of the transaction
2331 * to avoid exhausting pagecache & tlocks
2332 */
2333 XT_PUTPAGE(mp);
2334 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
2335 goto getParent;
2336 }
2337 tlck = txLock(tid, ip, mp, tlckXTREE);
2338 tlck->type = tlckXTREE | tlckTRUNCATE;
2339 xtlck = (struct xtlock *) & tlck->lock;
2340 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
2341 }
2342 BT_MARK_DIRTY(mp, ip);
2343
2344 /*
2345 * scan backward leaf page entries
2346 */
2347 for (; index >= XTENTRYSTART; index--) {
2348 xad = &p->xad[index];
2349 xoff = offsetXAD(xad);
2350 xlen = lengthXAD(xad);
2351 xaddr = addressXAD(xad);
2352
2353 /*
2354 * The "data" for a directory is indexed by the block
2355 * device's address space. This metadata must be invalidated
2356 * here
2357 */
2358 if (S_ISDIR(ip->i_mode) && (teof == 0))
2359 invalidate_xad_metapages(ip, *xad);
2360 /*
2361 * entry beyond eof: continue scan of current page
2362 * xad
2363 * ---|---=======------->
2364 * eof
2365 */
2366 if (teof < xoff) {
2367 nfreed += xlen;
2368 continue;
2369 }
2370
2371 /*
2372 * (xoff <= teof): last entry to be deleted from page;
2373 * If other entries remain in page: keep and update the page.
2374 */
2375
2376 /*
2377 * eof == entry_start: delete the entry
2378 * xad
2379 * -------|=======------->
2380 * eof
2381 *
2382 */
2383 if (teof == xoff) {
2384 nfreed += xlen;
2385
2386 if (index == XTENTRYSTART)
2387 break;
2388
2389 nextindex = index;
2390 }
2391 /*
2392 * eof within the entry: truncate the entry.
2393 * xad
2394 * -------===|===------->
2395 * eof
2396 */
2397 else if (teof < xoff + xlen) {
2398 /* update truncated entry */
2399 len = teof - xoff;
2400 freexlen = xlen - len;
2401 XADlength(xad, len);
2402
2403 /* save pxd of truncated extent in tlck */
2404 xaddr += len;
2405 if (log) { /* COMMIT_PWMAP */
2406 xtlck->lwm.offset = (xtlck->lwm.offset) ?
2407 min(index, (int)xtlck->lwm.offset) : index;
2408 xtlck->lwm.length = index + 1 -
2409 xtlck->lwm.offset;
2410 xtlck->twm.offset = index;
2411 pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
2412 pxdlock->flag = mlckFREEPXD;
2413 PXDaddress(&pxdlock->pxd, xaddr);
2414 PXDlength(&pxdlock->pxd, freexlen);
2415 }
2416 /* free truncated extent */
2417 else { /* COMMIT_WMAP */
2418
2419 pxdlock = (struct pxd_lock *) & xadlock;
2420 pxdlock->flag = mlckFREEPXD;
2421 PXDaddress(&pxdlock->pxd, xaddr);
2422 PXDlength(&pxdlock->pxd, freexlen);
2423 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
2424
2425 /* reset map lock */
2426 xadlock.flag = mlckFREEXADLIST;
2427 }
2428
2429 /* current entry is new last entry; */
2430 nextindex = index + 1;
2431
2432 nfreed += freexlen;
2433 }
2434 /*
2435 * eof beyond the entry:
2436 * xad
2437 * -------=======---|--->
2438 * eof
2439 */
2440 else { /* (xoff + xlen < teof) */
2441
2442 nextindex = index + 1;
2443 }
2444
2445 if (nextindex < le16_to_cpu(p->header.nextindex)) {
2446 if (!log) { /* COMMIT_WAMP */
2447 xadlock.xdlist = &p->xad[nextindex];
2448 xadlock.count =
2449 le16_to_cpu(p->header.nextindex) -
2450 nextindex;
2451 txFreeMap(ip, (struct maplock *) & xadlock,
2452 NULL, COMMIT_WMAP);
2453 }
2454 p->header.nextindex = cpu_to_le16(nextindex);
2455 }
2456
2457 XT_PUTPAGE(mp);
2458
2459 /* assert(freed == 0); */
2460 goto getParent;
2461 } /* end scan of leaf page entries */
2462
2463 freed = 1;
2464
2465 /*
2466 * leaf page become empty: free the page if type != PMAP
2467 */
2468 if (log) { /* COMMIT_PWMAP */
2469 /* txCommit() with tlckFREE:
2470 * free data extents covered by leaf [XTENTRYSTART:hwm);
2471 * invalidate leaf if COMMIT_PWMAP;
2472 * if (TRUNCATE), will write LOG_NOREDOPAGE;
2473 */
2474 tlck->type = tlckXTREE | tlckFREE;
2475 } else { /* COMMIT_WAMP */
2476
2477 /* free data extents covered by leaf */
2478 xadlock.xdlist = &p->xad[XTENTRYSTART];
2479 xadlock.count =
2480 le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
2481 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
2482 }
2483
2484 if (p->header.flag & BT_ROOT) {
2485 p->header.flag &= ~BT_INTERNAL;
2486 p->header.flag |= BT_LEAF;
2487 p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2488
2489 XT_PUTPAGE(mp); /* debug */
2490 goto out;
2491 } else {
2492 if (log) { /* COMMIT_PWMAP */
2493 /* page will be invalidated at tx completion
2494 */
2495 XT_PUTPAGE(mp);
2496 } else { /* COMMIT_WMAP */
2497
2498 if (mp->lid)
2499 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
2500
2501 /* invalidate empty leaf page */
2502 discard_metapage(mp);
2503 }
2504 }
2505
2506 /*
2507 * the leaf page become empty: delete the parent entry
2508 * for the leaf page if the parent page is to be kept
2509 * in the new sized file.
2510 */
2511
2512 /*
2513 * go back up to the parent page
2514 */
2515 getParent:
2516 /* pop/restore parent entry for the current child page */
2517 if ((parent = BT_POP(&btstack)) == NULL)
2518 /* current page must have been root */
2519 goto out;
2520
2521 /* get back the parent page */
2522 bn = parent->bn;
2523 p = xt_getpage(ip, bn, &mp);
2524 if (IS_ERR(p))
2525 return PTR_ERR(p);
2526
2527 index = parent->index;
2528
2529 /*
2530 * child page was not empty:
2531 */
2532 if (freed == 0) {
2533 /* has any entry deleted from parent ? */
2534 if (index < le16_to_cpu(p->header.nextindex) - 1) {
2535 /* (re)acquire tlock on the parent page */
2536 if (log) { /* COMMIT_PWMAP */
2537 /* txCommit() with tlckTRUNCATE:
2538 * free child extents covered by parent [);
2539 */
2540 tlck = txLock(tid, ip, mp, tlckXTREE);
2541 xtlck = (struct xtlock *) & tlck->lock;
2542 if (!(tlck->type & tlckTRUNCATE)) {
2543 xtlck->hwm.offset =
2544 le16_to_cpu(p->header.
2545 nextindex) - 1;
2546 tlck->type =
2547 tlckXTREE | tlckTRUNCATE;
2548 }
2549 } else { /* COMMIT_WMAP */
2550
2551 /* free child extents covered by parent */
2552 xadlock.xdlist = &p->xad[index + 1];
2553 xadlock.count =
2554 le16_to_cpu(p->header.nextindex) -
2555 index - 1;
2556 txFreeMap(ip, (struct maplock *) & xadlock,
2557 NULL, COMMIT_WMAP);
2558 }
2559 BT_MARK_DIRTY(mp, ip);
2560
2561 p->header.nextindex = cpu_to_le16(index + 1);
2562 }
2563 XT_PUTPAGE(mp);
2564 goto getParent;
2565 }
2566
2567 /*
2568 * child page was empty:
2569 */
2570 nfreed += lengthXAD(&p->xad[index]);
2571
2572 /*
2573 * During working map update, child page's tlock must be handled
2574 * before parent's. This is because the parent's tlock will cause
2575 * the child's disk space to be marked available in the wmap, so
2576 * it's important that the child page be released by that time.
2577 *
2578 * ToDo: tlocks should be on doubly-linked list, so we can
2579 * quickly remove it and add it to the end.
2580 */
2581
2582 /*
2583 * Move parent page's tlock to the end of the tid's tlock list
2584 */
2585 if (log && mp->lid && (tblk->last != mp->lid) &&
2586 lid_to_tlock(mp->lid)->tid) {
2587 lid_t lid = mp->lid;
2588 struct tlock *prev;
2589
2590 tlck = lid_to_tlock(lid);
2591
2592 if (tblk->next == lid)
2593 tblk->next = tlck->next;
2594 else {
2595 for (prev = lid_to_tlock(tblk->next);
2596 prev->next != lid;
2597 prev = lid_to_tlock(prev->next)) {
2598 assert(prev->next);
2599 }
2600 prev->next = tlck->next;
2601 }
2602 lid_to_tlock(tblk->last)->next = lid;
2603 tlck->next = 0;
2604 tblk->last = lid;
2605 }
2606
2607 /*
2608 * parent page become empty: free the page
2609 */
2610 if (index == XTENTRYSTART) {
2611 if (log) { /* COMMIT_PWMAP */
2612 /* txCommit() with tlckFREE:
2613 * free child extents covered by parent;
2614 * invalidate parent if COMMIT_PWMAP;
2615 */
2616 tlck = txLock(tid, ip, mp, tlckXTREE);
2617 xtlck = (struct xtlock *) & tlck->lock;
2618 xtlck->hwm.offset =
2619 le16_to_cpu(p->header.nextindex) - 1;
2620 tlck->type = tlckXTREE | tlckFREE;
2621 } else { /* COMMIT_WMAP */
2622
2623 /* free child extents covered by parent */
2624 xadlock.xdlist = &p->xad[XTENTRYSTART];
2625 xadlock.count =
2626 le16_to_cpu(p->header.nextindex) -
2627 XTENTRYSTART;
2628 txFreeMap(ip, (struct maplock *) & xadlock, NULL,
2629 COMMIT_WMAP);
2630 }
2631 BT_MARK_DIRTY(mp, ip);
2632
2633 if (p->header.flag & BT_ROOT) {
2634 p->header.flag &= ~BT_INTERNAL;
2635 p->header.flag |= BT_LEAF;
2636 p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2637 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
2638 /*
2639 * Shrink root down to allow inline
2640 * EA (otherwise fsck complains)
2641 */
2642 p->header.maxentry =
2643 cpu_to_le16(XTROOTINITSLOT);
2644 JFS_IP(ip)->mode2 |= INLINEEA;
2645 }
2646
2647 XT_PUTPAGE(mp); /* debug */
2648 goto out;
2649 } else {
2650 if (log) { /* COMMIT_PWMAP */
2651 /* page will be invalidated at tx completion
2652 */
2653 XT_PUTPAGE(mp);
2654 } else { /* COMMIT_WMAP */
2655
2656 if (mp->lid)
2657 lid_to_tlock(mp->lid)->flag |=
2658 tlckFREELOCK;
2659
2660 /* invalidate parent page */
2661 discard_metapage(mp);
2662 }
2663
2664 /* parent has become empty and freed:
2665 * go back up to its parent page
2666 */
2667 /* freed = 1; */
2668 goto getParent;
2669 }
2670 }
2671 /*
2672 * parent page still has entries for front region;
2673 */
2674 else {
2675 /* try truncate region covered by preceding entry
2676 * (process backward)
2677 */
2678 index--;
2679
2680 /* go back down to the child page corresponding
2681 * to the entry
2682 */
2683 goto getChild;
2684 }
2685
2686 /*
2687 * internal page: go down to child page of current entry
2688 */
2689 getChild:
2690 /* save current parent entry for the child page */
2691 if (BT_STACK_FULL(&btstack)) {
2692 jfs_error(ip->i_sb, "stack overrun!\n");
2693 XT_PUTPAGE(mp);
2694 return -EIO;
2695 }
2696 BT_PUSH(&btstack, bn, index);
2697
2698 /* get child page */
2699 xad = &p->xad[index];
2700 bn = addressXAD(xad);
2701
2702 /*
2703 * first access of each internal entry:
2704 */
2705 /* release parent page */
2706 XT_PUTPAGE(mp);
2707
2708 /* process the child page */
2709 goto getPage;
2710
2711 out:
2712 /*
2713 * update file resource stat
2714 */
2715 /* set size
2716 */
2717 if (S_ISDIR(ip->i_mode) && !newsize)
2718 ip->i_size = 1; /* fsck hates zero-length directories */
2719 else
2720 ip->i_size = newsize;
2721
2722 /* update quota allocation to reflect freed blocks */
2723 dquot_free_block(ip, nfreed);
2724
2725 /*
2726 * free tlock of invalidated pages
2727 */
2728 if (flag == COMMIT_WMAP)
2729 txFreelock(ip);
2730
2731 return newsize;
2732}
2733
2734
2735/*
2736 * xtTruncate_pmap()
2737 *
2738 * function:
2739 * Perform truncate to zero length for deleted file, leaving the
2740 * xtree and working map untouched. This allows the file to
2741 * be accessed via open file handles, while the delete of the file
2742 * is committed to disk.
2743 *
2744 * parameter:
2745 * tid_t tid,
2746 * struct inode *ip,
2747 * s64 committed_size)
2748 *
2749 * return: new committed size
2750 *
2751 * note:
2752 *
2753 * To avoid deadlock by holding too many transaction locks, the
2754 * truncation may be broken up into multiple transactions.
2755 * The committed_size keeps track of part of the file has been
2756 * freed from the pmaps.
2757 */
2758s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
2759{
2760 s64 bn;
2761 struct btstack btstack;
2762 int cmp;
2763 int index;
2764 int locked_leaves = 0;
2765 struct metapage *mp;
2766 xtpage_t *p;
2767 struct btframe *parent;
2768 int rc;
2769 struct tblock *tblk;
2770 struct tlock *tlck = NULL;
2771 xad_t *xad;
2772 int xlen;
2773 s64 xoff;
2774 struct xtlock *xtlck = NULL;
2775
2776 /* save object truncation type */
2777 tblk = tid_to_tblock(tid);
2778 tblk->xflag |= COMMIT_PMAP;
2779
2780 /* clear stack */
2781 BT_CLR(&btstack);
2782
2783 if (committed_size) {
2784 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
2785 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2786 if (rc)
2787 return rc;
2788
2789 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2790
2791 if (cmp != 0) {
2792 XT_PUTPAGE(mp);
2793 jfs_error(ip->i_sb, "did not find extent\n");
2794 return -EIO;
2795 }
2796 } else {
2797 /*
2798 * start with root
2799 *
2800 * root resides in the inode
2801 */
2802 bn = 0;
2803
2804 /*
2805 * first access of each page:
2806 */
2807 getPage:
2808 p = xt_getpage(ip, bn, &mp);
2809 if (IS_ERR(p))
2810 return PTR_ERR(p);
2811
2812 /* process entries backward from last index */
2813 index = le16_to_cpu(p->header.nextindex) - 1;
2814
2815 if (p->header.flag & BT_INTERNAL)
2816 goto getChild;
2817 }
2818
2819 /*
2820 * leaf page
2821 */
2822
2823 if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
2824 /*
2825 * We need to limit the size of the transaction
2826 * to avoid exhausting pagecache & tlocks
2827 */
2828 xad = &p->xad[index];
2829 xoff = offsetXAD(xad);
2830 xlen = lengthXAD(xad);
2831 XT_PUTPAGE(mp);
2832 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
2833 }
2834 tlck = txLock(tid, ip, mp, tlckXTREE);
2835 tlck->type = tlckXTREE | tlckFREE;
2836 xtlck = (struct xtlock *) & tlck->lock;
2837 xtlck->hwm.offset = index;
2838
2839
2840 XT_PUTPAGE(mp);
2841
2842 /*
2843 * go back up to the parent page
2844 */
2845 getParent:
2846 /* pop/restore parent entry for the current child page */
2847 if ((parent = BT_POP(&btstack)) == NULL)
2848 /* current page must have been root */
2849 goto out;
2850
2851 /* get back the parent page */
2852 bn = parent->bn;
2853 p = xt_getpage(ip, bn, &mp);
2854 if (IS_ERR(p))
2855 return PTR_ERR(p);
2856
2857 index = parent->index;
2858
2859 /*
2860 * parent page become empty: free the page
2861 */
2862 if (index == XTENTRYSTART) {
2863 /* txCommit() with tlckFREE:
2864 * free child extents covered by parent;
2865 * invalidate parent if COMMIT_PWMAP;
2866 */
2867 tlck = txLock(tid, ip, mp, tlckXTREE);
2868 xtlck = (struct xtlock *) & tlck->lock;
2869 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
2870 tlck->type = tlckXTREE | tlckFREE;
2871
2872 XT_PUTPAGE(mp);
2873
2874 if (p->header.flag & BT_ROOT) {
2875
2876 goto out;
2877 } else {
2878 goto getParent;
2879 }
2880 }
2881 /*
2882 * parent page still has entries for front region;
2883 */
2884 else
2885 index--;
2886 /*
2887 * internal page: go down to child page of current entry
2888 */
2889 getChild:
2890 /* save current parent entry for the child page */
2891 if (BT_STACK_FULL(&btstack)) {
2892 jfs_error(ip->i_sb, "stack overrun!\n");
2893 XT_PUTPAGE(mp);
2894 return -EIO;
2895 }
2896 BT_PUSH(&btstack, bn, index);
2897
2898 /* get child page */
2899 xad = &p->xad[index];
2900 bn = addressXAD(xad);
2901
2902 /*
2903 * first access of each internal entry:
2904 */
2905 /* release parent page */
2906 XT_PUTPAGE(mp);
2907
2908 /* process the child page */
2909 goto getPage;
2910
2911 out:
2912
2913 return 0;
2914}
2915
2916#ifdef CONFIG_JFS_STATISTICS
2917int jfs_xtstat_proc_show(struct seq_file *m, void *v)
2918{
2919 seq_printf(m,
2920 "JFS Xtree statistics\n"
2921 "====================\n"
2922 "searches = %d\n"
2923 "fast searches = %d\n"
2924 "splits = %d\n",
2925 xtStat.search,
2926 xtStat.fastSearch,
2927 xtStat.split);
2928 return 0;
2929}
2930#endif