commit eca281aad0c293e7698edea5834c252dd8108afa · tjh.dev/kernel

+1

fs/ocfs2/Makefile

··· 46 46 ocfs2_stack_o2cb-objs := stack_o2cb.o 47 47 ocfs2_stack_user-objs := stack_user.o 48 48 49 + obj-$(CONFIG_OCFS2_FS) += dlmfs/ 49 50 # cluster/ is always needed when OCFS2_FS for masklog support 50 51 obj-$(CONFIG_OCFS2_FS) += cluster/ 51 52 obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/

+3 -2

fs/ocfs2/alloc.c

··· 1050 1050 strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); 1051 1051 eb->h_blkno = cpu_to_le64(first_blkno); 1052 1052 eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 1053 - eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); 1053 + eb->h_suballoc_slot = 1054 + cpu_to_le16(meta_ac->ac_alloc_slot); 1054 1055 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1055 1056 eb->h_list.l_count = 1056 1057 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); ··· 6038 6037 if (status < 0) 6039 6038 mlog_errno(status); 6040 6039 else 6041 - ocfs2_init_inode_steal_slot(osb); 6040 + ocfs2_init_steal_slots(osb); 6042 6041 6043 6042 mlog_exit(status); 6044 6043 }

+3 -2

fs/ocfs2/aops.c

··· 577 577 goto bail; 578 578 } 579 579 580 - /* We should already CoW the refcounted extent. */ 581 - BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 580 + /* We should already CoW the refcounted extent in case of create. */ 581 + BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED)); 582 + 582 583 /* 583 584 * get_more_blocks() expects us to describe a hole by clearing 584 585 * the mapped bit on bh_result().

+1

fs/ocfs2/cluster/masklog.c

··· 112 112 define_mask(XATTR), 113 113 define_mask(QUOTA), 114 114 define_mask(REFCOUNT), 115 + define_mask(BASTS), 115 116 define_mask(ERROR), 116 117 define_mask(NOTICE), 117 118 define_mask(KTHREAD),

+4 -3

fs/ocfs2/cluster/masklog.h

··· 114 114 #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ 115 115 #define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ 116 116 #define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ 117 + #define ML_BASTS 0x0000001000000000ULL /* dlmglue asts and basts */ 117 118 /* bits that are infrequently given and frequently matched in the high word */ 118 119 #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 119 120 #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ ··· 195 194 * previous token if args expands to nothing. 196 195 */ 197 196 #define __mlog_printk(level, fmt, args...) \ 198 - printk(level "(%u,%lu):%s:%d " fmt, task_pid_nr(current), \ 199 - __mlog_cpu_guess, __PRETTY_FUNCTION__, __LINE__ , \ 200 - ##args) 197 + printk(level "(%s,%u,%lu):%s:%d " fmt, current->comm, \ 198 + task_pid_nr(current), __mlog_cpu_guess, \ 199 + __PRETTY_FUNCTION__, __LINE__ , ##args) 201 200 202 201 #define mlog(mask, fmt, args...) do { \ 203 202 u64 __m = MLOG_MASK_PREFIX | (mask); \

+1 -1

fs/ocfs2/dir.c

··· 2439 2439 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; 2440 2440 memset(dx_root, 0, osb->sb->s_blocksize); 2441 2441 strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); 2442 - dx_root->dr_suballoc_slot = cpu_to_le16(osb->slot_num); 2442 + dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 2443 2443 dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit); 2444 2444 dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); 2445 2445 dx_root->dr_blkno = cpu_to_le64(dr_blkno);

+1 -2

fs/ocfs2/dlm/Makefile

··· 1 1 EXTRA_CFLAGS += -Ifs/ocfs2 2 2 3 - obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlmfs.o 3 + obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o 4 4 5 5 ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ 6 6 dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o 7 7 8 - ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o

+95 -32

fs/ocfs2/dlm/dlmfs.c fs/ocfs2/dlmfs/dlmfs.c

··· 43 43 #include <linux/init.h> 44 44 #include <linux/string.h> 45 45 #include <linux/backing-dev.h> 46 + #include <linux/poll.h> 46 47 47 48 #include <asm/uaccess.h> 48 49 49 - 50 - #include "cluster/nodemanager.h" 51 - #include "cluster/heartbeat.h" 52 - #include "cluster/tcp.h" 53 - 54 - #include "dlmapi.h" 55 - 50 + #include "stackglue.h" 56 51 #include "userdlm.h" 57 - 58 52 #include "dlmfsver.h" 59 53 60 54 #define MLOG_MASK_PREFIX ML_DLMFS 61 55 #include "cluster/masklog.h" 62 56 63 - #include "ocfs2_lockingver.h" 64 57 65 58 static const struct super_operations dlmfs_ops; 66 59 static const struct file_operations dlmfs_file_operations; ··· 64 71 65 72 struct workqueue_struct *user_dlm_worker; 66 73 74 + 75 + 67 76 /* 68 - * This is the userdlmfs locking protocol version. 77 + * These are the ABI capabilities of dlmfs. 69 78 * 70 - * See fs/ocfs2/dlmglue.c for more details on locking versions. 79 + * Over time, dlmfs has added some features that were not part of the 80 + * initial ABI. Unfortunately, some of these features are not detectable 81 + * via standard usage. For example, Linux's default poll always returns 82 + * POLLIN, so there is no way for a caller of poll(2) to know when dlmfs 83 + * added poll support. Instead, we provide this list of new capabilities. 84 + * 85 + * Capabilities is a read-only attribute. We do it as a module parameter 86 + * so we can discover it whether dlmfs is built in, loaded, or even not 87 + * loaded. 88 + * 89 + * The ABI features are local to this machine's dlmfs mount. This is 90 + * distinct from the locking protocol, which is concerned with inter-node 91 + * interaction. 92 + * 93 + * Capabilities: 94 + * - bast : POLLIN against the file descriptor of a held lock 95 + * signifies a bast fired on the lock. 71 96 */ 72 - static const struct dlm_protocol_version user_locking_protocol = { 73 - .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 74 - .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 75 - }; 97 + #define DLMFS_CAPABILITIES "bast stackglue" 98 + extern int param_set_dlmfs_capabilities(const char *val, 99 + struct kernel_param *kp) 100 + { 101 + printk(KERN_ERR "%s: readonly parameter\n", kp->name); 102 + return -EINVAL; 103 + } 104 + static int param_get_dlmfs_capabilities(char *buffer, 105 + struct kernel_param *kp) 106 + { 107 + return strlcpy(buffer, DLMFS_CAPABILITIES, 108 + strlen(DLMFS_CAPABILITIES) + 1); 109 + } 110 + module_param_call(capabilities, param_set_dlmfs_capabilities, 111 + param_get_dlmfs_capabilities, NULL, 0444); 112 + MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); 113 + 76 114 77 115 /* 78 116 * decodes a set of open flags into a valid lock level and a set of flags. ··· 203 179 return 0; 204 180 } 205 181 182 + /* 183 + * We do ->setattr() just to override size changes. Our size is the size 184 + * of the LVB and nothing else. 185 + */ 186 + static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr) 187 + { 188 + int error; 189 + struct inode *inode = dentry->d_inode; 190 + 191 + attr->ia_valid &= ~ATTR_SIZE; 192 + error = inode_change_ok(inode, attr); 193 + if (!error) 194 + error = inode_setattr(inode, attr); 195 + 196 + return error; 197 + } 198 + 199 + static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) 200 + { 201 + int event = 0; 202 + struct inode *inode = file->f_path.dentry->d_inode; 203 + struct dlmfs_inode_private *ip = DLMFS_I(inode); 204 + 205 + poll_wait(file, &ip->ip_lockres.l_event, wait); 206 + 207 + spin_lock(&ip->ip_lockres.l_lock); 208 + if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED) 209 + event = POLLIN | POLLRDNORM; 210 + spin_unlock(&ip->ip_lockres.l_lock); 211 + 212 + return event; 213 + } 214 + 206 215 static ssize_t dlmfs_file_read(struct file *filp, 207 216 char __user *buf, 208 217 size_t count, 209 218 loff_t *ppos) 210 219 { 211 220 int bytes_left; 212 - ssize_t readlen; 221 + ssize_t readlen, got; 213 222 char *lvb_buf; 214 223 struct inode *inode = filp->f_path.dentry->d_inode; 215 224 ··· 268 211 if (!lvb_buf) 269 212 return -ENOMEM; 270 213 271 - user_dlm_read_lvb(inode, lvb_buf, readlen); 272 - bytes_left = __copy_to_user(buf, lvb_buf, readlen); 273 - readlen -= bytes_left; 214 + got = user_dlm_read_lvb(inode, lvb_buf, readlen); 215 + if (got) { 216 + BUG_ON(got != readlen); 217 + bytes_left = __copy_to_user(buf, lvb_buf, readlen); 218 + readlen -= bytes_left; 219 + } else 220 + readlen = 0; 274 221 275 222 kfree(lvb_buf); 276 223 ··· 333 272 struct dlmfs_inode_private *ip = 334 273 (struct dlmfs_inode_private *) foo; 335 274 336 - ip->ip_dlm = NULL; 275 + ip->ip_conn = NULL; 337 276 ip->ip_parent = NULL; 338 277 339 278 inode_init_once(&ip->ip_vfs_inode); ··· 375 314 goto clear_fields; 376 315 } 377 316 378 - mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm); 317 + mlog(0, "we're a directory, ip->ip_conn = 0x%p\n", ip->ip_conn); 379 318 /* we must be a directory. If required, lets unregister the 380 319 * dlm context now. */ 381 - if (ip->ip_dlm) 382 - user_dlm_unregister_context(ip->ip_dlm); 320 + if (ip->ip_conn) 321 + user_dlm_unregister(ip->ip_conn); 383 322 clear_fields: 384 323 ip->ip_parent = NULL; 385 - ip->ip_dlm = NULL; 324 + ip->ip_conn = NULL; 386 325 } 387 326 388 327 static struct backing_dev_info dlmfs_backing_dev_info = { ··· 432 371 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 433 372 434 373 ip = DLMFS_I(inode); 435 - ip->ip_dlm = DLMFS_I(parent)->ip_dlm; 374 + ip->ip_conn = DLMFS_I(parent)->ip_conn; 436 375 437 376 switch (mode & S_IFMT) { 438 377 default: ··· 486 425 struct inode *inode = NULL; 487 426 struct qstr *domain = &dentry->d_name; 488 427 struct dlmfs_inode_private *ip; 489 - struct dlm_ctxt *dlm; 490 - struct dlm_protocol_version proto = user_locking_protocol; 428 + struct ocfs2_cluster_connection *conn; 491 429 492 430 mlog(0, "mkdir %.*s\n", domain->len, domain->name); 493 431 494 432 /* verify that we have a proper domain */ 495 - if (domain->len >= O2NM_MAX_NAME_LEN) { 433 + if (domain->len >= GROUP_NAME_MAX) { 496 434 status = -EINVAL; 497 435 mlog(ML_ERROR, "invalid domain name for directory.\n"); 498 436 goto bail; ··· 506 446 507 447 ip = DLMFS_I(inode); 508 448 509 - dlm = user_dlm_register_context(domain, &proto); 510 - if (IS_ERR(dlm)) { 511 - status = PTR_ERR(dlm); 449 + conn = user_dlm_register(domain); 450 + if (IS_ERR(conn)) { 451 + status = PTR_ERR(conn); 512 452 mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", 513 453 status, domain->len, domain->name); 514 454 goto bail; 515 455 } 516 - ip->ip_dlm = dlm; 456 + ip->ip_conn = conn; 517 457 518 458 inc_nlink(dir); 519 459 d_instantiate(dentry, inode); ··· 609 549 static const struct file_operations dlmfs_file_operations = { 610 550 .open = dlmfs_file_open, 611 551 .release = dlmfs_file_release, 552 + .poll = dlmfs_file_poll, 612 553 .read = dlmfs_file_read, 613 554 .write = dlmfs_file_write, 614 555 }; ··· 637 576 638 577 static const struct inode_operations dlmfs_file_inode_operations = { 639 578 .getattr = simple_getattr, 579 + .setattr = dlmfs_file_setattr, 640 580 }; 641 581 642 582 static int dlmfs_get_sb(struct file_system_type *fs_type, ··· 682 620 } 683 621 cleanup_worker = 1; 684 622 623 + user_dlm_set_locking_protocol(); 685 624 status = register_filesystem(&dlmfs_fs_type); 686 625 bail: 687 626 if (status) {

fs/ocfs2/dlm/dlmfsver.c fs/ocfs2/dlmfs/dlmfsver.c

fs/ocfs2/dlm/dlmfsver.h fs/ocfs2/dlmfs/dlmfsver.h

+1 -1

fs/ocfs2/dlm/dlmrecovery.c

··· 310 310 mlog(0, "dlm thread running for %s...\n", dlm->name); 311 311 312 312 while (!kthread_should_stop()) { 313 - if (dlm_joined(dlm)) { 313 + if (dlm_domain_fully_joined(dlm)) { 314 314 status = dlm_do_recovery(dlm); 315 315 if (status == -EAGAIN) { 316 316 /* do not sleep, recheck immediately. */

+164 -152

fs/ocfs2/dlm/userdlm.c fs/ocfs2/dlmfs/userdlm.c

··· 34 34 #include <linux/types.h> 35 35 #include <linux/crc32.h> 36 36 37 - 38 - #include "cluster/nodemanager.h" 39 - #include "cluster/heartbeat.h" 40 - #include "cluster/tcp.h" 41 - 42 - #include "dlmapi.h" 43 - 37 + #include "ocfs2_lockingver.h" 38 + #include "stackglue.h" 44 39 #include "userdlm.h" 45 40 46 41 #define MLOG_MASK_PREFIX ML_DLMFS 47 42 #include "cluster/masklog.h" 43 + 44 + 45 + static inline struct user_lock_res *user_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) 46 + { 47 + return container_of(lksb, struct user_lock_res, l_lksb); 48 + } 48 49 49 50 static inline int user_check_wait_flag(struct user_lock_res *lockres, 50 51 int flag) ··· 74 73 } 75 74 76 75 /* I heart container_of... */ 77 - static inline struct dlm_ctxt * 78 - dlm_ctxt_from_user_lockres(struct user_lock_res *lockres) 76 + static inline struct ocfs2_cluster_connection * 77 + cluster_connection_from_user_lockres(struct user_lock_res *lockres) 79 78 { 80 79 struct dlmfs_inode_private *ip; 81 80 82 81 ip = container_of(lockres, 83 82 struct dlmfs_inode_private, 84 83 ip_lockres); 85 - return ip->ip_dlm; 84 + return ip->ip_conn; 86 85 } 87 86 88 87 static struct inode * ··· 104 103 } 105 104 106 105 #define user_log_dlm_error(_func, _stat, _lockres) do { \ 107 - mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ 108 - "resource %.*s: %s\n", dlm_errname(_stat), _func, \ 109 - _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \ 106 + mlog(ML_ERROR, "Dlm error %d while calling %s on " \ 107 + "resource %.*s\n", _stat, _func, \ 108 + _lockres->l_namelen, _lockres->l_name); \ 110 109 } while (0) 111 110 112 111 /* WARNING: This function lives in a world where the only three lock ··· 114 113 * lock types are added. */ 115 114 static inline int user_highest_compat_lock_level(int level) 116 115 { 117 - int new_level = LKM_EXMODE; 116 + int new_level = DLM_LOCK_EX; 118 117 119 - if (level == LKM_EXMODE) 120 - new_level = LKM_NLMODE; 121 - else if (level == LKM_PRMODE) 122 - new_level = LKM_PRMODE; 118 + if (level == DLM_LOCK_EX) 119 + new_level = DLM_LOCK_NL; 120 + else if (level == DLM_LOCK_PR) 121 + new_level = DLM_LOCK_PR; 123 122 return new_level; 124 123 } 125 124 126 - static void user_ast(void *opaque) 125 + static void user_ast(struct ocfs2_dlm_lksb *lksb) 127 126 { 128 - struct user_lock_res *lockres = opaque; 129 - struct dlm_lockstatus *lksb; 127 + struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); 128 + int status; 130 129 131 - mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen, 132 - lockres->l_name); 130 + mlog(ML_BASTS, "AST fired for lockres %.*s, level %d => %d\n", 131 + lockres->l_namelen, lockres->l_name, lockres->l_level, 132 + lockres->l_requested); 133 133 134 134 spin_lock(&lockres->l_lock); 135 135 136 - lksb = &(lockres->l_lksb); 137 - if (lksb->status != DLM_NORMAL) { 136 + status = ocfs2_dlm_lock_status(&lockres->l_lksb); 137 + if (status) { 138 138 mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n", 139 - lksb->status, lockres->l_namelen, lockres->l_name); 139 + status, lockres->l_namelen, lockres->l_name); 140 140 spin_unlock(&lockres->l_lock); 141 141 return; 142 142 } 143 143 144 - mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, 144 + mlog_bug_on_msg(lockres->l_requested == DLM_LOCK_IV, 145 145 "Lockres %.*s, requested ivmode. flags 0x%x\n", 146 146 lockres->l_namelen, lockres->l_name, lockres->l_flags); 147 147 ··· 150 148 if (lockres->l_requested < lockres->l_level) { 151 149 if (lockres->l_requested <= 152 150 user_highest_compat_lock_level(lockres->l_blocking)) { 153 - lockres->l_blocking = LKM_NLMODE; 151 + lockres->l_blocking = DLM_LOCK_NL; 154 152 lockres->l_flags &= ~USER_LOCK_BLOCKED; 155 153 } 156 154 } 157 155 158 156 lockres->l_level = lockres->l_requested; 159 - lockres->l_requested = LKM_IVMODE; 157 + lockres->l_requested = DLM_LOCK_IV; 160 158 lockres->l_flags |= USER_LOCK_ATTACHED; 161 159 lockres->l_flags &= ~USER_LOCK_BUSY; 162 160 ··· 195 193 return; 196 194 197 195 switch (lockres->l_blocking) { 198 - case LKM_EXMODE: 196 + case DLM_LOCK_EX: 199 197 if (!lockres->l_ex_holders && !lockres->l_ro_holders) 200 198 queue = 1; 201 199 break; 202 - case LKM_PRMODE: 200 + case DLM_LOCK_PR: 203 201 if (!lockres->l_ex_holders) 204 202 queue = 1; 205 203 break; ··· 211 209 __user_dlm_queue_lockres(lockres); 212 210 } 213 211 214 - static void user_bast(void *opaque, int level) 212 + static void user_bast(struct ocfs2_dlm_lksb *lksb, int level) 215 213 { 216 - struct user_lock_res *lockres = opaque; 214 + struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); 217 215 218 - mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n", 219 - lockres->l_namelen, lockres->l_name, level); 216 + mlog(ML_BASTS, "BAST fired for lockres %.*s, blocking %d, level %d\n", 217 + lockres->l_namelen, lockres->l_name, level, lockres->l_level); 220 218 221 219 spin_lock(&lockres->l_lock); 222 220 lockres->l_flags |= USER_LOCK_BLOCKED; ··· 229 227 wake_up(&lockres->l_event); 230 228 } 231 229 232 - static void user_unlock_ast(void *opaque, enum dlm_status status) 230 + static void user_unlock_ast(struct ocfs2_dlm_lksb *lksb, int status) 233 231 { 234 - struct user_lock_res *lockres = opaque; 232 + struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); 235 233 236 - mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen, 237 - lockres->l_name); 234 + mlog(ML_BASTS, "UNLOCK AST fired for lockres %.*s, flags 0x%x\n", 235 + lockres->l_namelen, lockres->l_name, lockres->l_flags); 238 236 239 - if (status != DLM_NORMAL && status != DLM_CANCELGRANT) 240 - mlog(ML_ERROR, "Dlm returns status %d\n", status); 237 + if (status) 238 + mlog(ML_ERROR, "dlm returns status %d\n", status); 241 239 242 240 spin_lock(&lockres->l_lock); 243 241 /* The teardown flag gets set early during the unlock process, ··· 245 243 * for a concurrent cancel. */ 246 244 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN 247 245 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { 248 - lockres->l_level = LKM_IVMODE; 246 + lockres->l_level = DLM_LOCK_IV; 249 247 } else if (status == DLM_CANCELGRANT) { 250 248 /* We tried to cancel a convert request, but it was 251 249 * already granted. Don't clear the busy flag - the ··· 256 254 } else { 257 255 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); 258 256 /* Cancel succeeded, we want to re-queue */ 259 - lockres->l_requested = LKM_IVMODE; /* cancel an 257 + lockres->l_requested = DLM_LOCK_IV; /* cancel an 260 258 * upconvert 261 259 * request. */ 262 260 lockres->l_flags &= ~USER_LOCK_IN_CANCEL; ··· 273 271 wake_up(&lockres->l_event); 274 272 } 275 273 274 + /* 275 + * This is the userdlmfs locking protocol version. 276 + * 277 + * See fs/ocfs2/dlmglue.c for more details on locking versions. 278 + */ 279 + static struct ocfs2_locking_protocol user_dlm_lproto = { 280 + .lp_max_version = { 281 + .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 282 + .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 283 + }, 284 + .lp_lock_ast = user_ast, 285 + .lp_blocking_ast = user_bast, 286 + .lp_unlock_ast = user_unlock_ast, 287 + }; 288 + 276 289 static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres) 277 290 { 278 291 struct inode *inode; ··· 300 283 int new_level, status; 301 284 struct user_lock_res *lockres = 302 285 container_of(work, struct user_lock_res, l_work); 303 - struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); 286 + struct ocfs2_cluster_connection *conn = 287 + cluster_connection_from_user_lockres(lockres); 304 288 305 - mlog(0, "processing lockres %.*s\n", lockres->l_namelen, 306 - lockres->l_name); 289 + mlog(0, "lockres %.*s\n", lockres->l_namelen, lockres->l_name); 307 290 308 291 spin_lock(&lockres->l_lock); 309 292 ··· 321 304 * flag, and finally we might get another bast which re-queues 322 305 * us before our ast for the downconvert is called. */ 323 306 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { 307 + mlog(ML_BASTS, "lockres %.*s USER_LOCK_BLOCKED\n", 308 + lockres->l_namelen, lockres->l_name); 324 309 spin_unlock(&lockres->l_lock); 325 310 goto drop_ref; 326 311 } 327 312 328 313 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 314 + mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_TEARDOWN\n", 315 + lockres->l_namelen, lockres->l_name); 329 316 spin_unlock(&lockres->l_lock); 330 317 goto drop_ref; 331 318 } 332 319 333 320 if (lockres->l_flags & USER_LOCK_BUSY) { 334 321 if (lockres->l_flags & USER_LOCK_IN_CANCEL) { 322 + mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_CANCEL\n", 323 + lockres->l_namelen, lockres->l_name); 335 324 spin_unlock(&lockres->l_lock); 336 325 goto drop_ref; 337 326 } ··· 345 322 lockres->l_flags |= USER_LOCK_IN_CANCEL; 346 323 spin_unlock(&lockres->l_lock); 347 324 348 - status = dlmunlock(dlm, 349 - &lockres->l_lksb, 350 - LKM_CANCEL, 351 - user_unlock_ast, 352 - lockres); 353 - if (status != DLM_NORMAL) 354 - user_log_dlm_error("dlmunlock", status, lockres); 325 + status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, 326 + DLM_LKF_CANCEL); 327 + if (status) 328 + user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); 355 329 goto drop_ref; 356 330 } 357 331 358 332 /* If there are still incompat holders, we can exit safely 359 333 * without worrying about re-queueing this lock as that will 360 334 * happen on the last call to user_cluster_unlock. */ 361 - if ((lockres->l_blocking == LKM_EXMODE) 335 + if ((lockres->l_blocking == DLM_LOCK_EX) 362 336 && (lockres->l_ex_holders || lockres->l_ro_holders)) { 363 337 spin_unlock(&lockres->l_lock); 364 - mlog(0, "can't downconvert for ex: ro = %u, ex = %u\n", 365 - lockres->l_ro_holders, lockres->l_ex_holders); 338 + mlog(ML_BASTS, "lockres %.*s, EX/PR Holders %u,%u\n", 339 + lockres->l_namelen, lockres->l_name, 340 + lockres->l_ex_holders, lockres->l_ro_holders); 366 341 goto drop_ref; 367 342 } 368 343 369 - if ((lockres->l_blocking == LKM_PRMODE) 344 + if ((lockres->l_blocking == DLM_LOCK_PR) 370 345 && lockres->l_ex_holders) { 371 346 spin_unlock(&lockres->l_lock); 372 - mlog(0, "can't downconvert for pr: ex = %u\n", 373 - lockres->l_ex_holders); 347 + mlog(ML_BASTS, "lockres %.*s, EX Holders %u\n", 348 + lockres->l_namelen, lockres->l_name, 349 + lockres->l_ex_holders); 374 350 goto drop_ref; 375 351 } 376 352 ··· 377 355 new_level = user_highest_compat_lock_level(lockres->l_blocking); 378 356 lockres->l_requested = new_level; 379 357 lockres->l_flags |= USER_LOCK_BUSY; 380 - mlog(0, "Downconvert lock from %d to %d\n", 381 - lockres->l_level, new_level); 358 + mlog(ML_BASTS, "lockres %.*s, downconvert %d => %d\n", 359 + lockres->l_namelen, lockres->l_name, lockres->l_level, new_level); 382 360 spin_unlock(&lockres->l_lock); 383 361 384 362 /* need lock downconvert request now... */ 385 - status = dlmlock(dlm, 386 - new_level, 387 - &lockres->l_lksb, 388 - LKM_CONVERT|LKM_VALBLK, 389 - lockres->l_name, 390 - lockres->l_namelen, 391 - user_ast, 392 - lockres, 393 - user_bast); 394 - if (status != DLM_NORMAL) { 395 - user_log_dlm_error("dlmlock", status, lockres); 363 + status = ocfs2_dlm_lock(conn, new_level, &lockres->l_lksb, 364 + DLM_LKF_CONVERT|DLM_LKF_VALBLK, 365 + lockres->l_name, 366 + lockres->l_namelen); 367 + if (status) { 368 + user_log_dlm_error("ocfs2_dlm_lock", status, lockres); 396 369 user_recover_from_dlm_error(lockres); 397 370 } 398 371 ··· 399 382 int level) 400 383 { 401 384 switch(level) { 402 - case LKM_EXMODE: 385 + case DLM_LOCK_EX: 403 386 lockres->l_ex_holders++; 404 387 break; 405 - case LKM_PRMODE: 388 + case DLM_LOCK_PR: 406 389 lockres->l_ro_holders++; 407 390 break; 408 391 default: ··· 427 410 int lkm_flags) 428 411 { 429 412 int status, local_flags; 430 - struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); 413 + struct ocfs2_cluster_connection *conn = 414 + cluster_connection_from_user_lockres(lockres); 431 415 432 - if (level != LKM_EXMODE && 433 - level != LKM_PRMODE) { 416 + if (level != DLM_LOCK_EX && 417 + level != DLM_LOCK_PR) { 434 418 mlog(ML_ERROR, "lockres %.*s: invalid request!\n", 435 419 lockres->l_namelen, lockres->l_name); 436 420 status = -EINVAL; 437 421 goto bail; 438 422 } 439 423 440 - mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n", 441 - lockres->l_namelen, lockres->l_name, 442 - (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", 443 - lkm_flags); 424 + mlog(ML_BASTS, "lockres %.*s, level %d, flags = 0x%x\n", 425 + lockres->l_namelen, lockres->l_name, level, lkm_flags); 444 426 445 427 again: 446 428 if (signal_pending(current)) { ··· 473 457 } 474 458 475 459 if (level > lockres->l_level) { 476 - local_flags = lkm_flags | LKM_VALBLK; 477 - if (lockres->l_level != LKM_IVMODE) 478 - local_flags |= LKM_CONVERT; 460 + local_flags = lkm_flags | DLM_LKF_VALBLK; 461 + if (lockres->l_level != DLM_LOCK_IV) 462 + local_flags |= DLM_LKF_CONVERT; 479 463 480 464 lockres->l_requested = level; 481 465 lockres->l_flags |= USER_LOCK_BUSY; 482 466 spin_unlock(&lockres->l_lock); 483 467 484 - BUG_ON(level == LKM_IVMODE); 485 - BUG_ON(level == LKM_NLMODE); 468 + BUG_ON(level == DLM_LOCK_IV); 469 + BUG_ON(level == DLM_LOCK_NL); 486 470 487 471 /* call dlm_lock to upgrade lock now */ 488 - status = dlmlock(dlm, 489 - level, 490 - &lockres->l_lksb, 491 - local_flags, 492 - lockres->l_name, 493 - lockres->l_namelen, 494 - user_ast, 495 - lockres, 496 - user_bast); 497 - if (status != DLM_NORMAL) { 498 - if ((lkm_flags & LKM_NOQUEUE) && 499 - (status == DLM_NOTQUEUED)) 500 - status = -EAGAIN; 501 - else { 502 - user_log_dlm_error("dlmlock", status, lockres); 503 - status = -EINVAL; 504 - } 472 + status = ocfs2_dlm_lock(conn, level, &lockres->l_lksb, 473 + local_flags, lockres->l_name, 474 + lockres->l_namelen); 475 + if (status) { 476 + if ((lkm_flags & DLM_LKF_NOQUEUE) && 477 + (status != -EAGAIN)) 478 + user_log_dlm_error("ocfs2_dlm_lock", 479 + status, lockres); 505 480 user_recover_from_dlm_error(lockres); 506 481 goto bail; 507 482 } ··· 513 506 int level) 514 507 { 515 508 switch(level) { 516 - case LKM_EXMODE: 509 + case DLM_LOCK_EX: 517 510 BUG_ON(!lockres->l_ex_holders); 518 511 lockres->l_ex_holders--; 519 512 break; 520 - case LKM_PRMODE: 513 + case DLM_LOCK_PR: 521 514 BUG_ON(!lockres->l_ro_holders); 522 515 lockres->l_ro_holders--; 523 516 break; ··· 529 522 void user_dlm_cluster_unlock(struct user_lock_res *lockres, 530 523 int level) 531 524 { 532 - if (level != LKM_EXMODE && 533 - level != LKM_PRMODE) { 525 + if (level != DLM_LOCK_EX && 526 + level != DLM_LOCK_PR) { 534 527 mlog(ML_ERROR, "lockres %.*s: invalid request!\n", 535 528 lockres->l_namelen, lockres->l_name); 536 529 return; ··· 547 540 unsigned int len) 548 541 { 549 542 struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; 550 - char *lvb = lockres->l_lksb.lvb; 543 + char *lvb; 551 544 552 545 BUG_ON(len > DLM_LVB_LEN); 553 546 554 547 spin_lock(&lockres->l_lock); 555 548 556 - BUG_ON(lockres->l_level < LKM_EXMODE); 549 + BUG_ON(lockres->l_level < DLM_LOCK_EX); 550 + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 557 551 memcpy(lvb, val, len); 558 552 559 553 spin_unlock(&lockres->l_lock); 560 554 } 561 555 562 - void user_dlm_read_lvb(struct inode *inode, 563 - char *val, 564 - unsigned int len) 556 + ssize_t user_dlm_read_lvb(struct inode *inode, 557 + char *val, 558 + unsigned int len) 565 559 { 566 560 struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; 567 - char *lvb = lockres->l_lksb.lvb; 561 + char *lvb; 562 + ssize_t ret = len; 568 563 569 564 BUG_ON(len > DLM_LVB_LEN); 570 565 571 566 spin_lock(&lockres->l_lock); 572 567 573 - BUG_ON(lockres->l_level < LKM_PRMODE); 574 - memcpy(val, lvb, len); 568 + BUG_ON(lockres->l_level < DLM_LOCK_PR); 569 + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)) { 570 + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 571 + memcpy(val, lvb, len); 572 + } else 573 + ret = 0; 575 574 576 575 spin_unlock(&lockres->l_lock); 576 + return ret; 577 577 } 578 578 579 579 void user_dlm_lock_res_init(struct user_lock_res *lockres, ··· 590 576 591 577 spin_lock_init(&lockres->l_lock); 592 578 init_waitqueue_head(&lockres->l_event); 593 - lockres->l_level = LKM_IVMODE; 594 - lockres->l_requested = LKM_IVMODE; 595 - lockres->l_blocking = LKM_IVMODE; 579 + lockres->l_level = DLM_LOCK_IV; 580 + lockres->l_requested = DLM_LOCK_IV; 581 + lockres->l_blocking = DLM_LOCK_IV; 596 582 597 583 /* should have been checked before getting here. */ 598 584 BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN); ··· 606 592 int user_dlm_destroy_lock(struct user_lock_res *lockres) 607 593 { 608 594 int status = -EBUSY; 609 - struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); 595 + struct ocfs2_cluster_connection *conn = 596 + cluster_connection_from_user_lockres(lockres); 610 597 611 - mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name); 598 + mlog(ML_BASTS, "lockres %.*s\n", lockres->l_namelen, lockres->l_name); 612 599 613 600 spin_lock(&lockres->l_lock); 614 601 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { ··· 642 627 lockres->l_flags |= USER_LOCK_BUSY; 643 628 spin_unlock(&lockres->l_lock); 644 629 645 - status = dlmunlock(dlm, 646 - &lockres->l_lksb, 647 - LKM_VALBLK, 648 - user_unlock_ast, 649 - lockres); 650 - if (status != DLM_NORMAL) { 651 - user_log_dlm_error("dlmunlock", status, lockres); 652 - status = -EINVAL; 630 + status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK); 631 + if (status) { 632 + user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); 653 633 goto bail; 654 634 } 655 635 ··· 655 645 return status; 656 646 } 657 647 658 - struct dlm_ctxt *user_dlm_register_context(struct qstr *name, 659 - struct dlm_protocol_version *proto) 648 + static void user_dlm_recovery_handler_noop(int node_num, 649 + void *recovery_data) 660 650 { 661 - struct dlm_ctxt *dlm; 662 - u32 dlm_key; 663 - char *domain; 664 - 665 - domain = kmalloc(name->len + 1, GFP_NOFS); 666 - if (!domain) { 667 - mlog_errno(-ENOMEM); 668 - return ERR_PTR(-ENOMEM); 669 - } 670 - 671 - dlm_key = crc32_le(0, name->name, name->len); 672 - 673 - snprintf(domain, name->len + 1, "%.*s", name->len, name->name); 674 - 675 - dlm = dlm_register_domain(domain, dlm_key, proto); 676 - if (IS_ERR(dlm)) 677 - mlog_errno(PTR_ERR(dlm)); 678 - 679 - kfree(domain); 680 - return dlm; 651 + /* We ignore recovery events */ 652 + return; 681 653 } 682 654 683 - void user_dlm_unregister_context(struct dlm_ctxt *dlm) 655 + void user_dlm_set_locking_protocol(void) 684 656 { 685 - dlm_unregister_domain(dlm); 657 + ocfs2_stack_glue_set_max_proto_version(&user_dlm_lproto.lp_max_version); 658 + } 659 + 660 + struct ocfs2_cluster_connection *user_dlm_register(struct qstr *name) 661 + { 662 + int rc; 663 + struct ocfs2_cluster_connection *conn; 664 + 665 + rc = ocfs2_cluster_connect_agnostic(name->name, name->len, 666 + &user_dlm_lproto, 667 + user_dlm_recovery_handler_noop, 668 + NULL, &conn); 669 + if (rc) 670 + mlog_errno(rc); 671 + 672 + return rc ? ERR_PTR(rc) : conn; 673 + } 674 + 675 + void user_dlm_unregister(struct ocfs2_cluster_connection *conn) 676 + { 677 + ocfs2_cluster_disconnect(conn, 0); 686 678 }

+8 -8

fs/ocfs2/dlm/userdlm.h fs/ocfs2/dlmfs/userdlm.h

··· 57 57 int l_level; 58 58 unsigned int l_ro_holders; 59 59 unsigned int l_ex_holders; 60 - struct dlm_lockstatus l_lksb; 60 + struct ocfs2_dlm_lksb l_lksb; 61 61 62 62 int l_requested; 63 63 int l_blocking; ··· 80 80 void user_dlm_write_lvb(struct inode *inode, 81 81 const char *val, 82 82 unsigned int len); 83 - void user_dlm_read_lvb(struct inode *inode, 84 - char *val, 85 - unsigned int len); 86 - struct dlm_ctxt *user_dlm_register_context(struct qstr *name, 87 - struct dlm_protocol_version *proto); 88 - void user_dlm_unregister_context(struct dlm_ctxt *dlm); 83 + ssize_t user_dlm_read_lvb(struct inode *inode, 84 + char *val, 85 + unsigned int len); 86 + struct ocfs2_cluster_connection *user_dlm_register(struct qstr *name); 87 + void user_dlm_unregister(struct ocfs2_cluster_connection *conn); 88 + void user_dlm_set_locking_protocol(void); 89 89 90 90 struct dlmfs_inode_private { 91 - struct dlm_ctxt *ip_dlm; 91 + struct ocfs2_cluster_connection *ip_conn; 92 92 93 93 struct user_lock_res ip_lockres; /* unused for directories. */ 94 94 struct inode *ip_parent;

+5

fs/ocfs2/dlmfs/Makefile

··· 1 + EXTRA_CFLAGS += -Ifs/ocfs2 2 + 3 + obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o 4 + 5 + ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o

+161 -123

fs/ocfs2/dlmglue.c

··· 297 297 lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 298 298 } 299 299 300 + static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) 301 + { 302 + return container_of(lksb, struct ocfs2_lock_res, l_lksb); 303 + } 304 + 300 305 static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 301 306 { 302 307 BUG_ON(!ocfs2_is_inode_lock(lockres)); ··· 932 927 lockres->l_blocking = level; 933 928 } 934 929 930 + mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", 931 + lockres->l_name, level, lockres->l_level, lockres->l_blocking, 932 + needs_downconvert); 933 + 935 934 if (needs_downconvert) 936 935 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 937 936 ··· 1049 1040 return lockres->l_pending_gen; 1050 1041 } 1051 1042 1052 - 1053 - static void ocfs2_blocking_ast(void *opaque, int level) 1043 + static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) 1054 1044 { 1055 - struct ocfs2_lock_res *lockres = opaque; 1045 + struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1056 1046 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1057 1047 int needs_downconvert; 1058 1048 unsigned long flags; 1059 1049 1060 1050 BUG_ON(level <= DLM_LOCK_NL); 1061 1051 1062 - mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 1063 - lockres->l_name, level, lockres->l_level, 1052 + mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " 1053 + "type %s\n", lockres->l_name, level, lockres->l_level, 1064 1054 ocfs2_lock_type_string(lockres->l_type)); 1065 1055 1066 1056 /* ··· 1080 1072 ocfs2_wake_downconvert_thread(osb); 1081 1073 } 1082 1074 1083 - static void ocfs2_locking_ast(void *opaque) 1075 + static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) 1084 1076 { 1085 - struct ocfs2_lock_res *lockres = opaque; 1077 + struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1086 1078 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1087 1079 unsigned long flags; 1088 1080 int status; ··· 1103 1095 return; 1104 1096 } 1105 1097 1098 + mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " 1099 + "level %d => %d\n", lockres->l_name, lockres->l_action, 1100 + lockres->l_unlock_action, lockres->l_level, lockres->l_requested); 1101 + 1106 1102 switch(lockres->l_action) { 1107 1103 case OCFS2_AST_ATTACH: 1108 1104 ocfs2_generic_handle_attach_action(lockres); ··· 1119 1107 ocfs2_generic_handle_downconvert_action(lockres); 1120 1108 break; 1121 1109 default: 1122 - mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " 1123 - "lockres flags = 0x%lx, unlock action: %u\n", 1110 + mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " 1111 + "flags 0x%lx, unlock: %u\n", 1124 1112 lockres->l_name, lockres->l_action, lockres->l_flags, 1125 1113 lockres->l_unlock_action); 1126 1114 BUG(); ··· 1144 1132 1145 1133 wake_up(&lockres->l_event); 1146 1134 spin_unlock_irqrestore(&lockres->l_lock, flags); 1135 + } 1136 + 1137 + static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) 1138 + { 1139 + struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1140 + unsigned long flags; 1141 + 1142 + mlog_entry_void(); 1143 + 1144 + mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", 1145 + lockres->l_name, lockres->l_unlock_action); 1146 + 1147 + spin_lock_irqsave(&lockres->l_lock, flags); 1148 + if (error) { 1149 + mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 1150 + "unlock_action %d\n", error, lockres->l_name, 1151 + lockres->l_unlock_action); 1152 + spin_unlock_irqrestore(&lockres->l_lock, flags); 1153 + mlog_exit_void(); 1154 + return; 1155 + } 1156 + 1157 + switch(lockres->l_unlock_action) { 1158 + case OCFS2_UNLOCK_CANCEL_CONVERT: 1159 + mlog(0, "Cancel convert success for %s\n", lockres->l_name); 1160 + lockres->l_action = OCFS2_AST_INVALID; 1161 + /* Downconvert thread may have requeued this lock, we 1162 + * need to wake it. */ 1163 + if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1164 + ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); 1165 + break; 1166 + case OCFS2_UNLOCK_DROP_LOCK: 1167 + lockres->l_level = DLM_LOCK_IV; 1168 + break; 1169 + default: 1170 + BUG(); 1171 + } 1172 + 1173 + lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1174 + lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1175 + wake_up(&lockres->l_event); 1176 + spin_unlock_irqrestore(&lockres->l_lock, flags); 1177 + 1178 + mlog_exit_void(); 1179 + } 1180 + 1181 + /* 1182 + * This is the filesystem locking protocol. It provides the lock handling 1183 + * hooks for the underlying DLM. It has a maximum version number. 1184 + * The version number allows interoperability with systems running at 1185 + * the same major number and an equal or smaller minor number. 1186 + * 1187 + * Whenever the filesystem does new things with locks (adds or removes a 1188 + * lock, orders them differently, does different things underneath a lock), 1189 + * the version must be changed. The protocol is negotiated when joining 1190 + * the dlm domain. A node may join the domain if its major version is 1191 + * identical to all other nodes and its minor version is greater than 1192 + * or equal to all other nodes. When its minor version is greater than 1193 + * the other nodes, it will run at the minor version specified by the 1194 + * other nodes. 1195 + * 1196 + * If a locking change is made that will not be compatible with older 1197 + * versions, the major number must be increased and the minor version set 1198 + * to zero. If a change merely adds a behavior that can be disabled when 1199 + * speaking to older versions, the minor version must be increased. If a 1200 + * change adds a fully backwards compatible change (eg, LVB changes that 1201 + * are just ignored by older versions), the version does not need to be 1202 + * updated. 1203 + */ 1204 + static struct ocfs2_locking_protocol lproto = { 1205 + .lp_max_version = { 1206 + .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 1207 + .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 1208 + }, 1209 + .lp_lock_ast = ocfs2_locking_ast, 1210 + .lp_blocking_ast = ocfs2_blocking_ast, 1211 + .lp_unlock_ast = ocfs2_unlock_ast, 1212 + }; 1213 + 1214 + void ocfs2_set_locking_protocol(void) 1215 + { 1216 + ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); 1147 1217 } 1148 1218 1149 1219 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, ··· 1283 1189 &lockres->l_lksb, 1284 1190 dlm_flags, 1285 1191 lockres->l_name, 1286 - OCFS2_LOCK_ID_MAX_LEN - 1, 1287 - lockres); 1192 + OCFS2_LOCK_ID_MAX_LEN - 1); 1288 1193 lockres_clear_pending(lockres, gen, osb); 1289 1194 if (ret) { 1290 1195 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); ··· 1505 1412 BUG_ON(level == DLM_LOCK_IV); 1506 1413 BUG_ON(level == DLM_LOCK_NL); 1507 1414 1508 - mlog(0, "lock %s, convert from %d to level = %d\n", 1415 + mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", 1509 1416 lockres->l_name, lockres->l_level, level); 1510 1417 1511 1418 /* call dlm_lock to upgrade lock now */ ··· 1514 1421 &lockres->l_lksb, 1515 1422 lkm_flags, 1516 1423 lockres->l_name, 1517 - OCFS2_LOCK_ID_MAX_LEN - 1, 1518 - lockres); 1424 + OCFS2_LOCK_ID_MAX_LEN - 1); 1519 1425 lockres_clear_pending(lockres, gen, osb); 1520 1426 if (ret) { 1521 1427 if (!(lkm_flags & DLM_LKF_NOQUEUE) || ··· 1951 1859 spin_unlock_irqrestore(&lockres->l_lock, flags); 1952 1860 1953 1861 ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1954 - lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, 1955 - lockres); 1862 + lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); 1956 1863 if (ret) { 1957 1864 if (!trylock || (ret != -EAGAIN)) { 1958 1865 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); ··· 3080 2989 status = ocfs2_cluster_connect(osb->osb_cluster_stack, 3081 2990 osb->uuid_str, 3082 2991 strlen(osb->uuid_str), 3083 - ocfs2_do_node_down, osb, 2992 + &lproto, ocfs2_do_node_down, osb, 3084 2993 &conn); 3085 2994 if (status) { 3086 2995 mlog_errno(status); ··· 3143 3052 osb->cconn = NULL; 3144 3053 3145 3054 ocfs2_dlm_shutdown_debug(osb); 3146 - 3147 - mlog_exit_void(); 3148 - } 3149 - 3150 - static void ocfs2_unlock_ast(void *opaque, int error) 3151 - { 3152 - struct ocfs2_lock_res *lockres = opaque; 3153 - unsigned long flags; 3154 - 3155 - mlog_entry_void(); 3156 - 3157 - mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, 3158 - lockres->l_unlock_action); 3159 - 3160 - spin_lock_irqsave(&lockres->l_lock, flags); 3161 - if (error) { 3162 - mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 3163 - "unlock_action %d\n", error, lockres->l_name, 3164 - lockres->l_unlock_action); 3165 - spin_unlock_irqrestore(&lockres->l_lock, flags); 3166 - mlog_exit_void(); 3167 - return; 3168 - } 3169 - 3170 - switch(lockres->l_unlock_action) { 3171 - case OCFS2_UNLOCK_CANCEL_CONVERT: 3172 - mlog(0, "Cancel convert success for %s\n", lockres->l_name); 3173 - lockres->l_action = OCFS2_AST_INVALID; 3174 - /* Downconvert thread may have requeued this lock, we 3175 - * need to wake it. */ 3176 - if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3177 - ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); 3178 - break; 3179 - case OCFS2_UNLOCK_DROP_LOCK: 3180 - lockres->l_level = DLM_LOCK_IV; 3181 - break; 3182 - default: 3183 - BUG(); 3184 - } 3185 - 3186 - lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 3187 - lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 3188 - wake_up(&lockres->l_event); 3189 - spin_unlock_irqrestore(&lockres->l_lock, flags); 3190 3055 3191 3056 mlog_exit_void(); 3192 3057 } ··· 3214 3167 3215 3168 mlog(0, "lock %s\n", lockres->l_name); 3216 3169 3217 - ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags, 3218 - lockres); 3170 + ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); 3219 3171 if (ret) { 3220 3172 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3221 3173 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); ··· 3322 3276 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3323 3277 3324 3278 if (lockres->l_level <= new_level) { 3325 - mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", 3326 - lockres->l_level, new_level); 3279 + mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " 3280 + "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " 3281 + "block %d, pgen %d\n", lockres->l_name, lockres->l_level, 3282 + new_level, list_empty(&lockres->l_blocked_list), 3283 + list_empty(&lockres->l_mask_waiters), lockres->l_type, 3284 + lockres->l_flags, lockres->l_ro_holders, 3285 + lockres->l_ex_holders, lockres->l_action, 3286 + lockres->l_unlock_action, lockres->l_requested, 3287 + lockres->l_blocking, lockres->l_pending_gen); 3327 3288 BUG(); 3328 3289 } 3329 3290 3330 - mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", 3331 - lockres->l_name, new_level, lockres->l_blocking); 3291 + mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", 3292 + lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); 3332 3293 3333 3294 lockres->l_action = OCFS2_AST_DOWNCONVERT; 3334 3295 lockres->l_requested = new_level; ··· 3354 3301 3355 3302 mlog_entry_void(); 3356 3303 3304 + mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, 3305 + lockres->l_level, new_level); 3306 + 3357 3307 if (lvb) 3358 3308 dlm_flags |= DLM_LKF_VALBLK; 3359 3309 ··· 3365 3309 &lockres->l_lksb, 3366 3310 dlm_flags, 3367 3311 lockres->l_name, 3368 - OCFS2_LOCK_ID_MAX_LEN - 1, 3369 - lockres); 3312 + OCFS2_LOCK_ID_MAX_LEN - 1); 3370 3313 lockres_clear_pending(lockres, generation, osb); 3371 3314 if (ret) { 3372 3315 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); ··· 3386 3331 assert_spin_locked(&lockres->l_lock); 3387 3332 3388 3333 mlog_entry_void(); 3389 - mlog(0, "lock %s\n", lockres->l_name); 3390 3334 3391 3335 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3392 3336 /* If we're already trying to cancel a lock conversion 3393 3337 * then just drop the spinlock and allow the caller to 3394 3338 * requeue this lock. */ 3395 - 3396 - mlog(0, "Lockres %s, skip convert\n", lockres->l_name); 3339 + mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); 3397 3340 return 0; 3398 3341 } 3399 3342 ··· 3406 3353 "lock %s, invalid flags: 0x%lx\n", 3407 3354 lockres->l_name, lockres->l_flags); 3408 3355 3356 + mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 3357 + 3409 3358 return 1; 3410 3359 } 3411 3360 ··· 3417 3362 int ret; 3418 3363 3419 3364 mlog_entry_void(); 3420 - mlog(0, "lock %s\n", lockres->l_name); 3421 3365 3422 3366 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3423 - DLM_LKF_CANCEL, lockres); 3367 + DLM_LKF_CANCEL); 3424 3368 if (ret) { 3425 3369 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3426 3370 ocfs2_recover_from_dlm_error(lockres, 0); 3427 3371 } 3428 3372 3429 - mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); 3373 + mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 3430 3374 3431 3375 mlog_exit(ret); 3432 3376 return ret; ··· 3482 3428 * at the same time they set OCFS2_DLM_BUSY. They must 3483 3429 * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3484 3430 */ 3485 - if (lockres->l_flags & OCFS2_LOCK_PENDING) 3431 + if (lockres->l_flags & OCFS2_LOCK_PENDING) { 3432 + mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", 3433 + lockres->l_name); 3486 3434 goto leave_requeue; 3435 + } 3487 3436 3488 3437 ctl->requeue = 1; 3489 3438 ret = ocfs2_prepare_cancel_convert(osb, lockres); ··· 3518 3461 */ 3519 3462 if (lockres->l_level == DLM_LOCK_NL) { 3520 3463 BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); 3464 + mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); 3521 3465 lockres->l_blocking = DLM_LOCK_NL; 3522 3466 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 3523 3467 spin_unlock_irqrestore(&lockres->l_lock, flags); ··· 3528 3470 /* if we're blocking an exclusive and we have *any* holders, 3529 3471 * then requeue. */ 3530 3472 if ((lockres->l_blocking == DLM_LOCK_EX) 3531 - && (lockres->l_ex_holders || lockres->l_ro_holders)) 3473 + && (lockres->l_ex_holders || lockres->l_ro_holders)) { 3474 + mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", 3475 + lockres->l_name, lockres->l_ex_holders, 3476 + lockres->l_ro_holders); 3532 3477 goto leave_requeue; 3478 + } 3533 3479 3534 3480 /* If it's a PR we're blocking, then only 3535 3481 * requeue if we've got any EX holders */ 3536 3482 if (lockres->l_blocking == DLM_LOCK_PR && 3537 - lockres->l_ex_holders) 3483 + lockres->l_ex_holders) { 3484 + mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", 3485 + lockres->l_name, lockres->l_ex_holders); 3538 3486 goto leave_requeue; 3487 + } 3539 3488 3540 3489 /* 3541 3490 * Can we get a lock in this state if the holder counts are 3542 3491 * zero? The meta data unblock code used to check this. 3543 3492 */ 3544 3493 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 3545 - && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) 3494 + && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { 3495 + mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", 3496 + lockres->l_name); 3546 3497 goto leave_requeue; 3498 + } 3547 3499 3548 3500 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 3549 3501 3550 3502 if (lockres->l_ops->check_downconvert 3551 - && !lockres->l_ops->check_downconvert(lockres, new_level)) 3503 + && !lockres->l_ops->check_downconvert(lockres, new_level)) { 3504 + mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", 3505 + lockres->l_name); 3552 3506 goto leave_requeue; 3507 + } 3553 3508 3554 3509 /* If we get here, then we know that there are no more 3555 3510 * incompatible holders (and anyone asking for an incompatible ··· 3580 3509 3581 3510 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3582 3511 3583 - if (ctl->unblock_action == UNBLOCK_STOP_POST) 3512 + if (ctl->unblock_action == UNBLOCK_STOP_POST) { 3513 + mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", 3514 + lockres->l_name); 3584 3515 goto leave; 3516 + } 3585 3517 3586 3518 spin_lock_irqsave(&lockres->l_lock, flags); 3587 3519 if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { 3588 3520 /* If this changed underneath us, then we can't drop 3589 3521 * it just yet. */ 3522 + mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " 3523 + "Recheck\n", lockres->l_name, blocking, 3524 + lockres->l_blocking, level, lockres->l_level); 3590 3525 goto recheck; 3591 3526 } 3592 3527 ··· 3987 3910 ocfs2_cluster_unlock(osb, lockres, level); 3988 3911 } 3989 3912 3990 - /* 3991 - * This is the filesystem locking protocol. It provides the lock handling 3992 - * hooks for the underlying DLM. It has a maximum version number. 3993 - * The version number allows interoperability with systems running at 3994 - * the same major number and an equal or smaller minor number. 3995 - * 3996 - * Whenever the filesystem does new things with locks (adds or removes a 3997 - * lock, orders them differently, does different things underneath a lock), 3998 - * the version must be changed. The protocol is negotiated when joining 3999 - * the dlm domain. A node may join the domain if its major version is 4000 - * identical to all other nodes and its minor version is greater than 4001 - * or equal to all other nodes. When its minor version is greater than 4002 - * the other nodes, it will run at the minor version specified by the 4003 - * other nodes. 4004 - * 4005 - * If a locking change is made that will not be compatible with older 4006 - * versions, the major number must be increased and the minor version set 4007 - * to zero. If a change merely adds a behavior that can be disabled when 4008 - * speaking to older versions, the minor version must be increased. If a 4009 - * change adds a fully backwards compatible change (eg, LVB changes that 4010 - * are just ignored by older versions), the version does not need to be 4011 - * updated. 4012 - */ 4013 - static struct ocfs2_locking_protocol lproto = { 4014 - .lp_max_version = { 4015 - .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 4016 - .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 4017 - }, 4018 - .lp_lock_ast = ocfs2_locking_ast, 4019 - .lp_blocking_ast = ocfs2_blocking_ast, 4020 - .lp_unlock_ast = ocfs2_unlock_ast, 4021 - }; 4022 - 4023 - void ocfs2_set_locking_protocol(void) 4024 - { 4025 - ocfs2_stack_glue_set_locking_protocol(&lproto); 4026 - } 4027 - 4028 - 4029 3913 static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 4030 3914 struct ocfs2_lock_res *lockres) 4031 3915 { ··· 4003 3965 BUG_ON(!lockres); 4004 3966 BUG_ON(!lockres->l_ops); 4005 3967 4006 - mlog(0, "lockres %s blocked.\n", lockres->l_name); 3968 + mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); 4007 3969 4008 3970 /* Detect whether a lock has been marked as going away while 4009 3971 * the downconvert thread was processing other things. A lock can ··· 4026 3988 } else 4027 3989 ocfs2_schedule_blocked_lock(osb, lockres); 4028 3990 4029 - mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 3991 + mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, 4030 3992 ctl.requeue ? "yes" : "no"); 4031 3993 spin_unlock_irqrestore(&lockres->l_lock, flags); 4032 3994 ··· 4048 4010 /* Do not schedule a lock for downconvert when it's on 4049 4011 * the way to destruction - any nodes wanting access 4050 4012 * to the resource will get it soon. */ 4051 - mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", 4013 + mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", 4052 4014 lockres->l_name, lockres->l_flags); 4053 4015 return; 4054 4016 }

+5 -8

fs/ocfs2/file.c

··· 993 993 } 994 994 995 995 if (size_change && attr->ia_size != i_size_read(inode)) { 996 - if (attr->ia_size > sb->s_maxbytes) { 997 - status = -EFBIG; 996 + status = inode_newsize_ok(inode, attr->ia_size); 997 + if (status) 998 998 goto bail_unlock; 999 - } 1000 999 1001 1000 if (i_size_read(inode) > attr->ia_size) { 1002 1001 if (ocfs2_should_order_data(inode)) { ··· 1835 1836 &meta_level); 1836 1837 if (has_refcount) 1837 1838 *has_refcount = 1; 1839 + if (direct_io) 1840 + *direct_io = 0; 1838 1841 } 1839 1842 1840 1843 if (ret < 0) { ··· 1860 1859 break; 1861 1860 } 1862 1861 1863 - if (has_refcount && *has_refcount == 1) { 1864 - *direct_io = 0; 1865 - break; 1866 - } 1867 1862 /* 1868 1863 * Allowing concurrent direct writes means 1869 1864 * i_size changes wouldn't be synchronized, so ··· 2040 2043 * async dio is going to do it in the future or an end_io after an 2041 2044 * error has already done it. 2042 2045 */ 2043 - if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { 2046 + if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { 2044 2047 rw_level = -1; 2045 2048 have_alloc_sem = 0; 2046 2049 }

+3 -3

fs/ocfs2/ioctl.h

··· 7 7 * 8 8 */ 9 9 10 - #ifndef OCFS2_IOCTL_H 11 - #define OCFS2_IOCTL_H 10 + #ifndef OCFS2_IOCTL_PROTO_H 11 + #define OCFS2_IOCTL_PROTO_H 12 12 13 13 long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); 14 14 long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); 15 15 16 - #endif /* OCFS2_IOCTL_H */ 16 + #endif /* OCFS2_IOCTL_PROTO_H */

+1 -1

fs/ocfs2/localalloc.c

··· 476 476 477 477 out: 478 478 if (!status) 479 - ocfs2_init_inode_steal_slot(osb); 479 + ocfs2_init_steal_slots(osb); 480 480 mlog_exit(status); 481 481 return status; 482 482 }

+4 -28

fs/ocfs2/ocfs2.h

··· 42 42 43 43 #include "ocfs2_fs.h" 44 44 #include "ocfs2_lockid.h" 45 + #include "ocfs2_ioctl.h" 45 46 46 47 /* For struct ocfs2_blockcheck_stats */ 47 48 #include "blockcheck.h" ··· 160 159 int l_level; 161 160 unsigned int l_ro_holders; 162 161 unsigned int l_ex_holders; 163 - union ocfs2_dlm_lksb l_lksb; 162 + struct ocfs2_dlm_lksb l_lksb; 164 163 165 164 /* used from AST/BAST funcs. */ 166 165 enum ocfs2_ast_action l_action; ··· 306 305 u32 s_next_generation; 307 306 unsigned long osb_flags; 308 307 s16 s_inode_steal_slot; 308 + s16 s_meta_steal_slot; 309 309 atomic_t s_num_inodes_stolen; 310 + atomic_t s_num_meta_stolen; 310 311 311 312 unsigned long s_mount_opt; 312 313 unsigned int s_atime_quantum; ··· 761 758 BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576); 762 759 763 760 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); 764 - } 765 - 766 - static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) 767 - { 768 - spin_lock(&osb->osb_lock); 769 - osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; 770 - spin_unlock(&osb->osb_lock); 771 - atomic_set(&osb->s_num_inodes_stolen, 0); 772 - } 773 - 774 - static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb, 775 - s16 slot) 776 - { 777 - spin_lock(&osb->osb_lock); 778 - osb->s_inode_steal_slot = slot; 779 - spin_unlock(&osb->osb_lock); 780 - } 781 - 782 - static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) 783 - { 784 - s16 slot; 785 - 786 - spin_lock(&osb->osb_lock); 787 - slot = osb->s_inode_steal_slot; 788 - spin_unlock(&osb->osb_lock); 789 - 790 - return slot; 791 761 } 792 762 793 763 #define ocfs2_set_bit ext2_set_bit

-57

fs/ocfs2/ocfs2_fs.h

··· 254 254 * refcount tree */ 255 255 256 256 /* 257 - * ioctl commands 258 - */ 259 - #define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) 260 - #define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) 261 - #define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) 262 - #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) 263 - 264 - /* 265 - * Space reservation / allocation / free ioctls and argument structure 266 - * are designed to be compatible with XFS. 267 - * 268 - * ALLOCSP* and FREESP* are not and will never be supported, but are 269 - * included here for completeness. 270 - */ 271 - struct ocfs2_space_resv { 272 - __s16 l_type; 273 - __s16 l_whence; 274 - __s64 l_start; 275 - __s64 l_len; /* len == 0 means until end of file */ 276 - __s32 l_sysid; 277 - __u32 l_pid; 278 - __s32 l_pad[4]; /* reserve area */ 279 - }; 280 - 281 - #define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv) 282 - #define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv) 283 - #define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv) 284 - #define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv) 285 - #define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv) 286 - #define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv) 287 - #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) 288 - #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) 289 - 290 - /* Used to pass group descriptor data when online resize is done */ 291 - struct ocfs2_new_group_input { 292 - __u64 group; /* Group descriptor's blkno. */ 293 - __u32 clusters; /* Total number of clusters in this group */ 294 - __u32 frees; /* Total free clusters in this group */ 295 - __u16 chain; /* Chain for this group */ 296 - __u16 reserved1; 297 - __u32 reserved2; 298 - }; 299 - 300 - #define OCFS2_IOC_GROUP_EXTEND _IOW('o', 1, int) 301 - #define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) 302 - #define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) 303 - 304 - /* Used to pass 2 file names to reflink. */ 305 - struct reflink_arguments { 306 - __u64 old_path; 307 - __u64 new_path; 308 - __u64 preserve; 309 - }; 310 - #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) 311 - 312 - 313 - /* 314 257 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) 315 258 */ 316 259 #define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */

+79

fs/ocfs2/ocfs2_ioctl.h

··· 1 + /* -*- mode: c; c-basic-offset: 8; -*- 2 + * vim: noexpandtab sw=8 ts=8 sts=0: 3 + * 4 + * ocfs2_ioctl.h 5 + * 6 + * Defines OCFS2 ioctls. 7 + * 8 + * Copyright (C) 2010 Oracle. All rights reserved. 9 + * 10 + * This program is free software; you can redistribute it and/or 11 + * modify it under the terms of the GNU General Public 12 + * License, version 2, as published by the Free Software Foundation. 13 + * 14 + * This program is distributed in the hope that it will be useful, 15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 + * General Public License for more details. 18 + */ 19 + 20 + #ifndef OCFS2_IOCTL_H 21 + #define OCFS2_IOCTL_H 22 + 23 + /* 24 + * ioctl commands 25 + */ 26 + #define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) 27 + #define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) 28 + #define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) 29 + #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) 30 + 31 + /* 32 + * Space reservation / allocation / free ioctls and argument structure 33 + * are designed to be compatible with XFS. 34 + * 35 + * ALLOCSP* and FREESP* are not and will never be supported, but are 36 + * included here for completeness. 37 + */ 38 + struct ocfs2_space_resv { 39 + __s16 l_type; 40 + __s16 l_whence; 41 + __s64 l_start; 42 + __s64 l_len; /* len == 0 means until end of file */ 43 + __s32 l_sysid; 44 + __u32 l_pid; 45 + __s32 l_pad[4]; /* reserve area */ 46 + }; 47 + 48 + #define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv) 49 + #define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv) 50 + #define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv) 51 + #define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv) 52 + #define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv) 53 + #define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv) 54 + #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) 55 + #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) 56 + 57 + /* Used to pass group descriptor data when online resize is done */ 58 + struct ocfs2_new_group_input { 59 + __u64 group; /* Group descriptor's blkno. */ 60 + __u32 clusters; /* Total number of clusters in this group */ 61 + __u32 frees; /* Total free clusters in this group */ 62 + __u16 chain; /* Chain for this group */ 63 + __u16 reserved1; 64 + __u32 reserved2; 65 + }; 66 + 67 + #define OCFS2_IOC_GROUP_EXTEND _IOW('o', 1, int) 68 + #define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) 69 + #define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) 70 + 71 + /* Used to pass 2 file names to reflink. */ 72 + struct reflink_arguments { 73 + __u64 old_path; 74 + __u64 new_path; 75 + __u64 preserve; 76 + }; 77 + #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) 78 + 79 + #endif /* OCFS2_IOCTL_H */

+2

fs/ocfs2/ocfs2_lockingver.h

··· 23 23 /* 24 24 * The protocol version for ocfs2 cluster locking. See dlmglue.c for 25 25 * more details. 26 + * 27 + * 1.0 - Initial locking version from ocfs2 1.4. 26 28 */ 27 29 #define OCFS2_LOCKING_PROTOCOL_MAJOR 1 28 30 #define OCFS2_LOCKING_PROTOCOL_MINOR 0

+3 -3

fs/ocfs2/refcounttree.c

··· 626 626 rb = (struct ocfs2_refcount_block *)new_bh->b_data; 627 627 memset(rb, 0, inode->i_sb->s_blocksize); 628 628 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 629 - rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num); 629 + rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 630 630 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 631 631 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); 632 632 rb->rf_blkno = cpu_to_le64(first_blkno); ··· 1330 1330 memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize); 1331 1331 1332 1332 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1333 - new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); 1333 + new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 1334 1334 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1335 1335 new_rb->rf_blkno = cpu_to_le64(blkno); 1336 1336 new_rb->rf_cpos = cpu_to_le32(0); ··· 1576 1576 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1577 1577 memset(new_rb, 0, sb->s_blocksize); 1578 1578 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 1579 - new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); 1579 + new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 1580 1580 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1581 1581 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 1582 1582 new_rb->rf_blkno = cpu_to_le64(blkno);

+17 -20

fs/ocfs2/stack_o2cb.c

··· 161 161 162 162 static void o2dlm_lock_ast_wrapper(void *astarg) 163 163 { 164 - BUG_ON(o2cb_stack.sp_proto == NULL); 164 + struct ocfs2_dlm_lksb *lksb = astarg; 165 165 166 - o2cb_stack.sp_proto->lp_lock_ast(astarg); 166 + lksb->lksb_conn->cc_proto->lp_lock_ast(lksb); 167 167 } 168 168 169 169 static void o2dlm_blocking_ast_wrapper(void *astarg, int level) 170 170 { 171 - BUG_ON(o2cb_stack.sp_proto == NULL); 171 + struct ocfs2_dlm_lksb *lksb = astarg; 172 172 173 - o2cb_stack.sp_proto->lp_blocking_ast(astarg, level); 173 + lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level); 174 174 } 175 175 176 176 static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) 177 177 { 178 + struct ocfs2_dlm_lksb *lksb = astarg; 178 179 int error = dlm_status_to_errno(status); 179 - 180 - BUG_ON(o2cb_stack.sp_proto == NULL); 181 180 182 181 /* 183 182 * In o2dlm, you can get both the lock_ast() for the lock being ··· 192 193 if (status == DLM_CANCELGRANT) 193 194 return; 194 195 195 - o2cb_stack.sp_proto->lp_unlock_ast(astarg, error); 196 + lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, error); 196 197 } 197 198 198 199 static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn, 199 200 int mode, 200 - union ocfs2_dlm_lksb *lksb, 201 + struct ocfs2_dlm_lksb *lksb, 201 202 u32 flags, 202 203 void *name, 203 - unsigned int namelen, 204 - void *astarg) 204 + unsigned int namelen) 205 205 { 206 206 enum dlm_status status; 207 207 int o2dlm_mode = mode_to_o2dlm(mode); ··· 209 211 210 212 status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm, 211 213 o2dlm_flags, name, namelen, 212 - o2dlm_lock_ast_wrapper, astarg, 214 + o2dlm_lock_ast_wrapper, lksb, 213 215 o2dlm_blocking_ast_wrapper); 214 216 ret = dlm_status_to_errno(status); 215 217 return ret; 216 218 } 217 219 218 220 static int o2cb_dlm_unlock(struct ocfs2_cluster_connection *conn, 219 - union ocfs2_dlm_lksb *lksb, 220 - u32 flags, 221 - void *astarg) 221 + struct ocfs2_dlm_lksb *lksb, 222 + u32 flags) 222 223 { 223 224 enum dlm_status status; 224 225 int o2dlm_flags = flags_to_o2dlm(flags); 225 226 int ret; 226 227 227 228 status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm, 228 - o2dlm_flags, o2dlm_unlock_ast_wrapper, astarg); 229 + o2dlm_flags, o2dlm_unlock_ast_wrapper, lksb); 229 230 ret = dlm_status_to_errno(status); 230 231 return ret; 231 232 } 232 233 233 - static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) 234 + static int o2cb_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) 234 235 { 235 236 return dlm_status_to_errno(lksb->lksb_o2dlm.status); 236 237 } ··· 239 242 * contents, it will zero out the LVB. Thus the caller can always trust 240 243 * the contents. 241 244 */ 242 - static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) 245 + static int o2cb_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) 243 246 { 244 247 return 1; 245 248 } 246 249 247 - static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) 250 + static void *o2cb_dlm_lvb(struct ocfs2_dlm_lksb *lksb) 248 251 { 249 252 return (void *)(lksb->lksb_o2dlm.lvb); 250 253 } 251 254 252 - static void o2cb_dump_lksb(union ocfs2_dlm_lksb *lksb) 255 + static void o2cb_dump_lksb(struct ocfs2_dlm_lksb *lksb) 253 256 { 254 257 dlm_print_one_lock(lksb->lksb_o2dlm.lockid); 255 258 } ··· 277 280 struct dlm_protocol_version fs_version; 278 281 279 282 BUG_ON(conn == NULL); 280 - BUG_ON(o2cb_stack.sp_proto == NULL); 283 + BUG_ON(conn->cc_proto == NULL); 281 284 282 285 /* for now we only have one cluster/node, make sure we see it 283 286 * in the heartbeat universe */

+19 -30

fs/ocfs2/stack_user.c

··· 25 25 #include <linux/reboot.h> 26 26 #include <asm/uaccess.h> 27 27 28 - #include "ocfs2.h" /* For struct ocfs2_lock_res */ 29 28 #include "stackglue.h" 30 29 31 30 #include <linux/dlm_plock.h> ··· 62 63 * negotiated by the client. The client negotiates based on the maximum 63 64 * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major 64 65 * number from the "SETV" message must match 65 - * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number 66 - * must be less than or equal to ...->lp_max_version.pv_minor. 66 + * ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number 67 + * must be less than or equal to ...sp_max_version.pv_minor. 67 68 * 68 69 * Once this information has been set, mounts will be allowed. From this 69 70 * point on, the "DOWN" message can be sent for node down notification. ··· 400 401 char *ptr = NULL; 401 402 struct ocfs2_control_private *p = file->private_data; 402 403 struct ocfs2_protocol_version *max = 403 - &ocfs2_user_plugin.sp_proto->lp_max_version; 404 + &ocfs2_user_plugin.sp_max_proto; 404 405 405 406 if (ocfs2_control_get_handshake_state(file) != 406 407 OCFS2_CONTROL_HANDSHAKE_PROTOCOL) ··· 663 664 -rc); 664 665 } 665 666 666 - static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg) 667 - { 668 - struct ocfs2_lock_res *res = astarg; 669 - return &res->l_lksb.lksb_fsdlm; 670 - } 671 - 672 667 static void fsdlm_lock_ast_wrapper(void *astarg) 673 668 { 674 - struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg); 675 - int status = lksb->sb_status; 676 - 677 - BUG_ON(ocfs2_user_plugin.sp_proto == NULL); 669 + struct ocfs2_dlm_lksb *lksb = astarg; 670 + int status = lksb->lksb_fsdlm.sb_status; 678 671 679 672 /* 680 673 * For now we're punting on the issue of other non-standard errors ··· 679 688 */ 680 689 681 690 if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) 682 - ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0); 691 + lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, 0); 683 692 else 684 - ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg); 693 + lksb->lksb_conn->cc_proto->lp_lock_ast(lksb); 685 694 } 686 695 687 696 static void fsdlm_blocking_ast_wrapper(void *astarg, int level) 688 697 { 689 - BUG_ON(ocfs2_user_plugin.sp_proto == NULL); 698 + struct ocfs2_dlm_lksb *lksb = astarg; 690 699 691 - ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level); 700 + lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level); 692 701 } 693 702 694 703 static int user_dlm_lock(struct ocfs2_cluster_connection *conn, 695 704 int mode, 696 - union ocfs2_dlm_lksb *lksb, 705 + struct ocfs2_dlm_lksb *lksb, 697 706 u32 flags, 698 707 void *name, 699 - unsigned int namelen, 700 - void *astarg) 708 + unsigned int namelen) 701 709 { 702 710 int ret; 703 711 ··· 706 716 707 717 ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, 708 718 flags|DLM_LKF_NODLCKWT, name, namelen, 0, 709 - fsdlm_lock_ast_wrapper, astarg, 719 + fsdlm_lock_ast_wrapper, lksb, 710 720 fsdlm_blocking_ast_wrapper); 711 721 return ret; 712 722 } 713 723 714 724 static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, 715 - union ocfs2_dlm_lksb *lksb, 716 - u32 flags, 717 - void *astarg) 725 + struct ocfs2_dlm_lksb *lksb, 726 + u32 flags) 718 727 { 719 728 int ret; 720 729 721 730 ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, 722 - flags, &lksb->lksb_fsdlm, astarg); 731 + flags, &lksb->lksb_fsdlm, lksb); 723 732 return ret; 724 733 } 725 734 726 - static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) 735 + static int user_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) 727 736 { 728 737 return lksb->lksb_fsdlm.sb_status; 729 738 } 730 739 731 - static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) 740 + static int user_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) 732 741 { 733 742 int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; 734 743 735 744 return !invalid; 736 745 } 737 746 738 - static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) 747 + static void *user_dlm_lvb(struct ocfs2_dlm_lksb *lksb) 739 748 { 740 749 if (!lksb->lksb_fsdlm.sb_lvbptr) 741 750 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + ··· 742 753 return (void *)(lksb->lksb_fsdlm.sb_lvbptr); 743 754 } 744 755 745 - static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) 756 + static void user_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb) 746 757 { 747 758 } 748 759

+63 -35

fs/ocfs2/stackglue.c

··· 36 36 #define OCFS2_STACK_PLUGIN_USER "user" 37 37 #define OCFS2_MAX_HB_CTL_PATH 256 38 38 39 - static struct ocfs2_locking_protocol *lproto; 39 + static struct ocfs2_protocol_version locking_max_version; 40 40 static DEFINE_SPINLOCK(ocfs2_stack_lock); 41 41 static LIST_HEAD(ocfs2_stack_list); 42 42 static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; ··· 176 176 spin_lock(&ocfs2_stack_lock); 177 177 if (!ocfs2_stack_lookup(plugin->sp_name)) { 178 178 plugin->sp_count = 0; 179 - plugin->sp_proto = lproto; 179 + plugin->sp_max_proto = locking_max_version; 180 180 list_add(&plugin->sp_list, &ocfs2_stack_list); 181 181 printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", 182 182 plugin->sp_name); ··· 213 213 } 214 214 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); 215 215 216 - void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) 216 + void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_proto) 217 217 { 218 218 struct ocfs2_stack_plugin *p; 219 219 220 - BUG_ON(proto == NULL); 221 - 222 220 spin_lock(&ocfs2_stack_lock); 223 - BUG_ON(active_stack != NULL); 221 + if (memcmp(max_proto, &locking_max_version, 222 + sizeof(struct ocfs2_protocol_version))) { 223 + BUG_ON(locking_max_version.pv_major != 0); 224 224 225 - lproto = proto; 226 - list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 227 - p->sp_proto = lproto; 225 + locking_max_version = *max_proto; 226 + list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 227 + p->sp_max_proto = locking_max_version; 228 + } 228 229 } 229 - 230 230 spin_unlock(&ocfs2_stack_lock); 231 231 } 232 - EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); 232 + EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_max_proto_version); 233 233 234 234 235 235 /* 236 - * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take 237 - * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the 238 - * underlying stack plugins need to pilfer the lksb off of the lock_res. 239 - * If some other structure needs to be passed as an astarg, the plugins 240 - * will need to be given a different avenue to the lksb. 236 + * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take no argument 237 + * for the ast and bast functions. They will pass the lksb to the ast 238 + * and bast. The caller can wrap the lksb with their own structure to 239 + * get more information. 241 240 */ 242 241 int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, 243 242 int mode, 244 - union ocfs2_dlm_lksb *lksb, 243 + struct ocfs2_dlm_lksb *lksb, 245 244 u32 flags, 246 245 void *name, 247 - unsigned int namelen, 248 - struct ocfs2_lock_res *astarg) 246 + unsigned int namelen) 249 247 { 250 - BUG_ON(lproto == NULL); 251 - 248 + if (!lksb->lksb_conn) 249 + lksb->lksb_conn = conn; 250 + else 251 + BUG_ON(lksb->lksb_conn != conn); 252 252 return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, 253 - name, namelen, astarg); 253 + name, namelen); 254 254 } 255 255 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); 256 256 257 257 int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, 258 - union ocfs2_dlm_lksb *lksb, 259 - u32 flags, 260 - struct ocfs2_lock_res *astarg) 258 + struct ocfs2_dlm_lksb *lksb, 259 + u32 flags) 261 260 { 262 - BUG_ON(lproto == NULL); 261 + BUG_ON(lksb->lksb_conn == NULL); 263 262 264 - return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg); 263 + return active_stack->sp_ops->dlm_unlock(conn, lksb, flags); 265 264 } 266 265 EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); 267 266 268 - int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) 267 + int ocfs2_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) 269 268 { 270 269 return active_stack->sp_ops->lock_status(lksb); 271 270 } 272 271 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); 273 272 274 - int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) 273 + int ocfs2_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) 275 274 { 276 275 return active_stack->sp_ops->lvb_valid(lksb); 277 276 } 278 277 EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid); 279 278 280 - void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) 279 + void *ocfs2_dlm_lvb(struct ocfs2_dlm_lksb *lksb) 281 280 { 282 281 return active_stack->sp_ops->lock_lvb(lksb); 283 282 } 284 283 EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); 285 284 286 - void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) 285 + void ocfs2_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb) 287 286 { 288 287 active_stack->sp_ops->dump_lksb(lksb); 289 288 } ··· 311 312 int ocfs2_cluster_connect(const char *stack_name, 312 313 const char *group, 313 314 int grouplen, 315 + struct ocfs2_locking_protocol *lproto, 314 316 void (*recovery_handler)(int node_num, 315 317 void *recovery_data), 316 318 void *recovery_data, ··· 329 329 goto out; 330 330 } 331 331 332 + if (memcmp(&lproto->lp_max_version, &locking_max_version, 333 + sizeof(struct ocfs2_protocol_version))) { 334 + rc = -EINVAL; 335 + goto out; 336 + } 337 + 332 338 new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), 333 339 GFP_KERNEL); 334 340 if (!new_conn) { ··· 347 341 new_conn->cc_recovery_handler = recovery_handler; 348 342 new_conn->cc_recovery_data = recovery_data; 349 343 344 + new_conn->cc_proto = lproto; 350 345 /* Start the new connection at our maximum compatibility level */ 351 346 new_conn->cc_version = lproto->lp_max_version; 352 347 ··· 372 365 return rc; 373 366 } 374 367 EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); 368 + 369 + /* The caller will ensure all nodes have the same cluster stack */ 370 + int ocfs2_cluster_connect_agnostic(const char *group, 371 + int grouplen, 372 + struct ocfs2_locking_protocol *lproto, 373 + void (*recovery_handler)(int node_num, 374 + void *recovery_data), 375 + void *recovery_data, 376 + struct ocfs2_cluster_connection **conn) 377 + { 378 + char *stack_name = NULL; 379 + 380 + if (cluster_stack_name[0]) 381 + stack_name = cluster_stack_name; 382 + return ocfs2_cluster_connect(stack_name, group, grouplen, lproto, 383 + recovery_handler, recovery_data, conn); 384 + } 385 + EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic); 375 386 376 387 /* If hangup_pending is 0, the stack driver will be dropped */ 377 388 int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, ··· 478 453 ssize_t ret = 0; 479 454 480 455 spin_lock(&ocfs2_stack_lock); 481 - if (lproto) 456 + if (locking_max_version.pv_major) 482 457 ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", 483 - lproto->lp_max_version.pv_major, 484 - lproto->lp_max_version.pv_minor); 458 + locking_max_version.pv_major, 459 + locking_max_version.pv_minor); 485 460 spin_unlock(&ocfs2_stack_lock); 486 461 487 462 return ret; ··· 710 685 711 686 static void __exit ocfs2_stack_glue_exit(void) 712 687 { 713 - lproto = NULL; 688 + memset(&locking_max_version, 0, 689 + sizeof(struct ocfs2_protocol_version)); 690 + locking_max_version.pv_major = 0; 691 + locking_max_version.pv_minor = 0; 714 692 ocfs2_sysfs_exit(); 715 693 if (ocfs2_table_header) 716 694 unregister_sysctl_table(ocfs2_table_header);

+56 -39

fs/ocfs2/stackglue.h

··· 56 56 }; 57 57 58 58 /* 59 - * The ocfs2_locking_protocol defines the handlers called on ocfs2's behalf. 60 - */ 61 - struct ocfs2_locking_protocol { 62 - struct ocfs2_protocol_version lp_max_version; 63 - void (*lp_lock_ast)(void *astarg); 64 - void (*lp_blocking_ast)(void *astarg, int level); 65 - void (*lp_unlock_ast)(void *astarg, int error); 66 - }; 67 - 68 - 69 - /* 70 59 * The dlm_lockstatus struct includes lvb space, but the dlm_lksb struct only 71 60 * has a pointer to separately allocated lvb space. This struct exists only to 72 61 * include in the lksb union to make space for a combined dlm_lksb and lvb. ··· 70 81 * size of the union is known. Lock status structures are embedded in 71 82 * ocfs2 inodes. 72 83 */ 73 - union ocfs2_dlm_lksb { 74 - struct dlm_lockstatus lksb_o2dlm; 75 - struct dlm_lksb lksb_fsdlm; 76 - struct fsdlm_lksb_plus_lvb padding; 84 + struct ocfs2_cluster_connection; 85 + struct ocfs2_dlm_lksb { 86 + union { 87 + struct dlm_lockstatus lksb_o2dlm; 88 + struct dlm_lksb lksb_fsdlm; 89 + struct fsdlm_lksb_plus_lvb padding; 90 + }; 91 + struct ocfs2_cluster_connection *lksb_conn; 77 92 }; 93 + 94 + /* 95 + * The ocfs2_locking_protocol defines the handlers called on ocfs2's behalf. 96 + */ 97 + struct ocfs2_locking_protocol { 98 + struct ocfs2_protocol_version lp_max_version; 99 + void (*lp_lock_ast)(struct ocfs2_dlm_lksb *lksb); 100 + void (*lp_blocking_ast)(struct ocfs2_dlm_lksb *lksb, int level); 101 + void (*lp_unlock_ast)(struct ocfs2_dlm_lksb *lksb, int error); 102 + }; 103 + 78 104 79 105 /* 80 106 * A cluster connection. Mostly opaque to ocfs2, the connection holds ··· 100 96 char cc_name[GROUP_NAME_MAX]; 101 97 int cc_namelen; 102 98 struct ocfs2_protocol_version cc_version; 99 + struct ocfs2_locking_protocol *cc_proto; 103 100 void (*cc_recovery_handler)(int node_num, void *recovery_data); 104 101 void *cc_recovery_data; 105 102 void *cc_lockspace; ··· 160 155 * 161 156 * ast and bast functions are not part of the call because the 162 157 * stack will likely want to wrap ast and bast calls before passing 163 - * them to stack->sp_proto. 158 + * them to stack->sp_proto. There is no astarg. The lksb will 159 + * be passed back to the ast and bast functions. The caller can 160 + * use this to find their object. 164 161 */ 165 162 int (*dlm_lock)(struct ocfs2_cluster_connection *conn, 166 163 int mode, 167 - union ocfs2_dlm_lksb *lksb, 164 + struct ocfs2_dlm_lksb *lksb, 168 165 u32 flags, 169 166 void *name, 170 - unsigned int namelen, 171 - void *astarg); 167 + unsigned int namelen); 172 168 173 169 /* 174 170 * Call the underlying dlm unlock function. The ->dlm_unlock() 175 171 * function should convert the flags as appropriate. 176 172 * 177 173 * The unlock ast is not passed, as the stack will want to wrap 178 - * it before calling stack->sp_proto->lp_unlock_ast(). 174 + * it before calling stack->sp_proto->lp_unlock_ast(). There is 175 + * no astarg. The lksb will be passed back to the unlock ast 176 + * function. The caller can use this to find their object. 179 177 */ 180 178 int (*dlm_unlock)(struct ocfs2_cluster_connection *conn, 181 - union ocfs2_dlm_lksb *lksb, 182 - u32 flags, 183 - void *astarg); 179 + struct ocfs2_dlm_lksb *lksb, 180 + u32 flags); 184 181 185 182 /* 186 183 * Return the status of the current lock status block. The fs ··· 190 183 * callback pulls out the stack-specific lksb, converts the status 191 184 * to a proper errno, and returns it. 192 185 */ 193 - int (*lock_status)(union ocfs2_dlm_lksb *lksb); 186 + int (*lock_status)(struct ocfs2_dlm_lksb *lksb); 194 187 195 188 /* 196 189 * Return non-zero if the LVB is valid. 197 190 */ 198 - int (*lvb_valid)(union ocfs2_dlm_lksb *lksb); 191 + int (*lvb_valid)(struct ocfs2_dlm_lksb *lksb); 199 192 200 193 /* 201 194 * Pull the lvb pointer off of the stack-specific lksb. 202 195 */ 203 - void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); 196 + void *(*lock_lvb)(struct ocfs2_dlm_lksb *lksb); 204 197 205 198 /* 206 199 * Cluster-aware posix locks ··· 217 210 * This is an optoinal debugging hook. If provided, the 218 211 * stack can dump debugging information about this lock. 219 212 */ 220 - void (*dump_lksb)(union ocfs2_dlm_lksb *lksb); 213 + void (*dump_lksb)(struct ocfs2_dlm_lksb *lksb); 221 214 }; 222 215 223 216 /* ··· 233 226 /* These are managed by the stackglue code. */ 234 227 struct list_head sp_list; 235 228 unsigned int sp_count; 236 - struct ocfs2_locking_protocol *sp_proto; 229 + struct ocfs2_protocol_version sp_max_proto; 237 230 }; 238 231 239 232 ··· 241 234 int ocfs2_cluster_connect(const char *stack_name, 242 235 const char *group, 243 236 int grouplen, 237 + struct ocfs2_locking_protocol *lproto, 244 238 void (*recovery_handler)(int node_num, 245 239 void *recovery_data), 246 240 void *recovery_data, 247 241 struct ocfs2_cluster_connection **conn); 242 + /* 243 + * Used by callers that don't store their stack name. They must ensure 244 + * all nodes have the same stack. 245 + */ 246 + int ocfs2_cluster_connect_agnostic(const char *group, 247 + int grouplen, 248 + struct ocfs2_locking_protocol *lproto, 249 + void (*recovery_handler)(int node_num, 250 + void *recovery_data), 251 + void *recovery_data, 252 + struct ocfs2_cluster_connection **conn); 248 253 int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, 249 254 int hangup_pending); 250 255 void ocfs2_cluster_hangup(const char *group, int grouplen); ··· 265 246 struct ocfs2_lock_res; 266 247 int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, 267 248 int mode, 268 - union ocfs2_dlm_lksb *lksb, 249 + struct ocfs2_dlm_lksb *lksb, 269 250 u32 flags, 270 251 void *name, 271 - unsigned int namelen, 272 - struct ocfs2_lock_res *astarg); 252 + unsigned int namelen); 273 253 int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, 274 - union ocfs2_dlm_lksb *lksb, 275 - u32 flags, 276 - struct ocfs2_lock_res *astarg); 254 + struct ocfs2_dlm_lksb *lksb, 255 + u32 flags); 277 256 278 - int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); 279 - int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb); 280 - void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); 281 - void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); 257 + int ocfs2_dlm_lock_status(struct ocfs2_dlm_lksb *lksb); 258 + int ocfs2_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb); 259 + void *ocfs2_dlm_lvb(struct ocfs2_dlm_lksb *lksb); 260 + void ocfs2_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb); 282 261 283 262 int ocfs2_stack_supports_plocks(void); 284 263 int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, 285 264 struct file *file, int cmd, struct file_lock *fl); 286 265 287 - void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); 266 + void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_proto); 288 267 289 268 290 269 /* Used by stack plugins */

+131 -40

fs/ocfs2/suballoc.c

··· 51 51 #define ALLOC_NEW_GROUP 0x1 52 52 #define ALLOC_GROUPS_FROM_GLOBAL 0x2 53 53 54 - #define OCFS2_MAX_INODES_TO_STEAL 1024 54 + #define OCFS2_MAX_TO_STEAL 1024 55 55 56 56 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 57 57 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); ··· 637 637 return status; 638 638 } 639 639 640 + static void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) 641 + { 642 + spin_lock(&osb->osb_lock); 643 + osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; 644 + spin_unlock(&osb->osb_lock); 645 + atomic_set(&osb->s_num_inodes_stolen, 0); 646 + } 647 + 648 + static void ocfs2_init_meta_steal_slot(struct ocfs2_super *osb) 649 + { 650 + spin_lock(&osb->osb_lock); 651 + osb->s_meta_steal_slot = OCFS2_INVALID_SLOT; 652 + spin_unlock(&osb->osb_lock); 653 + atomic_set(&osb->s_num_meta_stolen, 0); 654 + } 655 + 656 + void ocfs2_init_steal_slots(struct ocfs2_super *osb) 657 + { 658 + ocfs2_init_inode_steal_slot(osb); 659 + ocfs2_init_meta_steal_slot(osb); 660 + } 661 + 662 + static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type) 663 + { 664 + spin_lock(&osb->osb_lock); 665 + if (type == INODE_ALLOC_SYSTEM_INODE) 666 + osb->s_inode_steal_slot = slot; 667 + else if (type == EXTENT_ALLOC_SYSTEM_INODE) 668 + osb->s_meta_steal_slot = slot; 669 + spin_unlock(&osb->osb_lock); 670 + } 671 + 672 + static int __ocfs2_get_steal_slot(struct ocfs2_super *osb, int type) 673 + { 674 + int slot = OCFS2_INVALID_SLOT; 675 + 676 + spin_lock(&osb->osb_lock); 677 + if (type == INODE_ALLOC_SYSTEM_INODE) 678 + slot = osb->s_inode_steal_slot; 679 + else if (type == EXTENT_ALLOC_SYSTEM_INODE) 680 + slot = osb->s_meta_steal_slot; 681 + spin_unlock(&osb->osb_lock); 682 + 683 + return slot; 684 + } 685 + 686 + static int ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) 687 + { 688 + return __ocfs2_get_steal_slot(osb, INODE_ALLOC_SYSTEM_INODE); 689 + } 690 + 691 + static int ocfs2_get_meta_steal_slot(struct ocfs2_super *osb) 692 + { 693 + return __ocfs2_get_steal_slot(osb, EXTENT_ALLOC_SYSTEM_INODE); 694 + } 695 + 696 + static int ocfs2_steal_resource(struct ocfs2_super *osb, 697 + struct ocfs2_alloc_context *ac, 698 + int type) 699 + { 700 + int i, status = -ENOSPC; 701 + int slot = __ocfs2_get_steal_slot(osb, type); 702 + 703 + /* Start to steal resource from the first slot after ours. */ 704 + if (slot == OCFS2_INVALID_SLOT) 705 + slot = osb->slot_num + 1; 706 + 707 + for (i = 0; i < osb->max_slots; i++, slot++) { 708 + if (slot == osb->max_slots) 709 + slot = 0; 710 + 711 + if (slot == osb->slot_num) 712 + continue; 713 + 714 + status = ocfs2_reserve_suballoc_bits(osb, ac, 715 + type, 716 + (u32)slot, NULL, 717 + NOT_ALLOC_NEW_GROUP); 718 + if (status >= 0) { 719 + __ocfs2_set_steal_slot(osb, slot, type); 720 + break; 721 + } 722 + 723 + ocfs2_free_ac_resource(ac); 724 + } 725 + 726 + return status; 727 + } 728 + 729 + static int ocfs2_steal_inode(struct ocfs2_super *osb, 730 + struct ocfs2_alloc_context *ac) 731 + { 732 + return ocfs2_steal_resource(osb, ac, INODE_ALLOC_SYSTEM_INODE); 733 + } 734 + 735 + static int ocfs2_steal_meta(struct ocfs2_super *osb, 736 + struct ocfs2_alloc_context *ac) 737 + { 738 + return ocfs2_steal_resource(osb, ac, EXTENT_ALLOC_SYSTEM_INODE); 739 + } 740 + 640 741 int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, 641 742 int blocks, 642 743 struct ocfs2_alloc_context **ac) 643 744 { 644 745 int status; 645 - u32 slot; 746 + int slot = ocfs2_get_meta_steal_slot(osb); 646 747 647 748 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 648 749 if (!(*ac)) { ··· 754 653 755 654 (*ac)->ac_bits_wanted = blocks; 756 655 (*ac)->ac_which = OCFS2_AC_USE_META; 757 - slot = osb->slot_num; 758 656 (*ac)->ac_group_search = ocfs2_block_group_search; 759 657 658 + if (slot != OCFS2_INVALID_SLOT && 659 + atomic_read(&osb->s_num_meta_stolen) < OCFS2_MAX_TO_STEAL) 660 + goto extent_steal; 661 + 662 + atomic_set(&osb->s_num_meta_stolen, 0); 760 663 status = ocfs2_reserve_suballoc_bits(osb, (*ac), 761 664 EXTENT_ALLOC_SYSTEM_INODE, 762 - slot, NULL, ALLOC_NEW_GROUP); 665 + (u32)osb->slot_num, NULL, 666 + ALLOC_NEW_GROUP); 667 + 668 + 669 + if (status >= 0) { 670 + status = 0; 671 + if (slot != OCFS2_INVALID_SLOT) 672 + ocfs2_init_meta_steal_slot(osb); 673 + goto bail; 674 + } else if (status < 0 && status != -ENOSPC) { 675 + mlog_errno(status); 676 + goto bail; 677 + } 678 + 679 + ocfs2_free_ac_resource(*ac); 680 + 681 + extent_steal: 682 + status = ocfs2_steal_meta(osb, *ac); 683 + atomic_inc(&osb->s_num_meta_stolen); 763 684 if (status < 0) { 764 685 if (status != -ENOSPC) 765 686 mlog_errno(status); ··· 808 685 ac); 809 686 } 810 687 811 - static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, 812 - struct ocfs2_alloc_context *ac) 813 - { 814 - int i, status = -ENOSPC; 815 - s16 slot = ocfs2_get_inode_steal_slot(osb); 816 - 817 - /* Start to steal inodes from the first slot after ours. */ 818 - if (slot == OCFS2_INVALID_SLOT) 819 - slot = osb->slot_num + 1; 820 - 821 - for (i = 0; i < osb->max_slots; i++, slot++) { 822 - if (slot == osb->max_slots) 823 - slot = 0; 824 - 825 - if (slot == osb->slot_num) 826 - continue; 827 - 828 - status = ocfs2_reserve_suballoc_bits(osb, ac, 829 - INODE_ALLOC_SYSTEM_INODE, 830 - slot, NULL, 831 - NOT_ALLOC_NEW_GROUP); 832 - if (status >= 0) { 833 - ocfs2_set_inode_steal_slot(osb, slot); 834 - break; 835 - } 836 - 837 - ocfs2_free_ac_resource(ac); 838 - } 839 - 840 - return status; 841 - } 842 - 843 688 int ocfs2_reserve_new_inode(struct ocfs2_super *osb, 844 689 struct ocfs2_alloc_context **ac) 845 690 { 846 691 int status; 847 - s16 slot = ocfs2_get_inode_steal_slot(osb); 692 + int slot = ocfs2_get_inode_steal_slot(osb); 848 693 u64 alloc_group; 849 694 850 695 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); ··· 845 754 * need to check our slots to see whether there is some space for us. 846 755 */ 847 756 if (slot != OCFS2_INVALID_SLOT && 848 - atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL) 757 + atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_TO_STEAL) 849 758 goto inode_steal; 850 759 851 760 atomic_set(&osb->s_num_inodes_stolen, 0); 852 761 alloc_group = osb->osb_inode_alloc_group; 853 762 status = ocfs2_reserve_suballoc_bits(osb, *ac, 854 763 INODE_ALLOC_SYSTEM_INODE, 855 - osb->slot_num, 764 + (u32)osb->slot_num, 856 765 &alloc_group, 857 766 ALLOC_NEW_GROUP | 858 767 ALLOC_GROUPS_FROM_GLOBAL); ··· 880 789 ocfs2_free_ac_resource(*ac); 881 790 882 791 inode_steal: 883 - status = ocfs2_steal_inode_from_other_nodes(osb, *ac); 792 + status = ocfs2_steal_inode(osb, *ac); 884 793 atomic_inc(&osb->s_num_inodes_stolen); 885 794 if (status < 0) { 886 795 if (status != -ENOSPC)

+1

fs/ocfs2/suballoc.h

··· 56 56 is the same as ~0 - unlimited */ 57 57 }; 58 58 59 + void ocfs2_init_steal_slots(struct ocfs2_super *osb); 59 60 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); 60 61 static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac) 61 62 {

+7 -3

fs/ocfs2/super.c

··· 69 69 #include "xattr.h" 70 70 #include "quota.h" 71 71 #include "refcounttree.h" 72 + #include "suballoc.h" 72 73 73 74 #include "buffer_head_io.h" 74 75 ··· 302 301 303 302 spin_lock(&osb->osb_lock); 304 303 out += snprintf(buf + out, len - out, 305 - "%10s => Slot: %d NumStolen: %d\n", "Steal", 304 + "%10s => InodeSlot: %d StolenInodes: %d, " 305 + "MetaSlot: %d StolenMeta: %d\n", "Steal", 306 306 osb->s_inode_steal_slot, 307 - atomic_read(&osb->s_num_inodes_stolen)); 307 + atomic_read(&osb->s_num_inodes_stolen), 308 + osb->s_meta_steal_slot, 309 + atomic_read(&osb->s_num_meta_stolen)); 308 310 spin_unlock(&osb->osb_lock); 309 311 310 312 out += snprintf(buf + out, len - out, "OrphanScan => "); ··· 2001 1997 osb->blocked_lock_count = 0; 2002 1998 spin_lock_init(&osb->osb_lock); 2003 1999 spin_lock_init(&osb->osb_xattr_lock); 2004 - ocfs2_init_inode_steal_slot(osb); 2000 + ocfs2_init_steal_slots(osb); 2005 2001 2006 2002 atomic_set(&osb->alloc_stats.moves, 0); 2007 2003 atomic_set(&osb->alloc_stats.local_data, 0);

+1220 -1044

fs/ocfs2/xattr.c

··· 116 116 }; 117 117 118 118 struct ocfs2_xattr_info { 119 - int name_index; 120 - const char *name; 121 - const void *value; 122 - size_t value_len; 119 + int xi_name_index; 120 + const char *xi_name; 121 + int xi_name_len; 122 + const void *xi_value; 123 + size_t xi_value_len; 123 124 }; 124 125 125 126 struct ocfs2_xattr_search { ··· 137 136 struct ocfs2_xattr_entry *here; 138 137 int not_found; 139 138 }; 139 + 140 + /* Operations on struct ocfs2_xa_entry */ 141 + struct ocfs2_xa_loc; 142 + struct ocfs2_xa_loc_operations { 143 + /* 144 + * Journal functions 145 + */ 146 + int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 147 + int type); 148 + void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 149 + 150 + /* 151 + * Return a pointer to the appropriate buffer in loc->xl_storage 152 + * at the given offset from loc->xl_header. 153 + */ 154 + void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 155 + 156 + /* Can we reuse the existing entry for the new value? */ 157 + int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 158 + struct ocfs2_xattr_info *xi); 159 + 160 + /* How much space is needed for the new value? */ 161 + int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 162 + struct ocfs2_xattr_info *xi); 163 + 164 + /* 165 + * Return the offset of the first name+value pair. This is 166 + * the start of our downward-filling free space. 167 + */ 168 + int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 169 + 170 + /* 171 + * Remove the name+value at this location. Do whatever is 172 + * appropriate with the remaining name+value pairs. 173 + */ 174 + void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 175 + 176 + /* Fill xl_entry with a new entry */ 177 + void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 178 + 179 + /* Add name+value storage to an entry */ 180 + void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 181 + 182 + /* 183 + * Initialize the value buf's access and bh fields for this entry. 184 + * ocfs2_xa_fill_value_buf() will handle the xv pointer. 185 + */ 186 + void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 187 + struct ocfs2_xattr_value_buf *vb); 188 + }; 189 + 190 + /* 191 + * Describes an xattr entry location. This is a memory structure 192 + * tracking the on-disk structure. 193 + */ 194 + struct ocfs2_xa_loc { 195 + /* This xattr belongs to this inode */ 196 + struct inode *xl_inode; 197 + 198 + /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 199 + struct ocfs2_xattr_header *xl_header; 200 + 201 + /* Bytes from xl_header to the end of the storage */ 202 + int xl_size; 203 + 204 + /* 205 + * The ocfs2_xattr_entry this location describes. If this is 206 + * NULL, this location describes the on-disk structure where it 207 + * would have been. 208 + */ 209 + struct ocfs2_xattr_entry *xl_entry; 210 + 211 + /* 212 + * Internal housekeeping 213 + */ 214 + 215 + /* Buffer(s) containing this entry */ 216 + void *xl_storage; 217 + 218 + /* Operations on the storage backing this location */ 219 + const struct ocfs2_xa_loc_operations *xl_ops; 220 + }; 221 + 222 + /* 223 + * Convenience functions to calculate how much space is needed for a 224 + * given name+value pair 225 + */ 226 + static int namevalue_size(int name_len, uint64_t value_len) 227 + { 228 + if (value_len > OCFS2_XATTR_INLINE_SIZE) 229 + return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 230 + else 231 + return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 232 + } 233 + 234 + static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 235 + { 236 + return namevalue_size(xi->xi_name_len, xi->xi_value_len); 237 + } 238 + 239 + static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 240 + { 241 + u64 value_len = le64_to_cpu(xe->xe_value_size); 242 + 243 + BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 244 + ocfs2_xattr_is_local(xe)); 245 + return namevalue_size(xe->xe_name_len, value_len); 246 + } 247 + 140 248 141 249 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 142 250 struct ocfs2_xattr_header *xh, ··· 320 210 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 321 211 { 322 212 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 323 - } 324 - 325 - static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) 326 - { 327 - u16 len = sb->s_blocksize - 328 - offsetof(struct ocfs2_xattr_header, xh_entries); 329 - 330 - return len / sizeof(struct ocfs2_xattr_entry); 331 213 } 332 214 333 215 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) ··· 565 463 return hash; 566 464 } 567 465 568 - /* 569 - * ocfs2_xattr_hash_entry() 570 - * 571 - * Compute the hash of an extended attribute. 572 - */ 573 - static void ocfs2_xattr_hash_entry(struct inode *inode, 574 - struct ocfs2_xattr_header *header, 575 - struct ocfs2_xattr_entry *entry) 576 - { 577 - u32 hash = 0; 578 - char *name = (char *)header + le16_to_cpu(entry->xe_name_offset); 579 - 580 - hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len); 581 - entry->xe_name_hash = cpu_to_le32(hash); 582 - 583 - return; 584 - } 585 - 586 466 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 587 467 { 588 - int size = 0; 468 + return namevalue_size(name_len, value_len) + 469 + sizeof(struct ocfs2_xattr_entry); 470 + } 589 471 590 - if (value_len <= OCFS2_XATTR_INLINE_SIZE) 591 - size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 592 - else 593 - size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 594 - size += sizeof(struct ocfs2_xattr_entry); 472 + static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 473 + { 474 + return namevalue_size_xi(xi) + 475 + sizeof(struct ocfs2_xattr_entry); 476 + } 595 477 596 - return size; 478 + static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 479 + { 480 + return namevalue_size_xe(xe) + 481 + sizeof(struct ocfs2_xattr_entry); 597 482 } 598 483 599 484 int ocfs2_calc_security_init(struct inode *dir, ··· 1397 1308 return ret; 1398 1309 } 1399 1310 1400 - static int ocfs2_xattr_cleanup(struct inode *inode, 1401 - handle_t *handle, 1402 - struct ocfs2_xattr_info *xi, 1403 - struct ocfs2_xattr_search *xs, 1404 - struct ocfs2_xattr_value_buf *vb, 1405 - size_t offs) 1311 + static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1312 + int num_entries) 1406 1313 { 1407 - int ret = 0; 1408 - size_t name_len = strlen(xi->name); 1409 - void *val = xs->base + offs; 1410 - size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 1314 + int free_space; 1411 1315 1412 - ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 1413 - OCFS2_JOURNAL_ACCESS_WRITE); 1414 - if (ret) { 1415 - mlog_errno(ret); 1416 - goto out; 1417 - } 1418 - /* Decrease xattr count */ 1419 - le16_add_cpu(&xs->header->xh_count, -1); 1420 - /* Remove the xattr entry and tree root which has already be set*/ 1421 - memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry)); 1422 - memset(val, 0, size); 1316 + if (!needed_space) 1317 + return 0; 1423 1318 1424 - ret = ocfs2_journal_dirty(handle, vb->vb_bh); 1425 - if (ret < 0) 1426 - mlog_errno(ret); 1427 - out: 1428 - return ret; 1429 - } 1430 - 1431 - static int ocfs2_xattr_update_entry(struct inode *inode, 1432 - handle_t *handle, 1433 - struct ocfs2_xattr_info *xi, 1434 - struct ocfs2_xattr_search *xs, 1435 - struct ocfs2_xattr_value_buf *vb, 1436 - size_t offs) 1437 - { 1438 - int ret; 1439 - 1440 - ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 1441 - OCFS2_JOURNAL_ACCESS_WRITE); 1442 - if (ret) { 1443 - mlog_errno(ret); 1444 - goto out; 1445 - } 1446 - 1447 - xs->here->xe_name_offset = cpu_to_le16(offs); 1448 - xs->here->xe_value_size = cpu_to_le64(xi->value_len); 1449 - if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE) 1450 - ocfs2_xattr_set_local(xs->here, 1); 1451 - else 1452 - ocfs2_xattr_set_local(xs->here, 0); 1453 - ocfs2_xattr_hash_entry(inode, xs->header, xs->here); 1454 - 1455 - ret = ocfs2_journal_dirty(handle, vb->vb_bh); 1456 - if (ret < 0) 1457 - mlog_errno(ret); 1458 - out: 1459 - return ret; 1460 - } 1461 - 1462 - /* 1463 - * ocfs2_xattr_set_value_outside() 1464 - * 1465 - * Set large size value in B tree. 1466 - */ 1467 - static int ocfs2_xattr_set_value_outside(struct inode *inode, 1468 - struct ocfs2_xattr_info *xi, 1469 - struct ocfs2_xattr_search *xs, 1470 - struct ocfs2_xattr_set_ctxt *ctxt, 1471 - struct ocfs2_xattr_value_buf *vb, 1472 - size_t offs) 1473 - { 1474 - size_t name_len = strlen(xi->name); 1475 - void *val = xs->base + offs; 1476 - struct ocfs2_xattr_value_root *xv = NULL; 1477 - size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 1478 - int ret = 0; 1479 - 1480 - memset(val, 0, size); 1481 - memcpy(val, xi->name, name_len); 1482 - xv = (struct ocfs2_xattr_value_root *) 1483 - (val + OCFS2_XATTR_SIZE(name_len)); 1484 - xv->xr_clusters = 0; 1485 - xv->xr_last_eb_blk = 0; 1486 - xv->xr_list.l_tree_depth = 0; 1487 - xv->xr_list.l_count = cpu_to_le16(1); 1488 - xv->xr_list.l_next_free_rec = 0; 1489 - vb->vb_xv = xv; 1490 - 1491 - ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt); 1492 - if (ret < 0) { 1493 - mlog_errno(ret); 1494 - return ret; 1495 - } 1496 - ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs); 1497 - if (ret < 0) { 1498 - mlog_errno(ret); 1499 - return ret; 1500 - } 1501 - ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb, 1502 - xi->value, xi->value_len); 1503 - if (ret < 0) 1504 - mlog_errno(ret); 1505 - 1506 - return ret; 1507 - } 1508 - 1509 - /* 1510 - * ocfs2_xattr_set_entry_local() 1511 - * 1512 - * Set, replace or remove extended attribute in local. 1513 - */ 1514 - static void ocfs2_xattr_set_entry_local(struct inode *inode, 1515 - struct ocfs2_xattr_info *xi, 1516 - struct ocfs2_xattr_search *xs, 1517 - struct ocfs2_xattr_entry *last, 1518 - size_t min_offs) 1519 - { 1520 - size_t name_len = strlen(xi->name); 1521 - int i; 1522 - 1523 - if (xi->value && xs->not_found) { 1524 - /* Insert the new xattr entry. */ 1525 - le16_add_cpu(&xs->header->xh_count, 1); 1526 - ocfs2_xattr_set_type(last, xi->name_index); 1527 - ocfs2_xattr_set_local(last, 1); 1528 - last->xe_name_len = name_len; 1529 - } else { 1530 - void *first_val; 1531 - void *val; 1532 - size_t offs, size; 1533 - 1534 - first_val = xs->base + min_offs; 1535 - offs = le16_to_cpu(xs->here->xe_name_offset); 1536 - val = xs->base + offs; 1537 - 1538 - if (le64_to_cpu(xs->here->xe_value_size) > 1539 - OCFS2_XATTR_INLINE_SIZE) 1540 - size = OCFS2_XATTR_SIZE(name_len) + 1541 - OCFS2_XATTR_ROOT_SIZE; 1542 - else 1543 - size = OCFS2_XATTR_SIZE(name_len) + 1544 - OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); 1545 - 1546 - if (xi->value && size == OCFS2_XATTR_SIZE(name_len) + 1547 - OCFS2_XATTR_SIZE(xi->value_len)) { 1548 - /* The old and the new value have the 1549 - same size. Just replace the value. */ 1550 - ocfs2_xattr_set_local(xs->here, 1); 1551 - xs->here->xe_value_size = cpu_to_le64(xi->value_len); 1552 - /* Clear value bytes. */ 1553 - memset(val + OCFS2_XATTR_SIZE(name_len), 1554 - 0, 1555 - OCFS2_XATTR_SIZE(xi->value_len)); 1556 - memcpy(val + OCFS2_XATTR_SIZE(name_len), 1557 - xi->value, 1558 - xi->value_len); 1559 - return; 1560 - } 1561 - /* Remove the old name+value. */ 1562 - memmove(first_val + size, first_val, val - first_val); 1563 - memset(first_val, 0, size); 1564 - xs->here->xe_name_hash = 0; 1565 - xs->here->xe_name_offset = 0; 1566 - ocfs2_xattr_set_local(xs->here, 1); 1567 - xs->here->xe_value_size = 0; 1568 - 1569 - min_offs += size; 1570 - 1571 - /* Adjust all value offsets. */ 1572 - last = xs->header->xh_entries; 1573 - for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) { 1574 - size_t o = le16_to_cpu(last->xe_name_offset); 1575 - 1576 - if (o < offs) 1577 - last->xe_name_offset = cpu_to_le16(o + size); 1578 - last += 1; 1579 - } 1580 - 1581 - if (!xi->value) { 1582 - /* Remove the old entry. */ 1583 - last -= 1; 1584 - memmove(xs->here, xs->here + 1, 1585 - (void *)last - (void *)xs->here); 1586 - memset(last, 0, sizeof(struct ocfs2_xattr_entry)); 1587 - le16_add_cpu(&xs->header->xh_count, -1); 1588 - } 1589 - } 1590 - if (xi->value) { 1591 - /* Insert the new name+value. */ 1592 - size_t size = OCFS2_XATTR_SIZE(name_len) + 1593 - OCFS2_XATTR_SIZE(xi->value_len); 1594 - void *val = xs->base + min_offs - size; 1595 - 1596 - xs->here->xe_name_offset = cpu_to_le16(min_offs - size); 1597 - memset(val, 0, size); 1598 - memcpy(val, xi->name, name_len); 1599 - memcpy(val + OCFS2_XATTR_SIZE(name_len), 1600 - xi->value, 1601 - xi->value_len); 1602 - xs->here->xe_value_size = cpu_to_le64(xi->value_len); 1603 - ocfs2_xattr_set_local(xs->here, 1); 1604 - ocfs2_xattr_hash_entry(inode, xs->header, xs->here); 1605 - } 1606 - 1607 - return; 1608 - } 1609 - 1610 - /* 1611 - * ocfs2_xattr_set_entry() 1612 - * 1613 - * Set extended attribute entry into inode or block. 1614 - * 1615 - * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE, 1616 - * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(), 1617 - * then set value in B tree with set_value_outside(). 1618 - */ 1619 - static int ocfs2_xattr_set_entry(struct inode *inode, 1620 - struct ocfs2_xattr_info *xi, 1621 - struct ocfs2_xattr_search *xs, 1622 - struct ocfs2_xattr_set_ctxt *ctxt, 1623 - int flag) 1624 - { 1625 - struct ocfs2_xattr_entry *last; 1626 - struct ocfs2_inode_info *oi = OCFS2_I(inode); 1627 - struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1628 - size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name); 1629 - size_t size_l = 0; 1630 - handle_t *handle = ctxt->handle; 1631 - int free, i, ret; 1632 - struct ocfs2_xattr_info xi_l = { 1633 - .name_index = xi->name_index, 1634 - .name = xi->name, 1635 - .value = xi->value, 1636 - .value_len = xi->value_len, 1637 - }; 1638 - struct ocfs2_xattr_value_buf vb = { 1639 - .vb_bh = xs->xattr_bh, 1640 - .vb_access = ocfs2_journal_access_di, 1641 - }; 1642 - 1643 - if (!(flag & OCFS2_INLINE_XATTR_FL)) { 1644 - BUG_ON(xs->xattr_bh == xs->inode_bh); 1645 - vb.vb_access = ocfs2_journal_access_xb; 1646 - } else 1647 - BUG_ON(xs->xattr_bh != xs->inode_bh); 1648 - 1649 - /* Compute min_offs, last and free space. */ 1650 - last = xs->header->xh_entries; 1651 - 1652 - for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) { 1653 - size_t offs = le16_to_cpu(last->xe_name_offset); 1654 - if (offs < min_offs) 1655 - min_offs = offs; 1656 - last += 1; 1657 - } 1658 - 1659 - free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 1660 - if (free < 0) 1319 + free_space = free_start - 1320 + sizeof(struct ocfs2_xattr_header) - 1321 + (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1322 + OCFS2_XATTR_HEADER_GAP; 1323 + if (free_space < 0) 1661 1324 return -EIO; 1325 + if (free_space < needed_space) 1326 + return -ENOSPC; 1662 1327 1663 - if (!xs->not_found) { 1664 - size_t size = 0; 1665 - if (ocfs2_xattr_is_local(xs->here)) 1666 - size = OCFS2_XATTR_SIZE(name_len) + 1667 - OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); 1328 + return 0; 1329 + } 1330 + 1331 + static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1332 + int type) 1333 + { 1334 + return loc->xl_ops->xlo_journal_access(handle, loc, type); 1335 + } 1336 + 1337 + static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1338 + { 1339 + loc->xl_ops->xlo_journal_dirty(handle, loc); 1340 + } 1341 + 1342 + /* Give a pointer into the storage for the given offset */ 1343 + static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1344 + { 1345 + BUG_ON(offset >= loc->xl_size); 1346 + return loc->xl_ops->xlo_offset_pointer(loc, offset); 1347 + } 1348 + 1349 + /* 1350 + * Wipe the name+value pair and allow the storage to reclaim it. This 1351 + * must be followed by either removal of the entry or a call to 1352 + * ocfs2_xa_add_namevalue(). 1353 + */ 1354 + static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1355 + { 1356 + loc->xl_ops->xlo_wipe_namevalue(loc); 1357 + } 1358 + 1359 + /* 1360 + * Find lowest offset to a name+value pair. This is the start of our 1361 + * downward-growing free space. 1362 + */ 1363 + static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1364 + { 1365 + return loc->xl_ops->xlo_get_free_start(loc); 1366 + } 1367 + 1368 + /* Can we reuse loc->xl_entry for xi? */ 1369 + static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1370 + struct ocfs2_xattr_info *xi) 1371 + { 1372 + return loc->xl_ops->xlo_can_reuse(loc, xi); 1373 + } 1374 + 1375 + /* How much free space is needed to set the new value */ 1376 + static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1377 + struct ocfs2_xattr_info *xi) 1378 + { 1379 + return loc->xl_ops->xlo_check_space(loc, xi); 1380 + } 1381 + 1382 + static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1383 + { 1384 + loc->xl_ops->xlo_add_entry(loc, name_hash); 1385 + loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1386 + /* 1387 + * We can't leave the new entry's xe_name_offset at zero or 1388 + * add_namevalue() will go nuts. We set it to the size of our 1389 + * storage so that it can never be less than any other entry. 1390 + */ 1391 + loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1392 + } 1393 + 1394 + static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1395 + struct ocfs2_xattr_info *xi) 1396 + { 1397 + int size = namevalue_size_xi(xi); 1398 + int nameval_offset; 1399 + char *nameval_buf; 1400 + 1401 + loc->xl_ops->xlo_add_namevalue(loc, size); 1402 + loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1403 + loc->xl_entry->xe_name_len = xi->xi_name_len; 1404 + ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1405 + ocfs2_xattr_set_local(loc->xl_entry, 1406 + xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1407 + 1408 + nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1409 + nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1410 + memset(nameval_buf, 0, size); 1411 + memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1412 + } 1413 + 1414 + static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1415 + struct ocfs2_xattr_value_buf *vb) 1416 + { 1417 + int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1418 + int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1419 + 1420 + /* Value bufs are for value trees */ 1421 + BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1422 + BUG_ON(namevalue_size_xe(loc->xl_entry) != 1423 + (name_size + OCFS2_XATTR_ROOT_SIZE)); 1424 + 1425 + loc->xl_ops->xlo_fill_value_buf(loc, vb); 1426 + vb->vb_xv = 1427 + (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1428 + nameval_offset + 1429 + name_size); 1430 + } 1431 + 1432 + static int ocfs2_xa_block_journal_access(handle_t *handle, 1433 + struct ocfs2_xa_loc *loc, int type) 1434 + { 1435 + struct buffer_head *bh = loc->xl_storage; 1436 + ocfs2_journal_access_func access; 1437 + 1438 + if (loc->xl_size == (bh->b_size - 1439 + offsetof(struct ocfs2_xattr_block, 1440 + xb_attrs.xb_header))) 1441 + access = ocfs2_journal_access_xb; 1442 + else 1443 + access = ocfs2_journal_access_di; 1444 + return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1445 + } 1446 + 1447 + static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1448 + struct ocfs2_xa_loc *loc) 1449 + { 1450 + struct buffer_head *bh = loc->xl_storage; 1451 + 1452 + ocfs2_journal_dirty(handle, bh); 1453 + } 1454 + 1455 + static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1456 + int offset) 1457 + { 1458 + return (char *)loc->xl_header + offset; 1459 + } 1460 + 1461 + static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1462 + struct ocfs2_xattr_info *xi) 1463 + { 1464 + /* 1465 + * Block storage is strict. If the sizes aren't exact, we will 1466 + * remove the old one and reinsert the new. 1467 + */ 1468 + return namevalue_size_xe(loc->xl_entry) == 1469 + namevalue_size_xi(xi); 1470 + } 1471 + 1472 + static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1473 + { 1474 + struct ocfs2_xattr_header *xh = loc->xl_header; 1475 + int i, count = le16_to_cpu(xh->xh_count); 1476 + int offset, free_start = loc->xl_size; 1477 + 1478 + for (i = 0; i < count; i++) { 1479 + offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1480 + if (offset < free_start) 1481 + free_start = offset; 1482 + } 1483 + 1484 + return free_start; 1485 + } 1486 + 1487 + static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1488 + struct ocfs2_xattr_info *xi) 1489 + { 1490 + int count = le16_to_cpu(loc->xl_header->xh_count); 1491 + int free_start = ocfs2_xa_get_free_start(loc); 1492 + int needed_space = ocfs2_xi_entry_usage(xi); 1493 + 1494 + /* 1495 + * Block storage will reclaim the original entry before inserting 1496 + * the new value, so we only need the difference. If the new 1497 + * entry is smaller than the old one, we don't need anything. 1498 + */ 1499 + if (loc->xl_entry) { 1500 + /* Don't need space if we're reusing! */ 1501 + if (ocfs2_xa_can_reuse_entry(loc, xi)) 1502 + needed_space = 0; 1668 1503 else 1669 - size = OCFS2_XATTR_SIZE(name_len) + 1670 - OCFS2_XATTR_ROOT_SIZE; 1671 - free += (size + sizeof(struct ocfs2_xattr_entry)); 1504 + needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1672 1505 } 1673 - /* Check free space in inode or block */ 1674 - if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { 1675 - if (free < sizeof(struct ocfs2_xattr_entry) + 1676 - OCFS2_XATTR_SIZE(name_len) + 1677 - OCFS2_XATTR_ROOT_SIZE) { 1678 - ret = -ENOSPC; 1679 - goto out; 1680 - } 1681 - size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 1682 - xi_l.value = (void *)&def_xv; 1683 - xi_l.value_len = OCFS2_XATTR_ROOT_SIZE; 1684 - } else if (xi->value) { 1685 - if (free < sizeof(struct ocfs2_xattr_entry) + 1686 - OCFS2_XATTR_SIZE(name_len) + 1687 - OCFS2_XATTR_SIZE(xi->value_len)) { 1688 - ret = -ENOSPC; 1689 - goto out; 1506 + if (needed_space < 0) 1507 + needed_space = 0; 1508 + return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1509 + } 1510 + 1511 + /* 1512 + * Block storage for xattrs keeps the name+value pairs compacted. When 1513 + * we remove one, we have to shift any that preceded it towards the end. 1514 + */ 1515 + static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1516 + { 1517 + int i, offset; 1518 + int namevalue_offset, first_namevalue_offset, namevalue_size; 1519 + struct ocfs2_xattr_entry *entry = loc->xl_entry; 1520 + struct ocfs2_xattr_header *xh = loc->xl_header; 1521 + int count = le16_to_cpu(xh->xh_count); 1522 + 1523 + namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1524 + namevalue_size = namevalue_size_xe(entry); 1525 + first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1526 + 1527 + /* Shift the name+value pairs */ 1528 + memmove((char *)xh + first_namevalue_offset + namevalue_size, 1529 + (char *)xh + first_namevalue_offset, 1530 + namevalue_offset - first_namevalue_offset); 1531 + memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1532 + 1533 + /* Now tell xh->xh_entries about it */ 1534 + for (i = 0; i < count; i++) { 1535 + offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1536 + if (offset < namevalue_offset) 1537 + le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1538 + namevalue_size); 1539 + } 1540 + 1541 + /* 1542 + * Note that we don't update xh_free_start or xh_name_value_len 1543 + * because they're not used in block-stored xattrs. 1544 + */ 1545 + } 1546 + 1547 + static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1548 + { 1549 + int count = le16_to_cpu(loc->xl_header->xh_count); 1550 + loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1551 + le16_add_cpu(&loc->xl_header->xh_count, 1); 1552 + memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1553 + } 1554 + 1555 + static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1556 + { 1557 + int free_start = ocfs2_xa_get_free_start(loc); 1558 + 1559 + loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1560 + } 1561 + 1562 + static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1563 + struct ocfs2_xattr_value_buf *vb) 1564 + { 1565 + struct buffer_head *bh = loc->xl_storage; 1566 + 1567 + if (loc->xl_size == (bh->b_size - 1568 + offsetof(struct ocfs2_xattr_block, 1569 + xb_attrs.xb_header))) 1570 + vb->vb_access = ocfs2_journal_access_xb; 1571 + else 1572 + vb->vb_access = ocfs2_journal_access_di; 1573 + vb->vb_bh = bh; 1574 + } 1575 + 1576 + /* 1577 + * Operations for xattrs stored in blocks. This includes inline inode 1578 + * storage and unindexed ocfs2_xattr_blocks. 1579 + */ 1580 + static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1581 + .xlo_journal_access = ocfs2_xa_block_journal_access, 1582 + .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1583 + .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1584 + .xlo_check_space = ocfs2_xa_block_check_space, 1585 + .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1586 + .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1587 + .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1588 + .xlo_add_entry = ocfs2_xa_block_add_entry, 1589 + .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1590 + .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1591 + }; 1592 + 1593 + static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1594 + struct ocfs2_xa_loc *loc, int type) 1595 + { 1596 + struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1597 + 1598 + return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1599 + } 1600 + 1601 + static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1602 + struct ocfs2_xa_loc *loc) 1603 + { 1604 + struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1605 + 1606 + ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1607 + } 1608 + 1609 + static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1610 + int offset) 1611 + { 1612 + struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1613 + int block, block_offset; 1614 + 1615 + /* The header is at the front of the bucket */ 1616 + block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1617 + block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1618 + 1619 + return bucket_block(bucket, block) + block_offset; 1620 + } 1621 + 1622 + static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1623 + struct ocfs2_xattr_info *xi) 1624 + { 1625 + return namevalue_size_xe(loc->xl_entry) >= 1626 + namevalue_size_xi(xi); 1627 + } 1628 + 1629 + static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1630 + { 1631 + struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1632 + return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1633 + } 1634 + 1635 + static int ocfs2_bucket_align_free_start(struct super_block *sb, 1636 + int free_start, int size) 1637 + { 1638 + /* 1639 + * We need to make sure that the name+value pair fits within 1640 + * one block. 1641 + */ 1642 + if (((free_start - size) >> sb->s_blocksize_bits) != 1643 + ((free_start - 1) >> sb->s_blocksize_bits)) 1644 + free_start -= free_start % sb->s_blocksize; 1645 + 1646 + return free_start; 1647 + } 1648 + 1649 + static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1650 + struct ocfs2_xattr_info *xi) 1651 + { 1652 + int rc; 1653 + int count = le16_to_cpu(loc->xl_header->xh_count); 1654 + int free_start = ocfs2_xa_get_free_start(loc); 1655 + int needed_space = ocfs2_xi_entry_usage(xi); 1656 + int size = namevalue_size_xi(xi); 1657 + struct super_block *sb = loc->xl_inode->i_sb; 1658 + 1659 + /* 1660 + * Bucket storage does not reclaim name+value pairs it cannot 1661 + * reuse. They live as holes until the bucket fills, and then 1662 + * the bucket is defragmented. However, the bucket can reclaim 1663 + * the ocfs2_xattr_entry. 1664 + */ 1665 + if (loc->xl_entry) { 1666 + /* Don't need space if we're reusing! */ 1667 + if (ocfs2_xa_can_reuse_entry(loc, xi)) 1668 + needed_space = 0; 1669 + else 1670 + needed_space -= sizeof(struct ocfs2_xattr_entry); 1671 + } 1672 + BUG_ON(needed_space < 0); 1673 + 1674 + if (free_start < size) { 1675 + if (needed_space) 1676 + return -ENOSPC; 1677 + } else { 1678 + /* 1679 + * First we check if it would fit in the first place. 1680 + * Below, we align the free start to a block. This may 1681 + * slide us below the minimum gap. By checking unaligned 1682 + * first, we avoid that error. 1683 + */ 1684 + rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1685 + count); 1686 + if (rc) 1687 + return rc; 1688 + free_start = ocfs2_bucket_align_free_start(sb, free_start, 1689 + size); 1690 + } 1691 + return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1692 + } 1693 + 1694 + static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1695 + { 1696 + le16_add_cpu(&loc->xl_header->xh_name_value_len, 1697 + -namevalue_size_xe(loc->xl_entry)); 1698 + } 1699 + 1700 + static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1701 + { 1702 + struct ocfs2_xattr_header *xh = loc->xl_header; 1703 + int count = le16_to_cpu(xh->xh_count); 1704 + int low = 0, high = count - 1, tmp; 1705 + struct ocfs2_xattr_entry *tmp_xe; 1706 + 1707 + /* 1708 + * We keep buckets sorted by name_hash, so we need to find 1709 + * our insert place. 1710 + */ 1711 + while (low <= high && count) { 1712 + tmp = (low + high) / 2; 1713 + tmp_xe = &xh->xh_entries[tmp]; 1714 + 1715 + if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1716 + low = tmp + 1; 1717 + else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1718 + high = tmp - 1; 1719 + else { 1720 + low = tmp; 1721 + break; 1690 1722 } 1691 1723 } 1692 1724 1693 - if (!xs->not_found) { 1694 - /* For existing extended attribute */ 1695 - size_t size = OCFS2_XATTR_SIZE(name_len) + 1696 - OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); 1697 - size_t offs = le16_to_cpu(xs->here->xe_name_offset); 1698 - void *val = xs->base + offs; 1725 + if (low != count) 1726 + memmove(&xh->xh_entries[low + 1], 1727 + &xh->xh_entries[low], 1728 + ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1699 1729 1700 - if (ocfs2_xattr_is_local(xs->here) && size == size_l) { 1701 - /* Replace existing local xattr with tree root */ 1702 - ret = ocfs2_xattr_set_value_outside(inode, xi, xs, 1703 - ctxt, &vb, offs); 1704 - if (ret < 0) 1705 - mlog_errno(ret); 1706 - goto out; 1707 - } else if (!ocfs2_xattr_is_local(xs->here)) { 1708 - /* For existing xattr which has value outside */ 1709 - vb.vb_xv = (struct ocfs2_xattr_value_root *) 1710 - (val + OCFS2_XATTR_SIZE(name_len)); 1730 + le16_add_cpu(&xh->xh_count, 1); 1731 + loc->xl_entry = &xh->xh_entries[low]; 1732 + memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1733 + } 1711 1734 1712 - if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { 1713 - /* 1714 - * If new value need set outside also, 1715 - * first truncate old value to new value, 1716 - * then set new value with set_value_outside(). 1717 - */ 1718 - ret = ocfs2_xattr_value_truncate(inode, 1719 - &vb, 1720 - xi->value_len, 1721 - ctxt); 1722 - if (ret < 0) { 1723 - mlog_errno(ret); 1724 - goto out; 1725 - } 1735 + static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1736 + { 1737 + int free_start = ocfs2_xa_get_free_start(loc); 1738 + struct ocfs2_xattr_header *xh = loc->xl_header; 1739 + struct super_block *sb = loc->xl_inode->i_sb; 1740 + int nameval_offset; 1726 1741 1727 - ret = ocfs2_xattr_update_entry(inode, 1728 - handle, 1729 - xi, 1730 - xs, 1731 - &vb, 1732 - offs); 1733 - if (ret < 0) { 1734 - mlog_errno(ret); 1735 - goto out; 1736 - } 1742 + free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1743 + nameval_offset = free_start - size; 1744 + loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1745 + xh->xh_free_start = cpu_to_le16(nameval_offset); 1746 + le16_add_cpu(&xh->xh_name_value_len, size); 1737 1747 1738 - ret = __ocfs2_xattr_set_value_outside(inode, 1739 - handle, 1740 - &vb, 1741 - xi->value, 1742 - xi->value_len); 1743 - if (ret < 0) 1744 - mlog_errno(ret); 1748 + } 1749 + 1750 + static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1751 + struct ocfs2_xattr_value_buf *vb) 1752 + { 1753 + struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1754 + struct super_block *sb = loc->xl_inode->i_sb; 1755 + int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1756 + int size = namevalue_size_xe(loc->xl_entry); 1757 + int block_offset = nameval_offset >> sb->s_blocksize_bits; 1758 + 1759 + /* Values are not allowed to straddle block boundaries */ 1760 + BUG_ON(block_offset != 1761 + ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1762 + /* We expect the bucket to be filled in */ 1763 + BUG_ON(!bucket->bu_bhs[block_offset]); 1764 + 1765 + vb->vb_access = ocfs2_journal_access; 1766 + vb->vb_bh = bucket->bu_bhs[block_offset]; 1767 + } 1768 + 1769 + /* Operations for xattrs stored in buckets. */ 1770 + static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1771 + .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1772 + .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1773 + .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1774 + .xlo_check_space = ocfs2_xa_bucket_check_space, 1775 + .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1776 + .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1777 + .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1778 + .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1779 + .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1780 + .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1781 + }; 1782 + 1783 + static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1784 + { 1785 + struct ocfs2_xattr_value_buf vb; 1786 + 1787 + if (ocfs2_xattr_is_local(loc->xl_entry)) 1788 + return 0; 1789 + 1790 + ocfs2_xa_fill_value_buf(loc, &vb); 1791 + return le32_to_cpu(vb.vb_xv->xr_clusters); 1792 + } 1793 + 1794 + static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1795 + struct ocfs2_xattr_set_ctxt *ctxt) 1796 + { 1797 + int trunc_rc, access_rc; 1798 + struct ocfs2_xattr_value_buf vb; 1799 + 1800 + ocfs2_xa_fill_value_buf(loc, &vb); 1801 + trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1802 + ctxt); 1803 + 1804 + /* 1805 + * The caller of ocfs2_xa_value_truncate() has already called 1806 + * ocfs2_xa_journal_access on the loc. However, The truncate code 1807 + * calls ocfs2_extend_trans(). This may commit the previous 1808 + * transaction and open a new one. If this is a bucket, truncate 1809 + * could leave only vb->vb_bh set up for journaling. Meanwhile, 1810 + * the caller is expecting to dirty the entire bucket. So we must 1811 + * reset the journal work. We do this even if truncate has failed, 1812 + * as it could have failed after committing the extend. 1813 + */ 1814 + access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1815 + OCFS2_JOURNAL_ACCESS_WRITE); 1816 + 1817 + /* Errors in truncate take precedence */ 1818 + return trunc_rc ? trunc_rc : access_rc; 1819 + } 1820 + 1821 + static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1822 + { 1823 + int index, count; 1824 + struct ocfs2_xattr_header *xh = loc->xl_header; 1825 + struct ocfs2_xattr_entry *entry = loc->xl_entry; 1826 + 1827 + ocfs2_xa_wipe_namevalue(loc); 1828 + loc->xl_entry = NULL; 1829 + 1830 + le16_add_cpu(&xh->xh_count, -1); 1831 + count = le16_to_cpu(xh->xh_count); 1832 + 1833 + /* 1834 + * Only zero out the entry if there are more remaining. This is 1835 + * important for an empty bucket, as it keeps track of the 1836 + * bucket's hash value. It doesn't hurt empty block storage. 1837 + */ 1838 + if (count) { 1839 + index = ((char *)entry - (char *)&xh->xh_entries) / 1840 + sizeof(struct ocfs2_xattr_entry); 1841 + memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1842 + (count - index) * sizeof(struct ocfs2_xattr_entry)); 1843 + memset(&xh->xh_entries[count], 0, 1844 + sizeof(struct ocfs2_xattr_entry)); 1845 + } 1846 + } 1847 + 1848 + /* 1849 + * If we have a problem adjusting the size of an external value during 1850 + * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1851 + * in an intermediate state. For example, the value may be partially 1852 + * truncated. 1853 + * 1854 + * If the value tree hasn't changed, the extend/truncate went nowhere. 1855 + * We have nothing to do. The caller can treat it as a straight error. 1856 + * 1857 + * If the value tree got partially truncated, we now have a corrupted 1858 + * extended attribute. We're going to wipe its entry and leak the 1859 + * clusters. Better to leak some storage than leave a corrupt entry. 1860 + * 1861 + * If the value tree grew, it obviously didn't grow enough for the 1862 + * new entry. We're not going to try and reclaim those clusters either. 1863 + * If there was already an external value there (orig_clusters != 0), 1864 + * the new clusters are attached safely and we can just leave the old 1865 + * value in place. If there was no external value there, we remove 1866 + * the entry. 1867 + * 1868 + * This way, the xattr block we store in the journal will be consistent. 1869 + * If the size change broke because of the journal, no changes will hit 1870 + * disk anyway. 1871 + */ 1872 + static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1873 + const char *what, 1874 + unsigned int orig_clusters) 1875 + { 1876 + unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1877 + char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1878 + le16_to_cpu(loc->xl_entry->xe_name_offset)); 1879 + 1880 + if (new_clusters < orig_clusters) { 1881 + mlog(ML_ERROR, 1882 + "Partial truncate while %s xattr %.*s. Leaking " 1883 + "%u clusters and removing the entry\n", 1884 + what, loc->xl_entry->xe_name_len, nameval_buf, 1885 + orig_clusters - new_clusters); 1886 + ocfs2_xa_remove_entry(loc); 1887 + } else if (!orig_clusters) { 1888 + mlog(ML_ERROR, 1889 + "Unable to allocate an external value for xattr " 1890 + "%.*s safely. Leaking %u clusters and removing the " 1891 + "entry\n", 1892 + loc->xl_entry->xe_name_len, nameval_buf, 1893 + new_clusters - orig_clusters); 1894 + ocfs2_xa_remove_entry(loc); 1895 + } else if (new_clusters > orig_clusters) 1896 + mlog(ML_ERROR, 1897 + "Unable to grow xattr %.*s safely. %u new clusters " 1898 + "have been added, but the value will not be " 1899 + "modified\n", 1900 + loc->xl_entry->xe_name_len, nameval_buf, 1901 + new_clusters - orig_clusters); 1902 + } 1903 + 1904 + static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 1905 + struct ocfs2_xattr_set_ctxt *ctxt) 1906 + { 1907 + int rc = 0; 1908 + unsigned int orig_clusters; 1909 + 1910 + if (!ocfs2_xattr_is_local(loc->xl_entry)) { 1911 + orig_clusters = ocfs2_xa_value_clusters(loc); 1912 + rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 1913 + if (rc) { 1914 + mlog_errno(rc); 1915 + /* 1916 + * Since this is remove, we can return 0 if 1917 + * ocfs2_xa_cleanup_value_truncate() is going to 1918 + * wipe the entry anyway. So we check the 1919 + * cluster count as well. 1920 + */ 1921 + if (orig_clusters != ocfs2_xa_value_clusters(loc)) 1922 + rc = 0; 1923 + ocfs2_xa_cleanup_value_truncate(loc, "removing", 1924 + orig_clusters); 1925 + if (rc) 1745 1926 goto out; 1746 - } else { 1747 - /* 1748 - * If new value need set in local, 1749 - * just trucate old value to zero. 1750 - */ 1751 - ret = ocfs2_xattr_value_truncate(inode, 1752 - &vb, 1753 - 0, 1754 - ctxt); 1755 - if (ret < 0) 1756 - mlog_errno(ret); 1927 + } 1928 + } 1929 + 1930 + ocfs2_xa_remove_entry(loc); 1931 + 1932 + out: 1933 + return rc; 1934 + } 1935 + 1936 + static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 1937 + { 1938 + int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1939 + char *nameval_buf; 1940 + 1941 + nameval_buf = ocfs2_xa_offset_pointer(loc, 1942 + le16_to_cpu(loc->xl_entry->xe_name_offset)); 1943 + memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 1944 + } 1945 + 1946 + /* 1947 + * Take an existing entry and make it ready for the new value. This 1948 + * won't allocate space, but it may free space. It should be ready for 1949 + * ocfs2_xa_prepare_entry() to finish the work. 1950 + */ 1951 + static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 1952 + struct ocfs2_xattr_info *xi, 1953 + struct ocfs2_xattr_set_ctxt *ctxt) 1954 + { 1955 + int rc = 0; 1956 + int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 1957 + unsigned int orig_clusters; 1958 + char *nameval_buf; 1959 + int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 1960 + int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 1961 + 1962 + BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 1963 + name_size); 1964 + 1965 + nameval_buf = ocfs2_xa_offset_pointer(loc, 1966 + le16_to_cpu(loc->xl_entry->xe_name_offset)); 1967 + if (xe_local) { 1968 + memset(nameval_buf + name_size, 0, 1969 + namevalue_size_xe(loc->xl_entry) - name_size); 1970 + if (!xi_local) 1971 + ocfs2_xa_install_value_root(loc); 1972 + } else { 1973 + orig_clusters = ocfs2_xa_value_clusters(loc); 1974 + if (xi_local) { 1975 + rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 1976 + if (rc < 0) 1977 + mlog_errno(rc); 1978 + else 1979 + memset(nameval_buf + name_size, 0, 1980 + namevalue_size_xe(loc->xl_entry) - 1981 + name_size); 1982 + } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 1983 + xi->xi_value_len) { 1984 + rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 1985 + ctxt); 1986 + if (rc < 0) 1987 + mlog_errno(rc); 1988 + } 1989 + 1990 + if (rc) { 1991 + ocfs2_xa_cleanup_value_truncate(loc, "reusing", 1992 + orig_clusters); 1993 + goto out; 1994 + } 1995 + } 1996 + 1997 + loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1998 + ocfs2_xattr_set_local(loc->xl_entry, xi_local); 1999 + 2000 + out: 2001 + return rc; 2002 + } 2003 + 2004 + /* 2005 + * Prepares loc->xl_entry to receive the new xattr. This includes 2006 + * properly setting up the name+value pair region. If loc->xl_entry 2007 + * already exists, it will take care of modifying it appropriately. 2008 + * 2009 + * Note that this modifies the data. You did journal_access already, 2010 + * right? 2011 + */ 2012 + static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2013 + struct ocfs2_xattr_info *xi, 2014 + u32 name_hash, 2015 + struct ocfs2_xattr_set_ctxt *ctxt) 2016 + { 2017 + int rc = 0; 2018 + unsigned int orig_clusters; 2019 + __le64 orig_value_size = 0; 2020 + 2021 + rc = ocfs2_xa_check_space(loc, xi); 2022 + if (rc) 2023 + goto out; 2024 + 2025 + if (loc->xl_entry) { 2026 + if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2027 + orig_value_size = loc->xl_entry->xe_value_size; 2028 + rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2029 + if (rc) 2030 + goto out; 2031 + goto alloc_value; 2032 + } 2033 + 2034 + if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2035 + orig_clusters = ocfs2_xa_value_clusters(loc); 2036 + rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2037 + if (rc) { 2038 + mlog_errno(rc); 2039 + ocfs2_xa_cleanup_value_truncate(loc, 2040 + "overwriting", 2041 + orig_clusters); 2042 + goto out; 1757 2043 } 1758 2044 } 2045 + ocfs2_xa_wipe_namevalue(loc); 2046 + } else 2047 + ocfs2_xa_add_entry(loc, name_hash); 2048 + 2049 + /* 2050 + * If we get here, we have a blank entry. Fill it. We grow our 2051 + * name+value pair back from the end. 2052 + */ 2053 + ocfs2_xa_add_namevalue(loc, xi); 2054 + if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2055 + ocfs2_xa_install_value_root(loc); 2056 + 2057 + alloc_value: 2058 + if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2059 + orig_clusters = ocfs2_xa_value_clusters(loc); 2060 + rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2061 + if (rc < 0) { 2062 + /* 2063 + * If we tried to grow an existing external value, 2064 + * ocfs2_xa_cleanuP-value_truncate() is going to 2065 + * let it stand. We have to restore its original 2066 + * value size. 2067 + */ 2068 + loc->xl_entry->xe_value_size = orig_value_size; 2069 + ocfs2_xa_cleanup_value_truncate(loc, "growing", 2070 + orig_clusters); 2071 + mlog_errno(rc); 2072 + } 1759 2073 } 1760 2074 1761 - ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh, 2075 + out: 2076 + return rc; 2077 + } 2078 + 2079 + /* 2080 + * Store the value portion of the name+value pair. This will skip 2081 + * values that are stored externally. Their tree roots were set up 2082 + * by ocfs2_xa_prepare_entry(). 2083 + */ 2084 + static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2085 + struct ocfs2_xattr_info *xi, 2086 + struct ocfs2_xattr_set_ctxt *ctxt) 2087 + { 2088 + int rc = 0; 2089 + int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2090 + int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2091 + char *nameval_buf; 2092 + struct ocfs2_xattr_value_buf vb; 2093 + 2094 + nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2095 + if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2096 + ocfs2_xa_fill_value_buf(loc, &vb); 2097 + rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2098 + ctxt->handle, &vb, 2099 + xi->xi_value, 2100 + xi->xi_value_len); 2101 + } else 2102 + memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2103 + 2104 + return rc; 2105 + } 2106 + 2107 + static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2108 + struct ocfs2_xattr_info *xi, 2109 + struct ocfs2_xattr_set_ctxt *ctxt) 2110 + { 2111 + int ret; 2112 + u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2113 + xi->xi_name_len); 2114 + 2115 + ret = ocfs2_xa_journal_access(ctxt->handle, loc, 1762 2116 OCFS2_JOURNAL_ACCESS_WRITE); 1763 2117 if (ret) { 1764 2118 mlog_errno(ret); 1765 2119 goto out; 1766 2120 } 1767 2121 1768 - if (!(flag & OCFS2_INLINE_XATTR_FL)) { 1769 - ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh, 1770 - OCFS2_JOURNAL_ACCESS_WRITE); 1771 - if (ret) { 1772 - mlog_errno(ret); 1773 - goto out; 1774 - } 1775 - } 1776 - 1777 2122 /* 1778 - * Set value in local, include set tree root in local. 1779 - * This is the first step for value size >INLINE_SIZE. 2123 + * From here on out, everything is going to modify the buffer a 2124 + * little. Errors are going to leave the xattr header in a 2125 + * sane state. Thus, even with errors we dirty the sucker. 1780 2126 */ 1781 - ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs); 1782 2127 1783 - if (!(flag & OCFS2_INLINE_XATTR_FL)) { 1784 - ret = ocfs2_journal_dirty(handle, xs->xattr_bh); 1785 - if (ret < 0) { 2128 + /* Don't worry, we are never called with !xi_value and !xl_entry */ 2129 + if (!xi->xi_value) { 2130 + ret = ocfs2_xa_remove(loc, ctxt); 2131 + goto out_dirty; 2132 + } 2133 + 2134 + ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2135 + if (ret) { 2136 + if (ret != -ENOSPC) 1786 2137 mlog_errno(ret); 1787 - goto out; 1788 - } 2138 + goto out_dirty; 1789 2139 } 1790 2140 1791 - if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) && 1792 - (flag & OCFS2_INLINE_XATTR_FL)) { 1793 - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1794 - unsigned int xattrsize = osb->s_xattr_inline_size; 1795 - 1796 - /* 1797 - * Adjust extent record count or inline data size 1798 - * to reserve space for extended attribute. 1799 - */ 1800 - if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1801 - struct ocfs2_inline_data *idata = &di->id2.i_data; 1802 - le16_add_cpu(&idata->id_count, -xattrsize); 1803 - } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 1804 - struct ocfs2_extent_list *el = &di->id2.i_list; 1805 - le16_add_cpu(&el->l_count, -(xattrsize / 1806 - sizeof(struct ocfs2_extent_rec))); 1807 - } 1808 - di->i_xattr_inline_size = cpu_to_le16(xattrsize); 1809 - } 1810 - /* Update xattr flag */ 1811 - spin_lock(&oi->ip_lock); 1812 - oi->ip_dyn_features |= flag; 1813 - di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 1814 - spin_unlock(&oi->ip_lock); 1815 - 1816 - ret = ocfs2_journal_dirty(handle, xs->inode_bh); 1817 - if (ret < 0) 2141 + ret = ocfs2_xa_store_value(loc, xi, ctxt); 2142 + if (ret) 1818 2143 mlog_errno(ret); 1819 2144 1820 - if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { 1821 - /* 1822 - * Set value outside in B tree. 1823 - * This is the second step for value size > INLINE_SIZE. 1824 - */ 1825 - size_t offs = le16_to_cpu(xs->here->xe_name_offset); 1826 - ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, 1827 - &vb, offs); 1828 - if (ret < 0) { 1829 - int ret2; 2145 + out_dirty: 2146 + ocfs2_xa_journal_dirty(ctxt->handle, loc); 1830 2147 1831 - mlog_errno(ret); 1832 - /* 1833 - * If set value outside failed, we have to clean 1834 - * the junk tree root we have already set in local. 1835 - */ 1836 - ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle, 1837 - xi, xs, &vb, offs); 1838 - if (ret2 < 0) 1839 - mlog_errno(ret2); 1840 - } 1841 - } 1842 2148 out: 1843 2149 return ret; 2150 + } 2151 + 2152 + static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2153 + struct inode *inode, 2154 + struct buffer_head *bh, 2155 + struct ocfs2_xattr_entry *entry) 2156 + { 2157 + struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2158 + 2159 + BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2160 + 2161 + loc->xl_inode = inode; 2162 + loc->xl_ops = &ocfs2_xa_block_loc_ops; 2163 + loc->xl_storage = bh; 2164 + loc->xl_entry = entry; 2165 + loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2166 + loc->xl_header = 2167 + (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2168 + loc->xl_size); 2169 + } 2170 + 2171 + static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2172 + struct inode *inode, 2173 + struct buffer_head *bh, 2174 + struct ocfs2_xattr_entry *entry) 2175 + { 2176 + struct ocfs2_xattr_block *xb = 2177 + (struct ocfs2_xattr_block *)bh->b_data; 2178 + 2179 + BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2180 + 2181 + loc->xl_inode = inode; 2182 + loc->xl_ops = &ocfs2_xa_block_loc_ops; 2183 + loc->xl_storage = bh; 2184 + loc->xl_header = &(xb->xb_attrs.xb_header); 2185 + loc->xl_entry = entry; 2186 + loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2187 + xb_attrs.xb_header); 2188 + } 2189 + 2190 + static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2191 + struct ocfs2_xattr_bucket *bucket, 2192 + struct ocfs2_xattr_entry *entry) 2193 + { 2194 + loc->xl_inode = bucket->bu_inode; 2195 + loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2196 + loc->xl_storage = bucket; 2197 + loc->xl_header = bucket_xh(bucket); 2198 + loc->xl_entry = entry; 2199 + loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 1844 2200 } 1845 2201 1846 2202 /* ··· 2683 2149 return 0; 2684 2150 } 2685 2151 2152 + static int ocfs2_xattr_ibody_init(struct inode *inode, 2153 + struct buffer_head *di_bh, 2154 + struct ocfs2_xattr_set_ctxt *ctxt) 2155 + { 2156 + int ret; 2157 + struct ocfs2_inode_info *oi = OCFS2_I(inode); 2158 + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2159 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2160 + unsigned int xattrsize = osb->s_xattr_inline_size; 2161 + 2162 + if (!ocfs2_xattr_has_space_inline(inode, di)) { 2163 + ret = -ENOSPC; 2164 + goto out; 2165 + } 2166 + 2167 + ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2168 + OCFS2_JOURNAL_ACCESS_WRITE); 2169 + if (ret) { 2170 + mlog_errno(ret); 2171 + goto out; 2172 + } 2173 + 2174 + /* 2175 + * Adjust extent record count or inline data size 2176 + * to reserve space for extended attribute. 2177 + */ 2178 + if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2179 + struct ocfs2_inline_data *idata = &di->id2.i_data; 2180 + le16_add_cpu(&idata->id_count, -xattrsize); 2181 + } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2182 + struct ocfs2_extent_list *el = &di->id2.i_list; 2183 + le16_add_cpu(&el->l_count, -(xattrsize / 2184 + sizeof(struct ocfs2_extent_rec))); 2185 + } 2186 + di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2187 + 2188 + spin_lock(&oi->ip_lock); 2189 + oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2190 + di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2191 + spin_unlock(&oi->ip_lock); 2192 + 2193 + ret = ocfs2_journal_dirty(ctxt->handle, di_bh); 2194 + if (ret < 0) 2195 + mlog_errno(ret); 2196 + 2197 + out: 2198 + return ret; 2199 + } 2200 + 2686 2201 /* 2687 2202 * ocfs2_xattr_ibody_set() 2688 2203 * ··· 2743 2160 struct ocfs2_xattr_search *xs, 2744 2161 struct ocfs2_xattr_set_ctxt *ctxt) 2745 2162 { 2163 + int ret; 2746 2164 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2747 2165 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2748 - int ret; 2166 + struct ocfs2_xa_loc loc; 2749 2167 2750 2168 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2751 2169 return -ENOSPC; ··· 2759 2175 } 2760 2176 } 2761 2177 2762 - ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt, 2763 - (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL)); 2178 + if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2179 + ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2180 + if (ret) { 2181 + if (ret != -ENOSPC) 2182 + mlog_errno(ret); 2183 + goto out; 2184 + } 2185 + } 2186 + 2187 + ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2188 + xs->not_found ? NULL : xs->here); 2189 + ret = ocfs2_xa_set(&loc, xi, ctxt); 2190 + if (ret) { 2191 + if (ret != -ENOSPC) 2192 + mlog_errno(ret); 2193 + goto out; 2194 + } 2195 + xs->here = loc.xl_entry; 2196 + 2764 2197 out: 2765 2198 up_write(&oi->ip_alloc_sem); 2766 2199 ··· 2837 2236 return ret; 2838 2237 } 2839 2238 2840 - static int ocfs2_create_xattr_block(handle_t *handle, 2841 - struct inode *inode, 2239 + static int ocfs2_create_xattr_block(struct inode *inode, 2842 2240 struct buffer_head *inode_bh, 2843 - struct ocfs2_alloc_context *meta_ac, 2844 - struct buffer_head **ret_bh, 2845 - int indexed) 2241 + struct ocfs2_xattr_set_ctxt *ctxt, 2242 + int indexed, 2243 + struct buffer_head **ret_bh) 2846 2244 { 2847 2245 int ret; 2848 2246 u16 suballoc_bit_start; ··· 2852 2252 struct buffer_head *new_bh = NULL; 2853 2253 struct ocfs2_xattr_block *xblk; 2854 2254 2855 - ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh, 2856 - OCFS2_JOURNAL_ACCESS_CREATE); 2255 + ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2256 + inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2857 2257 if (ret < 0) { 2858 2258 mlog_errno(ret); 2859 2259 goto end; 2860 2260 } 2861 2261 2862 - ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, 2262 + ret = ocfs2_claim_metadata(osb, ctxt->handle, ctxt->meta_ac, 1, 2863 2263 &suballoc_bit_start, &num_got, 2864 2264 &first_blkno); 2865 2265 if (ret < 0) { ··· 2870 2270 new_bh = sb_getblk(inode->i_sb, first_blkno); 2871 2271 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2872 2272 2873 - ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), 2273 + ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2874 2274 new_bh, 2875 2275 OCFS2_JOURNAL_ACCESS_CREATE); 2876 2276 if (ret < 0) { ··· 2882 2282 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2883 2283 memset(xblk, 0, inode->i_sb->s_blocksize); 2884 2284 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2885 - xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num); 2285 + xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2886 2286 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2887 2287 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); 2888 2288 xblk->xb_blkno = cpu_to_le64(first_blkno); 2889 - 2890 2289 if (indexed) { 2891 2290 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2892 2291 xr->xt_clusters = cpu_to_le32(1); ··· 2896 2297 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2897 2298 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2898 2299 } 2300 + ocfs2_journal_dirty(ctxt->handle, new_bh); 2899 2301 2900 - ret = ocfs2_journal_dirty(handle, new_bh); 2901 - if (ret < 0) { 2902 - mlog_errno(ret); 2903 - goto end; 2904 - } 2302 + /* Add it to the inode */ 2905 2303 di->i_xattr_loc = cpu_to_le64(first_blkno); 2906 - ocfs2_journal_dirty(handle, inode_bh); 2304 + 2305 + spin_lock(&OCFS2_I(inode)->ip_lock); 2306 + OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2307 + di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2308 + spin_unlock(&OCFS2_I(inode)->ip_lock); 2309 + 2310 + ocfs2_journal_dirty(ctxt->handle, inode_bh); 2907 2311 2908 2312 *ret_bh = new_bh; 2909 2313 new_bh = NULL; ··· 2928 2326 struct ocfs2_xattr_set_ctxt *ctxt) 2929 2327 { 2930 2328 struct buffer_head *new_bh = NULL; 2931 - handle_t *handle = ctxt->handle; 2932 2329 struct ocfs2_xattr_block *xblk = NULL; 2933 2330 int ret; 2331 + struct ocfs2_xa_loc loc; 2934 2332 2935 2333 if (!xs->xattr_bh) { 2936 - ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh, 2937 - ctxt->meta_ac, &new_bh, 0); 2334 + ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2335 + 0, &new_bh); 2938 2336 if (ret) { 2939 2337 mlog_errno(ret); 2940 2338 goto end; ··· 2950 2348 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2951 2349 2952 2350 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2953 - /* Set extended attribute into external block */ 2954 - ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt, 2955 - OCFS2_HAS_XATTR_FL); 2956 - if (!ret || ret != -ENOSPC) 2957 - goto end; 2351 + ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2352 + xs->not_found ? NULL : xs->here); 2958 2353 2959 - ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2960 - if (ret) 2354 + ret = ocfs2_xa_set(&loc, xi, ctxt); 2355 + if (!ret) 2356 + xs->here = loc.xl_entry; 2357 + else if (ret != -ENOSPC) 2961 2358 goto end; 2359 + else { 2360 + ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2361 + if (ret) 2362 + goto end; 2363 + } 2962 2364 } 2963 2365 2964 - ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2366 + if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2367 + ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2965 2368 2966 2369 end: 2967 - 2968 2370 return ret; 2969 2371 } 2970 2372 ··· 2977 2371 struct ocfs2_xattr_info *xi, 2978 2372 struct ocfs2_xattr_search *xs) 2979 2373 { 2980 - u64 value_size; 2981 2374 struct ocfs2_xattr_entry *last; 2982 2375 int free, i; 2983 2376 size_t min_offs = xs->end - xs->base; ··· 2999 2394 3000 2395 BUG_ON(!xs->not_found); 3001 2396 3002 - if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) 3003 - value_size = OCFS2_XATTR_ROOT_SIZE; 3004 - else 3005 - value_size = OCFS2_XATTR_SIZE(xi->value_len); 3006 - 3007 - if (free >= sizeof(struct ocfs2_xattr_entry) + 3008 - OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size) 2397 + if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3009 2398 return 1; 3010 2399 3011 2400 return 0; ··· 3023 2424 char *base = NULL; 3024 2425 int name_offset, name_len = 0; 3025 2426 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3026 - xi->value_len); 2427 + xi->xi_value_len); 3027 2428 u64 value_size; 3028 2429 3029 2430 /* ··· 3031 2432 * No matter whether we replace an old one or add a new one, 3032 2433 * we need this for writing. 3033 2434 */ 3034 - if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) 2435 + if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3035 2436 credits += new_clusters * 3036 2437 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3037 2438 3038 2439 if (xis->not_found && xbs->not_found) { 3039 2440 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3040 2441 3041 - if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { 2442 + if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3042 2443 clusters_add += new_clusters; 3043 2444 credits += ocfs2_calc_extend_credits(inode->i_sb, 3044 2445 &def_xv.xv.xr_list, ··· 3083 2484 * The credits for removing the value tree will be extended 3084 2485 * by ocfs2_remove_extent itself. 3085 2486 */ 3086 - if (!xi->value) { 2487 + if (!xi->xi_value) { 3087 2488 if (!ocfs2_xattr_is_local(xe)) 3088 2489 credits += ocfs2_remove_extent_credits(inode->i_sb); 3089 2490 ··· 3113 2514 } 3114 2515 } 3115 2516 3116 - if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { 2517 + if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3117 2518 /* the new values will be stored outside. */ 3118 2519 u32 old_clusters = 0; 3119 2520 ··· 3146 2547 * value, we don't need any allocation, otherwise we have 3147 2548 * to guess metadata allocation. 3148 2549 */ 3149 - if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) || 2550 + if ((ocfs2_xattr_is_local(xe) && 2551 + (value_size >= xi->xi_value_len)) || 3150 2552 (!ocfs2_xattr_is_local(xe) && 3151 - OCFS2_XATTR_ROOT_SIZE >= xi->value_len)) 2553 + OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3152 2554 goto out; 3153 2555 } 3154 2556 ··· 3239 2639 3240 2640 meta_add += extra_meta; 3241 2641 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " 3242 - "credits = %d\n", xi->name, meta_add, clusters_add, *credits); 2642 + "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits); 3243 2643 3244 2644 if (meta_add) { 3245 2645 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, ··· 3279 2679 { 3280 2680 int ret = 0, credits, old_found; 3281 2681 3282 - if (!xi->value) { 2682 + if (!xi->xi_value) { 3283 2683 /* Remove existing extended attribute */ 3284 2684 if (!xis->not_found) 3285 2685 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); ··· 3293 2693 * If succeed and that extended attribute existing in 3294 2694 * external block, then we will remove it. 3295 2695 */ 3296 - xi->value = NULL; 3297 - xi->value_len = 0; 2696 + xi->xi_value = NULL; 2697 + xi->xi_value_len = 0; 3298 2698 3299 2699 old_found = xis->not_found; 3300 2700 xis->not_found = -ENODATA; ··· 3322 2722 } else if (ret == -ENOSPC) { 3323 2723 if (di->i_xattr_loc && !xbs->xattr_bh) { 3324 2724 ret = ocfs2_xattr_block_find(inode, 3325 - xi->name_index, 3326 - xi->name, xbs); 2725 + xi->xi_name_index, 2726 + xi->xi_name, xbs); 3327 2727 if (ret) 3328 2728 goto out; 3329 2729 ··· 3362 2762 * If succeed and that extended attribute 3363 2763 * existing in inode, we will remove it. 3364 2764 */ 3365 - xi->value = NULL; 3366 - xi->value_len = 0; 2765 + xi->xi_value = NULL; 2766 + xi->xi_value_len = 0; 3367 2767 xbs->not_found = -ENODATA; 3368 2768 ret = ocfs2_calc_xattr_set_need(inode, 3369 2769 di, ··· 3429 2829 int ret; 3430 2830 3431 2831 struct ocfs2_xattr_info xi = { 3432 - .name_index = name_index, 3433 - .name = name, 3434 - .value = value, 3435 - .value_len = value_len, 2832 + .xi_name_index = name_index, 2833 + .xi_name = name, 2834 + .xi_name_len = strlen(name), 2835 + .xi_value = value, 2836 + .xi_value_len = value_len, 3436 2837 }; 3437 2838 3438 2839 struct ocfs2_xattr_search xis = { ··· 3513 2912 struct ocfs2_refcount_tree *ref_tree = NULL; 3514 2913 3515 2914 struct ocfs2_xattr_info xi = { 3516 - .name_index = name_index, 3517 - .name = name, 3518 - .value = value, 3519 - .value_len = value_len, 2915 + .xi_name_index = name_index, 2916 + .xi_name = name, 2917 + .xi_name_len = strlen(name), 2918 + .xi_value = value, 2919 + .xi_value_len = value_len, 3520 2920 }; 3521 2921 3522 2922 struct ocfs2_xattr_search xis = { ··· 4361 3759 struct ocfs2_xattr_bucket *bucket) 4362 3760 { 4363 3761 int ret, i; 4364 - size_t end, offset, len, value_len; 3762 + size_t end, offset, len; 4365 3763 struct ocfs2_xattr_header *xh; 4366 3764 char *entries, *buf, *bucket_buf = NULL; 4367 3765 u64 blkno = bucket_blkno(bucket); ··· 4415 3813 end = OCFS2_XATTR_BUCKET_SIZE; 4416 3814 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4417 3815 offset = le16_to_cpu(xe->xe_name_offset); 4418 - if (ocfs2_xattr_is_local(xe)) 4419 - value_len = OCFS2_XATTR_SIZE( 4420 - le64_to_cpu(xe->xe_value_size)); 4421 - else 4422 - value_len = OCFS2_XATTR_ROOT_SIZE; 4423 - len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len; 3816 + len = namevalue_size_xe(xe); 4424 3817 4425 3818 /* 4426 3819 * We must make sure that the name/value pair ··· 4604 4007 int new_bucket_head) 4605 4008 { 4606 4009 int ret, i; 4607 - int count, start, len, name_value_len = 0, xe_len, name_offset = 0; 4010 + int count, start, len, name_value_len = 0, name_offset = 0; 4608 4011 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4609 4012 struct ocfs2_xattr_header *xh; 4610 4013 struct ocfs2_xattr_entry *xe; ··· 4695 4098 name_value_len = 0; 4696 4099 for (i = 0; i < start; i++) { 4697 4100 xe = &xh->xh_entries[i]; 4698 - xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 4699 - if (ocfs2_xattr_is_local(xe)) 4700 - xe_len += 4701 - OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); 4702 - else 4703 - xe_len += OCFS2_XATTR_ROOT_SIZE; 4704 - name_value_len += xe_len; 4101 + name_value_len += namevalue_size_xe(xe); 4705 4102 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4706 4103 name_offset = le16_to_cpu(xe->xe_name_offset); 4707 4104 } ··· 4725 4134 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4726 4135 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4727 4136 xe = &xh->xh_entries[i]; 4728 - xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 4729 - if (ocfs2_xattr_is_local(xe)) 4730 - xe_len += 4731 - OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); 4732 - else 4733 - xe_len += OCFS2_XATTR_ROOT_SIZE; 4734 4137 if (le16_to_cpu(xe->xe_name_offset) < 4735 4138 le16_to_cpu(xh->xh_free_start)) 4736 4139 xh->xh_free_start = xe->xe_name_offset; ··· 5336 4751 } 5337 4752 5338 4753 /* 5339 - * Handle the normal xattr set, including replace, delete and new. 5340 - * 5341 - * Note: "local" indicates the real data's locality. So we can't 5342 - * just its bucket locality by its length. 5343 - */ 5344 - static void ocfs2_xattr_set_entry_normal(struct inode *inode, 5345 - struct ocfs2_xattr_info *xi, 5346 - struct ocfs2_xattr_search *xs, 5347 - u32 name_hash, 5348 - int local) 5349 - { 5350 - struct ocfs2_xattr_entry *last, *xe; 5351 - int name_len = strlen(xi->name); 5352 - struct ocfs2_xattr_header *xh = xs->header; 5353 - u16 count = le16_to_cpu(xh->xh_count), start; 5354 - size_t blocksize = inode->i_sb->s_blocksize; 5355 - char *val; 5356 - size_t offs, size, new_size; 5357 - 5358 - last = &xh->xh_entries[count]; 5359 - if (!xs->not_found) { 5360 - xe = xs->here; 5361 - offs = le16_to_cpu(xe->xe_name_offset); 5362 - if (ocfs2_xattr_is_local(xe)) 5363 - size = OCFS2_XATTR_SIZE(name_len) + 5364 - OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); 5365 - else 5366 - size = OCFS2_XATTR_SIZE(name_len) + 5367 - OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE); 5368 - 5369 - /* 5370 - * If the new value will be stored outside, xi->value has been 5371 - * initalized as an empty ocfs2_xattr_value_root, and the same 5372 - * goes with xi->value_len, so we can set new_size safely here. 5373 - * See ocfs2_xattr_set_in_bucket. 5374 - */ 5375 - new_size = OCFS2_XATTR_SIZE(name_len) + 5376 - OCFS2_XATTR_SIZE(xi->value_len); 5377 - 5378 - le16_add_cpu(&xh->xh_name_value_len, -size); 5379 - if (xi->value) { 5380 - if (new_size > size) 5381 - goto set_new_name_value; 5382 - 5383 - /* Now replace the old value with new one. */ 5384 - if (local) 5385 - xe->xe_value_size = cpu_to_le64(xi->value_len); 5386 - else 5387 - xe->xe_value_size = 0; 5388 - 5389 - val = ocfs2_xattr_bucket_get_val(inode, 5390 - xs->bucket, offs); 5391 - memset(val + OCFS2_XATTR_SIZE(name_len), 0, 5392 - size - OCFS2_XATTR_SIZE(name_len)); 5393 - if (OCFS2_XATTR_SIZE(xi->value_len) > 0) 5394 - memcpy(val + OCFS2_XATTR_SIZE(name_len), 5395 - xi->value, xi->value_len); 5396 - 5397 - le16_add_cpu(&xh->xh_name_value_len, new_size); 5398 - ocfs2_xattr_set_local(xe, local); 5399 - return; 5400 - } else { 5401 - /* 5402 - * Remove the old entry if there is more than one. 5403 - * We don't remove the last entry so that we can 5404 - * use it to indicate the hash value of the empty 5405 - * bucket. 5406 - */ 5407 - last -= 1; 5408 - le16_add_cpu(&xh->xh_count, -1); 5409 - if (xh->xh_count) { 5410 - memmove(xe, xe + 1, 5411 - (void *)last - (void *)xe); 5412 - memset(last, 0, 5413 - sizeof(struct ocfs2_xattr_entry)); 5414 - } else 5415 - xh->xh_free_start = 5416 - cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 5417 - 5418 - return; 5419 - } 5420 - } else { 5421 - /* find a new entry for insert. */ 5422 - int low = 0, high = count - 1, tmp; 5423 - struct ocfs2_xattr_entry *tmp_xe; 5424 - 5425 - while (low <= high && count) { 5426 - tmp = (low + high) / 2; 5427 - tmp_xe = &xh->xh_entries[tmp]; 5428 - 5429 - if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 5430 - low = tmp + 1; 5431 - else if (name_hash < 5432 - le32_to_cpu(tmp_xe->xe_name_hash)) 5433 - high = tmp - 1; 5434 - else { 5435 - low = tmp; 5436 - break; 5437 - } 5438 - } 5439 - 5440 - xe = &xh->xh_entries[low]; 5441 - if (low != count) 5442 - memmove(xe + 1, xe, (void *)last - (void *)xe); 5443 - 5444 - le16_add_cpu(&xh->xh_count, 1); 5445 - memset(xe, 0, sizeof(struct ocfs2_xattr_entry)); 5446 - xe->xe_name_hash = cpu_to_le32(name_hash); 5447 - xe->xe_name_len = name_len; 5448 - ocfs2_xattr_set_type(xe, xi->name_index); 5449 - } 5450 - 5451 - set_new_name_value: 5452 - /* Insert the new name+value. */ 5453 - size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len); 5454 - 5455 - /* 5456 - * We must make sure that the name/value pair 5457 - * exists in the same block. 5458 - */ 5459 - offs = le16_to_cpu(xh->xh_free_start); 5460 - start = offs - size; 5461 - 5462 - if (start >> inode->i_sb->s_blocksize_bits != 5463 - (offs - 1) >> inode->i_sb->s_blocksize_bits) { 5464 - offs = offs - offs % blocksize; 5465 - xh->xh_free_start = cpu_to_le16(offs); 5466 - } 5467 - 5468 - val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size); 5469 - xe->xe_name_offset = cpu_to_le16(offs - size); 5470 - 5471 - memset(val, 0, size); 5472 - memcpy(val, xi->name, name_len); 5473 - memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len); 5474 - 5475 - xe->xe_value_size = cpu_to_le64(xi->value_len); 5476 - ocfs2_xattr_set_local(xe, local); 5477 - xs->here = xe; 5478 - le16_add_cpu(&xh->xh_free_start, -size); 5479 - le16_add_cpu(&xh->xh_name_value_len, size); 5480 - 5481 - return; 5482 - } 5483 - 5484 - /* 5485 - * Set the xattr entry in the specified bucket. 5486 - * The bucket is indicated by xs->bucket and it should have the enough 5487 - * space for the xattr insertion. 5488 - */ 5489 - static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, 5490 - handle_t *handle, 5491 - struct ocfs2_xattr_info *xi, 5492 - struct ocfs2_xattr_search *xs, 5493 - u32 name_hash, 5494 - int local) 5495 - { 5496 - int ret; 5497 - u64 blkno; 5498 - 5499 - mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", 5500 - (unsigned long)xi->value_len, xi->name_index, 5501 - (unsigned long long)bucket_blkno(xs->bucket)); 5502 - 5503 - if (!xs->bucket->bu_bhs[1]) { 5504 - blkno = bucket_blkno(xs->bucket); 5505 - ocfs2_xattr_bucket_relse(xs->bucket); 5506 - ret = ocfs2_read_xattr_bucket(xs->bucket, blkno); 5507 - if (ret) { 5508 - mlog_errno(ret); 5509 - goto out; 5510 - } 5511 - } 5512 - 5513 - ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 5514 - OCFS2_JOURNAL_ACCESS_WRITE); 5515 - if (ret < 0) { 5516 - mlog_errno(ret); 5517 - goto out; 5518 - } 5519 - 5520 - ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local); 5521 - ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 5522 - 5523 - out: 5524 - return ret; 5525 - } 5526 - 5527 - /* 5528 4754 * Truncate the specified xe_off entry in xattr bucket. 5529 4755 * bucket is indicated by header_bh and len is the new length. 5530 4756 * Both the ocfs2_xattr_value_root and the entry will be updated here. ··· 5401 5005 5402 5006 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5403 5007 5404 - out: 5405 - return ret; 5406 - } 5407 - 5408 - static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, 5409 - struct ocfs2_xattr_search *xs, 5410 - int len, 5411 - struct ocfs2_xattr_set_ctxt *ctxt) 5412 - { 5413 - int ret, offset; 5414 - struct ocfs2_xattr_entry *xe = xs->here; 5415 - struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base; 5416 - 5417 - BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe)); 5418 - 5419 - offset = xe - xh->xh_entries; 5420 - ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket, 5421 - offset, len, ctxt); 5422 - if (ret) 5423 - mlog_errno(ret); 5424 - 5425 - return ret; 5426 - } 5427 - 5428 - static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, 5429 - handle_t *handle, 5430 - struct ocfs2_xattr_search *xs, 5431 - char *val, 5432 - int value_len) 5433 - { 5434 - int ret, offset, block_off; 5435 - struct ocfs2_xattr_value_root *xv; 5436 - struct ocfs2_xattr_entry *xe = xs->here; 5437 - struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); 5438 - void *base; 5439 - struct ocfs2_xattr_value_buf vb = { 5440 - .vb_access = ocfs2_journal_access, 5441 - }; 5442 - 5443 - BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe)); 5444 - 5445 - ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh, 5446 - xe - xh->xh_entries, 5447 - &block_off, 5448 - &offset); 5449 - if (ret) { 5450 - mlog_errno(ret); 5451 - goto out; 5452 - } 5453 - 5454 - base = bucket_block(xs->bucket, block_off); 5455 - xv = (struct ocfs2_xattr_value_root *)(base + offset + 5456 - OCFS2_XATTR_SIZE(xe->xe_name_len)); 5457 - 5458 - vb.vb_xv = xv; 5459 - vb.vb_bh = xs->bucket->bu_bhs[block_off]; 5460 - ret = __ocfs2_xattr_set_value_outside(inode, handle, 5461 - &vb, val, value_len); 5462 - if (ret) 5463 - mlog_errno(ret); 5464 5008 out: 5465 5009 return ret; 5466 5010 } ··· 5503 5167 return ret; 5504 5168 } 5505 5169 5506 - static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, 5507 - handle_t *handle, 5508 - struct ocfs2_xattr_search *xs) 5509 - { 5510 - struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); 5511 - struct ocfs2_xattr_entry *last = &xh->xh_entries[ 5512 - le16_to_cpu(xh->xh_count) - 1]; 5513 - int ret = 0; 5514 - 5515 - ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 5516 - OCFS2_JOURNAL_ACCESS_WRITE); 5517 - if (ret) { 5518 - mlog_errno(ret); 5519 - return; 5520 - } 5521 - 5522 - /* Remove the old entry. */ 5523 - memmove(xs->here, xs->here + 1, 5524 - (void *)last - (void *)xs->here); 5525 - memset(last, 0, sizeof(struct ocfs2_xattr_entry)); 5526 - le16_add_cpu(&xh->xh_count, -1); 5527 - 5528 - ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 5529 - } 5530 - 5531 - /* 5532 - * Set the xattr name/value in the bucket specified in xs. 5533 - * 5534 - * As the new value in xi may be stored in the bucket or in an outside cluster, 5535 - * we divide the whole process into 3 steps: 5536 - * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket) 5537 - * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs) 5538 - * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside) 5539 - * 4. If the clusters for the new outside value can't be allocated, we need 5540 - * to free the xattr we allocated in set. 5541 - */ 5542 - static int ocfs2_xattr_set_in_bucket(struct inode *inode, 5543 - struct ocfs2_xattr_info *xi, 5544 - struct ocfs2_xattr_search *xs, 5545 - struct ocfs2_xattr_set_ctxt *ctxt) 5546 - { 5547 - int ret, local = 1; 5548 - size_t value_len; 5549 - char *val = (char *)xi->value; 5550 - struct ocfs2_xattr_entry *xe = xs->here; 5551 - u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name, 5552 - strlen(xi->name)); 5553 - 5554 - if (!xs->not_found && !ocfs2_xattr_is_local(xe)) { 5555 - /* 5556 - * We need to truncate the xattr storage first. 5557 - * 5558 - * If both the old and new value are stored to 5559 - * outside block, we only need to truncate 5560 - * the storage and then set the value outside. 5561 - * 5562 - * If the new value should be stored within block, 5563 - * we should free all the outside block first and 5564 - * the modification to the xattr block will be done 5565 - * by following steps. 5566 - */ 5567 - if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) 5568 - value_len = xi->value_len; 5569 - else 5570 - value_len = 0; 5571 - 5572 - ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, 5573 - value_len, 5574 - ctxt); 5575 - if (ret) 5576 - goto out; 5577 - 5578 - if (value_len) 5579 - goto set_value_outside; 5580 - } 5581 - 5582 - value_len = xi->value_len; 5583 - /* So we have to handle the inside block change now. */ 5584 - if (value_len > OCFS2_XATTR_INLINE_SIZE) { 5585 - /* 5586 - * If the new value will be stored outside of block, 5587 - * initalize a new empty value root and insert it first. 5588 - */ 5589 - local = 0; 5590 - xi->value = &def_xv; 5591 - xi->value_len = OCFS2_XATTR_ROOT_SIZE; 5592 - } 5593 - 5594 - ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs, 5595 - name_hash, local); 5596 - if (ret) { 5597 - mlog_errno(ret); 5598 - goto out; 5599 - } 5600 - 5601 - if (value_len <= OCFS2_XATTR_INLINE_SIZE) 5602 - goto out; 5603 - 5604 - /* allocate the space now for the outside block storage. */ 5605 - ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, 5606 - value_len, ctxt); 5607 - if (ret) { 5608 - mlog_errno(ret); 5609 - 5610 - if (xs->not_found) { 5611 - /* 5612 - * We can't allocate enough clusters for outside 5613 - * storage and we have allocated xattr already, 5614 - * so need to remove it. 5615 - */ 5616 - ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs); 5617 - } 5618 - goto out; 5619 - } 5620 - 5621 - set_value_outside: 5622 - ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle, 5623 - xs, val, value_len); 5624 - out: 5625 - return ret; 5626 - } 5627 - 5628 5170 /* 5629 5171 * check whether the xattr bucket is filled up with the same hash value. 5630 5172 * If we want to insert the xattr with the same hash, return -ENOSPC. ··· 5531 5317 return 0; 5532 5318 } 5533 5319 5320 + /* 5321 + * Try to set the entry in the current bucket. If we fail, the caller 5322 + * will handle getting us another bucket. 5323 + */ 5324 + static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5325 + struct ocfs2_xattr_info *xi, 5326 + struct ocfs2_xattr_search *xs, 5327 + struct ocfs2_xattr_set_ctxt *ctxt) 5328 + { 5329 + int ret; 5330 + struct ocfs2_xa_loc loc; 5331 + 5332 + mlog_entry("Set xattr %s in xattr bucket\n", xi->xi_name); 5333 + 5334 + ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5335 + xs->not_found ? NULL : xs->here); 5336 + ret = ocfs2_xa_set(&loc, xi, ctxt); 5337 + if (!ret) { 5338 + xs->here = loc.xl_entry; 5339 + goto out; 5340 + } 5341 + if (ret != -ENOSPC) { 5342 + mlog_errno(ret); 5343 + goto out; 5344 + } 5345 + 5346 + /* Ok, we need space. Let's try defragmenting the bucket. */ 5347 + ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5348 + xs->bucket); 5349 + if (ret) { 5350 + mlog_errno(ret); 5351 + goto out; 5352 + } 5353 + 5354 + ret = ocfs2_xa_set(&loc, xi, ctxt); 5355 + if (!ret) { 5356 + xs->here = loc.xl_entry; 5357 + goto out; 5358 + } 5359 + if (ret != -ENOSPC) 5360 + mlog_errno(ret); 5361 + 5362 + 5363 + out: 5364 + mlog_exit(ret); 5365 + return ret; 5366 + } 5367 + 5534 5368 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5535 5369 struct ocfs2_xattr_info *xi, 5536 5370 struct ocfs2_xattr_search *xs, 5537 5371 struct ocfs2_xattr_set_ctxt *ctxt) 5538 5372 { 5539 - struct ocfs2_xattr_header *xh; 5540 - struct ocfs2_xattr_entry *xe; 5541 - u16 count, header_size, xh_free_start; 5542 - int free, max_free, need, old; 5543 - size_t value_size = 0, name_len = strlen(xi->name); 5544 - size_t blocksize = inode->i_sb->s_blocksize; 5545 - int ret, allocation = 0; 5373 + int ret; 5546 5374 5547 - mlog_entry("Set xattr %s in xattr index block\n", xi->name); 5375 + mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name); 5548 5376 5549 - try_again: 5550 - xh = xs->header; 5551 - count = le16_to_cpu(xh->xh_count); 5552 - xh_free_start = le16_to_cpu(xh->xh_free_start); 5553 - header_size = sizeof(struct ocfs2_xattr_header) + 5554 - count * sizeof(struct ocfs2_xattr_entry); 5555 - max_free = OCFS2_XATTR_BUCKET_SIZE - header_size - 5556 - le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP; 5557 - 5558 - mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " 5559 - "of %u which exceed block size\n", 5560 - (unsigned long long)bucket_blkno(xs->bucket), 5561 - header_size); 5562 - 5563 - if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) 5564 - value_size = OCFS2_XATTR_ROOT_SIZE; 5565 - else if (xi->value) 5566 - value_size = OCFS2_XATTR_SIZE(xi->value_len); 5567 - 5568 - if (xs->not_found) 5569 - need = sizeof(struct ocfs2_xattr_entry) + 5570 - OCFS2_XATTR_SIZE(name_len) + value_size; 5571 - else { 5572 - need = value_size + OCFS2_XATTR_SIZE(name_len); 5573 - 5574 - /* 5575 - * We only replace the old value if the new length is smaller 5576 - * than the old one. Otherwise we will allocate new space in the 5577 - * bucket to store it. 5578 - */ 5579 - xe = xs->here; 5580 - if (ocfs2_xattr_is_local(xe)) 5581 - old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); 5582 - else 5583 - old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE); 5584 - 5585 - if (old >= value_size) 5586 - need = 0; 5377 + ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5378 + if (!ret) 5379 + goto out; 5380 + if (ret != -ENOSPC) { 5381 + mlog_errno(ret); 5382 + goto out; 5587 5383 } 5588 5384 5589 - free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP; 5385 + /* Ack, need more space. Let's try to get another bucket! */ 5386 + 5590 5387 /* 5591 - * We need to make sure the new name/value pair 5592 - * can exist in the same block. 5388 + * We do not allow for overlapping ranges between buckets. And 5389 + * the maximum number of collisions we will allow for then is 5390 + * one bucket's worth, so check it here whether we need to 5391 + * add a new bucket for the insert. 5593 5392 */ 5594 - if (xh_free_start % blocksize < need) 5595 - free -= xh_free_start % blocksize; 5596 - 5597 - mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " 5598 - "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" 5599 - " %u\n", xs->not_found, 5600 - (unsigned long long)bucket_blkno(xs->bucket), 5601 - free, need, max_free, le16_to_cpu(xh->xh_free_start), 5602 - le16_to_cpu(xh->xh_name_value_len)); 5603 - 5604 - if (free < need || 5605 - (xs->not_found && 5606 - count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) { 5607 - if (need <= max_free && 5608 - count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { 5609 - /* 5610 - * We can create the space by defragment. Since only the 5611 - * name/value will be moved, the xe shouldn't be changed 5612 - * in xs. 5613 - */ 5614 - ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5615 - xs->bucket); 5616 - if (ret) { 5617 - mlog_errno(ret); 5618 - goto out; 5619 - } 5620 - 5621 - xh_free_start = le16_to_cpu(xh->xh_free_start); 5622 - free = xh_free_start - header_size 5623 - - OCFS2_XATTR_HEADER_GAP; 5624 - if (xh_free_start % blocksize < need) 5625 - free -= xh_free_start % blocksize; 5626 - 5627 - if (free >= need) 5628 - goto xattr_set; 5629 - 5630 - mlog(0, "Can't get enough space for xattr insert by " 5631 - "defragment. Need %u bytes, but we have %d, so " 5632 - "allocate new bucket for it.\n", need, free); 5633 - } 5634 - 5635 - /* 5636 - * We have to add new buckets or clusters and one 5637 - * allocation should leave us enough space for insert. 5638 - */ 5639 - BUG_ON(allocation); 5640 - 5641 - /* 5642 - * We do not allow for overlapping ranges between buckets. And 5643 - * the maximum number of collisions we will allow for then is 5644 - * one bucket's worth, so check it here whether we need to 5645 - * add a new bucket for the insert. 5646 - */ 5647 - ret = ocfs2_check_xattr_bucket_collision(inode, 5648 - xs->bucket, 5649 - xi->name); 5650 - if (ret) { 5651 - mlog_errno(ret); 5652 - goto out; 5653 - } 5654 - 5655 - ret = ocfs2_add_new_xattr_bucket(inode, 5656 - xs->xattr_bh, 5393 + ret = ocfs2_check_xattr_bucket_collision(inode, 5657 5394 xs->bucket, 5658 - ctxt); 5659 - if (ret) { 5660 - mlog_errno(ret); 5661 - goto out; 5662 - } 5663 - 5664 - /* 5665 - * ocfs2_add_new_xattr_bucket() will have updated 5666 - * xs->bucket if it moved, but it will not have updated 5667 - * any of the other search fields. Thus, we drop it and 5668 - * re-search. Everything should be cached, so it'll be 5669 - * quick. 5670 - */ 5671 - ocfs2_xattr_bucket_relse(xs->bucket); 5672 - ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5673 - xi->name_index, 5674 - xi->name, xs); 5675 - if (ret && ret != -ENODATA) 5676 - goto out; 5677 - xs->not_found = ret; 5678 - allocation = 1; 5679 - goto try_again; 5395 + xi->xi_name); 5396 + if (ret) { 5397 + mlog_errno(ret); 5398 + goto out; 5680 5399 } 5681 5400 5682 - xattr_set: 5683 - ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt); 5401 + ret = ocfs2_add_new_xattr_bucket(inode, 5402 + xs->xattr_bh, 5403 + xs->bucket, 5404 + ctxt); 5405 + if (ret) { 5406 + mlog_errno(ret); 5407 + goto out; 5408 + } 5409 + 5410 + /* 5411 + * ocfs2_add_new_xattr_bucket() will have updated 5412 + * xs->bucket if it moved, but it will not have updated 5413 + * any of the other search fields. Thus, we drop it and 5414 + * re-search. Everything should be cached, so it'll be 5415 + * quick. 5416 + */ 5417 + ocfs2_xattr_bucket_relse(xs->bucket); 5418 + ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5419 + xi->xi_name_index, 5420 + xi->xi_name, xs); 5421 + if (ret && ret != -ENODATA) 5422 + goto out; 5423 + xs->not_found = ret; 5424 + 5425 + /* Ok, we have a new bucket, let's try again */ 5426 + ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5427 + if (ret && (ret != -ENOSPC)) 5428 + mlog_errno(ret); 5429 + 5684 5430 out: 5685 5431 mlog_exit(ret); 5686 5432 return ret; ··· 5852 5678 * refcount tree, and make the original extent become 3. So we will need 5853 5679 * 2 * cluster more extent recs at most. 5854 5680 */ 5855 - if (!xi->value || xi->value_len <= OCFS2_XATTR_INLINE_SIZE) { 5681 + if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5856 5682 5857 5683 ret = ocfs2_refcounted_xattr_delete_need(inode, 5858 5684 &(*ref_tree)->rf_ci, ··· 6528 6354 int indexed) 6529 6355 { 6530 6356 int ret; 6531 - handle_t *handle; 6532 6357 struct ocfs2_alloc_context *meta_ac; 6533 6358 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6359 + struct ocfs2_xattr_set_ctxt ctxt = { 6360 + .meta_ac = meta_ac, 6361 + }; 6534 6362 6535 6363 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); 6536 6364 if (ret < 0) { ··· 6540 6364 return ret; 6541 6365 } 6542 6366 6543 - handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6544 - if (IS_ERR(handle)) { 6545 - ret = PTR_ERR(handle); 6367 + ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6368 + if (IS_ERR(ctxt.handle)) { 6369 + ret = PTR_ERR(ctxt.handle); 6546 6370 mlog_errno(ret); 6547 6371 goto out; 6548 6372 } 6549 6373 6550 6374 mlog(0, "create new xattr block for inode %llu, index = %d\n", 6551 6375 (unsigned long long)fe_bh->b_blocknr, indexed); 6552 - ret = ocfs2_create_xattr_block(handle, inode, fe_bh, 6553 - meta_ac, ret_bh, indexed); 6376 + ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6377 + ret_bh); 6554 6378 if (ret) 6555 6379 mlog_errno(ret); 6556 6380 6557 - ocfs2_commit_trans(osb, handle); 6381 + ocfs2_commit_trans(osb, ctxt.handle); 6558 6382 out: 6559 6383 ocfs2_free_alloc_context(meta_ac); 6560 6384 return ret;