···7778void __dlm_unhash_lockres(struct dlm_lock_resource *lockres)79{80- list_del_init(&lockres->list);81 dlm_lockres_put(lockres);82}8384void __dlm_insert_lockres(struct dlm_ctxt *dlm,85 struct dlm_lock_resource *res)86{87- struct list_head *bucket;88 struct qstr *q;8990 assert_spin_locked(&dlm->spinlock);9192 q = &res->lockname;93 q->hash = full_name_hash(q->name, q->len);94- bucket = &(dlm->resources[q->hash & DLM_HASH_MASK]);9596 /* get a reference for our hashtable */97 dlm_lockres_get(res);9899- list_add_tail(&res->list, bucket);100}101102struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,···104 unsigned int len)105{106 unsigned int hash;107- struct list_head *iter;108 struct dlm_lock_resource *tmpres=NULL;109- struct list_head *bucket;110111 mlog_entry("%.*s\n", len, name);112···114115 hash = full_name_hash(name, len);116117- bucket = &(dlm->resources[hash & DLM_HASH_MASK]);118119 /* check for pre-existing lock */120- list_for_each(iter, bucket) {121- tmpres = list_entry(iter, struct dlm_lock_resource, list);122 if (tmpres->lockname.len == len &&123 memcmp(tmpres->lockname.name, name, len) == 0) {124 dlm_lockres_get(tmpres);···193194static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)195{196- if (dlm->resources)197- free_page((unsigned long) dlm->resources);198199 if (dlm->name)200 kfree(dlm->name);···303 mlog(0, "Migrating locks from domain %s\n", dlm->name);304restart:305 spin_lock(&dlm->spinlock);306- for (i=0; i<DLM_HASH_SIZE; i++) {307- while (!list_empty(&dlm->resources[i])) {308- res = list_entry(dlm->resources[i].next,309- struct dlm_lock_resource, list);310 /* need reference when manually grabbing lockres */311 dlm_lockres_get(res);312 /* this should unhash the lockres···1191 goto leave;1192 }11931194- dlm->resources = (struct list_head *) __get_free_page(GFP_KERNEL);1195- if (!dlm->resources) {1196 mlog_errno(-ENOMEM);1197 kfree(dlm->name);1198 kfree(dlm);1199 dlm = NULL;1200 goto leave;1201 }1202- memset(dlm->resources, 0, PAGE_SIZE);12031204- for (i=0; i<DLM_HASH_SIZE; i++)1205- INIT_LIST_HEAD(&dlm->resources[i]);12061207 strcpy(dlm->name, domain);1208 dlm->key = key;
···7778void __dlm_unhash_lockres(struct dlm_lock_resource *lockres)79{80+ hlist_del_init(&lockres->hash_node);81 dlm_lockres_put(lockres);82}8384void __dlm_insert_lockres(struct dlm_ctxt *dlm,85 struct dlm_lock_resource *res)86{87+ struct hlist_head *bucket;88 struct qstr *q;8990 assert_spin_locked(&dlm->spinlock);9192 q = &res->lockname;93 q->hash = full_name_hash(q->name, q->len);94+ bucket = &(dlm->lockres_hash[q->hash % DLM_HASH_BUCKETS]);9596 /* get a reference for our hashtable */97 dlm_lockres_get(res);9899+ hlist_add_head(&res->hash_node, bucket);100}101102struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,···104 unsigned int len)105{106 unsigned int hash;107+ struct hlist_node *iter;108 struct dlm_lock_resource *tmpres=NULL;109+ struct hlist_head *bucket;110111 mlog_entry("%.*s\n", len, name);112···114115 hash = full_name_hash(name, len);116117+ bucket = &(dlm->lockres_hash[hash % DLM_HASH_BUCKETS]);118119 /* check for pre-existing lock */120+ hlist_for_each(iter, bucket) {121+ tmpres = hlist_entry(iter, struct dlm_lock_resource, hash_node);122 if (tmpres->lockname.len == len &&123 memcmp(tmpres->lockname.name, name, len) == 0) {124 dlm_lockres_get(tmpres);···193194static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)195{196+ if (dlm->lockres_hash)197+ free_page((unsigned long) dlm->lockres_hash);198199 if (dlm->name)200 kfree(dlm->name);···303 mlog(0, "Migrating locks from domain %s\n", dlm->name);304restart:305 spin_lock(&dlm->spinlock);306+ for (i = 0; i < DLM_HASH_BUCKETS; i++) {307+ while (!hlist_empty(&dlm->lockres_hash[i])) {308+ res = hlist_entry(dlm->lockres_hash[i].first,309+ struct dlm_lock_resource, hash_node);310 /* need reference when manually grabbing lockres */311 dlm_lockres_get(res);312 /* this should unhash the lockres···1191 goto leave;1192 }11931194+ dlm->lockres_hash = (struct hlist_head *) __get_free_page(GFP_KERNEL);1195+ if (!dlm->lockres_hash) {1196 mlog_errno(-ENOMEM);1197 kfree(dlm->name);1198 kfree(dlm);1199 dlm = NULL;1200 goto leave;1201 }012021203+ for (i=0; i<DLM_HASH_BUCKETS; i++)1204+ INIT_HLIST_HEAD(&dlm->lockres_hash[i]);12051206 strcpy(dlm->name, domain);1207 dlm->key = key;
+2-2
fs/ocfs2/dlm/dlmmaster.c
···564565 /* By the time we're ready to blow this guy away, we shouldn't566 * be on any lists. */567- BUG_ON(!list_empty(&res->list));568 BUG_ON(!list_empty(&res->granted));569 BUG_ON(!list_empty(&res->converting));570 BUG_ON(!list_empty(&res->blocked));···605606 init_waitqueue_head(&res->wq);607 spin_lock_init(&res->spinlock);608- INIT_LIST_HEAD(&res->list);609 INIT_LIST_HEAD(&res->granted);610 INIT_LIST_HEAD(&res->converting);611 INIT_LIST_HEAD(&res->blocked);
···564565 /* By the time we're ready to blow this guy away, we shouldn't566 * be on any lists. */567+ BUG_ON(!hlist_unhashed(&res->hash_node));568 BUG_ON(!list_empty(&res->granted));569 BUG_ON(!list_empty(&res->converting));570 BUG_ON(!list_empty(&res->blocked));···605606 init_waitqueue_head(&res->wq);607 spin_lock_init(&res->spinlock);608+ INIT_HLIST_NODE(&res->hash_node);609 INIT_LIST_HEAD(&res->granted);610 INIT_LIST_HEAD(&res->converting);611 INIT_LIST_HEAD(&res->blocked);
+12-11
fs/ocfs2/dlm/dlmrecovery.c
···1693 u8 dead_node, u8 new_master)1694{1695 int i;1696- struct list_head *iter, *iter2, *bucket;0001697 struct dlm_lock_resource *res;16981699 mlog_entry_void();···1720 * for now we need to run the whole hash, clear1721 * the RECOVERING state and set the owner1722 * if necessary */1723- for (i=0; i<DLM_HASH_SIZE; i++) {1724- bucket = &(dlm->resources[i]);1725- list_for_each(iter, bucket) {1726- res = list_entry (iter, struct dlm_lock_resource, list);1727 if (res->state & DLM_LOCK_RES_RECOVERING) {1728 if (res->owner == dead_node) {1729 mlog(0, "(this=%u) res %.*s owner=%u "···18541855static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)1856{1857- struct list_head *iter;1858 struct dlm_lock_resource *res;1859 int i;1860- struct list_head *bucket;1861 struct dlm_lock *lock;18621863···1878 * can be kicked again to see if any ASTs or BASTs1879 * need to be fired as a result.1880 */1881- for (i=0; i<DLM_HASH_SIZE; i++) {1882- bucket = &(dlm->resources[i]);1883- list_for_each(iter, bucket) {1884- res = list_entry (iter, struct dlm_lock_resource, list);1885 /* always prune any $RECOVERY entries for dead nodes,1886 * otherwise hangs can occur during later recovery */1887 if (dlm_is_recovery_lock(res->lockname.name,
···1693 u8 dead_node, u8 new_master)1694{1695 int i;1696+ struct list_head *iter, *iter2;1697+ struct hlist_node *hash_iter;1698+ struct hlist_head *bucket;1699+1700 struct dlm_lock_resource *res;17011702 mlog_entry_void();···1717 * for now we need to run the whole hash, clear1718 * the RECOVERING state and set the owner1719 * if necessary */1720+ for (i = 0; i < DLM_HASH_BUCKETS; i++) {1721+ bucket = &(dlm->lockres_hash[i]);1722+ hlist_for_each_entry(res, hash_iter, bucket, hash_node) {01723 if (res->state & DLM_LOCK_RES_RECOVERING) {1724 if (res->owner == dead_node) {1725 mlog(0, "(this=%u) res %.*s owner=%u "···18521853static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)1854{1855+ struct hlist_node *iter;1856 struct dlm_lock_resource *res;1857 int i;1858+ struct hlist_head *bucket;1859 struct dlm_lock *lock;18601861···1876 * can be kicked again to see if any ASTs or BASTs1877 * need to be fired as a result.1878 */1879+ for (i = 0; i < DLM_HASH_BUCKETS; i++) {1880+ bucket = &(dlm->lockres_hash[i]);1881+ hlist_for_each_entry(res, iter, bucket, hash_node) {01882 /* always prune any $RECOVERY entries for dead nodes,1883 * otherwise hangs can occur during later recovery */1884 if (dlm_is_recovery_lock(res->lockname.name,
+36-2
fs/ocfs2/extent_map.c
···181 ret = -EBADR;182 if (rec_end > OCFS2_I(inode)->ip_clusters) {183 mlog_errno(ret);000000184 goto out_free;185 }186···232 ret = -EBADR;233 if (blkno) {234 mlog_errno(ret);000000235 goto out_free;236 }237···250 */251 ret = -EBADR;252 if (!blkno) {0000253 mlog_errno(ret);254 goto out_free;255 }···282283 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {284 rec = &el->l_recs[i];00000000000000285 ret = ocfs2_extent_map_insert(inode, rec,286 le16_to_cpu(el->l_tree_depth));287 if (ret) {···556 OCFS2_I(inode)->ip_map.em_clusters) {557 ret = -EBADR;558 mlog_errno(ret);0000559 return ret;560 }561···622 * Existing record in the extent map:623 *624 * cpos = 10, len = 10625- * |---------|626 *627 * New Record:628 *629 * cpos = 10, len = 20630- * |------------------|631 *632 * The passed record is the new on-disk record. The new_clusters value633 * is how many clusters were added to the file. If the append is a
···181 ret = -EBADR;182 if (rec_end > OCFS2_I(inode)->ip_clusters) {183 mlog_errno(ret);184+ ocfs2_error(inode->i_sb,185+ "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n",186+ i,187+ le64_to_cpu(rec->e_blkno),188+ OCFS2_I(inode)->ip_blkno,189+ OCFS2_I(inode)->ip_clusters);190 goto out_free;191 }192···226 ret = -EBADR;227 if (blkno) {228 mlog_errno(ret);229+ ocfs2_error(inode->i_sb,230+ "Multiple extents for (cpos = %u, clusters = %u) on inode %"MLFu64"; e_blkno %"MLFu64" and rec %d at e_blkno %"MLFu64"\n",231+ cpos, clusters,232+ OCFS2_I(inode)->ip_blkno,233+ blkno, i,234+ le64_to_cpu(rec->e_blkno));235 goto out_free;236 }237···238 */239 ret = -EBADR;240 if (!blkno) {241+ ocfs2_error(inode->i_sb,242+ "No record found for (cpos = %u, clusters = %u) on inode %"MLFu64"\n",243+ cpos, clusters,244+ OCFS2_I(inode)->ip_blkno);245 mlog_errno(ret);246 goto out_free;247 }···266267 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {268 rec = &el->l_recs[i];269+270+ if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) >271+ OCFS2_I(inode)->ip_clusters) {272+ ret = -EBADR;273+ mlog_errno(ret);274+ ocfs2_error(inode->i_sb,275+ "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n",276+ i,277+ le64_to_cpu(rec->e_blkno),278+ OCFS2_I(inode)->ip_blkno,279+ OCFS2_I(inode)->ip_clusters);280+ return ret;281+ }282+283 ret = ocfs2_extent_map_insert(inode, rec,284 le16_to_cpu(el->l_tree_depth));285 if (ret) {···526 OCFS2_I(inode)->ip_map.em_clusters) {527 ret = -EBADR;528 mlog_errno(ret);529+ ocfs2_error(inode->i_sb,530+ "Zero e_clusters on non-tail extent record at e_blkno %"MLFu64" on inode %"MLFu64"\n",531+ le64_to_cpu(rec->e_blkno),532+ OCFS2_I(inode)->ip_blkno);533 return ret;534 }535···588 * Existing record in the extent map:589 *590 * cpos = 10, len = 10591+ * |---------|592 *593 * New Record:594 *595 * cpos = 10, len = 20596+ * |------------------|597 *598 * The passed record is the new on-disk record. The new_clusters value599 * is how many clusters were added to the file. If the append is a
+1-50
fs/ocfs2/file.c
···933 struct file *filp = iocb->ki_filp;934 struct inode *inode = filp->f_dentry->d_inode;935 loff_t newsize, saved_pos;936-#ifdef OCFS2_ORACORE_WORKAROUNDS937- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);938-#endif939940 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,941 (unsigned int)count,···947 mlog(0, "bad inode\n");948 return -EIO;949 }950-951-#ifdef OCFS2_ORACORE_WORKAROUNDS952- /* ugh, work around some applications which open everything O_DIRECT +953- * O_APPEND and really don't mean to use O_DIRECT. */954- if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS &&955- (filp->f_flags & O_APPEND) && (filp->f_flags & O_DIRECT)) 956- filp->f_flags &= ~O_DIRECT;957-#endif958959 mutex_lock(&inode->i_mutex);960 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */···1068 /* communicate with ocfs2_dio_end_io */1069 ocfs2_iocb_set_rw_locked(iocb);10701071-#ifdef OCFS2_ORACORE_WORKAROUNDS1072- if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS &&1073- filp->f_flags & O_DIRECT) {1074- unsigned int saved_flags = filp->f_flags;1075- int sector_size = 1 << osb->s_sectsize_bits;1076-1077- if ((saved_pos & (sector_size - 1)) ||1078- (count & (sector_size - 1)) ||1079- ((unsigned long)buf & (sector_size - 1))) {1080- filp->f_flags |= O_SYNC;1081- filp->f_flags &= ~O_DIRECT;1082- }1083-1084- ret = generic_file_aio_write_nolock(iocb, &local_iov, 1,1085- &iocb->ki_pos);1086-1087- filp->f_flags = saved_flags;1088- } else1089-#endif1090- ret = generic_file_aio_write_nolock(iocb, &local_iov, 1,1091- &iocb->ki_pos);10921093 /* buffered aio wouldn't have proper lock coverage today */1094 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));···1109 int ret = 0, rw_level = -1, have_alloc_sem = 0;1110 struct file *filp = iocb->ki_filp;1111 struct inode *inode = filp->f_dentry->d_inode;1112-#ifdef OCFS2_ORACORE_WORKAROUNDS1113- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);1114-#endif11151116 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,1117 (unsigned int)count,···1120 mlog_errno(ret);1121 goto bail;1122 }1123-1124-#ifdef OCFS2_ORACORE_WORKAROUNDS1125- if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS) {1126- if (filp->f_flags & O_DIRECT) {1127- int sector_size = 1 << osb->s_sectsize_bits;1128-1129- if ((pos & (sector_size - 1)) ||1130- (count & (sector_size - 1)) ||1131- ((unsigned long)buf & (sector_size - 1)) ||1132- (i_size_read(inode) & (sector_size -1))) {1133- filp->f_flags &= ~O_DIRECT;1134- }1135- }1136- }1137-#endif11381139 /* 1140 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
···41#include "dlmglue.h"42#include "extent_map.h"43#include "file.h"044#include "inode.h"45#include "journal.h"46#include "namei.h"···545 return status;546}547000000000000000000000000000000000000548static int ocfs2_wipe_inode(struct inode *inode,549 struct buffer_head *di_bh)550{···592 /* We've already voted on this so it should be readonly - no593 * spinlock needed. */594 orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;00000595 orphan_dir_inode = ocfs2_get_system_file_inode(osb,596 ORPHAN_DIR_SYSTEM_INODE,597 orphaned_slot);···639 brelse(orphan_dir_bh);640bail:641 iput(orphan_dir_inode);0642643 return status;644}···865866 status = ocfs2_wipe_inode(inode, di_bh);867 if (status < 0) {868- mlog_errno(status);0869 goto bail_unlock_inode;870 }871
···41#include "dlmglue.h"42#include "extent_map.h"43#include "file.h"44+#include "heartbeat.h"45#include "inode.h"46#include "journal.h"47#include "namei.h"···544 return status;545}546547+/* 548+ * Serialize with orphan dir recovery. If the process doing549+ * recovery on this orphan dir does an iget() with the dir550+ * i_mutex held, we'll deadlock here. Instead we detect this551+ * and exit early - recovery will wipe this inode for us.552+ */553+static int ocfs2_check_orphan_recovery_state(struct ocfs2_super *osb,554+ int slot)555+{556+ int ret = 0;557+558+ spin_lock(&osb->osb_lock);559+ if (ocfs2_node_map_test_bit(osb, &osb->osb_recovering_orphan_dirs, slot)) {560+ mlog(0, "Recovery is happening on orphan dir %d, will skip "561+ "this inode\n", slot);562+ ret = -EDEADLK;563+ goto out;564+ }565+ /* This signals to the orphan recovery process that it should566+ * wait for us to handle the wipe. */567+ osb->osb_orphan_wipes[slot]++;568+out:569+ spin_unlock(&osb->osb_lock);570+ return ret;571+}572+573+static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb,574+ int slot)575+{576+ spin_lock(&osb->osb_lock);577+ osb->osb_orphan_wipes[slot]--;578+ spin_unlock(&osb->osb_lock);579+580+ wake_up(&osb->osb_wipe_event);581+}582+583static int ocfs2_wipe_inode(struct inode *inode,584 struct buffer_head *di_bh)585{···555 /* We've already voted on this so it should be readonly - no556 * spinlock needed. */557 orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;558+559+ status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);560+ if (status)561+ return status;562+563 orphan_dir_inode = ocfs2_get_system_file_inode(osb,564 ORPHAN_DIR_SYSTEM_INODE,565 orphaned_slot);···597 brelse(orphan_dir_bh);598bail:599 iput(orphan_dir_inode);600+ ocfs2_signal_wipe_completion(osb, orphaned_slot);601602 return status;603}···822823 status = ocfs2_wipe_inode(inode, di_bh);824 if (status < 0) {825+ if (status != -EDEADLK)826+ mlog_errno(status);827 goto bail_unlock_inode;828 }829
+93-31
fs/ocfs2/journal.c
···1408 return status;1409}14101411-static int ocfs2_recover_orphans(struct ocfs2_super *osb,1412- int slot)01413{1414- int status = 0;1415- int have_disk_lock = 0;1416- struct inode *inode = NULL;1417- struct inode *iter;1418 struct inode *orphan_dir_inode = NULL;01419 unsigned long offset, blk, local;1420 struct buffer_head *bh = NULL;1421 struct ocfs2_dir_entry *de;1422 struct super_block *sb = osb->sb;1423- struct ocfs2_inode_info *oi;1424-1425- mlog(0, "Recover inodes from orphan dir in slot %d\n", slot);14261427 orphan_dir_inode = ocfs2_get_system_file_inode(osb,1428 ORPHAN_DIR_SYSTEM_INODE,···1426 if (!orphan_dir_inode) {1427 status = -ENOENT;1428 mlog_errno(status);1429- goto out;1430- }14311432 mutex_lock(&orphan_dir_inode->i_mutex);1433 status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0);1434 if (status < 0) {1435- mutex_unlock(&orphan_dir_inode->i_mutex);1436 mlog_errno(status);1437 goto out;1438 }1439- have_disk_lock = 1;14401441 offset = 0;1442 iter = NULL;···1445 if (!bh)1446 status = -EINVAL;1447 if (status < 0) {1448- mutex_unlock(&orphan_dir_inode->i_mutex);1449 if (bh)1450 brelse(bh);1451 mlog_errno(status);1452- goto out;1453 }14541455 local = 0;···14581459 if (!ocfs2_check_dir_entry(orphan_dir_inode,1460 de, bh, local)) {1461- mutex_unlock(&orphan_dir_inode->i_mutex);1462 status = -EINVAL;1463 mlog_errno(status);1464 brelse(bh);1465- goto out;1466 }14671468 local += le16_to_cpu(de->rec_len);···14961497 mlog(0, "queue orphan %"MLFu64"\n",1498 OCFS2_I(iter)->ip_blkno);1499- OCFS2_I(iter)->ip_next_orphan = inode;1500- inode = iter;0001501 }1502 brelse(bh);1503 }1504- mutex_unlock(&orphan_dir_inode->i_mutex);150501506 ocfs2_meta_unlock(orphan_dir_inode, 0);1507- have_disk_lock = 0;1508-1509 iput(orphan_dir_inode);1510- orphan_dir_inode = NULL;0000000000000000000000000000000000000000000000000000000000000000000000000015111512 while (inode) {1513 oi = OCFS2_I(inode);···1610 inode = iter;1611 }16121613-out:1614- if (have_disk_lock)1615- ocfs2_meta_unlock(orphan_dir_inode, 0);1616-1617- if (orphan_dir_inode)1618- iput(orphan_dir_inode);1619-1620- return status;1621}16221623static int ocfs2_wait_on_mount(struct ocfs2_super *osb)
···1408 return status;1409}14101411+static int ocfs2_queue_orphans(struct ocfs2_super *osb,1412+ int slot,1413+ struct inode **head)1414{1415+ int status;0001416 struct inode *orphan_dir_inode = NULL;1417+ struct inode *iter;1418 unsigned long offset, blk, local;1419 struct buffer_head *bh = NULL;1420 struct ocfs2_dir_entry *de;1421 struct super_block *sb = osb->sb;00014221423 orphan_dir_inode = ocfs2_get_system_file_inode(osb,1424 ORPHAN_DIR_SYSTEM_INODE,···1430 if (!orphan_dir_inode) {1431 status = -ENOENT;1432 mlog_errno(status);1433+ return status;1434+ } 14351436 mutex_lock(&orphan_dir_inode->i_mutex);1437 status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0);1438 if (status < 0) {01439 mlog_errno(status);1440 goto out;1441 }014421443 offset = 0;1444 iter = NULL;···1451 if (!bh)1452 status = -EINVAL;1453 if (status < 0) {01454 if (bh)1455 brelse(bh);1456 mlog_errno(status);1457+ goto out_unlock;1458 }14591460 local = 0;···14651466 if (!ocfs2_check_dir_entry(orphan_dir_inode,1467 de, bh, local)) {01468 status = -EINVAL;1469 mlog_errno(status);1470 brelse(bh);1471+ goto out_unlock;1472 }14731474 local += le16_to_cpu(de->rec_len);···15041505 mlog(0, "queue orphan %"MLFu64"\n",1506 OCFS2_I(iter)->ip_blkno);1507+ /* No locking is required for the next_orphan1508+ * queue as there is only ever a single1509+ * process doing orphan recovery. */1510+ OCFS2_I(iter)->ip_next_orphan = *head;1511+ *head = iter;1512 }1513 brelse(bh);1514 }015151516+out_unlock:1517 ocfs2_meta_unlock(orphan_dir_inode, 0);1518+out:1519+ mutex_unlock(&orphan_dir_inode->i_mutex);1520 iput(orphan_dir_inode);1521+ return status;1522+}1523+1524+static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,1525+ int slot)1526+{1527+ int ret;1528+1529+ spin_lock(&osb->osb_lock);1530+ ret = !osb->osb_orphan_wipes[slot];1531+ spin_unlock(&osb->osb_lock);1532+ return ret;1533+}1534+1535+static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,1536+ int slot)1537+{1538+ spin_lock(&osb->osb_lock);1539+ /* Mark ourselves such that new processes in delete_inode()1540+ * know to quit early. */1541+ ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);1542+ while (osb->osb_orphan_wipes[slot]) {1543+ /* If any processes are already in the middle of an1544+ * orphan wipe on this dir, then we need to wait for1545+ * them. */1546+ spin_unlock(&osb->osb_lock);1547+ wait_event_interruptible(osb->osb_wipe_event,1548+ ocfs2_orphan_recovery_can_continue(osb, slot));1549+ spin_lock(&osb->osb_lock);1550+ }1551+ spin_unlock(&osb->osb_lock);1552+}1553+1554+static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,1555+ int slot)1556+{1557+ ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);1558+}1559+1560+/*1561+ * Orphan recovery. Each mounted node has it's own orphan dir which we1562+ * must run during recovery. Our strategy here is to build a list of1563+ * the inodes in the orphan dir and iget/iput them. The VFS does1564+ * (most) of the rest of the work.1565+ *1566+ * Orphan recovery can happen at any time, not just mount so we have a1567+ * couple of extra considerations.1568+ *1569+ * - We grab as many inodes as we can under the orphan dir lock -1570+ * doing iget() outside the orphan dir risks getting a reference on1571+ * an invalid inode.1572+ * - We must be sure not to deadlock with other processes on the1573+ * system wanting to run delete_inode(). This can happen when they go1574+ * to lock the orphan dir and the orphan recovery process attempts to1575+ * iget() inside the orphan dir lock. This can be avoided by1576+ * advertising our state to ocfs2_delete_inode().1577+ */1578+static int ocfs2_recover_orphans(struct ocfs2_super *osb,1579+ int slot)1580+{1581+ int ret = 0;1582+ struct inode *inode = NULL;1583+ struct inode *iter;1584+ struct ocfs2_inode_info *oi;1585+1586+ mlog(0, "Recover inodes from orphan dir in slot %d\n", slot);1587+1588+ ocfs2_mark_recovering_orphan_dir(osb, slot);1589+ ret = ocfs2_queue_orphans(osb, slot, &inode);1590+ ocfs2_clear_recovering_orphan_dir(osb, slot);1591+1592+ /* Error here should be noted, but we want to continue with as1593+ * many queued inodes as we've got. */1594+ if (ret)1595+ mlog_errno(ret);15961597 while (inode) {1598 oi = OCFS2_I(inode);···1541 inode = iter;1542 }15431544+ return ret;00000001545}15461547static int ocfs2_wait_on_mount(struct ocfs2_super *osb)
···1325 }1326 mlog(ML_NOTICE, "max_slots for this device: %u\n", osb->max_slots);132700000000001328 osb->s_feature_compat =1329 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);1330 osb->s_feature_ro_compat =···1648 if (osb->slot_info)1649 ocfs2_free_slot_info(osb->slot_info);165001651 /* FIXME1652 * This belongs in journal shutdown, but because we have to1653 * allocate osb->journal at the start of ocfs2_initalize_osb(),
···1325 }1326 mlog(ML_NOTICE, "max_slots for this device: %u\n", osb->max_slots);13271328+ init_waitqueue_head(&osb->osb_wipe_event);1329+ osb->osb_orphan_wipes = kcalloc(osb->max_slots,1330+ sizeof(*osb->osb_orphan_wipes),1331+ GFP_KERNEL);1332+ if (!osb->osb_orphan_wipes) {1333+ status = -ENOMEM;1334+ mlog_errno(status);1335+ goto bail;1336+ }1337+1338 osb->s_feature_compat =1339 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);1340 osb->s_feature_ro_compat =···1638 if (osb->slot_info)1639 ocfs2_free_slot_info(osb->slot_info);16401641+ kfree(osb->osb_orphan_wipes);1642 /* FIXME1643 * This belongs in journal shutdown, but because we have to1644 * allocate osb->journal at the start of ocfs2_initalize_osb(),