commit a5eba3f66f812cbc076a1170b3f888ad63f850b2

+2

Documentation/filesystems/00-INDEX

··· 36 - info about directory notification in Linux. 37 ecryptfs.txt 38 - docs on eCryptfs: stacked cryptographic filesystem for Linux. 39 ext2.txt 40 - info, mount options and specifications for the Ext2 filesystem. 41 ext3.txt

··· 36 - info about directory notification in Linux. 37 ecryptfs.txt 38 - docs on eCryptfs: stacked cryptographic filesystem for Linux. 39 + exofs.txt 40 + - info, usage, mount options, design about EXOFS. 41 ext2.txt 42 - info, mount options and specifications for the Ext2 filesystem. 43 ext3.txt

+12 -11

Documentation/filesystems/exofs.txt

··· 60 61 mkfs.exofs --pid=65536 --format /dev/osd0 62 63 - The --format is optional if not specified no OSD_FORMAT will be 64 - preformed and a clean file system will be created in the specified pid, 65 in the available space of the target. (Use --format=size_in_meg to limit 66 the total LUN space available) 67 68 - If pid already exist it will be deleted and a new one will be created in it's 69 - place. Be careful. 70 71 An exofs lives inside a single OSD partition. You can create multiple exofs 72 filesystems on the same device using multiple pids. ··· 81 82 7. For reference (See do-exofs example script): 83 do-exofs start - an example of how to perform the above steps. 84 - do-exofs stop - an example of how to unmount the file system. 85 do-exofs format - an example of how to format and mkfs a new exofs. 86 87 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): ··· 104 exofs specific options: Options are separated by commas (,) 105 pid=<integer> - The partition number to mount/create as 106 container of the filesystem. 107 - This option is mandatory 108 - to=<integer> - Timeout in ticks for a single command 109 default is (60 * HZ) [for debugging only] 110 111 =============================================================================== ··· 116 with a special ID (defined in common.h). 117 Information included in the file system control block is used to fill the 118 in-memory superblock structure at mount time. This object is created before 119 - the file system is used by mkexofs.c It contains information such as: 120 - The file system's magic number 121 - The next inode number to be allocated 122 ··· 134 attributes. This applies to both regular files and other types (directories, 135 device files, symlinks, etc.). 136 137 - * Credentials are generated per object (inode and superblock) when they is 138 - created in memory (read off disk or created). The credential works for all 139 operations and is used as long as the object remains in memory. 140 141 * Async OSD operations are used whenever possible, but the target may execute ··· 145 from executing in reverse order: 146 - The following are handled with the OBJ_CREATED and OBJ_2BCREATED 147 flags. OBJ_CREATED is set when we know the object exists on the OSD - 148 - in create's callback function, and when we successfully do a read_inode. 149 OBJ_2BCREATED is set in the beginning of the create function, so we 150 know that we should wait. 151 - create/delete: delete should wait until the object is created

··· 60 61 mkfs.exofs --pid=65536 --format /dev/osd0 62 63 + The --format is optional. If not specified, no OSD_FORMAT will be 64 + performed and a clean file system will be created in the specified pid, 65 in the available space of the target. (Use --format=size_in_meg to limit 66 the total LUN space available) 67 68 + If pid already exists, it will be deleted and a new one will be created in 69 + its place. Be careful. 70 71 An exofs lives inside a single OSD partition. You can create multiple exofs 72 filesystems on the same device using multiple pids. ··· 81 82 7. For reference (See do-exofs example script): 83 do-exofs start - an example of how to perform the above steps. 84 + do-exofs stop - an example of how to unmount the file system. 85 do-exofs format - an example of how to format and mkfs a new exofs. 86 87 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): ··· 104 exofs specific options: Options are separated by commas (,) 105 pid=<integer> - The partition number to mount/create as 106 container of the filesystem. 107 + This option is mandatory. 108 + to=<integer> - Timeout in ticks for a single command. 109 default is (60 * HZ) [for debugging only] 110 111 =============================================================================== ··· 116 with a special ID (defined in common.h). 117 Information included in the file system control block is used to fill the 118 in-memory superblock structure at mount time. This object is created before 119 + the file system is used by mkexofs.c. It contains information such as: 120 - The file system's magic number 121 - The next inode number to be allocated 122 ··· 134 attributes. This applies to both regular files and other types (directories, 135 device files, symlinks, etc.). 136 137 + * Credentials are generated per object (inode and superblock) when they are 138 + created in memory (read from disk or created). The credential works for all 139 operations and is used as long as the object remains in memory. 140 141 * Async OSD operations are used whenever possible, but the target may execute ··· 145 from executing in reverse order: 146 - The following are handled with the OBJ_CREATED and OBJ_2BCREATED 147 flags. OBJ_CREATED is set when we know the object exists on the OSD - 148 + in create's callback function, and when we successfully do a 149 + read_inode. 150 OBJ_2BCREATED is set in the beginning of the create function, so we 151 know that we should wait. 152 - create/delete: delete should wait until the object is created

+1 -1

fs/exofs/Kbuild

··· 12 # Kbuild - Gets included from the Kernels Makefile and build system 13 # 14 15 - exofs-y := osd.o inode.o file.o symlink.o namei.o dir.o super.o 16 obj-$(CONFIG_EXOFS_FS) += exofs.o

··· 12 # Kbuild - Gets included from the Kernels Makefile and build system 13 # 14 15 + exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o 16 obj-$(CONFIG_EXOFS_FS) += exofs.o

+57 -24

fs/exofs/common.h

··· 49 #define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ 50 #define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ 51 #define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ 52 #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ 53 54 /* exofs Application specific page/attribute */ ··· 79 #define EXOFS_SUPER_MAGIC 0x5DF5 80 81 /* 82 - * The file system control block - stored in an object's data (mainly, the one 83 - * with ID EXOFS_SUPER_ID). This is where the in-memory superblock is stored 84 - * on disk. Right now it just has a magic value, which is basically a sanity 85 - * check on our ability to communicate with the object store. 86 */ 87 struct exofs_fscb { 88 __le64 s_nextid; /* Highest object ID used */ 89 - __le32 s_numfiles; /* Number of files on fs */ 90 __le16 s_magic; /* Magic signature */ 91 __le16 s_newfs; /* Non-zero if this is a new fs */ 92 - }; 93 94 /**************************************************************************** 95 * inode-related things ··· 205 #define EXOFS_DIR_REC_LEN(name_len) \ 206 (((name_len) + offsetof(struct exofs_dir_entry, name) + \ 207 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) 208 - 209 - /************************* 210 - * function declarations * 211 - *************************/ 212 - /* osd.c */ 213 - void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], 214 - const struct osd_obj_id *obj); 215 - 216 - int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid); 217 - static inline int exofs_check_ok(struct osd_request *or) 218 - { 219 - return exofs_check_ok_resid(or, NULL, NULL); 220 - } 221 - int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred); 222 - int exofs_async_op(struct osd_request *or, 223 - osd_req_done_fn *async_done, void *caller_context, u8 *cred); 224 - 225 - int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); 226 227 #endif /*ifndef __EXOFS_COM_H__*/

··· 49 #define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ 50 #define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ 51 #define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ 52 + #define EXOFS_DEVTABLE_ID 0x10001 /* object ID for on-disk device table */ 53 #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ 54 55 /* exofs Application specific page/attribute */ ··· 78 #define EXOFS_SUPER_MAGIC 0x5DF5 79 80 /* 81 + * The file system control block - stored in object EXOFS_SUPER_ID's data. 82 + * This is where the in-memory superblock is stored on disk. 83 */ 84 + enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1}; 85 struct exofs_fscb { 86 __le64 s_nextid; /* Highest object ID used */ 87 + __le64 s_numfiles; /* Number of files on fs */ 88 + __le32 s_version; /* == EXOFS_FSCB_VER */ 89 __le16 s_magic; /* Magic signature */ 90 __le16 s_newfs; /* Non-zero if this is a new fs */ 91 + 92 + /* From here on it's a static part, only written by mkexofs */ 93 + __le64 s_dev_table_oid; /* Resurved, not used */ 94 + __le64 s_dev_table_count; /* == 0 means no dev_table */ 95 + } __packed; 96 + 97 + /* 98 + * Describes the raid used in the FS. It is part of the device table. 99 + * This here is taken from the pNFS-objects definition. In exofs we 100 + * use one raid policy through-out the filesystem. (NOTE: the funny 101 + * alignment at begining. We take care of it at exofs_device_table. 102 + */ 103 + struct exofs_dt_data_map { 104 + __le32 cb_num_comps; 105 + __le64 cb_stripe_unit; 106 + __le32 cb_group_width; 107 + __le32 cb_group_depth; 108 + __le32 cb_mirror_cnt; 109 + __le32 cb_raid_algorithm; 110 + } __packed; 111 + 112 + /* 113 + * This is an osd device information descriptor. It is a single entry in 114 + * the exofs device table. It describes an osd target lun which 115 + * contains data belonging to this FS. (Same partition_id on all devices) 116 + */ 117 + struct exofs_dt_device_info { 118 + __le32 systemid_len; 119 + u8 systemid[OSD_SYSTEMID_LEN]; 120 + __le64 long_name_offset; /* If !0 then offset-in-file */ 121 + __le32 osdname_len; /* */ 122 + u8 osdname[44]; /* Embbeded, Ususally an asci uuid */ 123 + } __packed; 124 + 125 + /* 126 + * The EXOFS device table - stored in object EXOFS_DEVTABLE_ID's data. 127 + * It contains the raid used for this multy-device FS and an array of 128 + * participating devices. 129 + */ 130 + struct exofs_device_table { 131 + __le32 dt_version; /* == EXOFS_DT_VER */ 132 + struct exofs_dt_data_map dt_data_map; /* Raid policy to use */ 133 + 134 + /* Resurved space For future use. Total includeing this: 135 + * (8 * sizeof(le64)) 136 + */ 137 + __le64 __Resurved[4]; 138 + 139 + __le64 dt_num_devices; /* Array size */ 140 + struct exofs_dt_device_info dt_dev_table[]; /* Array of devices */ 141 + } __packed; 142 143 /**************************************************************************** 144 * inode-related things ··· 154 #define EXOFS_DIR_REC_LEN(name_len) \ 155 (((name_len) + offsetof(struct exofs_dir_entry, name) + \ 156 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) 157 158 #endif /*ifndef __EXOFS_COM_H__*/

+93 -4

fs/exofs/exofs.h

··· 30 * along with exofs; if not, write to the Free Software 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 32 */ 33 34 #include <linux/fs.h> 35 #include <linux/time.h> 36 #include "common.h" 37 38 - #ifndef __EXOFS_H__ 39 - #define __EXOFS_H__ 40 41 #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) 42 ··· 59 * our extension to the in-memory superblock 60 */ 61 struct exofs_sb_info { 62 - struct osd_dev *s_dev; /* returned by get_osd_dev */ 63 osd_id s_pid; /* partition ID of file system*/ 64 int s_timeout; /* timeout for OSD operations */ 65 uint64_t s_nextid; /* highest object ID used */ ··· 67 spinlock_t s_next_gen_lock; /* spinlock for gen # update */ 68 u32 s_next_generation; /* next gen # to use */ 69 atomic_t s_curr_pending; /* number of pending commands */ 70 - uint8_t s_cred[OSD_CAP_LEN]; /* all-powerful credential */ 71 }; 72 73 /* ··· 86 uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ 87 struct inode vfs_inode; /* normal in-memory inode */ 88 }; 89 90 /* 91 * our inode flags ··· 182 /************************* 183 * function declarations * 184 *************************/ 185 /* inode.c */ 186 void exofs_truncate(struct inode *inode); 187 int exofs_setattr(struct dentry *, struct iattr *); ··· 257 258 /* inode.c */ 259 extern const struct address_space_operations exofs_aops; 260 261 /* namei.c */ 262 extern const struct inode_operations exofs_dir_inode_operations;

··· 30 * along with exofs; if not, write to the Free Software 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 32 */ 33 + #ifndef __EXOFS_H__ 34 + #define __EXOFS_H__ 35 36 #include <linux/fs.h> 37 #include <linux/time.h> 38 #include "common.h" 39 40 + /* FIXME: Remove once pnfs hits mainline 41 + * #include <linux/exportfs/pnfs_osd_xdr.h> 42 + */ 43 + #include "pnfs.h" 44 45 #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) 46 ··· 55 * our extension to the in-memory superblock 56 */ 57 struct exofs_sb_info { 58 + struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ 59 osd_id s_pid; /* partition ID of file system*/ 60 int s_timeout; /* timeout for OSD operations */ 61 uint64_t s_nextid; /* highest object ID used */ ··· 63 spinlock_t s_next_gen_lock; /* spinlock for gen # update */ 64 u32 s_next_generation; /* next gen # to use */ 65 atomic_t s_curr_pending; /* number of pending commands */ 66 + uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ 67 + 68 + struct pnfs_osd_data_map data_map; /* Default raid to use */ 69 + unsigned s_numdevs; /* Num of devices in array */ 70 + struct osd_dev *s_ods[1]; /* Variable length, minimum 1 */ 71 }; 72 73 /* ··· 78 uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ 79 struct inode vfs_inode; /* normal in-memory inode */ 80 }; 81 + 82 + static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) 83 + { 84 + return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; 85 + } 86 + 87 + struct exofs_io_state; 88 + typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private); 89 + 90 + struct exofs_io_state { 91 + struct kref kref; 92 + 93 + void *private; 94 + exofs_io_done_fn done; 95 + 96 + struct exofs_sb_info *sbi; 97 + struct osd_obj_id obj; 98 + u8 *cred; 99 + 100 + /* Global read/write IO*/ 101 + loff_t offset; 102 + unsigned long length; 103 + void *kern_buff; 104 + struct bio *bio; 105 + 106 + /* Attributes */ 107 + unsigned in_attr_len; 108 + struct osd_attr *in_attr; 109 + unsigned out_attr_len; 110 + struct osd_attr *out_attr; 111 + 112 + /* Variable array of size numdevs */ 113 + unsigned numdevs; 114 + struct exofs_per_dev_state { 115 + struct osd_request *or; 116 + struct bio *bio; 117 + } per_dev[]; 118 + }; 119 + 120 + static inline unsigned exofs_io_state_size(unsigned numdevs) 121 + { 122 + return sizeof(struct exofs_io_state) + 123 + sizeof(struct exofs_per_dev_state) * numdevs; 124 + } 125 126 /* 127 * our inode flags ··· 130 /************************* 131 * function declarations * 132 *************************/ 133 + 134 + /* ios.c */ 135 + void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], 136 + const struct osd_obj_id *obj); 137 + int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, 138 + u64 offset, void *p, unsigned length); 139 + 140 + int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios); 141 + void exofs_put_io_state(struct exofs_io_state *ios); 142 + 143 + int exofs_check_io(struct exofs_io_state *ios, u64 *resid); 144 + 145 + int exofs_sbi_create(struct exofs_io_state *ios); 146 + int exofs_sbi_remove(struct exofs_io_state *ios); 147 + int exofs_sbi_write(struct exofs_io_state *ios); 148 + int exofs_sbi_read(struct exofs_io_state *ios); 149 + 150 + int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); 151 + 152 + int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); 153 + static inline int exofs_oi_write(struct exofs_i_info *oi, 154 + struct exofs_io_state *ios) 155 + { 156 + ios->obj.id = exofs_oi_objno(oi); 157 + ios->cred = oi->i_cred; 158 + return exofs_sbi_write(ios); 159 + } 160 + 161 + static inline int exofs_oi_read(struct exofs_i_info *oi, 162 + struct exofs_io_state *ios) 163 + { 164 + ios->obj.id = exofs_oi_objno(oi); 165 + ios->cred = oi->i_cred; 166 + return exofs_sbi_read(ios); 167 + } 168 + 169 /* inode.c */ 170 void exofs_truncate(struct inode *inode); 171 int exofs_setattr(struct dentry *, struct iattr *); ··· 169 170 /* inode.c */ 171 extern const struct address_space_operations exofs_aops; 172 + extern const struct osd_attr g_attr_logical_length; 173 174 /* namei.c */ 175 extern const struct inode_operations exofs_dir_inode_operations;

+202 -207

fs/exofs/inode.c

··· 37 38 #include "exofs.h" 39 40 - #ifdef CONFIG_EXOFS_DEBUG 41 - # define EXOFS_DEBUG_OBJ_ISIZE 1 42 - #endif 43 44 struct page_collect { 45 struct exofs_sb_info *sbi; 46 struct request_queue *req_q; 47 struct inode *inode; 48 unsigned expected_pages; 49 50 struct bio *bio; 51 unsigned nr_pages; ··· 57 }; 58 59 static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 60 - struct inode *inode) 61 { 62 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 63 64 pcol->sbi = sbi; 65 - pcol->req_q = osd_request_queue(sbi->s_dev); 66 pcol->inode = inode; 67 pcol->expected_pages = expected_pages; 68 69 pcol->bio = NULL; 70 pcol->nr_pages = 0; 71 pcol->length = 0; 72 pcol->pg_first = -1; 73 - 74 - EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, 75 - expected_pages); 76 } 77 78 static void _pcol_reset(struct page_collect *pcol) ··· 84 pcol->nr_pages = 0; 85 pcol->length = 0; 86 pcol->pg_first = -1; 87 - EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", 88 - pcol->inode->i_ino, pcol->expected_pages); 89 90 /* this is probably the end of the loop but in writes 91 * it might not end here. don't be left with nothing 92 */ 93 if (!pcol->expected_pages) 94 - pcol->expected_pages = 128; 95 } 96 97 static int pcol_try_alloc(struct page_collect *pcol) 98 { 99 - int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); 100 101 for (; pages; pages >>= 1) { 102 - pcol->bio = bio_alloc(GFP_KERNEL, pages); 103 if (likely(pcol->bio)) 104 return 0; 105 } 106 107 - EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", 108 pcol->expected_pages); 109 return -ENOMEM; 110 } 111 112 static void pcol_free(struct page_collect *pcol) 113 { 114 - bio_put(pcol->bio); 115 - pcol->bio = NULL; 116 } 117 118 static int pcol_add_page(struct page_collect *pcol, struct page *page, ··· 179 /* Called at the end of reads, to optionally unlock pages and update their 180 * status. 181 */ 182 - static int __readpages_done(struct osd_request *or, struct page_collect *pcol, 183 - bool do_unlock) 184 { 185 struct bio_vec *bvec; 186 int i; 187 u64 resid; 188 u64 good_bytes; 189 u64 length = 0; 190 - int ret = exofs_check_ok_resid(or, &resid, NULL); 191 - 192 - osd_end_request(or); 193 194 if (likely(!ret)) 195 good_bytes = pcol->length; 196 - else if (!resid) 197 - good_bytes = 0; 198 else 199 good_bytes = pcol->length - resid; 200 ··· 211 else 212 page_stat = ret; 213 214 - EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", 215 inode->i_ino, page->index, 216 page_stat ? "bad_bytes" : "good_bytes"); 217 ··· 227 } 228 229 /* callback of async reads */ 230 - static void readpages_done(struct osd_request *or, void *p) 231 { 232 struct page_collect *pcol = p; 233 234 - __readpages_done(or, pcol, true); 235 atomic_dec(&pcol->sbi->s_curr_pending); 236 - kfree(p); 237 } 238 239 static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) ··· 251 252 unlock_page(page); 253 } 254 - pcol_free(pcol); 255 } 256 257 static int read_exec(struct page_collect *pcol, bool is_sync) 258 { 259 struct exofs_i_info *oi = exofs_i(pcol->inode); 260 - struct osd_obj_id obj = {pcol->sbi->s_pid, 261 - pcol->inode->i_ino + EXOFS_OBJ_OFF}; 262 - struct osd_request *or = NULL; 263 struct page_collect *pcol_copy = NULL; 264 - loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; 265 int ret; 266 267 if (!pcol->bio) ··· 266 /* see comment in _readpage() about sync reads */ 267 WARN_ON(is_sync && (pcol->nr_pages != 1)); 268 269 - or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); 270 - if (unlikely(!or)) { 271 - ret = -ENOMEM; 272 - goto err; 273 - } 274 - 275 - osd_req_read(or, &obj, i_start, pcol->bio, pcol->length); 276 277 if (is_sync) { 278 - exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); 279 - return __readpages_done(or, pcol, false); 280 } 281 282 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); ··· 282 } 283 284 *pcol_copy = *pcol; 285 - ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); 286 if (unlikely(ret)) 287 goto err; 288 289 atomic_inc(&pcol->sbi->s_curr_pending); 290 291 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 292 - obj.id, _LLU(i_start), pcol->length); 293 294 /* pages ownership was passed to pcol_copy */ 295 _pcol_reset(pcol); ··· 300 err: 301 if (!is_sync) 302 _unlock_pcol_pages(pcol, ret, READ); 303 - else /* Pages unlocked by caller in sync mode only free bio */ 304 - pcol_free(pcol); 305 306 kfree(pcol_copy); 307 - if (or) 308 - osd_end_request(or); 309 return ret; 310 } 311 ··· 375 if (len != PAGE_CACHE_SIZE) 376 zero_user(page, len, PAGE_CACHE_SIZE - len); 377 378 - EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", 379 inode->i_ino, page->index, len); 380 381 ret = pcol_add_page(pcol, page, len); 382 if (ret) { 383 - EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " 384 "this_len=0x%zx nr_pages=%u length=0x%lx\n", 385 page, len, pcol->nr_pages, pcol->length); 386 ··· 424 425 _pcol_init(&pcol, 1, page->mapping->host); 426 427 - /* readpage_strip might call read_exec(,async) inside at several places 428 - * but this is safe for is_async=0 since read_exec will not do anything 429 - * when we have a single page. 430 */ 431 ret = readpage_strip(&pcol, page); 432 if (ret) { ··· 444 return _readpage(page, false); 445 } 446 447 - /* Callback for osd_write. All writes are asynchronouse */ 448 - static void writepages_done(struct osd_request *or, void *p) 449 { 450 struct page_collect *pcol = p; 451 struct bio_vec *bvec; ··· 453 u64 resid; 454 u64 good_bytes; 455 u64 length = 0; 456 457 - int ret = exofs_check_ok_resid(or, NULL, &resid); 458 - 459 - osd_end_request(or); 460 atomic_dec(&pcol->sbi->s_curr_pending); 461 462 if (likely(!ret)) 463 good_bytes = pcol->length; 464 - else if (!resid) 465 - good_bytes = 0; 466 else 467 good_bytes = pcol->length - resid; 468 ··· 482 483 update_write_page(page, page_stat); 484 unlock_page(page); 485 - EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", 486 inode->i_ino, page->index, page_stat); 487 488 length += bvec->bv_len; ··· 496 static int write_exec(struct page_collect *pcol) 497 { 498 struct exofs_i_info *oi = exofs_i(pcol->inode); 499 - struct osd_obj_id obj = {pcol->sbi->s_pid, 500 - pcol->inode->i_ino + EXOFS_OBJ_OFF}; 501 - struct osd_request *or = NULL; 502 struct page_collect *pcol_copy = NULL; 503 - loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; 504 int ret; 505 506 if (!pcol->bio) 507 return 0; 508 - 509 - or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); 510 - if (unlikely(!or)) { 511 - EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); 512 - ret = -ENOMEM; 513 - goto err; 514 - } 515 516 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 517 if (!pcol_copy) { ··· 513 *pcol_copy = *pcol; 514 515 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ 516 - osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); 517 - ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); 518 if (unlikely(ret)) { 519 - EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); 520 goto err; 521 } 522 523 atomic_inc(&pcol->sbi->s_curr_pending); 524 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", 525 - pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), 526 pcol->length); 527 /* pages ownership was passed to pcol_copy */ 528 _pcol_reset(pcol); ··· 536 537 err: 538 _unlock_pcol_pages(pcol, ret, WRITE); 539 kfree(pcol_copy); 540 - if (or) 541 - osd_end_request(or); 542 return ret; 543 } 544 ··· 582 if (PageError(page)) 583 ClearPageError(page); 584 unlock_page(page); 585 return 0; 586 } 587 } ··· 599 ret = write_exec(pcol); 600 if (unlikely(ret)) 601 goto fail; 602 goto try_again; 603 } 604 ··· 611 goto fail; 612 } 613 614 - EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", 615 inode->i_ino, page->index, len); 616 617 ret = pcol_add_page(pcol, page, len); ··· 636 return 0; 637 638 fail: 639 set_bit(AS_EIO, &page->mapping->flags); 640 unlock_page(page); 641 return ret; ··· 656 wbc->range_end >> PAGE_CACHE_SHIFT; 657 658 if (start || end) 659 - expected_pages = min(end - start + 1, 32L); 660 else 661 expected_pages = mapping->nrpages; 662 663 - EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" 664 - " m->nrpages=%lu start=0x%lx end=0x%lx\n", 665 mapping->host->i_ino, wbc->range_start, wbc->range_end, 666 - mapping->nrpages, start, end); 667 668 _pcol_init(&pcol, expected_pages, mapping->host); 669 ··· 778 const struct osd_attr g_attr_logical_length = ATTR_DEF( 779 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 780 781 /* 782 * Truncate a file to the specified size - all we have to do is set the size 783 * attribute. We make sure the object exists first. 784 */ 785 void exofs_truncate(struct inode *inode) 786 { 787 - struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 788 struct exofs_i_info *oi = exofs_i(inode); 789 - struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 790 - struct osd_request *or; 791 - struct osd_attr attr; 792 - loff_t isize = i_size_read(inode); 793 - __be64 newsize; 794 int ret; 795 796 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ··· 809 return; 810 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 811 return; 812 - inode->i_mtime = inode->i_ctime = CURRENT_TIME; 813 - 814 - nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); 815 - 816 - or = osd_start_request(sbi->s_dev, GFP_KERNEL); 817 - if (unlikely(!or)) { 818 - EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n"); 819 - goto fail; 820 - } 821 - 822 - osd_req_set_attributes(or, &obj); 823 - 824 - newsize = cpu_to_be64((u64)isize); 825 - attr = g_attr_logical_length; 826 - attr.val_ptr = &newsize; 827 - osd_req_add_set_attr_list(or, &attr, 1); 828 829 /* if we are about to truncate an object, and it hasn't been 830 * created yet, wait ··· 816 if (unlikely(wait_obj_created(oi))) 817 goto fail; 818 819 - ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 820 - osd_end_request(or); 821 if (ret) 822 goto fail; 823 ··· 846 847 /* 848 * Read an inode from the OSD, and return it as is. We also return the size 849 - * attribute in the 'sanity' argument if we got compiled with debugging turned 850 - * on. 851 */ 852 static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 853 - struct exofs_fcb *inode, uint64_t *sanity) 854 { 855 struct exofs_sb_info *sbi = sb->s_fs_info; 856 - struct osd_request *or; 857 - struct osd_attr attr; 858 - struct osd_obj_id obj = {sbi->s_pid, 859 - oi->vfs_inode.i_ino + EXOFS_OBJ_OFF}; 860 int ret; 861 862 - exofs_make_credential(oi->i_cred, &obj); 863 - 864 - or = osd_start_request(sbi->s_dev, GFP_KERNEL); 865 - if (unlikely(!or)) { 866 - EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); 867 - return -ENOMEM; 868 } 869 - osd_req_get_attributes(or, &obj); 870 871 - /* we need the inode attribute */ 872 - osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); 873 874 - #ifdef EXOFS_DEBUG_OBJ_ISIZE 875 - /* we get the size attributes to do a sanity check */ 876 - osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); 877 - #endif 878 879 - ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 880 if (ret) 881 goto out; 882 883 - attr = g_attr_inode_data; 884 - ret = extract_attr_from_req(or, &attr); 885 if (ret) { 886 - EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); 887 goto out; 888 } 889 890 - WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); 891 - memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE); 892 - 893 - #ifdef EXOFS_DEBUG_OBJ_ISIZE 894 - attr = g_attr_logical_length; 895 - ret = extract_attr_from_req(or, &attr); 896 if (ret) { 897 - EXOFS_ERR("ERROR: extract attr from or failed\n"); 898 goto out; 899 } 900 - *sanity = get_unaligned_be64(attr.val_ptr); 901 - #endif 902 903 out: 904 - osd_end_request(or); 905 return ret; 906 } 907 908 /* 909 * Fill in an inode read from the OSD and set it up for use 910 */ ··· 910 struct exofs_i_info *oi; 911 struct exofs_fcb fcb; 912 struct inode *inode; 913 - uint64_t uninitialized_var(sanity); 914 int ret; 915 916 inode = iget_locked(sb, ino); ··· 919 if (!(inode->i_state & I_NEW)) 920 return inode; 921 oi = exofs_i(inode); 922 923 /* read the inode from the osd */ 924 - ret = exofs_get_inode(sb, oi, &fcb, &sanity); 925 if (ret) 926 goto bad_inode; 927 928 - init_waitqueue_head(&oi->i_wq); 929 set_obj_created(oi); 930 931 /* copy stuff from on-disk struct to in-memory struct */ ··· 943 inode->i_blkbits = EXOFS_BLKSHIFT; 944 inode->i_generation = le32_to_cpu(fcb.i_generation); 945 946 - #ifdef EXOFS_DEBUG_OBJ_ISIZE 947 - if ((inode->i_size != sanity) && 948 (!exofs_inode_is_fast_symlink(inode))) { 949 - EXOFS_ERR("WARNING: Size of object from inode and " 950 - "attributes differ (%lld != %llu)\n", 951 - inode->i_size, _LLU(sanity)); 952 } 953 - #endif 954 955 oi->i_dir_start_lookup = 0; 956 ··· 1014 * set the obj_created flag so that other methods know that the object exists on 1015 * the OSD. 1016 */ 1017 - static void create_done(struct osd_request *or, void *p) 1018 { 1019 struct inode *inode = p; 1020 struct exofs_i_info *oi = exofs_i(inode); 1021 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1022 int ret; 1023 1024 - ret = exofs_check_ok(or); 1025 - osd_end_request(or); 1026 atomic_dec(&sbi->s_curr_pending); 1027 1028 if (unlikely(ret)) { 1029 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", 1030 - _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); 1031 - make_bad_inode(inode); 1032 - } else 1033 - set_obj_created(oi); 1034 1035 atomic_dec(&inode->i_count); 1036 wake_up(&oi->i_wq); ··· 1052 struct inode *inode; 1053 struct exofs_i_info *oi; 1054 struct exofs_sb_info *sbi; 1055 - struct osd_request *or; 1056 - struct osd_obj_id obj; 1057 int ret; 1058 1059 sb = dir->i_sb; ··· 1061 return ERR_PTR(-ENOMEM); 1062 1063 oi = exofs_i(inode); 1064 1065 - init_waitqueue_head(&oi->i_wq); 1066 set_obj_2bcreated(oi); 1067 1068 sbi = sb->s_fs_info; ··· 1089 1090 mark_inode_dirty(inode); 1091 1092 - obj.partition = sbi->s_pid; 1093 - obj.id = inode->i_ino + EXOFS_OBJ_OFF; 1094 - exofs_make_credential(oi->i_cred, &obj); 1095 - 1096 - or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1097 - if (unlikely(!or)) { 1098 - EXOFS_ERR("exofs_new_inode: osd_start_request failed\n"); 1099 - return ERR_PTR(-ENOMEM); 1100 } 1101 1102 - osd_req_create_object(or, &obj); 1103 1104 /* increment the refcount so that the inode will still be around when we 1105 * reach the callback 1106 */ 1107 atomic_inc(&inode->i_count); 1108 1109 - ret = exofs_async_op(or, create_done, inode, oi->i_cred); 1110 if (ret) { 1111 atomic_dec(&inode->i_count); 1112 - osd_end_request(or); 1113 - return ERR_PTR(-EIO); 1114 } 1115 atomic_inc(&sbi->s_curr_pending); 1116 ··· 1128 /* 1129 * Callback function from exofs_update_inode(). 1130 */ 1131 - static void updatei_done(struct osd_request *or, void *p) 1132 { 1133 struct updatei_args *args = p; 1134 1135 - osd_end_request(or); 1136 1137 atomic_dec(&args->sbi->s_curr_pending); 1138 ··· 1148 struct exofs_i_info *oi = exofs_i(inode); 1149 struct super_block *sb = inode->i_sb; 1150 struct exofs_sb_info *sbi = sb->s_fs_info; 1151 - struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 1152 - struct osd_request *or; 1153 struct osd_attr attr; 1154 struct exofs_fcb *fcb; 1155 struct updatei_args *args; ··· 1185 } else 1186 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1187 1188 - or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1189 - if (unlikely(!or)) { 1190 - EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); 1191 - ret = -ENOMEM; 1192 goto free_args; 1193 } 1194 1195 - osd_req_set_attributes(or, &obj); 1196 - 1197 attr = g_attr_inode_data; 1198 attr.val_ptr = fcb; 1199 - osd_req_add_set_attr_list(or, &attr, 1); 1200 1201 if (!obj_created(oi)) { 1202 EXOFS_DBGMSG("!obj_created\n"); ··· 1203 EXOFS_DBGMSG("wait_event done\n"); 1204 } 1205 1206 - if (do_sync) { 1207 - ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 1208 - osd_end_request(or); 1209 - goto free_args; 1210 - } else { 1211 args->sbi = sbi; 1212 1213 - ret = exofs_async_op(or, updatei_done, args, oi->i_cred); 1214 - if (ret) { 1215 - osd_end_request(or); 1216 - goto free_args; 1217 - } 1218 atomic_inc(&sbi->s_curr_pending); 1219 goto out; /* deallocation in updatei_done */ 1220 } 1221 1222 free_args: 1223 kfree(args); 1224 out: ··· 1232 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1233 * do. 1234 */ 1235 - static void delete_done(struct osd_request *or, void *p) 1236 { 1237 - struct exofs_sb_info *sbi; 1238 - osd_end_request(or); 1239 - sbi = p; 1240 atomic_dec(&sbi->s_curr_pending); 1241 } 1242 ··· 1251 struct exofs_i_info *oi = exofs_i(inode); 1252 struct super_block *sb = inode->i_sb; 1253 struct exofs_sb_info *sbi = sb->s_fs_info; 1254 - struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 1255 - struct osd_request *or; 1256 int ret; 1257 1258 truncate_inode_pages(&inode->i_data, 0); ··· 1268 1269 clear_inode(inode); 1270 1271 - or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1272 - if (unlikely(!or)) { 1273 - EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); 1274 return; 1275 } 1276 - 1277 - osd_req_remove_object(or, &obj); 1278 1279 /* if we are deleting an obj that hasn't been created yet, wait */ 1280 if (!obj_created(oi)) { ··· 1280 wait_event(oi->i_wq, obj_created(oi)); 1281 } 1282 1283 - ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); 1284 if (ret) { 1285 - EXOFS_ERR( 1286 - "ERROR: @exofs_delete_inode exofs_async_op failed\n"); 1287 - osd_end_request(or); 1288 return; 1289 } 1290 atomic_inc(&sbi->s_curr_pending);

··· 37 38 #include "exofs.h" 39 40 + #define EXOFS_DBGMSG2(M...) do {} while (0) 41 + 42 + enum { BIO_MAX_PAGES_KMALLOC = 43 + (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), 44 + }; 45 46 struct page_collect { 47 struct exofs_sb_info *sbi; 48 struct request_queue *req_q; 49 struct inode *inode; 50 unsigned expected_pages; 51 + struct exofs_io_state *ios; 52 53 struct bio *bio; 54 unsigned nr_pages; ··· 54 }; 55 56 static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 57 + struct inode *inode) 58 { 59 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 60 61 pcol->sbi = sbi; 62 + /* Create master bios on first Q, later on cloning, each clone will be 63 + * allocated on it's destination Q 64 + */ 65 + pcol->req_q = osd_request_queue(sbi->s_ods[0]); 66 pcol->inode = inode; 67 pcol->expected_pages = expected_pages; 68 69 + pcol->ios = NULL; 70 pcol->bio = NULL; 71 pcol->nr_pages = 0; 72 pcol->length = 0; 73 pcol->pg_first = -1; 74 } 75 76 static void _pcol_reset(struct page_collect *pcol) ··· 80 pcol->nr_pages = 0; 81 pcol->length = 0; 82 pcol->pg_first = -1; 83 + pcol->ios = NULL; 84 85 /* this is probably the end of the loop but in writes 86 * it might not end here. don't be left with nothing 87 */ 88 if (!pcol->expected_pages) 89 + pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; 90 } 91 92 static int pcol_try_alloc(struct page_collect *pcol) 93 { 94 + int pages = min_t(unsigned, pcol->expected_pages, 95 + BIO_MAX_PAGES_KMALLOC); 96 + 97 + if (!pcol->ios) { /* First time allocate io_state */ 98 + int ret = exofs_get_io_state(pcol->sbi, &pcol->ios); 99 + 100 + if (ret) 101 + return ret; 102 + } 103 104 for (; pages; pages >>= 1) { 105 + pcol->bio = bio_kmalloc(GFP_KERNEL, pages); 106 if (likely(pcol->bio)) 107 return 0; 108 } 109 110 + EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", 111 pcol->expected_pages); 112 return -ENOMEM; 113 } 114 115 static void pcol_free(struct page_collect *pcol) 116 { 117 + if (pcol->bio) { 118 + bio_put(pcol->bio); 119 + pcol->bio = NULL; 120 + } 121 + 122 + if (pcol->ios) { 123 + exofs_put_io_state(pcol->ios); 124 + pcol->ios = NULL; 125 + } 126 } 127 128 static int pcol_add_page(struct page_collect *pcol, struct page *page, ··· 161 /* Called at the end of reads, to optionally unlock pages and update their 162 * status. 163 */ 164 + static int __readpages_done(struct page_collect *pcol, bool do_unlock) 165 { 166 struct bio_vec *bvec; 167 int i; 168 u64 resid; 169 u64 good_bytes; 170 u64 length = 0; 171 + int ret = exofs_check_io(pcol->ios, &resid); 172 173 if (likely(!ret)) 174 good_bytes = pcol->length; 175 else 176 good_bytes = pcol->length - resid; 177 ··· 198 else 199 page_stat = ret; 200 201 + EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n", 202 inode->i_ino, page->index, 203 page_stat ? "bad_bytes" : "good_bytes"); 204 ··· 214 } 215 216 /* callback of async reads */ 217 + static void readpages_done(struct exofs_io_state *ios, void *p) 218 { 219 struct page_collect *pcol = p; 220 221 + __readpages_done(pcol, true); 222 atomic_dec(&pcol->sbi->s_curr_pending); 223 + kfree(pcol); 224 } 225 226 static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) ··· 238 239 unlock_page(page); 240 } 241 } 242 243 static int read_exec(struct page_collect *pcol, bool is_sync) 244 { 245 struct exofs_i_info *oi = exofs_i(pcol->inode); 246 + struct exofs_io_state *ios = pcol->ios; 247 struct page_collect *pcol_copy = NULL; 248 int ret; 249 250 if (!pcol->bio) ··· 257 /* see comment in _readpage() about sync reads */ 258 WARN_ON(is_sync && (pcol->nr_pages != 1)); 259 260 + ios->bio = pcol->bio; 261 + ios->length = pcol->length; 262 + ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; 263 264 if (is_sync) { 265 + exofs_oi_read(oi, pcol->ios); 266 + return __readpages_done(pcol, false); 267 } 268 269 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); ··· 277 } 278 279 *pcol_copy = *pcol; 280 + ios->done = readpages_done; 281 + ios->private = pcol_copy; 282 + ret = exofs_oi_read(oi, ios); 283 if (unlikely(ret)) 284 goto err; 285 286 atomic_inc(&pcol->sbi->s_curr_pending); 287 288 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 289 + ios->obj.id, _LLU(ios->offset), pcol->length); 290 291 /* pages ownership was passed to pcol_copy */ 292 _pcol_reset(pcol); ··· 293 err: 294 if (!is_sync) 295 _unlock_pcol_pages(pcol, ret, READ); 296 + 297 + pcol_free(pcol); 298 299 kfree(pcol_copy); 300 return ret; 301 } 302 ··· 370 if (len != PAGE_CACHE_SIZE) 371 zero_user(page, len, PAGE_CACHE_SIZE - len); 372 373 + EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", 374 inode->i_ino, page->index, len); 375 376 ret = pcol_add_page(pcol, page, len); 377 if (ret) { 378 + EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p " 379 "this_len=0x%zx nr_pages=%u length=0x%lx\n", 380 page, len, pcol->nr_pages, pcol->length); 381 ··· 419 420 _pcol_init(&pcol, 1, page->mapping->host); 421 422 + /* readpage_strip might call read_exec(,is_sync==false) at several 423 + * places but not if we have a single page. 424 */ 425 ret = readpage_strip(&pcol, page); 426 if (ret) { ··· 440 return _readpage(page, false); 441 } 442 443 + /* Callback for osd_write. All writes are asynchronous */ 444 + static void writepages_done(struct exofs_io_state *ios, void *p) 445 { 446 struct page_collect *pcol = p; 447 struct bio_vec *bvec; ··· 449 u64 resid; 450 u64 good_bytes; 451 u64 length = 0; 452 + int ret = exofs_check_io(ios, &resid); 453 454 atomic_dec(&pcol->sbi->s_curr_pending); 455 456 if (likely(!ret)) 457 good_bytes = pcol->length; 458 else 459 good_bytes = pcol->length - resid; 460 ··· 482 483 update_write_page(page, page_stat); 484 unlock_page(page); 485 + EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", 486 inode->i_ino, page->index, page_stat); 487 488 length += bvec->bv_len; ··· 496 static int write_exec(struct page_collect *pcol) 497 { 498 struct exofs_i_info *oi = exofs_i(pcol->inode); 499 + struct exofs_io_state *ios = pcol->ios; 500 struct page_collect *pcol_copy = NULL; 501 int ret; 502 503 if (!pcol->bio) 504 return 0; 505 506 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 507 if (!pcol_copy) { ··· 523 *pcol_copy = *pcol; 524 525 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ 526 + 527 + ios->bio = pcol_copy->bio; 528 + ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; 529 + ios->length = pcol_copy->length; 530 + ios->done = writepages_done; 531 + ios->private = pcol_copy; 532 + 533 + ret = exofs_oi_write(oi, ios); 534 if (unlikely(ret)) { 535 + EXOFS_ERR("write_exec: exofs_oi_write() Faild\n"); 536 goto err; 537 } 538 539 atomic_inc(&pcol->sbi->s_curr_pending); 540 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", 541 + pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset), 542 pcol->length); 543 /* pages ownership was passed to pcol_copy */ 544 _pcol_reset(pcol); ··· 540 541 err: 542 _unlock_pcol_pages(pcol, ret, WRITE); 543 + pcol_free(pcol); 544 kfree(pcol_copy); 545 + 546 return ret; 547 } 548 ··· 586 if (PageError(page)) 587 ClearPageError(page); 588 unlock_page(page); 589 + EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " 590 + "outside the limits\n", 591 + inode->i_ino, page->index); 592 return 0; 593 } 594 } ··· 600 ret = write_exec(pcol); 601 if (unlikely(ret)) 602 goto fail; 603 + 604 + EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", 605 + inode->i_ino, page->index); 606 goto try_again; 607 } 608 ··· 609 goto fail; 610 } 611 612 + EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", 613 inode->i_ino, page->index, len); 614 615 ret = pcol_add_page(pcol, page, len); ··· 634 return 0; 635 636 fail: 637 + EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", 638 + inode->i_ino, page->index, ret); 639 set_bit(AS_EIO, &page->mapping->flags); 640 unlock_page(page); 641 return ret; ··· 652 wbc->range_end >> PAGE_CACHE_SHIFT; 653 654 if (start || end) 655 + expected_pages = end - start + 1; 656 else 657 expected_pages = mapping->nrpages; 658 659 + if (expected_pages < 32L) 660 + expected_pages = 32L; 661 + 662 + EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " 663 + "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", 664 mapping->host->i_ino, wbc->range_start, wbc->range_end, 665 + mapping->nrpages, start, end, expected_pages); 666 667 _pcol_init(&pcol, expected_pages, mapping->host); 668 ··· 771 const struct osd_attr g_attr_logical_length = ATTR_DEF( 772 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 773 774 + static int _do_truncate(struct inode *inode) 775 + { 776 + struct exofs_i_info *oi = exofs_i(inode); 777 + loff_t isize = i_size_read(inode); 778 + int ret; 779 + 780 + inode->i_mtime = inode->i_ctime = CURRENT_TIME; 781 + 782 + nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); 783 + 784 + ret = exofs_oi_truncate(oi, (u64)isize); 785 + EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); 786 + return ret; 787 + } 788 + 789 /* 790 * Truncate a file to the specified size - all we have to do is set the size 791 * attribute. We make sure the object exists first. 792 */ 793 void exofs_truncate(struct inode *inode) 794 { 795 struct exofs_i_info *oi = exofs_i(inode); 796 int ret; 797 798 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ··· 793 return; 794 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 795 return; 796 797 /* if we are about to truncate an object, and it hasn't been 798 * created yet, wait ··· 816 if (unlikely(wait_obj_created(oi))) 817 goto fail; 818 819 + ret = _do_truncate(inode); 820 if (ret) 821 goto fail; 822 ··· 847 848 /* 849 * Read an inode from the OSD, and return it as is. We also return the size 850 + * attribute in the 'obj_size' argument. 851 */ 852 static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 853 + struct exofs_fcb *inode, uint64_t *obj_size) 854 { 855 struct exofs_sb_info *sbi = sb->s_fs_info; 856 + struct osd_attr attrs[2]; 857 + struct exofs_io_state *ios; 858 int ret; 859 860 + *obj_size = ~0; 861 + ret = exofs_get_io_state(sbi, &ios); 862 + if (unlikely(ret)) { 863 + EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 864 + return ret; 865 } 866 867 + ios->obj.id = exofs_oi_objno(oi); 868 + exofs_make_credential(oi->i_cred, &ios->obj); 869 + ios->cred = oi->i_cred; 870 871 + attrs[0] = g_attr_inode_data; 872 + attrs[1] = g_attr_logical_length; 873 + ios->in_attr = attrs; 874 + ios->in_attr_len = ARRAY_SIZE(attrs); 875 876 + ret = exofs_sbi_read(ios); 877 if (ret) 878 goto out; 879 880 + ret = extract_attr_from_ios(ios, &attrs[0]); 881 if (ret) { 882 + EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); 883 goto out; 884 } 885 + WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); 886 + memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); 887 888 + ret = extract_attr_from_ios(ios, &attrs[1]); 889 if (ret) { 890 + EXOFS_ERR("%s: extract_attr of logical_length failed\n", 891 + __func__); 892 goto out; 893 } 894 + *obj_size = get_unaligned_be64(attrs[1].val_ptr); 895 896 out: 897 + exofs_put_io_state(ios); 898 return ret; 899 } 900 901 + static void __oi_init(struct exofs_i_info *oi) 902 + { 903 + init_waitqueue_head(&oi->i_wq); 904 + oi->i_flags = 0; 905 + } 906 /* 907 * Fill in an inode read from the OSD and set it up for use 908 */ ··· 914 struct exofs_i_info *oi; 915 struct exofs_fcb fcb; 916 struct inode *inode; 917 + uint64_t obj_size; 918 int ret; 919 920 inode = iget_locked(sb, ino); ··· 923 if (!(inode->i_state & I_NEW)) 924 return inode; 925 oi = exofs_i(inode); 926 + __oi_init(oi); 927 928 /* read the inode from the osd */ 929 + ret = exofs_get_inode(sb, oi, &fcb, &obj_size); 930 if (ret) 931 goto bad_inode; 932 933 set_obj_created(oi); 934 935 /* copy stuff from on-disk struct to in-memory struct */ ··· 947 inode->i_blkbits = EXOFS_BLKSHIFT; 948 inode->i_generation = le32_to_cpu(fcb.i_generation); 949 950 + if ((inode->i_size != obj_size) && 951 (!exofs_inode_is_fast_symlink(inode))) { 952 + EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n", 953 + inode->i_size, _LLU(obj_size)); 954 + /* FIXME: call exofs_inode_recovery() */ 955 } 956 957 oi->i_dir_start_lookup = 0; 958 ··· 1020 * set the obj_created flag so that other methods know that the object exists on 1021 * the OSD. 1022 */ 1023 + static void create_done(struct exofs_io_state *ios, void *p) 1024 { 1025 struct inode *inode = p; 1026 struct exofs_i_info *oi = exofs_i(inode); 1027 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1028 int ret; 1029 1030 + ret = exofs_check_io(ios, NULL); 1031 + exofs_put_io_state(ios); 1032 + 1033 atomic_dec(&sbi->s_curr_pending); 1034 1035 if (unlikely(ret)) { 1036 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", 1037 + _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid)); 1038 + /*TODO: When FS is corrupted creation can fail, object already 1039 + * exist. Get rid of this asynchronous creation, if exist 1040 + * increment the obj counter and try the next object. Until we 1041 + * succeed. All these dangling objects will be made into lost 1042 + * files by chkfs.exofs 1043 + */ 1044 + } 1045 + 1046 + set_obj_created(oi); 1047 1048 atomic_dec(&inode->i_count); 1049 wake_up(&oi->i_wq); ··· 1051 struct inode *inode; 1052 struct exofs_i_info *oi; 1053 struct exofs_sb_info *sbi; 1054 + struct exofs_io_state *ios; 1055 int ret; 1056 1057 sb = dir->i_sb; ··· 1061 return ERR_PTR(-ENOMEM); 1062 1063 oi = exofs_i(inode); 1064 + __oi_init(oi); 1065 1066 set_obj_2bcreated(oi); 1067 1068 sbi = sb->s_fs_info; ··· 1089 1090 mark_inode_dirty(inode); 1091 1092 + ret = exofs_get_io_state(sbi, &ios); 1093 + if (unlikely(ret)) { 1094 + EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); 1095 + return ERR_PTR(ret); 1096 } 1097 1098 + ios->obj.id = exofs_oi_objno(oi); 1099 + exofs_make_credential(oi->i_cred, &ios->obj); 1100 1101 /* increment the refcount so that the inode will still be around when we 1102 * reach the callback 1103 */ 1104 atomic_inc(&inode->i_count); 1105 1106 + ios->done = create_done; 1107 + ios->private = inode; 1108 + ios->cred = oi->i_cred; 1109 + ret = exofs_sbi_create(ios); 1110 if (ret) { 1111 atomic_dec(&inode->i_count); 1112 + exofs_put_io_state(ios); 1113 + return ERR_PTR(ret); 1114 } 1115 atomic_inc(&sbi->s_curr_pending); 1116 ··· 1128 /* 1129 * Callback function from exofs_update_inode(). 1130 */ 1131 + static void updatei_done(struct exofs_io_state *ios, void *p) 1132 { 1133 struct updatei_args *args = p; 1134 1135 + exofs_put_io_state(ios); 1136 1137 atomic_dec(&args->sbi->s_curr_pending); 1138 ··· 1148 struct exofs_i_info *oi = exofs_i(inode); 1149 struct super_block *sb = inode->i_sb; 1150 struct exofs_sb_info *sbi = sb->s_fs_info; 1151 + struct exofs_io_state *ios; 1152 struct osd_attr attr; 1153 struct exofs_fcb *fcb; 1154 struct updatei_args *args; ··· 1186 } else 1187 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1188 1189 + ret = exofs_get_io_state(sbi, &ios); 1190 + if (unlikely(ret)) { 1191 + EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 1192 goto free_args; 1193 } 1194 1195 attr = g_attr_inode_data; 1196 attr.val_ptr = fcb; 1197 + ios->out_attr_len = 1; 1198 + ios->out_attr = &attr; 1199 1200 if (!obj_created(oi)) { 1201 EXOFS_DBGMSG("!obj_created\n"); ··· 1206 EXOFS_DBGMSG("wait_event done\n"); 1207 } 1208 1209 + if (!do_sync) { 1210 args->sbi = sbi; 1211 + ios->done = updatei_done; 1212 + ios->private = args; 1213 + } 1214 1215 + ret = exofs_oi_write(oi, ios); 1216 + if (!do_sync && !ret) { 1217 atomic_inc(&sbi->s_curr_pending); 1218 goto out; /* deallocation in updatei_done */ 1219 } 1220 1221 + exofs_put_io_state(ios); 1222 free_args: 1223 kfree(args); 1224 out: ··· 1238 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1239 * do. 1240 */ 1241 + static void delete_done(struct exofs_io_state *ios, void *p) 1242 { 1243 + struct exofs_sb_info *sbi = p; 1244 + 1245 + exofs_put_io_state(ios); 1246 + 1247 atomic_dec(&sbi->s_curr_pending); 1248 } 1249 ··· 1256 struct exofs_i_info *oi = exofs_i(inode); 1257 struct super_block *sb = inode->i_sb; 1258 struct exofs_sb_info *sbi = sb->s_fs_info; 1259 + struct exofs_io_state *ios; 1260 int ret; 1261 1262 truncate_inode_pages(&inode->i_data, 0); ··· 1274 1275 clear_inode(inode); 1276 1277 + ret = exofs_get_io_state(sbi, &ios); 1278 + if (unlikely(ret)) { 1279 + EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); 1280 return; 1281 } 1282 1283 /* if we are deleting an obj that hasn't been created yet, wait */ 1284 if (!obj_created(oi)) { ··· 1288 wait_event(oi->i_wq, obj_created(oi)); 1289 } 1290 1291 + ios->obj.id = exofs_oi_objno(oi); 1292 + ios->done = delete_done; 1293 + ios->private = sbi; 1294 + ios->cred = oi->i_cred; 1295 + ret = exofs_sbi_remove(ios); 1296 if (ret) { 1297 + EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); 1298 + exofs_put_io_state(ios); 1299 return; 1300 } 1301 atomic_inc(&sbi->s_curr_pending);

+421

fs/exofs/ios.c

···

··· 1 + /* 2 + * Copyright (C) 2005, 2006 3 + * Avishay Traeger (avishay@gmail.com) 4 + * Copyright (C) 2008, 2009 5 + * Boaz Harrosh <bharrosh@panasas.com> 6 + * 7 + * This file is part of exofs. 8 + * 9 + * exofs is free software; you can redistribute it and/or modify 10 + * it under the terms of the GNU General Public License as published by 11 + * the Free Software Foundation. Since it is based on ext2, and the only 12 + * valid version of GPL for the Linux kernel is version 2, the only valid 13 + * version of GPL for exofs is version 2. 14 + * 15 + * exofs is distributed in the hope that it will be useful, 16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 + * GNU General Public License for more details. 19 + * 20 + * You should have received a copy of the GNU General Public License 21 + * along with exofs; if not, write to the Free Software 22 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 23 + */ 24 + 25 + #include <scsi/scsi_device.h> 26 + 27 + #include "exofs.h" 28 + 29 + void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) 30 + { 31 + osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); 32 + } 33 + 34 + int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, 35 + u64 offset, void *p, unsigned length) 36 + { 37 + struct osd_request *or = osd_start_request(od, GFP_KERNEL); 38 + /* struct osd_sense_info osi = {.key = 0};*/ 39 + int ret; 40 + 41 + if (unlikely(!or)) { 42 + EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); 43 + return -ENOMEM; 44 + } 45 + ret = osd_req_read_kern(or, obj, offset, p, length); 46 + if (unlikely(ret)) { 47 + EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); 48 + goto out; 49 + } 50 + 51 + ret = osd_finalize_request(or, 0, cred, NULL); 52 + if (unlikely(ret)) { 53 + EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); 54 + goto out; 55 + } 56 + 57 + ret = osd_execute_request(or); 58 + if (unlikely(ret)) 59 + EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); 60 + /* osd_req_decode_sense(or, ret); */ 61 + 62 + out: 63 + osd_end_request(or); 64 + return ret; 65 + } 66 + 67 + int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios) 68 + { 69 + struct exofs_io_state *ios; 70 + 71 + /*TODO: Maybe use kmem_cach per sbi of size 72 + * exofs_io_state_size(sbi->s_numdevs) 73 + */ 74 + ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL); 75 + if (unlikely(!ios)) { 76 + *pios = NULL; 77 + return -ENOMEM; 78 + } 79 + 80 + ios->sbi = sbi; 81 + ios->obj.partition = sbi->s_pid; 82 + *pios = ios; 83 + return 0; 84 + } 85 + 86 + void exofs_put_io_state(struct exofs_io_state *ios) 87 + { 88 + if (ios) { 89 + unsigned i; 90 + 91 + for (i = 0; i < ios->numdevs; i++) { 92 + struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; 93 + 94 + if (per_dev->or) 95 + osd_end_request(per_dev->or); 96 + if (per_dev->bio) 97 + bio_put(per_dev->bio); 98 + } 99 + 100 + kfree(ios); 101 + } 102 + } 103 + 104 + static void _sync_done(struct exofs_io_state *ios, void *p) 105 + { 106 + struct completion *waiting = p; 107 + 108 + complete(waiting); 109 + } 110 + 111 + static void _last_io(struct kref *kref) 112 + { 113 + struct exofs_io_state *ios = container_of( 114 + kref, struct exofs_io_state, kref); 115 + 116 + ios->done(ios, ios->private); 117 + } 118 + 119 + static void _done_io(struct osd_request *or, void *p) 120 + { 121 + struct exofs_io_state *ios = p; 122 + 123 + kref_put(&ios->kref, _last_io); 124 + } 125 + 126 + static int exofs_io_execute(struct exofs_io_state *ios) 127 + { 128 + DECLARE_COMPLETION_ONSTACK(wait); 129 + bool sync = (ios->done == NULL); 130 + int i, ret; 131 + 132 + if (sync) { 133 + ios->done = _sync_done; 134 + ios->private = &wait; 135 + } 136 + 137 + for (i = 0; i < ios->numdevs; i++) { 138 + struct osd_request *or = ios->per_dev[i].or; 139 + if (unlikely(!or)) 140 + continue; 141 + 142 + ret = osd_finalize_request(or, 0, ios->cred, NULL); 143 + if (unlikely(ret)) { 144 + EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", 145 + ret); 146 + return ret; 147 + } 148 + } 149 + 150 + kref_init(&ios->kref); 151 + 152 + for (i = 0; i < ios->numdevs; i++) { 153 + struct osd_request *or = ios->per_dev[i].or; 154 + if (unlikely(!or)) 155 + continue; 156 + 157 + kref_get(&ios->kref); 158 + osd_execute_request_async(or, _done_io, ios); 159 + } 160 + 161 + kref_put(&ios->kref, _last_io); 162 + ret = 0; 163 + 164 + if (sync) { 165 + wait_for_completion(&wait); 166 + ret = exofs_check_io(ios, NULL); 167 + } 168 + return ret; 169 + } 170 + 171 + int exofs_check_io(struct exofs_io_state *ios, u64 *resid) 172 + { 173 + enum osd_err_priority acumulated_osd_err = 0; 174 + int acumulated_lin_err = 0; 175 + int i; 176 + 177 + for (i = 0; i < ios->numdevs; i++) { 178 + struct osd_sense_info osi; 179 + int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi); 180 + 181 + if (likely(!ret)) 182 + continue; 183 + 184 + if (unlikely(ret == -EFAULT)) { 185 + EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__); 186 + /*FIXME: All the pages in this device range should: 187 + * clear_highpage(page); 188 + */ 189 + } 190 + 191 + if (osi.osd_err_pri >= acumulated_osd_err) { 192 + acumulated_osd_err = osi.osd_err_pri; 193 + acumulated_lin_err = ret; 194 + } 195 + } 196 + 197 + /* TODO: raid specific residual calculations */ 198 + if (resid) { 199 + if (likely(!acumulated_lin_err)) 200 + *resid = 0; 201 + else 202 + *resid = ios->length; 203 + } 204 + 205 + return acumulated_lin_err; 206 + } 207 + 208 + int exofs_sbi_create(struct exofs_io_state *ios) 209 + { 210 + int i, ret; 211 + 212 + for (i = 0; i < ios->sbi->s_numdevs; i++) { 213 + struct osd_request *or; 214 + 215 + or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); 216 + if (unlikely(!or)) { 217 + EXOFS_ERR("%s: osd_start_request failed\n", __func__); 218 + ret = -ENOMEM; 219 + goto out; 220 + } 221 + ios->per_dev[i].or = or; 222 + ios->numdevs++; 223 + 224 + osd_req_create_object(or, &ios->obj); 225 + } 226 + ret = exofs_io_execute(ios); 227 + 228 + out: 229 + return ret; 230 + } 231 + 232 + int exofs_sbi_remove(struct exofs_io_state *ios) 233 + { 234 + int i, ret; 235 + 236 + for (i = 0; i < ios->sbi->s_numdevs; i++) { 237 + struct osd_request *or; 238 + 239 + or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); 240 + if (unlikely(!or)) { 241 + EXOFS_ERR("%s: osd_start_request failed\n", __func__); 242 + ret = -ENOMEM; 243 + goto out; 244 + } 245 + ios->per_dev[i].or = or; 246 + ios->numdevs++; 247 + 248 + osd_req_remove_object(or, &ios->obj); 249 + } 250 + ret = exofs_io_execute(ios); 251 + 252 + out: 253 + return ret; 254 + } 255 + 256 + int exofs_sbi_write(struct exofs_io_state *ios) 257 + { 258 + int i, ret; 259 + 260 + for (i = 0; i < ios->sbi->s_numdevs; i++) { 261 + struct osd_request *or; 262 + 263 + or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); 264 + if (unlikely(!or)) { 265 + EXOFS_ERR("%s: osd_start_request failed\n", __func__); 266 + ret = -ENOMEM; 267 + goto out; 268 + } 269 + ios->per_dev[i].or = or; 270 + ios->numdevs++; 271 + 272 + if (ios->bio) { 273 + struct bio *bio; 274 + 275 + if (i != 0) { 276 + bio = bio_kmalloc(GFP_KERNEL, 277 + ios->bio->bi_max_vecs); 278 + if (unlikely(!bio)) { 279 + ret = -ENOMEM; 280 + goto out; 281 + } 282 + 283 + __bio_clone(bio, ios->bio); 284 + bio->bi_bdev = NULL; 285 + bio->bi_next = NULL; 286 + ios->per_dev[i].bio = bio; 287 + } else { 288 + bio = ios->bio; 289 + } 290 + 291 + osd_req_write(or, &ios->obj, ios->offset, bio, 292 + ios->length); 293 + /* EXOFS_DBGMSG("write sync=%d\n", sync);*/ 294 + } else if (ios->kern_buff) { 295 + osd_req_write_kern(or, &ios->obj, ios->offset, 296 + ios->kern_buff, ios->length); 297 + /* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/ 298 + } else { 299 + osd_req_set_attributes(or, &ios->obj); 300 + /* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/ 301 + } 302 + 303 + if (ios->out_attr) 304 + osd_req_add_set_attr_list(or, ios->out_attr, 305 + ios->out_attr_len); 306 + 307 + if (ios->in_attr) 308 + osd_req_add_get_attr_list(or, ios->in_attr, 309 + ios->in_attr_len); 310 + } 311 + ret = exofs_io_execute(ios); 312 + 313 + out: 314 + return ret; 315 + } 316 + 317 + int exofs_sbi_read(struct exofs_io_state *ios) 318 + { 319 + int i, ret; 320 + 321 + for (i = 0; i < 1; i++) { 322 + struct osd_request *or; 323 + unsigned first_dev = (unsigned)ios->obj.id; 324 + 325 + first_dev %= ios->sbi->s_numdevs; 326 + or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL); 327 + if (unlikely(!or)) { 328 + EXOFS_ERR("%s: osd_start_request failed\n", __func__); 329 + ret = -ENOMEM; 330 + goto out; 331 + } 332 + ios->per_dev[i].or = or; 333 + ios->numdevs++; 334 + 335 + if (ios->bio) { 336 + osd_req_read(or, &ios->obj, ios->offset, ios->bio, 337 + ios->length); 338 + /* EXOFS_DBGMSG("read sync=%d\n", sync);*/ 339 + } else if (ios->kern_buff) { 340 + osd_req_read_kern(or, &ios->obj, ios->offset, 341 + ios->kern_buff, ios->length); 342 + /* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/ 343 + } else { 344 + osd_req_get_attributes(or, &ios->obj); 345 + /* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/ 346 + } 347 + 348 + if (ios->out_attr) 349 + osd_req_add_set_attr_list(or, ios->out_attr, 350 + ios->out_attr_len); 351 + 352 + if (ios->in_attr) 353 + osd_req_add_get_attr_list(or, ios->in_attr, 354 + ios->in_attr_len); 355 + } 356 + ret = exofs_io_execute(ios); 357 + 358 + out: 359 + return ret; 360 + } 361 + 362 + int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) 363 + { 364 + struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ 365 + void *iter = NULL; 366 + int nelem; 367 + 368 + do { 369 + nelem = 1; 370 + osd_req_decode_get_attr_list(ios->per_dev[0].or, 371 + &cur_attr, &nelem, &iter); 372 + if ((cur_attr.attr_page == attr->attr_page) && 373 + (cur_attr.attr_id == attr->attr_id)) { 374 + attr->len = cur_attr.len; 375 + attr->val_ptr = cur_attr.val_ptr; 376 + return 0; 377 + } 378 + } while (iter); 379 + 380 + return -EIO; 381 + } 382 + 383 + int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) 384 + { 385 + struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; 386 + struct exofs_io_state *ios; 387 + struct osd_attr attr; 388 + __be64 newsize; 389 + int i, ret; 390 + 391 + if (exofs_get_io_state(sbi, &ios)) 392 + return -ENOMEM; 393 + 394 + ios->obj.id = exofs_oi_objno(oi); 395 + ios->cred = oi->i_cred; 396 + 397 + newsize = cpu_to_be64(size); 398 + attr = g_attr_logical_length; 399 + attr.val_ptr = &newsize; 400 + 401 + for (i = 0; i < sbi->s_numdevs; i++) { 402 + struct osd_request *or; 403 + 404 + or = osd_start_request(sbi->s_ods[i], GFP_KERNEL); 405 + if (unlikely(!or)) { 406 + EXOFS_ERR("%s: osd_start_request failed\n", __func__); 407 + ret = -ENOMEM; 408 + goto out; 409 + } 410 + ios->per_dev[i].or = or; 411 + ios->numdevs++; 412 + 413 + osd_req_set_attributes(or, &ios->obj); 414 + osd_req_add_set_attr_list(or, &attr, 1); 415 + } 416 + ret = exofs_io_execute(ios); 417 + 418 + out: 419 + exofs_put_io_state(ios); 420 + return ret; 421 + }

-125

fs/exofs/osd.c

··· 1 - /* 2 - * Copyright (C) 2005, 2006 3 - * Avishay Traeger (avishay@gmail.com) 4 - * Copyright (C) 2008, 2009 5 - * Boaz Harrosh <bharrosh@panasas.com> 6 - * 7 - * This file is part of exofs. 8 - * 9 - * exofs is free software; you can redistribute it and/or modify 10 - * it under the terms of the GNU General Public License as published by 11 - * the Free Software Foundation. Since it is based on ext2, and the only 12 - * valid version of GPL for the Linux kernel is version 2, the only valid 13 - * version of GPL for exofs is version 2. 14 - * 15 - * exofs is distributed in the hope that it will be useful, 16 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 - * GNU General Public License for more details. 19 - * 20 - * You should have received a copy of the GNU General Public License 21 - * along with exofs; if not, write to the Free Software 22 - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 23 - */ 24 - 25 - #include <scsi/scsi_device.h> 26 - #include <scsi/osd_sense.h> 27 - 28 - #include "exofs.h" 29 - 30 - int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) 31 - { 32 - struct osd_sense_info osi; 33 - int ret = osd_req_decode_sense(or, &osi); 34 - 35 - if (ret) { /* translate to Linux codes */ 36 - if (osi.additional_code == scsi_invalid_field_in_cdb) { 37 - if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE) 38 - ret = -EFAULT; 39 - if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID) 40 - ret = -ENOENT; 41 - else 42 - ret = -EINVAL; 43 - } else if (osi.additional_code == osd_quota_error) 44 - ret = -ENOSPC; 45 - else 46 - ret = -EIO; 47 - } 48 - 49 - /* FIXME: should be include in osd_sense_info */ 50 - if (in_resid) 51 - *in_resid = or->in.req ? or->in.req->resid_len : 0; 52 - 53 - if (out_resid) 54 - *out_resid = or->out.req ? or->out.req->resid_len : 0; 55 - 56 - return ret; 57 - } 58 - 59 - void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) 60 - { 61 - osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); 62 - } 63 - 64 - /* 65 - * Perform a synchronous OSD operation. 66 - */ 67 - int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential) 68 - { 69 - int ret; 70 - 71 - or->timeout = timeout; 72 - ret = osd_finalize_request(or, 0, credential, NULL); 73 - if (ret) { 74 - EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); 75 - return ret; 76 - } 77 - 78 - ret = osd_execute_request(or); 79 - 80 - if (ret) 81 - EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); 82 - /* osd_req_decode_sense(or, ret); */ 83 - return ret; 84 - } 85 - 86 - /* 87 - * Perform an asynchronous OSD operation. 88 - */ 89 - int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done, 90 - void *caller_context, u8 *cred) 91 - { 92 - int ret; 93 - 94 - ret = osd_finalize_request(or, 0, cred, NULL); 95 - if (ret) { 96 - EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); 97 - return ret; 98 - } 99 - 100 - ret = osd_execute_request_async(or, async_done, caller_context); 101 - 102 - if (ret) 103 - EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret); 104 - return ret; 105 - } 106 - 107 - int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) 108 - { 109 - struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ 110 - void *iter = NULL; 111 - int nelem; 112 - 113 - do { 114 - nelem = 1; 115 - osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); 116 - if ((cur_attr.attr_page == attr->attr_page) && 117 - (cur_attr.attr_id == attr->attr_id)) { 118 - attr->len = cur_attr.len; 119 - attr->val_ptr = cur_attr.val_ptr; 120 - return 0; 121 - } 122 - } while (iter); 123 - 124 - return -EIO; 125 - }

···

+51

fs/exofs/pnfs.h

···

··· 1 + /* 2 + * Copyright (C) 2008, 2009 3 + * Boaz Harrosh <bharrosh@panasas.com> 4 + * 5 + * This file is part of exofs. 6 + * 7 + * exofs is free software; you can redistribute it and/or modify it under the 8 + * terms of the GNU General Public License version 2 as published by the Free 9 + * Software Foundation. 10 + * 11 + */ 12 + 13 + /* FIXME: Remove this file once pnfs hits mainline */ 14 + 15 + #ifndef __EXOFS_PNFS_H__ 16 + #define __EXOFS_PNFS_H__ 17 + 18 + #if defined(CONFIG_PNFS) 19 + 20 + 21 + /* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */ 22 + #include "../nfs/objlayout/pnfs_osd_xdr.h" 23 + 24 + #else /* defined(CONFIG_PNFS) */ 25 + 26 + enum pnfs_iomode { 27 + IOMODE_READ = 1, 28 + IOMODE_RW = 2, 29 + IOMODE_ANY = 3, 30 + }; 31 + 32 + /* Layout Structure */ 33 + enum pnfs_osd_raid_algorithm4 { 34 + PNFS_OSD_RAID_0 = 1, 35 + PNFS_OSD_RAID_4 = 2, 36 + PNFS_OSD_RAID_5 = 3, 37 + PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ 38 + }; 39 + 40 + struct pnfs_osd_data_map { 41 + u32 odm_num_comps; 42 + u64 odm_stripe_unit; 43 + u32 odm_group_width; 44 + u32 odm_group_depth; 45 + u32 odm_mirror_cnt; 46 + u32 odm_raid_algorithm; 47 + }; 48 + 49 + #endif /* else defined(CONFIG_PNFS) */ 50 + 51 + #endif /* __EXOFS_PNFS_H__ */

+268 -85

fs/exofs/super.c

··· 203 { 204 struct exofs_sb_info *sbi; 205 struct exofs_fscb *fscb; 206 - struct osd_request *or; 207 - struct osd_obj_id obj; 208 int ret = -ENOMEM; 209 - 210 - fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); 211 - if (!fscb) { 212 - EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); 213 - return -ENOMEM; 214 - } 215 216 lock_super(sb); 217 sbi = sb->s_fs_info; 218 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 219 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); 220 fscb->s_magic = cpu_to_le16(sb->s_magic); 221 fscb->s_newfs = 0; 222 223 - or = osd_start_request(sbi->s_dev, GFP_KERNEL); 224 - if (unlikely(!or)) { 225 - EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); 226 - goto out; 227 - } 228 229 - obj.partition = sbi->s_pid; 230 - obj.id = EXOFS_SUPER_ID; 231 - ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb)); 232 if (unlikely(ret)) { 233 - EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n"); 234 - goto out; 235 - } 236 - 237 - ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); 238 - if (unlikely(ret)) { 239 - EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n"); 240 goto out; 241 } 242 sb->s_dirt = 0; 243 244 out: 245 - if (or) 246 - osd_end_request(or); 247 unlock_super(sb); 248 - kfree(fscb); 249 return ret; 250 } 251 ··· 251 exofs_sync_fs(sb, 1); 252 else 253 sb->s_dirt = 0; 254 } 255 256 /* ··· 298 msecs_to_jiffies(100)); 299 } 300 301 - osduld_put_device(sbi->s_dev); 302 - kfree(sb->s_fs_info); 303 sb->s_fs_info = NULL; 304 } 305 306 /* ··· 482 struct inode *root; 483 struct exofs_mountopt *opts = data; 484 struct exofs_sb_info *sbi; /*extended info */ 485 struct exofs_fscb fscb; /*on-disk superblock info */ 486 - struct osd_request *or = NULL; 487 struct osd_obj_id obj; 488 int ret; 489 490 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 491 if (!sbi) 492 return -ENOMEM; 493 - sb->s_fs_info = sbi; 494 495 /* use mount options to fill superblock */ 496 - sbi->s_dev = osduld_path_lookup(opts->dev_name); 497 - if (IS_ERR(sbi->s_dev)) { 498 - ret = PTR_ERR(sbi->s_dev); 499 - sbi->s_dev = NULL; 500 goto free_sbi; 501 } 502 503 sbi->s_pid = opts->pid; 504 sbi->s_timeout = opts->timeout; 505 ··· 514 sb->s_bdev = NULL; 515 sb->s_dev = 0; 516 517 - /* read data from on-disk superblock object */ 518 obj.partition = sbi->s_pid; 519 obj.id = EXOFS_SUPER_ID; 520 exofs_make_credential(sbi->s_cred, &obj); 521 522 - or = osd_start_request(sbi->s_dev, GFP_KERNEL); 523 - if (unlikely(!or)) { 524 - if (!silent) 525 - EXOFS_ERR( 526 - "exofs_fill_super: osd_start_request failed.\n"); 527 - ret = -ENOMEM; 528 goto free_sbi; 529 - } 530 - ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb)); 531 - if (unlikely(ret)) { 532 - if (!silent) 533 - EXOFS_ERR( 534 - "exofs_fill_super: osd_req_read_kern failed.\n"); 535 - ret = -ENOMEM; 536 - goto free_sbi; 537 - } 538 - 539 - ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); 540 - if (unlikely(ret)) { 541 - if (!silent) 542 - EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n"); 543 - ret = -EIO; 544 - goto free_sbi; 545 - } 546 547 sb->s_magic = le16_to_cpu(fscb.s_magic); 548 sbi->s_nextid = le64_to_cpu(fscb.s_nextid); ··· 533 ret = -EINVAL; 534 goto free_sbi; 535 } 536 537 /* start generation numbers from a random point */ 538 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 539 spin_lock_init(&sbi->s_next_gen_lock); 540 541 /* set up operation vectors */ 542 sb->s_op = &exofs_sops; 543 sb->s_export_op = &exofs_export_ops; 544 root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); ··· 578 goto free_sbi; 579 } 580 581 - ret = 0; 582 - out: 583 - if (or) 584 - osd_end_request(or); 585 - return ret; 586 587 free_sbi: 588 - osduld_put_device(sbi->s_dev); /* NULL safe */ 589 - kfree(sbi); 590 - goto out; 591 } 592 593 /* ··· 615 { 616 struct super_block *sb = dentry->d_sb; 617 struct exofs_sb_info *sbi = sb->s_fs_info; 618 - struct osd_obj_id obj = {sbi->s_pid, 0}; 619 struct osd_attr attrs[] = { 620 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, 621 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), ··· 624 }; 625 uint64_t capacity = ULLONG_MAX; 626 uint64_t used = ULLONG_MAX; 627 - struct osd_request *or; 628 uint8_t cred_a[OSD_CAP_LEN]; 629 int ret; 630 631 - /* get used/capacity attributes */ 632 - exofs_make_credential(cred_a, &obj); 633 - 634 - or = osd_start_request(sbi->s_dev, GFP_KERNEL); 635 - if (unlikely(!or)) { 636 - EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n"); 637 - return -ENOMEM; 638 } 639 640 - osd_req_get_attributes(or, &obj); 641 - osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); 642 - ret = exofs_sync_op(or, sbi->s_timeout, cred_a); 643 if (unlikely(ret)) 644 goto out; 645 646 - ret = extract_attr_from_req(or, &attrs[0]); 647 - if (likely(!ret)) 648 capacity = get_unaligned_be64(attrs[0].val_ptr); 649 - else 650 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); 651 652 - ret = extract_attr_from_req(or, &attrs[1]); 653 if (likely(!ret)) 654 used = get_unaligned_be64(attrs[1].val_ptr); 655 else ··· 659 /* fill in the stats buffer */ 660 buf->f_type = EXOFS_SUPER_MAGIC; 661 buf->f_bsize = EXOFS_BLKSIZE; 662 - buf->f_blocks = (capacity >> EXOFS_BLKSHIFT); 663 - buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT); 664 buf->f_bavail = buf->f_bfree; 665 buf->f_files = sbi->s_numfiles; 666 buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; 667 buf->f_namelen = EXOFS_NAME_LEN; 668 669 out: 670 - osd_end_request(or); 671 return ret; 672 } 673

··· 203 { 204 struct exofs_sb_info *sbi; 205 struct exofs_fscb *fscb; 206 + struct exofs_io_state *ios; 207 int ret = -ENOMEM; 208 209 lock_super(sb); 210 sbi = sb->s_fs_info; 211 + fscb = &sbi->s_fscb; 212 + 213 + ret = exofs_get_io_state(sbi, &ios); 214 + if (ret) 215 + goto out; 216 + 217 + /* Note: We only write the changing part of the fscb. .i.e upto the 218 + * the fscb->s_dev_table_oid member. There is no read-modify-write 219 + * here. 220 + */ 221 + ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); 222 + memset(fscb, 0, ios->length); 223 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 224 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); 225 fscb->s_magic = cpu_to_le16(sb->s_magic); 226 fscb->s_newfs = 0; 227 + fscb->s_version = EXOFS_FSCB_VER; 228 229 + ios->obj.id = EXOFS_SUPER_ID; 230 + ios->offset = 0; 231 + ios->kern_buff = fscb; 232 + ios->cred = sbi->s_cred; 233 234 + ret = exofs_sbi_write(ios); 235 if (unlikely(ret)) { 236 + EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 237 goto out; 238 } 239 sb->s_dirt = 0; 240 241 out: 242 + EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); 243 + exofs_put_io_state(ios); 244 unlock_super(sb); 245 return ret; 246 } 247 ··· 255 exofs_sync_fs(sb, 1); 256 else 257 sb->s_dirt = 0; 258 + } 259 + 260 + static void _exofs_print_device(const char *msg, const char *dev_path, 261 + struct osd_dev *od, u64 pid) 262 + { 263 + const struct osd_dev_info *odi = osduld_device_info(od); 264 + 265 + printk(KERN_NOTICE "exofs: %s %s osd_name-%s pid-0x%llx\n", 266 + msg, dev_path ?: "", odi->osdname, _LLU(pid)); 267 + } 268 + 269 + void exofs_free_sbi(struct exofs_sb_info *sbi) 270 + { 271 + while (sbi->s_numdevs) { 272 + int i = --sbi->s_numdevs; 273 + struct osd_dev *od = sbi->s_ods[i]; 274 + 275 + if (od) { 276 + sbi->s_ods[i] = NULL; 277 + osduld_put_device(od); 278 + } 279 + } 280 + kfree(sbi); 281 } 282 283 /* ··· 279 msecs_to_jiffies(100)); 280 } 281 282 + _exofs_print_device("Unmounting", NULL, sbi->s_ods[0], sbi->s_pid); 283 + 284 + exofs_free_sbi(sbi); 285 sb->s_fs_info = NULL; 286 + } 287 + 288 + static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, 289 + struct exofs_device_table *dt) 290 + { 291 + sbi->data_map.odm_num_comps = 292 + le32_to_cpu(dt->dt_data_map.cb_num_comps); 293 + sbi->data_map.odm_stripe_unit = 294 + le64_to_cpu(dt->dt_data_map.cb_stripe_unit); 295 + sbi->data_map.odm_group_width = 296 + le32_to_cpu(dt->dt_data_map.cb_group_width); 297 + sbi->data_map.odm_group_depth = 298 + le32_to_cpu(dt->dt_data_map.cb_group_depth); 299 + sbi->data_map.odm_mirror_cnt = 300 + le32_to_cpu(dt->dt_data_map.cb_mirror_cnt); 301 + sbi->data_map.odm_raid_algorithm = 302 + le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); 303 + 304 + /* FIXME: Hard coded mirror only for now. if not so do not mount */ 305 + if ((sbi->data_map.odm_num_comps != numdevs) || 306 + (sbi->data_map.odm_stripe_unit != EXOFS_BLKSIZE) || 307 + (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) || 308 + (sbi->data_map.odm_mirror_cnt != (numdevs - 1))) 309 + return -EINVAL; 310 + else 311 + return 0; 312 + } 313 + 314 + /* @odi is valid only as long as @fscb_dev is valid */ 315 + static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, 316 + struct osd_dev_info *odi) 317 + { 318 + odi->systemid_len = le32_to_cpu(dt_dev->systemid_len); 319 + memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len); 320 + 321 + odi->osdname_len = le32_to_cpu(dt_dev->osdname_len); 322 + odi->osdname = dt_dev->osdname; 323 + 324 + /* FIXME support long names. Will need a _put function */ 325 + if (dt_dev->long_name_offset) 326 + return -EINVAL; 327 + 328 + /* Make sure osdname is printable! 329 + * mkexofs should give us space for a null-terminator else the 330 + * device-table is invalid. 331 + */ 332 + if (unlikely(odi->osdname_len >= sizeof(dt_dev->osdname))) 333 + odi->osdname_len = sizeof(dt_dev->osdname) - 1; 334 + dt_dev->osdname[odi->osdname_len] = 0; 335 + 336 + /* If it's all zeros something is bad we read past end-of-obj */ 337 + return !(odi->systemid_len || odi->osdname_len); 338 + } 339 + 340 + static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, 341 + unsigned table_count) 342 + { 343 + struct exofs_sb_info *sbi = *psbi; 344 + struct osd_dev *fscb_od; 345 + struct osd_obj_id obj = {.partition = sbi->s_pid, 346 + .id = EXOFS_DEVTABLE_ID}; 347 + struct exofs_device_table *dt; 348 + unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + 349 + sizeof(*dt); 350 + unsigned numdevs, i; 351 + int ret; 352 + 353 + dt = kmalloc(table_bytes, GFP_KERNEL); 354 + if (unlikely(!dt)) { 355 + EXOFS_ERR("ERROR: allocating %x bytes for device table\n", 356 + table_bytes); 357 + return -ENOMEM; 358 + } 359 + 360 + fscb_od = sbi->s_ods[0]; 361 + sbi->s_ods[0] = NULL; 362 + sbi->s_numdevs = 0; 363 + ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); 364 + if (unlikely(ret)) { 365 + EXOFS_ERR("ERROR: reading device table\n"); 366 + goto out; 367 + } 368 + 369 + numdevs = le64_to_cpu(dt->dt_num_devices); 370 + if (unlikely(!numdevs)) { 371 + ret = -EINVAL; 372 + goto out; 373 + } 374 + WARN_ON(table_count != numdevs); 375 + 376 + ret = _read_and_match_data_map(sbi, numdevs, dt); 377 + if (unlikely(ret)) 378 + goto out; 379 + 380 + if (likely(numdevs > 1)) { 381 + unsigned size = numdevs * sizeof(sbi->s_ods[0]); 382 + 383 + sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); 384 + if (unlikely(!sbi)) { 385 + ret = -ENOMEM; 386 + goto out; 387 + } 388 + memset(&sbi->s_ods[1], 0, size - sizeof(sbi->s_ods[0])); 389 + *psbi = sbi; 390 + } 391 + 392 + for (i = 0; i < numdevs; i++) { 393 + struct exofs_fscb fscb; 394 + struct osd_dev_info odi; 395 + struct osd_dev *od; 396 + 397 + if (exofs_devs_2_odi(&dt->dt_dev_table[i], &odi)) { 398 + EXOFS_ERR("ERROR: Read all-zeros device entry\n"); 399 + ret = -EINVAL; 400 + goto out; 401 + } 402 + 403 + printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n", 404 + i, odi.osdname); 405 + 406 + /* On all devices the device table is identical. The user can 407 + * specify any one of the participating devices on the command 408 + * line. We always keep them in device-table order. 409 + */ 410 + if (fscb_od && osduld_device_same(fscb_od, &odi)) { 411 + sbi->s_ods[i] = fscb_od; 412 + ++sbi->s_numdevs; 413 + fscb_od = NULL; 414 + continue; 415 + } 416 + 417 + od = osduld_info_lookup(&odi); 418 + if (unlikely(IS_ERR(od))) { 419 + ret = PTR_ERR(od); 420 + EXOFS_ERR("ERROR: device requested is not found " 421 + "osd_name-%s =>%d\n", odi.osdname, ret); 422 + goto out; 423 + } 424 + 425 + sbi->s_ods[i] = od; 426 + ++sbi->s_numdevs; 427 + 428 + /* Read the fscb of the other devices to make sure the FS 429 + * partition is there. 430 + */ 431 + ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, 432 + sizeof(fscb)); 433 + if (unlikely(ret)) { 434 + EXOFS_ERR("ERROR: Malformed participating device " 435 + "error reading fscb osd_name-%s\n", 436 + odi.osdname); 437 + goto out; 438 + } 439 + 440 + /* TODO: verify other information is correct and FS-uuid 441 + * matches. Benny what did you say about device table 442 + * generation and old devices? 443 + */ 444 + } 445 + 446 + out: 447 + kfree(dt); 448 + if (unlikely(!ret && fscb_od)) { 449 + EXOFS_ERR( 450 + "ERROR: Bad device-table container device not present\n"); 451 + osduld_put_device(fscb_od); 452 + ret = -EINVAL; 453 + } 454 + 455 + return ret; 456 } 457 458 /* ··· 292 struct inode *root; 293 struct exofs_mountopt *opts = data; 294 struct exofs_sb_info *sbi; /*extended info */ 295 + struct osd_dev *od; /* Master device */ 296 struct exofs_fscb fscb; /*on-disk superblock info */ 297 struct osd_obj_id obj; 298 + unsigned table_count; 299 int ret; 300 301 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 302 if (!sbi) 303 return -ENOMEM; 304 305 /* use mount options to fill superblock */ 306 + od = osduld_path_lookup(opts->dev_name); 307 + if (IS_ERR(od)) { 308 + ret = PTR_ERR(od); 309 goto free_sbi; 310 } 311 312 + sbi->s_ods[0] = od; 313 + sbi->s_numdevs = 1; 314 sbi->s_pid = opts->pid; 315 sbi->s_timeout = opts->timeout; 316 ··· 323 sb->s_bdev = NULL; 324 sb->s_dev = 0; 325 326 obj.partition = sbi->s_pid; 327 obj.id = EXOFS_SUPER_ID; 328 exofs_make_credential(sbi->s_cred, &obj); 329 330 + ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); 331 + if (unlikely(ret)) 332 goto free_sbi; 333 334 sb->s_magic = le16_to_cpu(fscb.s_magic); 335 sbi->s_nextid = le64_to_cpu(fscb.s_nextid); ··· 364 ret = -EINVAL; 365 goto free_sbi; 366 } 367 + if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { 368 + EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", 369 + EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); 370 + ret = -EINVAL; 371 + goto free_sbi; 372 + } 373 374 /* start generation numbers from a random point */ 375 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 376 spin_lock_init(&sbi->s_next_gen_lock); 377 378 + table_count = le64_to_cpu(fscb.s_dev_table_count); 379 + if (table_count) { 380 + ret = exofs_read_lookup_dev_table(&sbi, table_count); 381 + if (unlikely(ret)) 382 + goto free_sbi; 383 + } 384 + 385 /* set up operation vectors */ 386 + sb->s_fs_info = sbi; 387 sb->s_op = &exofs_sops; 388 sb->s_export_op = &exofs_export_ops; 389 root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); ··· 395 goto free_sbi; 396 } 397 398 + _exofs_print_device("Mounting", opts->dev_name, sbi->s_ods[0], 399 + sbi->s_pid); 400 + return 0; 401 402 free_sbi: 403 + EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 404 + opts->dev_name, sbi->s_pid, ret); 405 + exofs_free_sbi(sbi); 406 + return ret; 407 } 408 409 /* ··· 433 { 434 struct super_block *sb = dentry->d_sb; 435 struct exofs_sb_info *sbi = sb->s_fs_info; 436 + struct exofs_io_state *ios; 437 struct osd_attr attrs[] = { 438 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, 439 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), ··· 442 }; 443 uint64_t capacity = ULLONG_MAX; 444 uint64_t used = ULLONG_MAX; 445 uint8_t cred_a[OSD_CAP_LEN]; 446 int ret; 447 448 + ret = exofs_get_io_state(sbi, &ios); 449 + if (ret) { 450 + EXOFS_DBGMSG("exofs_get_io_state failed.\n"); 451 + return ret; 452 } 453 454 + exofs_make_credential(cred_a, &ios->obj); 455 + ios->cred = sbi->s_cred; 456 + ios->in_attr = attrs; 457 + ios->in_attr_len = ARRAY_SIZE(attrs); 458 + 459 + ret = exofs_sbi_read(ios); 460 if (unlikely(ret)) 461 goto out; 462 463 + ret = extract_attr_from_ios(ios, &attrs[0]); 464 + if (likely(!ret)) { 465 capacity = get_unaligned_be64(attrs[0].val_ptr); 466 + if (unlikely(!capacity)) 467 + capacity = ULLONG_MAX; 468 + } else 469 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); 470 471 + ret = extract_attr_from_ios(ios, &attrs[1]); 472 if (likely(!ret)) 473 used = get_unaligned_be64(attrs[1].val_ptr); 474 else ··· 476 /* fill in the stats buffer */ 477 buf->f_type = EXOFS_SUPER_MAGIC; 478 buf->f_bsize = EXOFS_BLKSIZE; 479 + buf->f_blocks = capacity >> 9; 480 + buf->f_bfree = (capacity - used) >> 9; 481 buf->f_bavail = buf->f_bfree; 482 buf->f_files = sbi->s_numfiles; 483 buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; 484 buf->f_namelen = EXOFS_NAME_LEN; 485 486 out: 487 + exofs_put_io_state(ios); 488 return ret; 489 } 490