at v2.6.30 15 kB view raw
1/* 2 * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> 3 * All rights reserved. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 */ 15 16#ifndef __DST_H 17#define __DST_H 18 19#include <linux/types.h> 20#include <linux/connector.h> 21 22#define DST_NAMELEN 32 23#define DST_NAME "dst" 24 25enum { 26 /* Remove node with given id from storage */ 27 DST_DEL_NODE = 0, 28 /* Add remote node with given id to the storage */ 29 DST_ADD_REMOTE, 30 /* Add local node with given id to the storage to be exported and used by remote peers */ 31 DST_ADD_EXPORT, 32 /* Crypto initialization command (hash/cipher used to protect the connection) */ 33 DST_CRYPTO, 34 /* Security attributes for given connection (permissions for example) */ 35 DST_SECURITY, 36 /* Register given node in the block layer subsystem */ 37 DST_START, 38 DST_CMD_MAX 39}; 40 41struct dst_ctl 42{ 43 /* Storage name */ 44 char name[DST_NAMELEN]; 45 /* Command flags */ 46 __u32 flags; 47 /* Command itself (see above) */ 48 __u32 cmd; 49 /* Maximum number of pages per single request in this device */ 50 __u32 max_pages; 51 /* Stale/error transaction scanning timeout in milliseconds */ 52 __u32 trans_scan_timeout; 53 /* Maximum number of retry sends before completing transaction as broken */ 54 __u32 trans_max_retries; 55 /* Storage size */ 56 __u64 size; 57}; 58 59/* Reply command carries completion status */ 60struct dst_ctl_ack 61{ 62 struct cn_msg msg; 63 int error; 64 int unused[3]; 65}; 66 67/* 68 * Unfortunaltely socket address structure is not exported to userspace 69 * and is redefined there. 70 */ 71#define SADDR_MAX_DATA 128 72 73struct saddr { 74 /* address family, AF_xxx */ 75 unsigned short sa_family; 76 /* 14 bytes of protocol address */ 77 char sa_data[SADDR_MAX_DATA]; 78 /* Number of bytes used in sa_data */ 79 unsigned short sa_data_len; 80}; 81 82/* Address structure */ 83struct dst_network_ctl 84{ 85 /* Socket type: datagram, stream...*/ 86 unsigned int type; 87 /* Let me guess, is it a Jupiter diameter? */ 88 unsigned int proto; 89 /* Peer's address */ 90 struct saddr addr; 91}; 92 93struct dst_crypto_ctl 94{ 95 /* Cipher and hash names */ 96 char cipher_algo[DST_NAMELEN]; 97 char hash_algo[DST_NAMELEN]; 98 99 /* Key sizes. Can be zero for digest for example */ 100 unsigned int cipher_keysize, hash_keysize; 101 /* Alignment. Calculated by the DST itself. */ 102 unsigned int crypto_attached_size; 103 /* Number of threads to perform crypto operations */ 104 int thread_num; 105}; 106 107/* Export security attributes have this bits checked in when client connects */ 108#define DST_PERM_READ (1<<0) 109#define DST_PERM_WRITE (1<<1) 110 111/* 112 * Right now it is simple model, where each remote address 113 * is assigned to set of permissions it is allowed to perform. 114 * In real world block device does not know anything but 115 * reading and writing, so it should be more than enough. 116 */ 117struct dst_secure_user 118{ 119 unsigned int permissions; 120 struct saddr addr; 121}; 122 123/* 124 * Export control command: device to export and network address to accept 125 * clients to work with given device 126 */ 127struct dst_export_ctl 128{ 129 char device[DST_NAMELEN]; 130 struct dst_network_ctl ctl; 131}; 132 133enum { 134 DST_CFG = 1, /* Request remote configuration */ 135 DST_IO, /* IO command */ 136 DST_IO_RESPONSE, /* IO response */ 137 DST_PING, /* Keepalive message */ 138 DST_NCMD_MAX, 139}; 140 141struct dst_cmd 142{ 143 /* Network command itself, see above */ 144 __u32 cmd; 145 /* 146 * Size of the attached data 147 * (in most cases, for READ command it means how many bytes were requested) 148 */ 149 __u32 size; 150 /* Crypto size: number of attached bytes with digest/hmac */ 151 __u32 csize; 152 /* Here we can carry secret data */ 153 __u32 reserved; 154 /* Read/write bits, see how they are encoded in bio structure */ 155 __u64 rw; 156 /* BIO flags */ 157 __u64 flags; 158 /* Unique command id (like transaction ID) */ 159 __u64 id; 160 /* Sector to start IO from */ 161 __u64 sector; 162 /* Hash data is placed after this header */ 163 __u8 hash[0]; 164}; 165 166/* 167 * Convert command to/from network byte order. 168 * We do not use hton*() functions, since there is 169 * no 64-bit implementation. 170 */ 171static inline void dst_convert_cmd(struct dst_cmd *c) 172{ 173 c->cmd = __cpu_to_be32(c->cmd); 174 c->csize = __cpu_to_be32(c->csize); 175 c->size = __cpu_to_be32(c->size); 176 c->sector = __cpu_to_be64(c->sector); 177 c->id = __cpu_to_be64(c->id); 178 c->flags = __cpu_to_be64(c->flags); 179 c->rw = __cpu_to_be64(c->rw); 180} 181 182/* Transaction id */ 183typedef __u64 dst_gen_t; 184 185#ifdef __KERNEL__ 186 187#include <linux/blkdev.h> 188#include <linux/bio.h> 189#include <linux/device.h> 190#include <linux/mempool.h> 191#include <linux/net.h> 192#include <linux/poll.h> 193#include <linux/rbtree.h> 194 195#ifdef CONFIG_DST_DEBUG 196#define dprintk(f, a...) printk(KERN_NOTICE f, ##a) 197#else 198static inline void __attribute__ ((format (printf, 1, 2))) 199 dprintk(const char *fmt, ...) {} 200#endif 201 202struct dst_node; 203 204struct dst_trans 205{ 206 /* DST node we are working with */ 207 struct dst_node *n; 208 209 /* Entry inside transaction tree */ 210 struct rb_node trans_entry; 211 212 /* Merlin kills this transaction when this memory cell equals zero */ 213 atomic_t refcnt; 214 215 /* How this transaction should be processed by crypto engine */ 216 short enc; 217 /* How many times this transaction was resent */ 218 short retries; 219 /* Completion status */ 220 int error; 221 222 /* When did we send it to the remote peer */ 223 long send_time; 224 225 /* My name is... 226 * Well, computers does not speak, they have unique id instead */ 227 dst_gen_t gen; 228 229 /* Block IO we are working with */ 230 struct bio *bio; 231 232 /* Network command for above block IO request */ 233 struct dst_cmd cmd; 234}; 235 236struct dst_crypto_engine 237{ 238 /* What should we do with all block requests */ 239 struct crypto_hash *hash; 240 struct crypto_ablkcipher *cipher; 241 242 /* Pool of pages used to encrypt data into before sending */ 243 int page_num; 244 struct page **pages; 245 246 /* What to do with current request */ 247 int enc; 248 /* Who we are and where do we go */ 249 struct scatterlist *src, *dst; 250 251 /* Maximum timeout waiting for encryption to be completed */ 252 long timeout; 253 /* IV is a 64-bit sequential counter */ 254 u64 iv; 255 256 /* Secret data */ 257 void *private; 258 259 /* Cached temporary data lives here */ 260 int size; 261 void *data; 262}; 263 264struct dst_state 265{ 266 /* The main state protection */ 267 struct mutex state_lock; 268 269 /* Polling machinery for sockets */ 270 wait_queue_t wait; 271 wait_queue_head_t *whead; 272 /* Most of events are being waited here */ 273 wait_queue_head_t thread_wait; 274 275 /* Who owns this? */ 276 struct dst_node *node; 277 278 /* Network address for this state */ 279 struct dst_network_ctl ctl; 280 281 /* Permissions to work with: read-only or rw connection */ 282 u32 permissions; 283 284 /* Called when we need to clean private data */ 285 void (* cleanup)(struct dst_state *st); 286 287 /* Used by the server: BIO completion queues BIOs here */ 288 struct list_head request_list; 289 spinlock_t request_lock; 290 291 /* Guess what? No, it is not number of planets */ 292 atomic_t refcnt; 293 294 /* This flags is set when connection should be dropped */ 295 int need_exit; 296 297 /* 298 * Socket to work with. Second pointer is used for 299 * lockless check if socket was changed before performing 300 * next action (like working with cached polling result) 301 */ 302 struct socket *socket, *read_socket; 303 304 /* Cached preallocated data */ 305 void *data; 306 unsigned int size; 307 308 /* Currently processed command */ 309 struct dst_cmd cmd; 310}; 311 312struct dst_info 313{ 314 /* Device size */ 315 u64 size; 316 317 /* Local device name for export devices */ 318 char local[DST_NAMELEN]; 319 320 /* Network setup */ 321 struct dst_network_ctl net; 322 323 /* Sysfs bits use this */ 324 struct device device; 325}; 326 327struct dst_node 328{ 329 struct list_head node_entry; 330 331 /* Hi, my name is stored here */ 332 char name[DST_NAMELEN]; 333 /* My cache name is stored here */ 334 char cache_name[DST_NAMELEN]; 335 336 /* Block device attached to given node. 337 * Only valid for exporting nodes */ 338 struct block_device *bdev; 339 /* Network state machine for given peer */ 340 struct dst_state *state; 341 342 /* Block IO machinery */ 343 struct request_queue *queue; 344 struct gendisk *disk; 345 346 /* Number of threads in processing pool */ 347 int thread_num; 348 /* Maximum number of pages in single IO */ 349 int max_pages; 350 351 /* I'm that big in bytes */ 352 loff_t size; 353 354 /* Exported to userspace node information */ 355 struct dst_info *info; 356 357 /* 358 * Security attribute list. 359 * Used only by exporting node currently. 360 */ 361 struct list_head security_list; 362 struct mutex security_lock; 363 364 /* 365 * When this unerflows below zero, university collapses. 366 * But this will not happen, since node will be freed, 367 * when reference counter reaches zero. 368 */ 369 atomic_t refcnt; 370 371 /* How precisely should I be started? */ 372 int (*start)(struct dst_node *); 373 374 /* Crypto capabilities */ 375 struct dst_crypto_ctl crypto; 376 u8 *hash_key; 377 u8 *cipher_key; 378 379 /* Pool of processing thread */ 380 struct thread_pool *pool; 381 382 /* Transaction IDs live here */ 383 atomic_long_t gen; 384 385 /* 386 * How frequently and how many times transaction 387 * tree should be scanned to drop stale objects. 388 */ 389 long trans_scan_timeout; 390 int trans_max_retries; 391 392 /* Small gnomes live here */ 393 struct rb_root trans_root; 394 struct mutex trans_lock; 395 396 /* 397 * Transaction cache/memory pool. 398 * It is big enough to contain not only transaction 399 * itself, but additional crypto data (digest/hmac). 400 */ 401 struct kmem_cache *trans_cache; 402 mempool_t *trans_pool; 403 404 /* This entity scans transaction tree */ 405 struct delayed_work trans_work; 406 407 wait_queue_head_t wait; 408}; 409 410/* Kernel representation of the security attribute */ 411struct dst_secure 412{ 413 struct list_head sec_entry; 414 struct dst_secure_user sec; 415}; 416 417int dst_process_bio(struct dst_node *n, struct bio *bio); 418 419int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); 420int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); 421 422static inline struct dst_state *dst_state_get(struct dst_state *st) 423{ 424 BUG_ON(atomic_read(&st->refcnt) == 0); 425 atomic_inc(&st->refcnt); 426 return st; 427} 428 429void dst_state_put(struct dst_state *st); 430 431struct dst_state *dst_state_alloc(struct dst_node *n); 432int dst_state_socket_create(struct dst_state *st); 433void dst_state_socket_release(struct dst_state *st); 434 435void dst_state_exit_connected(struct dst_state *st); 436 437int dst_state_schedule_receiver(struct dst_state *st); 438 439void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); 440 441static inline void dst_state_lock(struct dst_state *st) 442{ 443 mutex_lock(&st->state_lock); 444} 445 446static inline void dst_state_unlock(struct dst_state *st) 447{ 448 mutex_unlock(&st->state_lock); 449} 450 451void dst_poll_exit(struct dst_state *st); 452int dst_poll_init(struct dst_state *st); 453 454static inline unsigned int dst_state_poll(struct dst_state *st) 455{ 456 unsigned int revents = POLLHUP | POLLERR; 457 458 dst_state_lock(st); 459 if (st->socket) 460 revents = st->socket->ops->poll(NULL, st->socket, NULL); 461 dst_state_unlock(st); 462 463 return revents; 464} 465 466static inline int dst_thread_setup(void *private, void *data) 467{ 468 return 0; 469} 470 471void dst_node_put(struct dst_node *n); 472 473static inline struct dst_node *dst_node_get(struct dst_node *n) 474{ 475 atomic_inc(&n->refcnt); 476 return n; 477} 478 479int dst_data_recv(struct dst_state *st, void *data, unsigned int size); 480int dst_recv_cdata(struct dst_state *st, void *cdata); 481int dst_data_send_header(struct socket *sock, 482 void *data, unsigned int size, int more); 483 484int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); 485 486int dst_process_io(struct dst_state *st); 487int dst_export_crypto(struct dst_node *n, struct bio *bio); 488int dst_export_send_bio(struct bio *bio); 489int dst_start_export(struct dst_node *n); 490 491int __init dst_export_init(void); 492void dst_export_exit(void); 493 494/* Private structure for export block IO requests */ 495struct dst_export_priv 496{ 497 struct list_head request_entry; 498 struct dst_state *state; 499 struct bio *bio; 500 struct dst_cmd cmd; 501}; 502 503static inline void dst_trans_get(struct dst_trans *t) 504{ 505 atomic_inc(&t->refcnt); 506} 507 508struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); 509int dst_trans_remove(struct dst_trans *t); 510int dst_trans_remove_nolock(struct dst_trans *t); 511void dst_trans_put(struct dst_trans *t); 512 513/* 514 * Convert bio into network command. 515 */ 516static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, 517 u32 command, u64 id) 518{ 519 cmd->cmd = command; 520 cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; 521 cmd->rw = bio->bi_rw; 522 cmd->size = bio->bi_size; 523 cmd->csize = 0; 524 cmd->id = id; 525 cmd->sector = bio->bi_sector; 526}; 527 528int dst_trans_send(struct dst_trans *t); 529int dst_trans_crypto(struct dst_trans *t); 530 531int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); 532void dst_node_crypto_exit(struct dst_node *n); 533 534static inline int dst_need_crypto(struct dst_node *n) 535{ 536 struct dst_crypto_ctl *c = &n->crypto; 537 /* 538 * Logical OR is appropriate here, but boolean one produces 539 * more optimal code, so it is used instead. 540 */ 541 return (c->hash_algo[0] | c->cipher_algo[0]); 542} 543 544int dst_node_trans_init(struct dst_node *n, unsigned int size); 545void dst_node_trans_exit(struct dst_node *n); 546 547/* 548 * Pool of threads. 549 * Ready list contains threads currently free to be used, 550 * active one contains threads with some work scheduled for them. 551 * Caller can wait in given queue when thread is ready. 552 */ 553struct thread_pool 554{ 555 int thread_num; 556 struct mutex thread_lock; 557 struct list_head ready_list, active_list; 558 559 wait_queue_head_t wait; 560}; 561 562void thread_pool_del_worker(struct thread_pool *p); 563void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); 564int thread_pool_add_worker(struct thread_pool *p, 565 char *name, 566 unsigned int id, 567 void *(* init)(void *data), 568 void (* cleanup)(void *data), 569 void *data); 570 571void thread_pool_destroy(struct thread_pool *p); 572struct thread_pool *thread_pool_create(int num, char *name, 573 void *(* init)(void *data), 574 void (* cleanup)(void *data), 575 void *data); 576 577int thread_pool_schedule(struct thread_pool *p, 578 int (* setup)(void *stored_private, void *setup_data), 579 int (* action)(void *stored_private, void *setup_data), 580 void *setup_data, long timeout); 581int thread_pool_schedule_private(struct thread_pool *p, 582 int (* setup)(void *private, void *data), 583 int (* action)(void *private, void *data), 584 void *data, long timeout, void *id); 585 586#endif /* __KERNEL__ */ 587#endif /* __DST_H */