commit d2dd328b7f7bc6cebe167648289337755944ad2a · tjh.dev/kernel

+14 -6

block/blk-core.c

··· 467 { 468 struct request_list *rl = &q->rq; 469 470 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; 471 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; 472 rl->elvpriv = 0; ··· 573 struct request_queue * 574 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 575 { 576 - struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); 577 578 - return blk_init_allocated_queue_node(q, rfn, lock, node_id); 579 } 580 EXPORT_SYMBOL(blk_init_queue_node); 581 ··· 603 return NULL; 604 605 q->node = node_id; 606 - if (blk_init_free_list(q)) { 607 - kmem_cache_free(blk_requestq_cachep, q); 608 return NULL; 609 - } 610 611 q->request_fn = rfn; 612 q->prep_rq_fn = NULL; ··· 627 return q; 628 } 629 630 - blk_put_queue(q); 631 return NULL; 632 } 633 EXPORT_SYMBOL(blk_init_allocated_queue_node);

··· 467 { 468 struct request_list *rl = &q->rq; 469 470 + if (unlikely(rl->rq_pool)) 471 + return 0; 472 + 473 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; 474 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; 475 rl->elvpriv = 0; ··· 570 struct request_queue * 571 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 572 { 573 + struct request_queue *uninit_q, *q; 574 575 + uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); 576 + if (!uninit_q) 577 + return NULL; 578 + 579 + q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); 580 + if (!q) 581 + blk_cleanup_queue(uninit_q); 582 + 583 + return q; 584 } 585 EXPORT_SYMBOL(blk_init_queue_node); 586 ··· 592 return NULL; 593 594 q->node = node_id; 595 + if (blk_init_free_list(q)) 596 return NULL; 597 598 q->request_fn = rfn; 599 q->prep_rq_fn = NULL; ··· 618 return q; 619 } 620 621 return NULL; 622 } 623 EXPORT_SYMBOL(blk_init_allocated_queue_node);

+79 -22

block/cfq-iosched.c

··· 64 static struct completion *ioc_gone; 65 static DEFINE_SPINLOCK(ioc_gone_lock); 66 67 #define CFQ_PRIO_LISTS IOPRIO_BE_NR 68 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 69 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) ··· 274 unsigned int cfq_latency; 275 unsigned int cfq_group_isolation; 276 277 struct list_head cic_list; 278 279 /* ··· 432 struct cfq_queue *cfqq, bool is_sync) 433 { 434 cic->cfqq[is_sync] = cfqq; 435 } 436 437 /* ··· 2532 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) 2533 { 2534 unsigned long flags; 2535 2536 - BUG_ON(!cic->dead_key); 2537 2538 spin_lock_irqsave(&ioc->lock, flags); 2539 - radix_tree_delete(&ioc->radix_root, cic->dead_key); 2540 hlist_del_rcu(&cic->cic_list); 2541 spin_unlock_irqrestore(&ioc->lock, flags); 2542 ··· 2560 __call_for_each_cic(ioc, cic_free_func); 2561 } 2562 2563 - static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2564 { 2565 struct cfq_queue *__cfqq, *next; 2566 - 2567 - if (unlikely(cfqq == cfqd->active_queue)) { 2568 - __cfq_slice_expired(cfqd, cfqq, 0); 2569 - cfq_schedule_dispatch(cfqd); 2570 - } 2571 2572 /* 2573 * If this queue was scheduled to merge with another queue, be ··· 2579 cfq_put_queue(__cfqq); 2580 __cfqq = next; 2581 } 2582 2583 cfq_put_queue(cfqq); 2584 } ··· 2601 list_del_init(&cic->queue_list); 2602 2603 /* 2604 - * Make sure key == NULL is seen for dead queues 2605 */ 2606 smp_wmb(); 2607 - cic->dead_key = (unsigned long) cic->key; 2608 - cic->key = NULL; 2609 2610 if (ioc->ioc_data == cic) 2611 rcu_assign_pointer(ioc->ioc_data, NULL); ··· 2623 static void cfq_exit_single_io_context(struct io_context *ioc, 2624 struct cfq_io_context *cic) 2625 { 2626 - struct cfq_data *cfqd = cic->key; 2627 2628 if (cfqd) { 2629 struct request_queue *q = cfqd->queue; ··· 2636 * race between exiting task and queue 2637 */ 2638 smp_read_barrier_depends(); 2639 - if (cic->key) 2640 __cfq_exit_single_io_context(cfqd, cic); 2641 2642 spin_unlock_irqrestore(q->queue_lock, flags); ··· 2716 2717 static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) 2718 { 2719 - struct cfq_data *cfqd = cic->key; 2720 struct cfq_queue *cfqq; 2721 unsigned long flags; 2722 ··· 2773 static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) 2774 { 2775 struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); 2776 - struct cfq_data *cfqd = cic->key; 2777 unsigned long flags; 2778 struct request_queue *q; 2779 ··· 2910 unsigned long flags; 2911 2912 WARN_ON(!list_empty(&cic->queue_list)); 2913 2914 spin_lock_irqsave(&ioc->lock, flags); 2915 2916 BUG_ON(ioc->ioc_data == cic); 2917 2918 - radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); 2919 hlist_del_rcu(&cic->cic_list); 2920 spin_unlock_irqrestore(&ioc->lock, flags); 2921 ··· 2928 { 2929 struct cfq_io_context *cic; 2930 unsigned long flags; 2931 - void *k; 2932 2933 if (unlikely(!ioc)) 2934 return NULL; ··· 2944 } 2945 2946 do { 2947 - cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); 2948 rcu_read_unlock(); 2949 if (!cic) 2950 break; 2951 - /* ->key must be copied to avoid race with cfq_exit_queue() */ 2952 - k = cic->key; 2953 - if (unlikely(!k)) { 2954 cfq_drop_dead_cic(cfqd, ioc, cic); 2955 rcu_read_lock(); 2956 continue; ··· 2981 2982 spin_lock_irqsave(&ioc->lock, flags); 2983 ret = radix_tree_insert(&ioc->radix_root, 2984 - (unsigned long) cfqd, cic); 2985 if (!ret) 2986 hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); 2987 spin_unlock_irqrestore(&ioc->lock, flags); ··· 3541 } 3542 3543 cic_set_cfqq(cic, NULL, 1); 3544 cfq_put_queue(cfqq); 3545 return NULL; 3546 } ··· 3736 3737 cfq_shutdown_timer_wq(cfqd); 3738 3739 /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ 3740 call_rcu(&cfqd->rcu, cfq_cfqd_free); 3741 } 3742 3743 static void *cfq_init_queue(struct request_queue *q) ··· 3769 struct cfq_group *cfqg; 3770 struct cfq_rb_root *st; 3771 3772 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); 3773 if (!cfqd) 3774 return NULL; 3775 3776 /* Init root service tree */ 3777 cfqd->grp_service_tree = CFQ_RB_ROOT; ··· 4040 */ 4041 if (elv_ioc_count_read(cfq_ioc_count)) 4042 wait_for_completion(&all_gone); 4043 cfq_slab_kill(); 4044 } 4045

··· 64 static struct completion *ioc_gone; 65 static DEFINE_SPINLOCK(ioc_gone_lock); 66 67 + static DEFINE_SPINLOCK(cic_index_lock); 68 + static DEFINE_IDA(cic_index_ida); 69 + 70 #define CFQ_PRIO_LISTS IOPRIO_BE_NR 71 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 72 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) ··· 271 unsigned int cfq_latency; 272 unsigned int cfq_group_isolation; 273 274 + unsigned int cic_index; 275 struct list_head cic_list; 276 277 /* ··· 428 struct cfq_queue *cfqq, bool is_sync) 429 { 430 cic->cfqq[is_sync] = cfqq; 431 + } 432 + 433 + #define CIC_DEAD_KEY 1ul 434 + #define CIC_DEAD_INDEX_SHIFT 1 435 + 436 + static inline void *cfqd_dead_key(struct cfq_data *cfqd) 437 + { 438 + return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); 439 + } 440 + 441 + static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic) 442 + { 443 + struct cfq_data *cfqd = cic->key; 444 + 445 + if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY)) 446 + return NULL; 447 + 448 + return cfqd; 449 } 450 451 /* ··· 2510 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) 2511 { 2512 unsigned long flags; 2513 + unsigned long dead_key = (unsigned long) cic->key; 2514 2515 + BUG_ON(!(dead_key & CIC_DEAD_KEY)); 2516 2517 spin_lock_irqsave(&ioc->lock, flags); 2518 + radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT); 2519 hlist_del_rcu(&cic->cic_list); 2520 spin_unlock_irqrestore(&ioc->lock, flags); 2521 ··· 2537 __call_for_each_cic(ioc, cic_free_func); 2538 } 2539 2540 + static void cfq_put_cooperator(struct cfq_queue *cfqq) 2541 { 2542 struct cfq_queue *__cfqq, *next; 2543 2544 /* 2545 * If this queue was scheduled to merge with another queue, be ··· 2561 cfq_put_queue(__cfqq); 2562 __cfqq = next; 2563 } 2564 + } 2565 + 2566 + static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2567 + { 2568 + if (unlikely(cfqq == cfqd->active_queue)) { 2569 + __cfq_slice_expired(cfqd, cfqq, 0); 2570 + cfq_schedule_dispatch(cfqd); 2571 + } 2572 + 2573 + cfq_put_cooperator(cfqq); 2574 2575 cfq_put_queue(cfqq); 2576 } ··· 2573 list_del_init(&cic->queue_list); 2574 2575 /* 2576 + * Make sure dead mark is seen for dead queues 2577 */ 2578 smp_wmb(); 2579 + cic->key = cfqd_dead_key(cfqd); 2580 2581 if (ioc->ioc_data == cic) 2582 rcu_assign_pointer(ioc->ioc_data, NULL); ··· 2596 static void cfq_exit_single_io_context(struct io_context *ioc, 2597 struct cfq_io_context *cic) 2598 { 2599 + struct cfq_data *cfqd = cic_to_cfqd(cic); 2600 2601 if (cfqd) { 2602 struct request_queue *q = cfqd->queue; ··· 2609 * race between exiting task and queue 2610 */ 2611 smp_read_barrier_depends(); 2612 + if (cic->key == cfqd) 2613 __cfq_exit_single_io_context(cfqd, cic); 2614 2615 spin_unlock_irqrestore(q->queue_lock, flags); ··· 2689 2690 static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) 2691 { 2692 + struct cfq_data *cfqd = cic_to_cfqd(cic); 2693 struct cfq_queue *cfqq; 2694 unsigned long flags; 2695 ··· 2746 static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) 2747 { 2748 struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); 2749 + struct cfq_data *cfqd = cic_to_cfqd(cic); 2750 unsigned long flags; 2751 struct request_queue *q; 2752 ··· 2883 unsigned long flags; 2884 2885 WARN_ON(!list_empty(&cic->queue_list)); 2886 + BUG_ON(cic->key != cfqd_dead_key(cfqd)); 2887 2888 spin_lock_irqsave(&ioc->lock, flags); 2889 2890 BUG_ON(ioc->ioc_data == cic); 2891 2892 + radix_tree_delete(&ioc->radix_root, cfqd->cic_index); 2893 hlist_del_rcu(&cic->cic_list); 2894 spin_unlock_irqrestore(&ioc->lock, flags); 2895 ··· 2900 { 2901 struct cfq_io_context *cic; 2902 unsigned long flags; 2903 2904 if (unlikely(!ioc)) 2905 return NULL; ··· 2917 } 2918 2919 do { 2920 + cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index); 2921 rcu_read_unlock(); 2922 if (!cic) 2923 break; 2924 + if (unlikely(cic->key != cfqd)) { 2925 cfq_drop_dead_cic(cfqd, ioc, cic); 2926 rcu_read_lock(); 2927 continue; ··· 2956 2957 spin_lock_irqsave(&ioc->lock, flags); 2958 ret = radix_tree_insert(&ioc->radix_root, 2959 + cfqd->cic_index, cic); 2960 if (!ret) 2961 hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); 2962 spin_unlock_irqrestore(&ioc->lock, flags); ··· 3516 } 3517 3518 cic_set_cfqq(cic, NULL, 1); 3519 + 3520 + cfq_put_cooperator(cfqq); 3521 + 3522 cfq_put_queue(cfqq); 3523 return NULL; 3524 } ··· 3708 3709 cfq_shutdown_timer_wq(cfqd); 3710 3711 + spin_lock(&cic_index_lock); 3712 + ida_remove(&cic_index_ida, cfqd->cic_index); 3713 + spin_unlock(&cic_index_lock); 3714 + 3715 /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ 3716 call_rcu(&cfqd->rcu, cfq_cfqd_free); 3717 + } 3718 + 3719 + static int cfq_alloc_cic_index(void) 3720 + { 3721 + int index, error; 3722 + 3723 + do { 3724 + if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) 3725 + return -ENOMEM; 3726 + 3727 + spin_lock(&cic_index_lock); 3728 + error = ida_get_new(&cic_index_ida, &index); 3729 + spin_unlock(&cic_index_lock); 3730 + if (error && error != -EAGAIN) 3731 + return error; 3732 + } while (error); 3733 + 3734 + return index; 3735 } 3736 3737 static void *cfq_init_queue(struct request_queue *q) ··· 3719 struct cfq_group *cfqg; 3720 struct cfq_rb_root *st; 3721 3722 + i = cfq_alloc_cic_index(); 3723 + if (i < 0) 3724 + return NULL; 3725 + 3726 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); 3727 if (!cfqd) 3728 return NULL; 3729 + 3730 + cfqd->cic_index = i; 3731 3732 /* Init root service tree */ 3733 cfqd->grp_service_tree = CFQ_RB_ROOT; ··· 3984 */ 3985 if (elv_ioc_count_read(cfq_ioc_count)) 3986 wait_for_completion(&all_gone); 3987 + ida_destroy(&cic_index_ida); 3988 cfq_slab_kill(); 3989 } 3990

+5 -3

block/elevator.c

··· 242 { 243 struct elevator_type *e = NULL; 244 struct elevator_queue *eq; 245 - int ret = 0; 246 void *data; 247 248 INIT_LIST_HEAD(&q->queue_head); 249 q->last_merge = NULL; ··· 286 } 287 288 elevator_attach(q, eq, data); 289 - return ret; 290 } 291 EXPORT_SYMBOL(elevator_init); 292 ··· 1099 struct elevator_type *__e; 1100 int len = 0; 1101 1102 - if (!q->elevator) 1103 return sprintf(name, "none\n"); 1104 1105 elv = e->elevator_type;

··· 242 { 243 struct elevator_type *e = NULL; 244 struct elevator_queue *eq; 245 void *data; 246 + 247 + if (unlikely(q->elevator)) 248 + return 0; 249 250 INIT_LIST_HEAD(&q->queue_head); 251 q->last_merge = NULL; ··· 284 } 285 286 elevator_attach(q, eq, data); 287 + return 0; 288 } 289 EXPORT_SYMBOL(elevator_init); 290 ··· 1097 struct elevator_type *__e; 1098 int len = 0; 1099 1100 + if (!q->elevator || !blk_queue_stackable(q)) 1101 return sprintf(name, "none\n"); 1102 1103 elv = e->elevator_type;

+52 -1

drivers/block/brd.c

··· 133 return page; 134 } 135 136 /* 137 * Free all backing store pages and radix tree. This must only be called when 138 * there are no other users of the device. ··· 209 return -ENOMEM; 210 } 211 return 0; 212 } 213 214 /* ··· 340 get_capacity(bdev->bd_disk)) 341 goto out; 342 343 rw = bio_rw(bio); 344 if (rw == READA) 345 rw = READ; ··· 366 } 367 368 #ifdef CONFIG_BLK_DEV_XIP 369 - static int brd_direct_access (struct block_device *bdev, sector_t sector, 370 void **kaddr, unsigned long *pfn) 371 { 372 struct brd_device *brd = bdev->bd_disk->private_data; ··· 482 blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG, NULL); 483 blk_queue_max_hw_sectors(brd->brd_queue, 1024); 484 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); 485 486 disk = brd->brd_disk = alloc_disk(1 << part_shift); 487 if (!disk)

··· 133 return page; 134 } 135 136 + static void brd_free_page(struct brd_device *brd, sector_t sector) 137 + { 138 + struct page *page; 139 + pgoff_t idx; 140 + 141 + spin_lock(&brd->brd_lock); 142 + idx = sector >> PAGE_SECTORS_SHIFT; 143 + page = radix_tree_delete(&brd->brd_pages, idx); 144 + spin_unlock(&brd->brd_lock); 145 + if (page) 146 + __free_page(page); 147 + } 148 + 149 + static void brd_zero_page(struct brd_device *brd, sector_t sector) 150 + { 151 + struct page *page; 152 + 153 + page = brd_lookup_page(brd, sector); 154 + if (page) 155 + clear_highpage(page); 156 + } 157 + 158 /* 159 * Free all backing store pages and radix tree. This must only be called when 160 * there are no other users of the device. ··· 187 return -ENOMEM; 188 } 189 return 0; 190 + } 191 + 192 + static void discard_from_brd(struct brd_device *brd, 193 + sector_t sector, size_t n) 194 + { 195 + while (n >= PAGE_SIZE) { 196 + /* 197 + * Don't want to actually discard pages here because 198 + * re-allocating the pages can result in writeback 199 + * deadlocks under heavy load. 200 + */ 201 + if (0) 202 + brd_free_page(brd, sector); 203 + else 204 + brd_zero_page(brd, sector); 205 + sector += PAGE_SIZE >> SECTOR_SHIFT; 206 + n -= PAGE_SIZE; 207 + } 208 } 209 210 /* ··· 300 get_capacity(bdev->bd_disk)) 301 goto out; 302 303 + if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) { 304 + err = 0; 305 + discard_from_brd(brd, sector, bio->bi_size); 306 + goto out; 307 + } 308 + 309 rw = bio_rw(bio); 310 if (rw == READA) 311 rw = READ; ··· 320 } 321 322 #ifdef CONFIG_BLK_DEV_XIP 323 + static int brd_direct_access(struct block_device *bdev, sector_t sector, 324 void **kaddr, unsigned long *pfn) 325 { 326 struct brd_device *brd = bdev->bd_disk->private_data; ··· 436 blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG, NULL); 437 blk_queue_max_hw_sectors(brd->brd_queue, 1024); 438 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); 439 + 440 + brd->brd_queue->limits.discard_granularity = PAGE_SIZE; 441 + brd->brd_queue->limits.max_discard_sectors = UINT_MAX; 442 + brd->brd_queue->limits.discard_zeroes_data = 1; 443 + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); 444 445 disk = brd->brd_disk = alloc_disk(1 << part_shift); 446 if (!disk)

+1 -1

drivers/block/cciss_scsi.c

··· 188 189 sa = h->scsi_ctlr; 190 stk = &sa->cmd_stack; 191 if (stk->top >= CMD_STACK_SIZE) { 192 printk("cciss: scsi_cmd_free called too many times.\n"); 193 BUG(); 194 } 195 - stk->top++; 196 stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; 197 } 198

··· 188 189 sa = h->scsi_ctlr; 190 stk = &sa->cmd_stack; 191 + stk->top++; 192 if (stk->top >= CMD_STACK_SIZE) { 193 printk("cciss: scsi_cmd_free called too many times.\n"); 194 BUG(); 195 } 196 stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; 197 } 198

+3 -11

drivers/block/drbd/drbd_int.h

··· 943 struct drbd_work resync_work, 944 unplug_work, 945 md_sync_work, 946 - delay_probe_work, 947 - uuid_work; 948 struct timer_list resync_timer; 949 struct timer_list md_sync_timer; 950 struct timer_list delay_probe_timer; ··· 1068 struct timeval dps_time; /* delay-probes-start-time */ 1069 unsigned int dp_volume_last; /* send_cnt of last delay probe */ 1070 int c_sync_rate; /* current resync rate after delay_probe magic */ 1071 - atomic_t new_c_uuid; 1072 }; 1073 1074 static inline struct drbd_conf *minor_to_mdev(unsigned int minor) ··· 1474 extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); 1475 extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); 1476 extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); 1477 - extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); 1478 extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); 1479 extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); 1480 extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); ··· 1539 1540 static inline void drbd_tcp_quickack(struct socket *sock) 1541 { 1542 - int __user val = 1; 1543 (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, 1544 (char __user *)&val, sizeof(val)); 1545 } ··· 1725 switch (mdev->ldev->dc.on_io_error) { 1726 case EP_PASS_ON: 1727 if (!forcedetach) { 1728 - if (printk_ratelimit()) 1729 dev_err(DEV, "Local IO failed in %s." 1730 "Passing error on...\n", where); 1731 break; ··· 2216 return 0; 2217 if (test_bit(BITMAP_IO, &mdev->flags)) 2218 return 0; 2219 - if (atomic_read(&mdev->new_c_uuid)) 2220 - return 0; 2221 return 1; 2222 } 2223 ··· 2235 * 2236 * to avoid races with the reconnect code, 2237 * we need to atomic_inc within the spinlock. */ 2238 - 2239 - if (atomic_read(&mdev->new_c_uuid) && atomic_add_unless(&mdev->new_c_uuid, -1, 1)) 2240 - drbd_queue_work_front(&mdev->data.work, &mdev->uuid_work); 2241 2242 spin_lock_irq(&mdev->req_lock); 2243 while (!__inc_ap_bio_cond(mdev)) {

··· 943 struct drbd_work resync_work, 944 unplug_work, 945 md_sync_work, 946 + delay_probe_work; 947 struct timer_list resync_timer; 948 struct timer_list md_sync_timer; 949 struct timer_list delay_probe_timer; ··· 1069 struct timeval dps_time; /* delay-probes-start-time */ 1070 unsigned int dp_volume_last; /* send_cnt of last delay probe */ 1071 int c_sync_rate; /* current resync rate after delay_probe magic */ 1072 }; 1073 1074 static inline struct drbd_conf *minor_to_mdev(unsigned int minor) ··· 1476 extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); 1477 extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); 1478 extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); 1479 extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); 1480 extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); 1481 extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); ··· 1542 1543 static inline void drbd_tcp_quickack(struct socket *sock) 1544 { 1545 + int __user val = 2; 1546 (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, 1547 (char __user *)&val, sizeof(val)); 1548 } ··· 1728 switch (mdev->ldev->dc.on_io_error) { 1729 case EP_PASS_ON: 1730 if (!forcedetach) { 1731 + if (__ratelimit(&drbd_ratelimit_state)) 1732 dev_err(DEV, "Local IO failed in %s." 1733 "Passing error on...\n", where); 1734 break; ··· 2219 return 0; 2220 if (test_bit(BITMAP_IO, &mdev->flags)) 2221 return 0; 2222 return 1; 2223 } 2224 ··· 2240 * 2241 * to avoid races with the reconnect code, 2242 * we need to atomic_inc within the spinlock. */ 2243 2244 spin_lock_irq(&mdev->req_lock); 2245 while (!__inc_ap_bio_cond(mdev)) {

+26 -42

drivers/block/drbd/drbd_main.c

··· 1215 ns.pdsk == D_OUTDATED)) { 1216 if (get_ldev(mdev)) { 1217 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && 1218 - mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE && 1219 - !atomic_read(&mdev->new_c_uuid)) 1220 - atomic_set(&mdev->new_c_uuid, 2); 1221 put_ldev(mdev); 1222 } 1223 } 1224 1225 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { 1226 - /* Diskless peer becomes primary or got connected do diskless, primary peer. */ 1227 - if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 && 1228 - !atomic_read(&mdev->new_c_uuid)) 1229 - atomic_set(&mdev->new_c_uuid, 2); 1230 1231 /* D_DISKLESS Peer becomes secondary */ 1232 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) ··· 1349 drbd_md_sync(mdev); 1350 } 1351 1352 - static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1353 - { 1354 - if (get_ldev(mdev)) { 1355 - if (mdev->ldev->md.uuid[UI_BITMAP] == 0) { 1356 - drbd_uuid_new_current(mdev); 1357 - if (get_net_conf(mdev)) { 1358 - drbd_send_uuids(mdev); 1359 - put_net_conf(mdev); 1360 - } 1361 - drbd_md_sync(mdev); 1362 - } 1363 - put_ldev(mdev); 1364 - } 1365 - atomic_dec(&mdev->new_c_uuid); 1366 - wake_up(&mdev->misc_wait); 1367 - 1368 - return 1; 1369 - } 1370 1371 static int drbd_thread_setup(void *arg) 1372 { ··· 2272 * with page_count == 0 or PageSlab. 2273 */ 2274 static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, 2275 - int offset, size_t size) 2276 { 2277 - int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); 2278 kunmap(page); 2279 if (sent == size) 2280 mdev->send_cnt += size>>9; ··· 2282 } 2283 2284 static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, 2285 - int offset, size_t size) 2286 { 2287 mm_segment_t oldfs = get_fs(); 2288 int sent, ok; ··· 2295 * __page_cache_release a page that would actually still be referenced 2296 * by someone, leading to some obscure delayed Oops somewhere else. */ 2297 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) 2298 - return _drbd_no_send_page(mdev, page, offset, size); 2299 2300 drbd_update_congested(mdev); 2301 set_fs(KERNEL_DS); 2302 do { 2303 sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, 2304 offset, len, 2305 - MSG_NOSIGNAL); 2306 if (sent == -EAGAIN) { 2307 if (we_should_drop_the_connection(mdev, 2308 mdev->data.socket)) ··· 2332 { 2333 struct bio_vec *bvec; 2334 int i; 2335 __bio_for_each_segment(bvec, bio, i, 0) { 2336 if (!_drbd_no_send_page(mdev, bvec->bv_page, 2337 - bvec->bv_offset, bvec->bv_len)) 2338 return 0; 2339 } 2340 return 1; ··· 2346 { 2347 struct bio_vec *bvec; 2348 int i; 2349 __bio_for_each_segment(bvec, bio, i, 0) { 2350 if (!_drbd_send_page(mdev, bvec->bv_page, 2351 - bvec->bv_offset, bvec->bv_len)) 2352 return 0; 2353 } 2354 - 2355 return 1; 2356 } 2357 ··· 2360 { 2361 struct page *page = e->pages; 2362 unsigned len = e->size; 2363 page_chain_for_each(page) { 2364 unsigned l = min_t(unsigned, len, PAGE_SIZE); 2365 - if (!_drbd_send_page(mdev, page, 0, l)) 2366 return 0; 2367 len -= l; 2368 } ··· 2444 p.dp_flags = cpu_to_be32(dp_flags); 2445 set_bit(UNPLUG_REMOTE, &mdev->flags); 2446 ok = (sizeof(p) == 2447 - drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); 2448 if (ok && dgs) { 2449 dgb = mdev->int_dig_out; 2450 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); 2451 - ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); 2452 } 2453 if (ok) { 2454 if (mdev->net_conf->wire_protocol == DRBD_PROT_A) ··· 2497 return 0; 2498 2499 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, 2500 - sizeof(p), MSG_MORE); 2501 if (ok && dgs) { 2502 dgb = mdev->int_dig_out; 2503 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); 2504 - ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); 2505 } 2506 if (ok) 2507 ok = _drbd_send_zc_ee(mdev, e); ··· 2695 atomic_set(&mdev->net_cnt, 0); 2696 atomic_set(&mdev->packet_seq, 0); 2697 atomic_set(&mdev->pp_in_use, 0); 2698 - atomic_set(&mdev->new_c_uuid, 0); 2699 2700 mutex_init(&mdev->md_io_mutex); 2701 mutex_init(&mdev->data.mutex); ··· 2725 INIT_LIST_HEAD(&mdev->bm_io_work.w.list); 2726 INIT_LIST_HEAD(&mdev->delay_probes); 2727 INIT_LIST_HEAD(&mdev->delay_probe_work.list); 2728 - INIT_LIST_HEAD(&mdev->uuid_work.list); 2729 2730 mdev->resync_work.cb = w_resync_inactive; 2731 mdev->unplug_work.cb = w_send_write_hint; 2732 mdev->md_sync_work.cb = w_md_sync; 2733 mdev->bm_io_work.w.cb = w_bitmap_io; 2734 mdev->delay_probe_work.cb = w_delay_probes; 2735 - mdev->uuid_work.cb = w_new_current_uuid; 2736 init_timer(&mdev->resync_timer); 2737 init_timer(&mdev->md_sync_timer); 2738 init_timer(&mdev->delay_probe_timer); ··· 3783 if (ret) { 3784 fault_count++; 3785 3786 - if (printk_ratelimit()) 3787 dev_warn(DEV, "***Simulating %s failure\n", 3788 _drbd_fault_str(type)); 3789 }

··· 1215 ns.pdsk == D_OUTDATED)) { 1216 if (get_ldev(mdev)) { 1217 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && 1218 + mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { 1219 + drbd_uuid_new_current(mdev); 1220 + drbd_send_uuids(mdev); 1221 + } 1222 put_ldev(mdev); 1223 } 1224 } 1225 1226 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { 1227 + if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) 1228 + drbd_uuid_new_current(mdev); 1229 1230 /* D_DISKLESS Peer becomes secondary */ 1231 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) ··· 1350 drbd_md_sync(mdev); 1351 } 1352 1353 1354 static int drbd_thread_setup(void *arg) 1355 { ··· 2291 * with page_count == 0 or PageSlab. 2292 */ 2293 static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, 2294 + int offset, size_t size, unsigned msg_flags) 2295 { 2296 + int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags); 2297 kunmap(page); 2298 if (sent == size) 2299 mdev->send_cnt += size>>9; ··· 2301 } 2302 2303 static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, 2304 + int offset, size_t size, unsigned msg_flags) 2305 { 2306 mm_segment_t oldfs = get_fs(); 2307 int sent, ok; ··· 2314 * __page_cache_release a page that would actually still be referenced 2315 * by someone, leading to some obscure delayed Oops somewhere else. */ 2316 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) 2317 + return _drbd_no_send_page(mdev, page, offset, size, msg_flags); 2318 2319 + msg_flags |= MSG_NOSIGNAL; 2320 drbd_update_congested(mdev); 2321 set_fs(KERNEL_DS); 2322 do { 2323 sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, 2324 offset, len, 2325 + msg_flags); 2326 if (sent == -EAGAIN) { 2327 if (we_should_drop_the_connection(mdev, 2328 mdev->data.socket)) ··· 2350 { 2351 struct bio_vec *bvec; 2352 int i; 2353 + /* hint all but last page with MSG_MORE */ 2354 __bio_for_each_segment(bvec, bio, i, 0) { 2355 if (!_drbd_no_send_page(mdev, bvec->bv_page, 2356 + bvec->bv_offset, bvec->bv_len, 2357 + i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) 2358 return 0; 2359 } 2360 return 1; ··· 2362 { 2363 struct bio_vec *bvec; 2364 int i; 2365 + /* hint all but last page with MSG_MORE */ 2366 __bio_for_each_segment(bvec, bio, i, 0) { 2367 if (!_drbd_send_page(mdev, bvec->bv_page, 2368 + bvec->bv_offset, bvec->bv_len, 2369 + i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) 2370 return 0; 2371 } 2372 return 1; 2373 } 2374 ··· 2375 { 2376 struct page *page = e->pages; 2377 unsigned len = e->size; 2378 + /* hint all but last page with MSG_MORE */ 2379 page_chain_for_each(page) { 2380 unsigned l = min_t(unsigned, len, PAGE_SIZE); 2381 + if (!_drbd_send_page(mdev, page, 0, l, 2382 + page_chain_next(page) ? MSG_MORE : 0)) 2383 return 0; 2384 len -= l; 2385 } ··· 2457 p.dp_flags = cpu_to_be32(dp_flags); 2458 set_bit(UNPLUG_REMOTE, &mdev->flags); 2459 ok = (sizeof(p) == 2460 + drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); 2461 if (ok && dgs) { 2462 dgb = mdev->int_dig_out; 2463 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); 2464 + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); 2465 } 2466 if (ok) { 2467 if (mdev->net_conf->wire_protocol == DRBD_PROT_A) ··· 2510 return 0; 2511 2512 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, 2513 + sizeof(p), dgs ? MSG_MORE : 0); 2514 if (ok && dgs) { 2515 dgb = mdev->int_dig_out; 2516 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); 2517 + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); 2518 } 2519 if (ok) 2520 ok = _drbd_send_zc_ee(mdev, e); ··· 2708 atomic_set(&mdev->net_cnt, 0); 2709 atomic_set(&mdev->packet_seq, 0); 2710 atomic_set(&mdev->pp_in_use, 0); 2711 2712 mutex_init(&mdev->md_io_mutex); 2713 mutex_init(&mdev->data.mutex); ··· 2739 INIT_LIST_HEAD(&mdev->bm_io_work.w.list); 2740 INIT_LIST_HEAD(&mdev->delay_probes); 2741 INIT_LIST_HEAD(&mdev->delay_probe_work.list); 2742 2743 mdev->resync_work.cb = w_resync_inactive; 2744 mdev->unplug_work.cb = w_send_write_hint; 2745 mdev->md_sync_work.cb = w_md_sync; 2746 mdev->bm_io_work.w.cb = w_bitmap_io; 2747 mdev->delay_probe_work.cb = w_delay_probes; 2748 init_timer(&mdev->resync_timer); 2749 init_timer(&mdev->md_sync_timer); 2750 init_timer(&mdev->delay_probe_timer); ··· 3799 if (ret) { 3800 fault_count++; 3801 3802 + if (__ratelimit(&drbd_ratelimit_state)) 3803 dev_warn(DEV, "***Simulating %s failure\n", 3804 _drbd_fault_str(type)); 3805 }

+23 -22

drivers/block/drbd/drbd_receiver.c

··· 42 #include <linux/unistd.h> 43 #include <linux/vmalloc.h> 44 #include <linux/random.h> 45 - #include <linux/mm.h> 46 #include <linux/string.h> 47 #include <linux/scatterlist.h> 48 #include "drbd_int.h" ··· 570 return rv; 571 } 572 573 static struct socket *drbd_try_connect(struct drbd_conf *mdev) 574 { 575 const char *what; ··· 610 611 sock->sk->sk_rcvtimeo = 612 sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; 613 614 /* explicitly bind to the configured IP as source IP 615 * for the outgoing connections. ··· 690 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ 691 s_listen->sk->sk_rcvtimeo = timeo; 692 s_listen->sk->sk_sndtimeo = timeo; 693 694 what = "bind before listen"; 695 err = s_listen->ops->bind(s_listen, ··· 877 878 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; 879 msock->sk->sk_priority = TC_PRIO_INTERACTIVE; 880 - 881 - if (mdev->net_conf->sndbuf_size) { 882 - sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size; 883 - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 884 - } 885 - 886 - if (mdev->net_conf->rcvbuf_size) { 887 - sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size; 888 - sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 889 - } 890 891 /* NOT YET ... 892 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; ··· 1165 unsigned ds = e->size; 1166 unsigned n_bios = 0; 1167 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; 1168 - 1169 - if (atomic_read(&mdev->new_c_uuid)) { 1170 - if (atomic_add_unless(&mdev->new_c_uuid, -1, 1)) { 1171 - drbd_uuid_new_current(mdev); 1172 - drbd_md_sync(mdev); 1173 - 1174 - atomic_dec(&mdev->new_c_uuid); 1175 - wake_up(&mdev->misc_wait); 1176 - } 1177 - wait_event(mdev->misc_wait, !atomic_read(&mdev->new_c_uuid)); 1178 - } 1179 1180 /* In most cases, we will only need one bio. But in case the lower 1181 * level restrictions happen to be different at this offset on this

··· 42 #include <linux/unistd.h> 43 #include <linux/vmalloc.h> 44 #include <linux/random.h> 45 #include <linux/string.h> 46 #include <linux/scatterlist.h> 47 #include "drbd_int.h" ··· 571 return rv; 572 } 573 574 + /* quoting tcp(7): 575 + * On individual connections, the socket buffer size must be set prior to the 576 + * listen(2) or connect(2) calls in order to have it take effect. 577 + * This is our wrapper to do so. 578 + */ 579 + static void drbd_setbufsize(struct socket *sock, unsigned int snd, 580 + unsigned int rcv) 581 + { 582 + /* open coded SO_SNDBUF, SO_RCVBUF */ 583 + if (snd) { 584 + sock->sk->sk_sndbuf = snd; 585 + sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 586 + } 587 + if (rcv) { 588 + sock->sk->sk_rcvbuf = rcv; 589 + sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 590 + } 591 + } 592 + 593 static struct socket *drbd_try_connect(struct drbd_conf *mdev) 594 { 595 const char *what; ··· 592 593 sock->sk->sk_rcvtimeo = 594 sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; 595 + drbd_setbufsize(sock, mdev->net_conf->sndbuf_size, 596 + mdev->net_conf->rcvbuf_size); 597 598 /* explicitly bind to the configured IP as source IP 599 * for the outgoing connections. ··· 670 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ 671 s_listen->sk->sk_rcvtimeo = timeo; 672 s_listen->sk->sk_sndtimeo = timeo; 673 + drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size, 674 + mdev->net_conf->rcvbuf_size); 675 676 what = "bind before listen"; 677 err = s_listen->ops->bind(s_listen, ··· 855 856 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; 857 msock->sk->sk_priority = TC_PRIO_INTERACTIVE; 858 859 /* NOT YET ... 860 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; ··· 1153 unsigned ds = e->size; 1154 unsigned n_bios = 0; 1155 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; 1156 1157 /* In most cases, we will only need one bio. But in case the lower 1158 * level restrictions happen to be different at this offset on this

+14 -40

drivers/block/drbd/drbd_req.c

··· 102 } 103 } 104 105 - /* if it was a local io error, we want to notify our 106 - * peer about that, and see if we need to 107 - * detach the disk and stuff. 108 - * to avoid allocating some special work 109 - * struct, reuse the request. */ 110 - 111 - /* THINK 112 - * why do we do this not when we detect the error, 113 - * but delay it until it is "done", i.e. possibly 114 - * until the next barrier ack? */ 115 - 116 - if (rw == WRITE && 117 - ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) { 118 - if (!(req->w.list.next == LIST_POISON1 || 119 - list_empty(&req->w.list))) { 120 - /* DEBUG ASSERT only; if this triggers, we 121 - * probably corrupt the worker list here */ 122 - dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next); 123 - dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev); 124 - } 125 - req->w.cb = w_io_error; 126 - drbd_queue_work(&mdev->data.work, &req->w); 127 - /* drbd_req_free() is done in w_io_error */ 128 - } else { 129 - drbd_req_free(req); 130 - } 131 } 132 133 static void queue_barrier(struct drbd_conf *mdev) ··· 428 req->rq_state |= RQ_LOCAL_COMPLETED; 429 req->rq_state &= ~RQ_LOCAL_PENDING; 430 431 - dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n", 432 - (unsigned long long)req->sector, req->size); 433 - /* and now: check how to handle local io error. */ 434 __drbd_chk_io_error(mdev, FALSE); 435 _req_may_be_done(req, m); 436 put_ldev(mdev); ··· 447 req->rq_state |= RQ_LOCAL_COMPLETED; 448 req->rq_state &= ~RQ_LOCAL_PENDING; 449 450 - dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", 451 - (unsigned long long)req->sector, req->size); 452 - /* _req_mod(req,to_be_send); oops, recursion... */ 453 D_ASSERT(!(req->rq_state & RQ_NET_MASK)); 454 - req->rq_state |= RQ_NET_PENDING; 455 - inc_ap_pending(mdev); 456 457 __drbd_chk_io_error(mdev, FALSE); 458 put_ldev(mdev); 459 - /* NOTE: if we have no connection, 460 - * or know the peer has no good data either, 461 - * then we don't actually need to "queue_for_net_read", 462 - * but we do so anyways, since the drbd_io_error() 463 - * and the potential state change to "Diskless" 464 - * needs to be done from process context */ 465 466 /* fall through: _req_mod(req,queue_for_net_read); */ 467 468 case queue_for_net_read: ··· 571 _req_may_be_done(req, m); 572 break; 573 574 case connection_lost_while_pending: 575 /* transfer log cleanup after connection loss */ 576 /* assert something? */

··· 102 } 103 } 104 105 + drbd_req_free(req); 106 } 107 108 static void queue_barrier(struct drbd_conf *mdev) ··· 453 req->rq_state |= RQ_LOCAL_COMPLETED; 454 req->rq_state &= ~RQ_LOCAL_PENDING; 455 456 __drbd_chk_io_error(mdev, FALSE); 457 _req_may_be_done(req, m); 458 put_ldev(mdev); ··· 475 req->rq_state |= RQ_LOCAL_COMPLETED; 476 req->rq_state &= ~RQ_LOCAL_PENDING; 477 478 D_ASSERT(!(req->rq_state & RQ_NET_MASK)); 479 480 __drbd_chk_io_error(mdev, FALSE); 481 put_ldev(mdev); 482 483 + /* no point in retrying if there is no good remote data, 484 + * or we have no connection. */ 485 + if (mdev->state.pdsk != D_UP_TO_DATE) { 486 + _req_may_be_done(req, m); 487 + break; 488 + } 489 + 490 + /* _req_mod(req,to_be_send); oops, recursion... */ 491 + req->rq_state |= RQ_NET_PENDING; 492 + inc_ap_pending(mdev); 493 /* fall through: _req_mod(req,queue_for_net_read); */ 494 495 case queue_for_net_read: ··· 600 _req_may_be_done(req, m); 601 break; 602 603 + case read_retry_remote_canceled: 604 + req->rq_state &= ~RQ_NET_QUEUED; 605 + /* fall through, in case we raced with drbd_disconnect */ 606 case connection_lost_while_pending: 607 /* transfer log cleanup after connection loss */ 608 /* assert something? */

+1

drivers/block/drbd/drbd_req.h

··· 91 send_failed, 92 handed_over_to_network, 93 connection_lost_while_pending, 94 recv_acked_by_peer, 95 write_acked_by_peer, 96 write_acked_by_peer_and_sis, /* and set_in_sync */

··· 91 send_failed, 92 handed_over_to_network, 93 connection_lost_while_pending, 94 + read_retry_remote_canceled, 95 recv_acked_by_peer, 96 write_acked_by_peer, 97 write_acked_by_peer_and_sis, /* and set_in_sync */

+2 -22

drivers/block/drbd/drbd_worker.c

··· 224 enum drbd_req_event what; 225 int uptodate = bio_flagged(bio, BIO_UPTODATE); 226 227 - if (error) 228 - dev_warn(DEV, "p %s: error=%d\n", 229 - bio_data_dir(bio) == WRITE ? "write" : "read", error); 230 if (!error && !uptodate) { 231 dev_warn(DEV, "p %s: setting error to -EIO\n", 232 bio_data_dir(bio) == WRITE ? "write" : "read"); ··· 254 complete_master_bio(mdev, &m); 255 } 256 257 - int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 258 - { 259 - struct drbd_request *req = container_of(w, struct drbd_request, w); 260 - 261 - /* NOTE: mdev->ldev can be NULL by the time we get here! */ 262 - /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */ 263 - 264 - /* the only way this callback is scheduled is from _req_may_be_done, 265 - * when it is done and had a local write error, see comments there */ 266 - drbd_req_free(req); 267 - 268 - return TRUE; 269 - } 270 - 271 int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 272 { 273 struct drbd_request *req = container_of(w, struct drbd_request, w); ··· 263 * to give the disk the chance to relocate that block */ 264 265 spin_lock_irq(&mdev->req_lock); 266 - if (cancel || 267 - mdev->state.conn < C_CONNECTED || 268 - mdev->state.pdsk <= D_INCONSISTENT) { 269 - _req_mod(req, send_canceled); 270 spin_unlock_irq(&mdev->req_lock); 271 - dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); 272 return 1; 273 } 274 spin_unlock_irq(&mdev->req_lock);

··· 224 enum drbd_req_event what; 225 int uptodate = bio_flagged(bio, BIO_UPTODATE); 226 227 if (!error && !uptodate) { 228 dev_warn(DEV, "p %s: setting error to -EIO\n", 229 bio_data_dir(bio) == WRITE ? "write" : "read"); ··· 257 complete_master_bio(mdev, &m); 258 } 259 260 int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 261 { 262 struct drbd_request *req = container_of(w, struct drbd_request, w); ··· 280 * to give the disk the chance to relocate that block */ 281 282 spin_lock_irq(&mdev->req_lock); 283 + if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { 284 + _req_mod(req, read_retry_remote_canceled); 285 spin_unlock_irq(&mdev->req_lock); 286 return 1; 287 } 288 spin_unlock_irq(&mdev->req_lock);

+16 -48

fs/fs-writeback.c

··· 45 unsigned int for_kupdate:1; 46 unsigned int range_cyclic:1; 47 unsigned int for_background:1; 48 - unsigned int sb_pinned:1; 49 }; 50 51 /* ··· 192 } 193 194 static void bdi_alloc_queue_work(struct backing_dev_info *bdi, 195 - struct wb_writeback_args *args, 196 - int wait) 197 { 198 struct bdi_work *work; 199 ··· 204 if (work) { 205 bdi_work_init(work, args); 206 bdi_queue_work(bdi, work); 207 - if (wait) 208 - bdi_wait_on_work_clear(work); 209 } else { 210 struct bdi_writeback *wb = &bdi->wb; 211 ··· 230 .sync_mode = WB_SYNC_ALL, 231 .nr_pages = LONG_MAX, 232 .range_cyclic = 0, 233 - /* 234 - * Setting sb_pinned is not necessary for WB_SYNC_ALL, but 235 - * lets make it explicitly clear. 236 - */ 237 - .sb_pinned = 1, 238 }; 239 struct bdi_work work; 240 ··· 245 * @bdi: the backing device to write from 246 * @sb: write inodes from this super_block 247 * @nr_pages: the number of pages to write 248 - * @sb_locked: caller already holds sb umount sem. 249 * 250 * Description: 251 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 252 * started when this function returns, we make no guarentees on 253 - * completion. Caller specifies whether sb umount sem is held already or not. 254 * 255 */ 256 void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 257 - long nr_pages, int sb_locked) 258 { 259 struct wb_writeback_args args = { 260 .sb = sb, 261 .sync_mode = WB_SYNC_NONE, 262 .nr_pages = nr_pages, 263 .range_cyclic = 1, 264 - .sb_pinned = sb_locked, 265 }; 266 267 /* ··· 271 args.for_background = 1; 272 } 273 274 - bdi_alloc_queue_work(bdi, &args, sb_locked); 275 } 276 277 /* ··· 584 /* 585 * Caller must already hold the ref for this 586 */ 587 - if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { 588 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 589 return SB_NOT_PINNED; 590 } ··· 758 .for_kupdate = args->for_kupdate, 759 .for_background = args->for_background, 760 .range_cyclic = args->range_cyclic, 761 - .sb_pinned = args->sb_pinned, 762 }; 763 unsigned long oldest_jif; 764 long wrote = 0; ··· 900 901 while ((work = get_next_work_item(bdi, wb)) != NULL) { 902 struct wb_writeback_args args = work->args; 903 - int post_clear; 904 905 /* 906 * Override sync mode, in case we must wait for completion ··· 907 if (force_wait) 908 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; 909 910 - post_clear = WB_SYNC_ALL || args.sb_pinned; 911 - 912 /* 913 * If this isn't a data integrity operation, just notify 914 * that we have seen this work and we are now starting it. 915 */ 916 - if (!post_clear) 917 wb_clear_pending(wb, work); 918 919 wrote += wb_writeback(wb, &args); ··· 920 * This is a data integrity writeback, so only do the 921 * notification when we have completed the work. 922 */ 923 - if (post_clear) 924 wb_clear_pending(wb, work); 925 } 926 ··· 996 if (!bdi_has_dirty_io(bdi)) 997 continue; 998 999 - bdi_alloc_queue_work(bdi, &args, 0); 1000 } 1001 1002 rcu_read_unlock(); ··· 1205 iput(old_inode); 1206 } 1207 1208 - static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) 1209 - { 1210 - unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); 1211 - unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); 1212 - long nr_to_write; 1213 - 1214 - nr_to_write = nr_dirty + nr_unstable + 1215 - (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1216 - 1217 - bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); 1218 - } 1219 - 1220 /** 1221 * writeback_inodes_sb - writeback dirty inodes from given super_block 1222 * @sb: the superblock ··· 1216 */ 1217 void writeback_inodes_sb(struct super_block *sb) 1218 { 1219 - __writeback_inodes_sb(sb, 0); 1220 } 1221 EXPORT_SYMBOL(writeback_inodes_sb); 1222 - 1223 - /** 1224 - * writeback_inodes_sb_locked - writeback dirty inodes from given super_block 1225 - * @sb: the superblock 1226 - * 1227 - * Like writeback_inodes_sb(), except the caller already holds the 1228 - * sb umount sem. 1229 - */ 1230 - void writeback_inodes_sb_locked(struct super_block *sb) 1231 - { 1232 - __writeback_inodes_sb(sb, 1); 1233 - } 1234 1235 /** 1236 * writeback_inodes_sb_if_idle - start writeback if none underway

··· 45 unsigned int for_kupdate:1; 46 unsigned int range_cyclic:1; 47 unsigned int for_background:1; 48 }; 49 50 /* ··· 193 } 194 195 static void bdi_alloc_queue_work(struct backing_dev_info *bdi, 196 + struct wb_writeback_args *args) 197 { 198 struct bdi_work *work; 199 ··· 206 if (work) { 207 bdi_work_init(work, args); 208 bdi_queue_work(bdi, work); 209 } else { 210 struct bdi_writeback *wb = &bdi->wb; 211 ··· 234 .sync_mode = WB_SYNC_ALL, 235 .nr_pages = LONG_MAX, 236 .range_cyclic = 0, 237 }; 238 struct bdi_work work; 239 ··· 254 * @bdi: the backing device to write from 255 * @sb: write inodes from this super_block 256 * @nr_pages: the number of pages to write 257 * 258 * Description: 259 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 260 * started when this function returns, we make no guarentees on 261 + * completion. Caller need not hold sb s_umount semaphore. 262 * 263 */ 264 void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 265 + long nr_pages) 266 { 267 struct wb_writeback_args args = { 268 .sb = sb, 269 .sync_mode = WB_SYNC_NONE, 270 .nr_pages = nr_pages, 271 .range_cyclic = 1, 272 }; 273 274 /* ··· 282 args.for_background = 1; 283 } 284 285 + bdi_alloc_queue_work(bdi, &args); 286 } 287 288 /* ··· 595 /* 596 * Caller must already hold the ref for this 597 */ 598 + if (wbc->sync_mode == WB_SYNC_ALL) { 599 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 600 return SB_NOT_PINNED; 601 } ··· 769 .for_kupdate = args->for_kupdate, 770 .for_background = args->for_background, 771 .range_cyclic = args->range_cyclic, 772 }; 773 unsigned long oldest_jif; 774 long wrote = 0; ··· 912 913 while ((work = get_next_work_item(bdi, wb)) != NULL) { 914 struct wb_writeback_args args = work->args; 915 916 /* 917 * Override sync mode, in case we must wait for completion ··· 920 if (force_wait) 921 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; 922 923 /* 924 * If this isn't a data integrity operation, just notify 925 * that we have seen this work and we are now starting it. 926 */ 927 + if (args.sync_mode == WB_SYNC_NONE) 928 wb_clear_pending(wb, work); 929 930 wrote += wb_writeback(wb, &args); ··· 935 * This is a data integrity writeback, so only do the 936 * notification when we have completed the work. 937 */ 938 + if (args.sync_mode == WB_SYNC_ALL) 939 wb_clear_pending(wb, work); 940 } 941 ··· 1011 if (!bdi_has_dirty_io(bdi)) 1012 continue; 1013 1014 + bdi_alloc_queue_work(bdi, &args); 1015 } 1016 1017 rcu_read_unlock(); ··· 1220 iput(old_inode); 1221 } 1222 1223 /** 1224 * writeback_inodes_sb - writeback dirty inodes from given super_block 1225 * @sb: the superblock ··· 1243 */ 1244 void writeback_inodes_sb(struct super_block *sb) 1245 { 1246 + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); 1247 + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); 1248 + long nr_to_write; 1249 + 1250 + nr_to_write = nr_dirty + nr_unstable + 1251 + (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1252 + 1253 + bdi_start_writeback(sb->s_bdi, sb, nr_to_write); 1254 } 1255 EXPORT_SYMBOL(writeback_inodes_sb); 1256 1257 /** 1258 * writeback_inodes_sb_if_idle - start writeback if none underway

+54 -25

fs/pipe.c

··· 26 27 /* 28 * The max size that a non-root user is allowed to grow the pipe. Can 29 - * be set by root in /proc/sys/fs/pipe-max-pages 30 */ 31 - unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; 32 33 /* 34 * We use a start+len construction, which provides full use of the ··· 1123 * Allocate a new array of pipe buffers and copy the info over. Returns the 1124 * pipe size if successful, or return -ERROR on error. 1125 */ 1126 - static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) 1127 { 1128 struct pipe_buffer *bufs; 1129 - 1130 - /* 1131 - * Must be a power-of-2 currently 1132 - */ 1133 - if (!is_power_of_2(arg)) 1134 - return -EINVAL; 1135 1136 /* 1137 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't ··· 1133 * again like we would do for growing. If the pipe currently 1134 * contains more buffers than arg, then return busy. 1135 */ 1136 - if (arg < pipe->nrbufs) 1137 return -EBUSY; 1138 1139 - bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); 1140 if (unlikely(!bufs)) 1141 return -ENOMEM; 1142 ··· 1157 pipe->curbuf = 0; 1158 kfree(pipe->bufs); 1159 pipe->bufs = bufs; 1160 - pipe->buffers = arg; 1161 - return arg; 1162 } 1163 1164 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) ··· 1202 mutex_lock(&pipe->inode->i_mutex); 1203 1204 switch (cmd) { 1205 - case F_SETPIPE_SZ: 1206 - if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) { 1207 ret = -EINVAL; 1208 goto out; 1209 } 1210 - /* 1211 - * The pipe needs to be at least 2 pages large to 1212 - * guarantee POSIX behaviour. 1213 - */ 1214 - if (arg < 2) { 1215 - ret = -EINVAL; 1216 - goto out; 1217 - } 1218 - ret = pipe_set_size(pipe, arg); 1219 break; 1220 case F_GETPIPE_SZ: 1221 - ret = pipe->buffers; 1222 break; 1223 default: 1224 ret = -EINVAL;

··· 26 27 /* 28 * The max size that a non-root user is allowed to grow the pipe. Can 29 + * be set by root in /proc/sys/fs/pipe-max-size 30 */ 31 + unsigned int pipe_max_size = 1048576; 32 + 33 + /* 34 + * Minimum pipe size, as required by POSIX 35 + */ 36 + unsigned int pipe_min_size = PAGE_SIZE; 37 38 /* 39 * We use a start+len construction, which provides full use of the ··· 1118 * Allocate a new array of pipe buffers and copy the info over. Returns the 1119 * pipe size if successful, or return -ERROR on error. 1120 */ 1121 + static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) 1122 { 1123 struct pipe_buffer *bufs; 1124 1125 /* 1126 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't ··· 1134 * again like we would do for growing. If the pipe currently 1135 * contains more buffers than arg, then return busy. 1136 */ 1137 + if (nr_pages < pipe->nrbufs) 1138 return -EBUSY; 1139 1140 + bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); 1141 if (unlikely(!bufs)) 1142 return -ENOMEM; 1143 ··· 1158 pipe->curbuf = 0; 1159 kfree(pipe->bufs); 1160 pipe->bufs = bufs; 1161 + pipe->buffers = nr_pages; 1162 + return nr_pages * PAGE_SIZE; 1163 + } 1164 + 1165 + /* 1166 + * Currently we rely on the pipe array holding a power-of-2 number 1167 + * of pages. 1168 + */ 1169 + static inline unsigned int round_pipe_size(unsigned int size) 1170 + { 1171 + unsigned long nr_pages; 1172 + 1173 + nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 1174 + return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; 1175 + } 1176 + 1177 + /* 1178 + * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax 1179 + * will return an error. 1180 + */ 1181 + int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, 1182 + size_t *lenp, loff_t *ppos) 1183 + { 1184 + int ret; 1185 + 1186 + ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); 1187 + if (ret < 0 || !write) 1188 + return ret; 1189 + 1190 + pipe_max_size = round_pipe_size(pipe_max_size); 1191 + return ret; 1192 } 1193 1194 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) ··· 1174 mutex_lock(&pipe->inode->i_mutex); 1175 1176 switch (cmd) { 1177 + case F_SETPIPE_SZ: { 1178 + unsigned int size, nr_pages; 1179 + 1180 + size = round_pipe_size(arg); 1181 + nr_pages = size >> PAGE_SHIFT; 1182 + 1183 + if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { 1184 + ret = -EPERM; 1185 + goto out; 1186 + } else if (nr_pages < PAGE_SIZE) { 1187 ret = -EINVAL; 1188 goto out; 1189 } 1190 + ret = pipe_set_size(pipe, nr_pages); 1191 break; 1192 + } 1193 case F_GETPIPE_SZ: 1194 + ret = pipe->buffers * PAGE_SIZE; 1195 break; 1196 default: 1197 ret = -EINVAL;

+1 -1

fs/splice.c

··· 354 break; 355 356 error = add_to_page_cache_lru(page, mapping, index, 357 - mapping_gfp_mask(mapping)); 358 if (unlikely(error)) { 359 page_cache_release(page); 360 if (error == -EEXIST)

··· 354 break; 355 356 error = add_to_page_cache_lru(page, mapping, index, 357 + GFP_KERNEL); 358 if (unlikely(error)) { 359 page_cache_release(page); 360 if (error == -EEXIST)

+1 -1

fs/sync.c

··· 42 if (wait) 43 sync_inodes_sb(sb); 44 else 45 - writeback_inodes_sb_locked(sb); 46 47 if (sb->s_op->sync_fs) 48 sb->s_op->sync_fs(sb, wait);

··· 42 if (wait) 43 sync_inodes_sb(sb); 44 else 45 + writeback_inodes_sb(sb); 46 47 if (sb->s_op->sync_fs) 48 sb->s_op->sync_fs(sb, wait);

+1 -1

include/linux/backing-dev.h

··· 106 void bdi_unregister(struct backing_dev_info *bdi); 107 int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); 108 void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 109 - long nr_pages, int sb_locked); 110 int bdi_writeback_task(struct bdi_writeback *wb); 111 int bdi_has_dirty_io(struct backing_dev_info *bdi); 112 void bdi_arm_supers_timer(void);

··· 106 void bdi_unregister(struct backing_dev_info *bdi); 107 int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); 108 void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 109 + long nr_pages); 110 int bdi_writeback_task(struct bdi_writeback *wb); 111 int bdi_has_dirty_io(struct backing_dev_info *bdi); 112 void bdi_arm_supers_timer(void);

+9

include/linux/blkdev.h

··· 1211 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); 1212 1213 #ifdef CONFIG_BLK_CGROUP 1214 static inline void set_start_time_ns(struct request *req) 1215 { 1216 req->start_time_ns = sched_clock(); 1217 } 1218 1219 static inline void set_io_start_time_ns(struct request *req) 1220 { 1221 req->io_start_time_ns = sched_clock(); 1222 } 1223 1224 static inline uint64_t rq_start_time_ns(struct request *req)

··· 1211 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); 1212 1213 #ifdef CONFIG_BLK_CGROUP 1214 + /* 1215 + * This should not be using sched_clock(). A real patch is in progress 1216 + * to fix this up, until that is in place we need to disable preemption 1217 + * around sched_clock() in this function and set_io_start_time_ns(). 1218 + */ 1219 static inline void set_start_time_ns(struct request *req) 1220 { 1221 + preempt_disable(); 1222 req->start_time_ns = sched_clock(); 1223 + preempt_enable(); 1224 } 1225 1226 static inline void set_io_start_time_ns(struct request *req) 1227 { 1228 + preempt_disable(); 1229 req->io_start_time_ns = sched_clock(); 1230 + preempt_enable(); 1231 } 1232 1233 static inline uint64_t rq_start_time_ns(struct request *req)

+1 -1

include/linux/drbd.h

··· 53 54 55 extern const char *drbd_buildtag(void); 56 - #define REL_VERSION "8.3.8rc1" 57 #define API_VERSION 88 58 #define PRO_VERSION_MIN 86 59 #define PRO_VERSION_MAX 94

··· 53 54 55 extern const char *drbd_buildtag(void); 56 + #define REL_VERSION "8.3.8rc2" 57 #define API_VERSION 88 58 #define PRO_VERSION_MIN 86 59 #define PRO_VERSION_MAX 94

-1

include/linux/iocontext.h

··· 7 struct cfq_queue; 8 struct cfq_io_context { 9 void *key; 10 - unsigned long dead_key; 11 12 struct cfq_queue *cfqq[2]; 13

··· 7 struct cfq_queue; 8 struct cfq_io_context { 9 void *key; 10 11 struct cfq_queue *cfqq[2]; 12

+3 -1

include/linux/pipe_fs_i.h

··· 139 void pipe_unlock(struct pipe_inode_info *); 140 void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); 141 142 - extern unsigned int pipe_max_pages; 143 144 /* Drop the inode semaphore and wait for a pipe event, atomically */ 145 void pipe_wait(struct pipe_inode_info *pipe);

··· 139 void pipe_unlock(struct pipe_inode_info *); 140 void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); 141 142 + extern unsigned int pipe_max_size, pipe_min_size; 143 + int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); 144 + 145 146 /* Drop the inode semaphore and wait for a pipe event, atomically */ 147 void pipe_wait(struct pipe_inode_info *pipe);

-10

include/linux/writeback.h

··· 65 * so we use a single control to update them 66 */ 67 unsigned no_nrwrite_index_update:1; 68 - 69 - /* 70 - * For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE, 71 - * the writeback code will pin the sb for the caller. However, 72 - * for eg umount, the caller does WB_SYNC_NONE but already has 73 - * the sb pinned. If the below is set, caller already has the 74 - * sb pinned. 75 - */ 76 - unsigned sb_pinned:1; 77 }; 78 79 /* ··· 73 struct bdi_writeback; 74 int inode_wait(void *); 75 void writeback_inodes_sb(struct super_block *); 76 - void writeback_inodes_sb_locked(struct super_block *); 77 int writeback_inodes_sb_if_idle(struct super_block *); 78 void sync_inodes_sb(struct super_block *); 79 void writeback_inodes_wbc(struct writeback_control *wbc);

··· 65 * so we use a single control to update them 66 */ 67 unsigned no_nrwrite_index_update:1; 68 }; 69 70 /* ··· 82 struct bdi_writeback; 83 int inode_wait(void *); 84 void writeback_inodes_sb(struct super_block *); 85 int writeback_inodes_sb_if_idle(struct super_block *); 86 void sync_inodes_sb(struct super_block *); 87 void writeback_inodes_wbc(struct writeback_control *wbc);

+4 -4

kernel/sysctl.c

··· 1471 }, 1472 #endif 1473 { 1474 - .procname = "pipe-max-pages", 1475 - .data = &pipe_max_pages, 1476 .maxlen = sizeof(int), 1477 .mode = 0644, 1478 - .proc_handler = &proc_dointvec_minmax, 1479 - .extra1 = &two, 1480 }, 1481 /* 1482 * NOTE: do not add new entries to this table unless you have read

··· 1471 }, 1472 #endif 1473 { 1474 + .procname = "pipe-max-size", 1475 + .data = &pipe_max_size, 1476 .maxlen = sizeof(int), 1477 .mode = 0644, 1478 + .proc_handler = &pipe_proc_fn, 1479 + .extra1 = &pipe_min_size, 1480 }, 1481 /* 1482 * NOTE: do not add new entries to this table unless you have read

+2 -2

mm/page-writeback.c

··· 597 (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) 598 + global_page_state(NR_UNSTABLE_NFS)) 599 > background_thresh))) 600 - bdi_start_writeback(bdi, NULL, 0, 0); 601 } 602 603 void set_page_dirty_balance(struct page *page, int page_mkwrite) ··· 707 */ 708 709 if (bdi_has_dirty_io(&q->backing_dev_info)) 710 - bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages, 0); 711 } 712 713 /*

··· 597 (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) 598 + global_page_state(NR_UNSTABLE_NFS)) 599 > background_thresh))) 600 + bdi_start_writeback(bdi, NULL, 0); 601 } 602 603 void set_page_dirty_balance(struct page *page, int page_mkwrite) ··· 707 */ 708 709 if (bdi_has_dirty_io(&q->backing_dev_info)) 710 + bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); 711 } 712 713 /*