Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rbd: retry watch re-registration periodically

Revamp watch code to support retrying watch re-registration:

- add rbd_dev->watch_state for more robust errcb handling
- store watch cookie separately to avoid dereferencing watch_handle
which is set to NULL on unwatch
- move re-register code into a delayed work and retry re-registration
every second, unless the client is blacklisted

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Tested-by: Mike Christie <mchristi@redhat.com>

+110 -29
+109 -29
drivers/block/rbd.c
··· 114 114 115 115 #define RBD_OBJ_PREFIX_LEN_MAX 64 116 116 117 + #define RBD_RETRY_DELAY msecs_to_jiffies(1000) 118 + 117 119 /* Feature bits */ 118 120 119 121 #define RBD_FEATURE_LAYERING (1<<0) ··· 321 319 #define for_each_obj_request_safe(ireq, oreq, n) \ 322 320 list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) 323 321 322 + enum rbd_watch_state { 323 + RBD_WATCH_STATE_UNREGISTERED, 324 + RBD_WATCH_STATE_REGISTERED, 325 + RBD_WATCH_STATE_ERROR, 326 + }; 327 + 324 328 struct rbd_mapping { 325 329 u64 size; 326 330 u64 features; ··· 360 352 361 353 struct ceph_file_layout layout; /* used for all rbd requests */ 362 354 355 + struct mutex watch_mutex; 356 + enum rbd_watch_state watch_state; 363 357 struct ceph_osd_linger_request *watch_handle; 358 + u64 watch_cookie; 359 + struct delayed_work watch_dwork; 364 360 365 361 struct workqueue_struct *task_wq; 366 362 ··· 3095 3083 obj_request_done_set(obj_request); 3096 3084 } 3097 3085 3098 - static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev); 3099 - static void __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev); 3100 - 3101 3086 static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie, 3102 3087 u64 notifier_id, void *data, size_t data_len) 3103 3088 { ··· 3122 3113 rbd_warn(rbd_dev, "notify_ack ret %d", ret); 3123 3114 } 3124 3115 3116 + static void __rbd_unregister_watch(struct rbd_device *rbd_dev); 3117 + 3125 3118 static void rbd_watch_errcb(void *arg, u64 cookie, int err) 3126 3119 { 3127 3120 struct rbd_device *rbd_dev = arg; 3128 - int ret; 3129 3121 3130 3122 rbd_warn(rbd_dev, "encountered watch error: %d", err); 3131 3123 3132 - __rbd_dev_header_unwatch_sync(rbd_dev); 3124 + mutex_lock(&rbd_dev->watch_mutex); 3125 + if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) { 3126 + __rbd_unregister_watch(rbd_dev); 3127 + rbd_dev->watch_state = RBD_WATCH_STATE_ERROR; 3133 3128 3134 - ret = rbd_dev_header_watch_sync(rbd_dev); 3135 - if (ret) { 3136 - rbd_warn(rbd_dev, "failed to reregister watch: %d", ret); 3137 - return; 3129 + queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, 0); 3138 3130 } 3139 - 3140 - ret = rbd_dev_refresh(rbd_dev); 3141 - if (ret) 3142 - rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret); 3131 + mutex_unlock(&rbd_dev->watch_mutex); 3143 3132 } 3144 3133 3145 3134 /* 3146 - * Initiate a watch request, synchronously. 3135 + * watch_mutex must be locked 3147 3136 */ 3148 - static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev) 3137 + static int __rbd_register_watch(struct rbd_device *rbd_dev) 3149 3138 { 3150 3139 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 3151 3140 struct ceph_osd_linger_request *handle; 3152 3141 3153 3142 rbd_assert(!rbd_dev->watch_handle); 3143 + dout("%s rbd_dev %p\n", __func__, rbd_dev); 3154 3144 3155 3145 handle = ceph_osdc_watch(osdc, &rbd_dev->header_oid, 3156 3146 &rbd_dev->header_oloc, rbd_watch_cb, ··· 3161 3153 return 0; 3162 3154 } 3163 3155 3164 - static void __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev) 3156 + /* 3157 + * watch_mutex must be locked 3158 + */ 3159 + static void __rbd_unregister_watch(struct rbd_device *rbd_dev) 3165 3160 { 3166 3161 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 3167 3162 int ret; 3168 3163 3169 - if (!rbd_dev->watch_handle) 3170 - return; 3164 + rbd_assert(rbd_dev->watch_handle); 3165 + dout("%s rbd_dev %p\n", __func__, rbd_dev); 3171 3166 3172 3167 ret = ceph_osdc_unwatch(osdc, rbd_dev->watch_handle); 3173 3168 if (ret) ··· 3179 3168 rbd_dev->watch_handle = NULL; 3180 3169 } 3181 3170 3182 - /* 3183 - * Tear down a watch request, synchronously. 3184 - */ 3185 - static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev) 3171 + static int rbd_register_watch(struct rbd_device *rbd_dev) 3186 3172 { 3187 - __rbd_dev_header_unwatch_sync(rbd_dev); 3173 + int ret; 3188 3174 3189 - dout("%s flushing notifies\n", __func__); 3175 + mutex_lock(&rbd_dev->watch_mutex); 3176 + rbd_assert(rbd_dev->watch_state == RBD_WATCH_STATE_UNREGISTERED); 3177 + ret = __rbd_register_watch(rbd_dev); 3178 + if (ret) 3179 + goto out; 3180 + 3181 + rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED; 3182 + rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id; 3183 + 3184 + out: 3185 + mutex_unlock(&rbd_dev->watch_mutex); 3186 + return ret; 3187 + } 3188 + 3189 + static void cancel_tasks_sync(struct rbd_device *rbd_dev) 3190 + { 3191 + dout("%s rbd_dev %p\n", __func__, rbd_dev); 3192 + 3193 + cancel_delayed_work_sync(&rbd_dev->watch_dwork); 3194 + } 3195 + 3196 + static void rbd_unregister_watch(struct rbd_device *rbd_dev) 3197 + { 3198 + cancel_tasks_sync(rbd_dev); 3199 + 3200 + mutex_lock(&rbd_dev->watch_mutex); 3201 + if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) 3202 + __rbd_unregister_watch(rbd_dev); 3203 + rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED; 3204 + mutex_unlock(&rbd_dev->watch_mutex); 3205 + 3190 3206 ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); 3207 + } 3208 + 3209 + static void rbd_reregister_watch(struct work_struct *work) 3210 + { 3211 + struct rbd_device *rbd_dev = container_of(to_delayed_work(work), 3212 + struct rbd_device, watch_dwork); 3213 + int ret; 3214 + 3215 + dout("%s rbd_dev %p\n", __func__, rbd_dev); 3216 + 3217 + mutex_lock(&rbd_dev->watch_mutex); 3218 + if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) 3219 + goto fail_unlock; 3220 + 3221 + ret = __rbd_register_watch(rbd_dev); 3222 + if (ret) { 3223 + rbd_warn(rbd_dev, "failed to reregister watch: %d", ret); 3224 + if (ret != -EBLACKLISTED) 3225 + queue_delayed_work(rbd_dev->task_wq, 3226 + &rbd_dev->watch_dwork, 3227 + RBD_RETRY_DELAY); 3228 + goto fail_unlock; 3229 + } 3230 + 3231 + rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED; 3232 + rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id; 3233 + mutex_unlock(&rbd_dev->watch_mutex); 3234 + 3235 + ret = rbd_dev_refresh(rbd_dev); 3236 + if (ret) 3237 + rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret); 3238 + 3239 + return; 3240 + 3241 + fail_unlock: 3242 + mutex_unlock(&rbd_dev->watch_mutex); 3191 3243 } 3192 3244 3193 3245 /* ··· 4019 3945 4020 3946 static void rbd_dev_free(struct rbd_device *rbd_dev) 4021 3947 { 3948 + WARN_ON(rbd_dev->watch_state != RBD_WATCH_STATE_UNREGISTERED); 3949 + 4022 3950 ceph_oid_destroy(&rbd_dev->header_oid); 4023 3951 ceph_oloc_destroy(&rbd_dev->header_oloc); 4024 3952 ··· 4066 3990 4067 3991 ceph_oid_init(&rbd_dev->header_oid); 4068 3992 ceph_oloc_init(&rbd_dev->header_oloc); 3993 + 3994 + mutex_init(&rbd_dev->watch_mutex); 3995 + rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED; 3996 + INIT_DELAYED_WORK(&rbd_dev->watch_dwork, rbd_reregister_watch); 4069 3997 4070 3998 rbd_dev->dev.bus = &rbd_bus_type; 4071 3999 rbd_dev->dev.type = &rbd_device_type; ··· 5302 5222 goto err_out_format; 5303 5223 5304 5224 if (!depth) { 5305 - ret = rbd_dev_header_watch_sync(rbd_dev); 5225 + ret = rbd_register_watch(rbd_dev); 5306 5226 if (ret) { 5307 5227 if (ret == -ENOENT) 5308 5228 pr_info("image %s/%s does not exist\n", ··· 5361 5281 rbd_dev_unprobe(rbd_dev); 5362 5282 err_out_watch: 5363 5283 if (!depth) 5364 - rbd_dev_header_unwatch_sync(rbd_dev); 5284 + rbd_unregister_watch(rbd_dev); 5365 5285 err_out_format: 5366 5286 rbd_dev->image_format = 0; 5367 5287 kfree(rbd_dev->spec->image_id); ··· 5428 5348 rc = rbd_dev_device_setup(rbd_dev); 5429 5349 if (rc) { 5430 5350 /* 5431 - * rbd_dev_header_unwatch_sync() can't be moved into 5351 + * rbd_unregister_watch() can't be moved into 5432 5352 * rbd_dev_image_release() without refactoring, see 5433 5353 * commit 1f3ef78861ac. 5434 5354 */ 5435 - rbd_dev_header_unwatch_sync(rbd_dev); 5355 + rbd_unregister_watch(rbd_dev); 5436 5356 rbd_dev_image_release(rbd_dev); 5437 5357 goto out; 5438 5358 } ··· 5553 5473 if (ret < 0 || already) 5554 5474 return ret; 5555 5475 5556 - rbd_dev_header_unwatch_sync(rbd_dev); 5476 + rbd_unregister_watch(rbd_dev); 5557 5477 5558 5478 /* 5559 5479 * Don't free anything from rbd_dev->disk until after all
+1
net/ceph/osd_client.c
··· 4014 4014 */ 4015 4015 void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) 4016 4016 { 4017 + dout("%s osdc %p\n", __func__, osdc); 4017 4018 flush_workqueue(osdc->notify_wq); 4018 4019 } 4019 4020 EXPORT_SYMBOL(ceph_osdc_flush_notifies);