Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

libceph: support the RADOS copy-from operation

Add support for performing remote object copies using the 'copy-from'
operation.

[ Add COPY_FROM to get_num_data_items(). ]

Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

authored by

Luis Henriques and committed by
Ilya Dryomov
23ddf9be 2ee9dd95

+135
+17
include/linux/ceph/osd_client.h
··· 136 136 u64 expected_object_size; 137 137 u64 expected_write_size; 138 138 } alloc_hint; 139 + struct { 140 + u64 snapid; 141 + u64 src_version; 142 + u8 flags; 143 + u32 src_fadvise_flags; 144 + struct ceph_osd_data osd_data; 145 + } copy_from; 139 146 }; 140 147 }; 141 148 ··· 516 509 u32 truncate_seq, u64 truncate_size, 517 510 struct timespec64 *mtime, 518 511 struct page **pages, int nr_pages); 512 + 513 + int ceph_osdc_copy_from(struct ceph_osd_client *osdc, 514 + u64 src_snapid, u64 src_version, 515 + struct ceph_object_id *src_oid, 516 + struct ceph_object_locator *src_oloc, 517 + u32 src_fadvise_flags, 518 + struct ceph_object_id *dst_oid, 519 + struct ceph_object_locator *dst_oloc, 520 + u32 dst_fadvise_flags, 521 + u8 copy_from_flags); 519 522 520 523 /* watch/notify */ 521 524 struct ceph_osd_linger_request *
+28
include/linux/ceph/rados.h
··· 410 410 enum { 411 411 CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ 412 412 CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ 413 + CEPH_OSD_OP_FLAG_FADVISE_RANDOM = 0x4, /* the op is random */ 414 + CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL = 0x8, /* the op is sequential */ 415 + CEPH_OSD_OP_FLAG_FADVISE_WILLNEED = 0x10,/* data will be accessed in 416 + the near future */ 417 + CEPH_OSD_OP_FLAG_FADVISE_DONTNEED = 0x20,/* data will not be accessed 418 + in the near future */ 419 + CEPH_OSD_OP_FLAG_FADVISE_NOCACHE = 0x40,/* data will be accessed only 420 + once by this client */ 413 421 }; 414 422 415 423 #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ ··· 437 429 enum { 438 430 CEPH_OSD_CMPXATTR_MODE_STRING = 1, 439 431 CEPH_OSD_CMPXATTR_MODE_U64 = 2 432 + }; 433 + 434 + enum { 435 + CEPH_OSD_COPY_FROM_FLAG_FLUSH = 1, /* part of a flush operation */ 436 + CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY = 2, /* ignore pool overlay */ 437 + CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE = 4, /* ignore osd cache logic */ 438 + CEPH_OSD_COPY_FROM_FLAG_MAP_SNAP_CLONE = 8, /* map snap direct to 439 + * cloneid */ 440 + CEPH_OSD_COPY_FROM_FLAG_RWORDERED = 16, /* order with write */ 440 441 }; 441 442 442 443 enum { ··· 514 497 __le64 expected_object_size; 515 498 __le64 expected_write_size; 516 499 } __attribute__ ((packed)) alloc_hint; 500 + struct { 501 + __le64 snapid; 502 + __le64 src_version; 503 + __u8 flags; /* CEPH_OSD_COPY_FROM_FLAG_* */ 504 + /* 505 + * CEPH_OSD_OP_FLAG_FADVISE_*: fadvise flags 506 + * for src object, flags for dest object are in 507 + * ceph_osd_op::flags. 508 + */ 509 + __le32 src_fadvise_flags; 510 + } __attribute__ ((packed)) copy_from; 517 511 }; 518 512 __le32 payload_len; 519 513 } __attribute__ ((packed));
+90
net/ceph/osd_client.c
··· 410 410 case CEPH_OSD_OP_LIST_WATCHERS: 411 411 ceph_osd_data_release(&op->list_watchers.response_data); 412 412 break; 413 + case CEPH_OSD_OP_COPY_FROM: 414 + ceph_osd_data_release(&op->copy_from.osd_data); 415 + break; 413 416 default: 414 417 break; 415 418 } ··· 705 702 case CEPH_OSD_OP_SETXATTR: 706 703 case CEPH_OSD_OP_CMPXATTR: 707 704 case CEPH_OSD_OP_NOTIFY_ACK: 705 + case CEPH_OSD_OP_COPY_FROM: 708 706 *num_request_data_items += 1; 709 707 break; 710 708 ··· 1019 1015 break; 1020 1016 case CEPH_OSD_OP_CREATE: 1021 1017 case CEPH_OSD_OP_DELETE: 1018 + break; 1019 + case CEPH_OSD_OP_COPY_FROM: 1020 + dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid); 1021 + dst->copy_from.src_version = 1022 + cpu_to_le64(src->copy_from.src_version); 1023 + dst->copy_from.flags = src->copy_from.flags; 1024 + dst->copy_from.src_fadvise_flags = 1025 + cpu_to_le32(src->copy_from.src_fadvise_flags); 1022 1026 break; 1023 1027 default: 1024 1028 pr_err("unsupported osd opcode %s\n", ··· 1958 1946 case CEPH_OSD_OP_NOTIFY_ACK: 1959 1947 ceph_osdc_msg_data_add(request_msg, 1960 1948 &op->notify_ack.request_data); 1949 + break; 1950 + case CEPH_OSD_OP_COPY_FROM: 1951 + ceph_osdc_msg_data_add(request_msg, 1952 + &op->copy_from.osd_data); 1961 1953 break; 1962 1954 1963 1955 /* reply */ ··· 5270 5254 return rc; 5271 5255 } 5272 5256 EXPORT_SYMBOL(ceph_osdc_writepages); 5257 + 5258 + static int osd_req_op_copy_from_init(struct ceph_osd_request *req, 5259 + u64 src_snapid, u64 src_version, 5260 + struct ceph_object_id *src_oid, 5261 + struct ceph_object_locator *src_oloc, 5262 + u32 src_fadvise_flags, 5263 + u32 dst_fadvise_flags, 5264 + u8 copy_from_flags) 5265 + { 5266 + struct ceph_osd_req_op *op; 5267 + struct page **pages; 5268 + void *p, *end; 5269 + 5270 + pages = ceph_alloc_page_vector(1, GFP_KERNEL); 5271 + if (IS_ERR(pages)) 5272 + return PTR_ERR(pages); 5273 + 5274 + op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM, dst_fadvise_flags); 5275 + op->copy_from.snapid = src_snapid; 5276 + op->copy_from.src_version = src_version; 5277 + op->copy_from.flags = copy_from_flags; 5278 + op->copy_from.src_fadvise_flags = src_fadvise_flags; 5279 + 5280 + p = page_address(pages[0]); 5281 + end = p + PAGE_SIZE; 5282 + ceph_encode_string(&p, end, src_oid->name, src_oid->name_len); 5283 + encode_oloc(&p, end, src_oloc); 5284 + op->indata_len = PAGE_SIZE - (end - p); 5285 + 5286 + ceph_osd_data_pages_init(&op->copy_from.osd_data, pages, 5287 + op->indata_len, 0, false, true); 5288 + return 0; 5289 + } 5290 + 5291 + int ceph_osdc_copy_from(struct ceph_osd_client *osdc, 5292 + u64 src_snapid, u64 src_version, 5293 + struct ceph_object_id *src_oid, 5294 + struct ceph_object_locator *src_oloc, 5295 + u32 src_fadvise_flags, 5296 + struct ceph_object_id *dst_oid, 5297 + struct ceph_object_locator *dst_oloc, 5298 + u32 dst_fadvise_flags, 5299 + u8 copy_from_flags) 5300 + { 5301 + struct ceph_osd_request *req; 5302 + int ret; 5303 + 5304 + req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL); 5305 + if (!req) 5306 + return -ENOMEM; 5307 + 5308 + req->r_flags = CEPH_OSD_FLAG_WRITE; 5309 + 5310 + ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc); 5311 + ceph_oid_copy(&req->r_t.base_oid, dst_oid); 5312 + 5313 + ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid, 5314 + src_oloc, src_fadvise_flags, 5315 + dst_fadvise_flags, copy_from_flags); 5316 + if (ret) 5317 + goto out; 5318 + 5319 + ret = ceph_osdc_alloc_messages(req, GFP_KERNEL); 5320 + if (ret) 5321 + goto out; 5322 + 5323 + ceph_osdc_start_request(osdc, req, false); 5324 + ret = ceph_osdc_wait_request(osdc, req); 5325 + 5326 + out: 5327 + ceph_osdc_put_request(req); 5328 + return ret; 5329 + } 5330 + EXPORT_SYMBOL(ceph_osdc_copy_from); 5273 5331 5274 5332 int __init ceph_osdc_setup(void) 5275 5333 {