Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm btree: introduce cursor api

This uses prefetching to speed up iteration through a btree.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

authored by

Joe Thornber and committed by
Mike Snitzer
7d111c81 9d1b404c

+197
+162
drivers/md/persistent-data/dm-btree.c
··· 994 994 return walk_node(info, root, fn, context); 995 995 } 996 996 EXPORT_SYMBOL_GPL(dm_btree_walk); 997 + 998 + /*----------------------------------------------------------------*/ 999 + 1000 + static void prefetch_values(struct dm_btree_cursor *c) 1001 + { 1002 + unsigned i, nr; 1003 + __le64 value_le; 1004 + struct cursor_node *n = c->nodes + c->depth - 1; 1005 + struct btree_node *bn = dm_block_data(n->b); 1006 + struct dm_block_manager *bm = dm_tm_get_bm(c->info->tm); 1007 + 1008 + BUG_ON(c->info->value_type.size != sizeof(value_le)); 1009 + 1010 + nr = le32_to_cpu(bn->header.nr_entries); 1011 + for (i = 0; i < nr; i++) { 1012 + memcpy(&value_le, value_ptr(bn, i), sizeof(value_le)); 1013 + dm_bm_prefetch(bm, le64_to_cpu(value_le)); 1014 + } 1015 + } 1016 + 1017 + static bool leaf_node(struct dm_btree_cursor *c) 1018 + { 1019 + struct cursor_node *n = c->nodes + c->depth - 1; 1020 + struct btree_node *bn = dm_block_data(n->b); 1021 + 1022 + return le32_to_cpu(bn->header.flags) & LEAF_NODE; 1023 + } 1024 + 1025 + static int push_node(struct dm_btree_cursor *c, dm_block_t b) 1026 + { 1027 + int r; 1028 + struct cursor_node *n = c->nodes + c->depth; 1029 + 1030 + if (c->depth >= DM_BTREE_CURSOR_MAX_DEPTH - 1) { 1031 + DMERR("couldn't push cursor node, stack depth too high"); 1032 + return -EINVAL; 1033 + } 1034 + 1035 + r = bn_read_lock(c->info, b, &n->b); 1036 + if (r) 1037 + return r; 1038 + 1039 + n->index = 0; 1040 + c->depth++; 1041 + 1042 + if (c->prefetch_leaves || !leaf_node(c)) 1043 + prefetch_values(c); 1044 + 1045 + return 0; 1046 + } 1047 + 1048 + static void pop_node(struct dm_btree_cursor *c) 1049 + { 1050 + c->depth--; 1051 + unlock_block(c->info, c->nodes[c->depth].b); 1052 + } 1053 + 1054 + static int inc_or_backtrack(struct dm_btree_cursor *c) 1055 + { 1056 + struct cursor_node *n; 1057 + struct btree_node *bn; 1058 + 1059 + for (;;) { 1060 + if (!c->depth) 1061 + return -ENODATA; 1062 + 1063 + n = c->nodes + c->depth - 1; 1064 + bn = dm_block_data(n->b); 1065 + 1066 + n->index++; 1067 + if (n->index < le32_to_cpu(bn->header.nr_entries)) 1068 + break; 1069 + 1070 + pop_node(c); 1071 + } 1072 + 1073 + return 0; 1074 + } 1075 + 1076 + static int find_leaf(struct dm_btree_cursor *c) 1077 + { 1078 + int r = 0; 1079 + struct cursor_node *n; 1080 + struct btree_node *bn; 1081 + __le64 value_le; 1082 + 1083 + for (;;) { 1084 + n = c->nodes + c->depth - 1; 1085 + bn = dm_block_data(n->b); 1086 + 1087 + if (le32_to_cpu(bn->header.flags) & LEAF_NODE) 1088 + break; 1089 + 1090 + memcpy(&value_le, value_ptr(bn, n->index), sizeof(value_le)); 1091 + r = push_node(c, le64_to_cpu(value_le)); 1092 + if (r) { 1093 + DMERR("push_node failed"); 1094 + break; 1095 + } 1096 + } 1097 + 1098 + if (!r && (le32_to_cpu(bn->header.nr_entries) == 0)) 1099 + return -ENODATA; 1100 + 1101 + return r; 1102 + } 1103 + 1104 + int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, 1105 + bool prefetch_leaves, struct dm_btree_cursor *c) 1106 + { 1107 + int r; 1108 + 1109 + c->info = info; 1110 + c->root = root; 1111 + c->depth = 0; 1112 + c->prefetch_leaves = prefetch_leaves; 1113 + 1114 + r = push_node(c, root); 1115 + if (r) 1116 + return r; 1117 + 1118 + return find_leaf(c); 1119 + } 1120 + EXPORT_SYMBOL_GPL(dm_btree_cursor_begin); 1121 + 1122 + void dm_btree_cursor_end(struct dm_btree_cursor *c) 1123 + { 1124 + while (c->depth) 1125 + pop_node(c); 1126 + } 1127 + EXPORT_SYMBOL_GPL(dm_btree_cursor_end); 1128 + 1129 + int dm_btree_cursor_next(struct dm_btree_cursor *c) 1130 + { 1131 + int r = inc_or_backtrack(c); 1132 + if (!r) { 1133 + r = find_leaf(c); 1134 + if (r) 1135 + DMERR("find_leaf failed"); 1136 + } 1137 + 1138 + return r; 1139 + } 1140 + EXPORT_SYMBOL_GPL(dm_btree_cursor_next); 1141 + 1142 + int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le) 1143 + { 1144 + if (c->depth) { 1145 + struct cursor_node *n = c->nodes + c->depth - 1; 1146 + struct btree_node *bn = dm_block_data(n->b); 1147 + 1148 + if (le32_to_cpu(bn->header.flags) & INTERNAL_NODE) 1149 + return -EINVAL; 1150 + 1151 + *key = le64_to_cpu(*key_ptr(bn, n->index)); 1152 + memcpy(value_le, value_ptr(bn, n->index), c->info->value_type.size); 1153 + return 0; 1154 + 1155 + } else 1156 + return -ENODATA; 1157 + } 1158 + EXPORT_SYMBOL_GPL(dm_btree_cursor_get_value);
+35
drivers/md/persistent-data/dm-btree.h
··· 176 176 int (*fn)(void *context, uint64_t *keys, void *leaf), 177 177 void *context); 178 178 179 + 180 + /*----------------------------------------------------------------*/ 181 + 182 + /* 183 + * Cursor API. This does not follow the rolling lock convention. Since we 184 + * know the order that values are required we can issue prefetches to speed 185 + * up iteration. Use on a single level btree only. 186 + */ 187 + #define DM_BTREE_CURSOR_MAX_DEPTH 16 188 + 189 + struct cursor_node { 190 + struct dm_block *b; 191 + unsigned index; 192 + }; 193 + 194 + struct dm_btree_cursor { 195 + struct dm_btree_info *info; 196 + dm_block_t root; 197 + 198 + bool prefetch_leaves; 199 + unsigned depth; 200 + struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH]; 201 + }; 202 + 203 + /* 204 + * Creates a fresh cursor. If prefetch_leaves is set then it is assumed 205 + * the btree contains block indexes that will be prefetched. The cursor is 206 + * quite large, so you probably don't want to put it on the stack. 207 + */ 208 + int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, 209 + bool prefetch_leaves, struct dm_btree_cursor *c); 210 + void dm_btree_cursor_end(struct dm_btree_cursor *c); 211 + int dm_btree_cursor_next(struct dm_btree_cursor *c); 212 + int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); 213 + 179 214 #endif /* _LINUX_DM_BTREE_H */