Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[SCSI] hpsa: complete the ioaccel raidmap code

Load balance across members of a N-way mirror set, and
handle the meta-RAID levels: R10, R50, R60.

Signed-off-by: Scott Teel <scott.teel@hp.com>
Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>

authored by

Scott Teel and committed by
James Bottomley
6b80b18f 9fb0de2d

+168 -6
+168 -6
drivers/scsi/hpsa.c
··· 423 423 static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG", 424 424 "1(ADM)", "UNKNOWN" 425 425 }; 426 + #define HPSA_RAID_0 0 427 + #define HPSA_RAID_4 1 428 + #define HPSA_RAID_1 2 /* also used for RAID 10 */ 429 + #define HPSA_RAID_5 3 /* also used for RAID 50 */ 430 + #define HPSA_RAID_51 4 431 + #define HPSA_RAID_6 5 /* also used for RAID 60 */ 432 + #define HPSA_RAID_ADM 6 /* also used for RAID 1+0 ADM */ 426 433 #define RAID_UNKNOWN (ARRAY_SIZE(raid_label) - 1) 427 434 428 435 static ssize_t raid_level_show(struct device *dev, ··· 2948 2941 cdb, cdb_len, scsi3addr); 2949 2942 } 2950 2943 2944 + static void raid_map_helper(struct raid_map_data *map, 2945 + int offload_to_mirror, u32 *map_index, u32 *current_group) 2946 + { 2947 + if (offload_to_mirror == 0) { 2948 + /* use physical disk in the first mirrored group. */ 2949 + *map_index %= map->data_disks_per_row; 2950 + return; 2951 + } 2952 + do { 2953 + /* determine mirror group that *map_index indicates */ 2954 + *current_group = *map_index / map->data_disks_per_row; 2955 + if (offload_to_mirror == *current_group) 2956 + continue; 2957 + if (*current_group < (map->layout_map_count - 1)) { 2958 + /* select map index from next group */ 2959 + *map_index += map->data_disks_per_row; 2960 + (*current_group)++; 2961 + } else { 2962 + /* select map index from first group */ 2963 + *map_index %= map->data_disks_per_row; 2964 + *current_group = 0; 2965 + } 2966 + } while (offload_to_mirror != *current_group); 2967 + } 2968 + 2951 2969 /* 2952 2970 * Attempt to perform offload RAID mapping for a logical volume I/O. 2953 2971 */ ··· 2991 2959 u64 first_row, last_row; 2992 2960 u32 first_row_offset, last_row_offset; 2993 2961 u32 first_column, last_column; 2962 + u64 r0_first_row, r0_last_row; 2963 + u32 r5or6_blocks_per_row; 2964 + u64 r5or6_first_row, r5or6_last_row; 2965 + u32 r5or6_first_row_offset, r5or6_last_row_offset; 2966 + u32 r5or6_first_column, r5or6_last_column; 2967 + u32 total_disks_per_row; 2968 + u32 stripesize; 2969 + u32 first_group, last_group, current_group; 2994 2970 u32 map_row; 2995 2971 u32 disk_handle; 2996 2972 u64 disk_block; ··· 3008 2968 #if BITS_PER_LONG == 32 3009 2969 u64 tmpdiv; 3010 2970 #endif 2971 + int offload_to_mirror; 3011 2972 3012 2973 BUG_ON(!(dev->offload_config && dev->offload_enabled)); 3013 2974 ··· 3111 3070 return IO_ACCEL_INELIGIBLE; 3112 3071 3113 3072 /* proceeding with driver mapping */ 3073 + total_disks_per_row = map->data_disks_per_row + 3074 + map->metadata_disks_per_row; 3114 3075 map_row = ((u32)(first_row >> map->parity_rotation_shift)) % 3115 3076 map->row_cnt; 3116 - map_index = (map_row * (map->data_disks_per_row + 3117 - map->metadata_disks_per_row)) + first_column; 3118 - if (dev->raid_level == 2) { 3119 - /* simple round-robin balancing of RAID 1+0 reads across 3120 - * primary and mirror members. this is appropriate for SSD 3121 - * but not optimal for HDD. 3077 + map_index = (map_row * total_disks_per_row) + first_column; 3078 + 3079 + switch (dev->raid_level) { 3080 + case HPSA_RAID_0: 3081 + break; /* nothing special to do */ 3082 + case HPSA_RAID_1: 3083 + /* Handles load balance across RAID 1 members. 3084 + * (2-drive R1 and R10 with even # of drives.) 3085 + * Appropriate for SSDs, not optimal for HDDs 3122 3086 */ 3087 + BUG_ON(map->layout_map_count != 2); 3123 3088 if (dev->offload_to_mirror) 3124 3089 map_index += map->data_disks_per_row; 3125 3090 dev->offload_to_mirror = !dev->offload_to_mirror; 3091 + break; 3092 + case HPSA_RAID_ADM: 3093 + /* Handles N-way mirrors (R1-ADM) 3094 + * and R10 with # of drives divisible by 3.) 3095 + */ 3096 + BUG_ON(map->layout_map_count != 3); 3097 + 3098 + offload_to_mirror = dev->offload_to_mirror; 3099 + raid_map_helper(map, offload_to_mirror, 3100 + &map_index, &current_group); 3101 + /* set mirror group to use next time */ 3102 + offload_to_mirror = 3103 + (offload_to_mirror >= map->layout_map_count - 1) 3104 + ? 0 : offload_to_mirror + 1; 3105 + /* FIXME: remove after debug/dev */ 3106 + BUG_ON(offload_to_mirror >= map->layout_map_count); 3107 + dev_warn(&h->pdev->dev, 3108 + "DEBUG: Using physical disk map index %d from mirror group %d\n", 3109 + map_index, offload_to_mirror); 3110 + dev->offload_to_mirror = offload_to_mirror; 3111 + /* Avoid direct use of dev->offload_to_mirror within this 3112 + * function since multiple threads might simultaneously 3113 + * increment it beyond the range of dev->layout_map_count -1. 3114 + */ 3115 + break; 3116 + case HPSA_RAID_5: 3117 + case HPSA_RAID_6: 3118 + if (map->layout_map_count <= 1) 3119 + break; 3120 + 3121 + /* Verify first and last block are in same RAID group */ 3122 + r5or6_blocks_per_row = 3123 + map->strip_size * map->data_disks_per_row; 3124 + BUG_ON(r5or6_blocks_per_row == 0); 3125 + stripesize = r5or6_blocks_per_row * map->layout_map_count; 3126 + #if BITS_PER_LONG == 32 3127 + tmpdiv = first_block; 3128 + first_group = do_div(tmpdiv, stripesize); 3129 + tmpdiv = first_group; 3130 + (void) do_div(tmpdiv, r5or6_blocks_per_row); 3131 + first_group = tmpdiv; 3132 + tmpdiv = last_block; 3133 + last_group = do_div(tmpdiv, stripesize); 3134 + tmpdiv = last_group; 3135 + (void) do_div(tmpdiv, r5or6_blocks_per_row); 3136 + last_group = tmpdiv; 3137 + #else 3138 + first_group = (first_block % stripesize) / r5or6_blocks_per_row; 3139 + last_group = (last_block % stripesize) / r5or6_blocks_per_row; 3140 + if (first_group != last_group) 3141 + #endif 3142 + return IO_ACCEL_INELIGIBLE; 3143 + 3144 + /* Verify request is in a single row of RAID 5/6 */ 3145 + #if BITS_PER_LONG == 32 3146 + tmpdiv = first_block; 3147 + (void) do_div(tmpdiv, stripesize); 3148 + first_row = r5or6_first_row = r0_first_row = tmpdiv; 3149 + tmpdiv = last_block; 3150 + (void) do_div(tmpdiv, stripesize); 3151 + r5or6_last_row = r0_last_row = tmpdiv; 3152 + #else 3153 + first_row = r5or6_first_row = r0_first_row = 3154 + first_block / stripesize; 3155 + r5or6_last_row = r0_last_row = last_block / stripesize; 3156 + #endif 3157 + if (r5or6_first_row != r5or6_last_row) 3158 + return IO_ACCEL_INELIGIBLE; 3159 + 3160 + 3161 + /* Verify request is in a single column */ 3162 + #if BITS_PER_LONG == 32 3163 + tmpdiv = first_block; 3164 + first_row_offset = do_div(tmpdiv, stripesize); 3165 + tmpdiv = first_row_offset; 3166 + first_row_offset = (u32) do_div(tmpdiv, r5or6_blocks_per_row); 3167 + r5or6_first_row_offset = first_row_offset; 3168 + tmpdiv = last_block; 3169 + r5or6_last_row_offset = do_div(tmpdiv, stripesize); 3170 + tmpdiv = r5or6_last_row_offset; 3171 + r5or6_last_row_offset = do_div(tmpdiv, r5or6_blocks_per_row); 3172 + tmpdiv = r5or6_first_row_offset; 3173 + (void) do_div(tmpdiv, map->strip_size); 3174 + first_column = r5or6_first_column = tmpdiv; 3175 + tmpdiv = r5or6_last_row_offset; 3176 + (void) do_div(tmpdiv, map->strip_size); 3177 + r5or6_last_column = tmpdiv; 3178 + #else 3179 + first_row_offset = r5or6_first_row_offset = 3180 + (u32)((first_block % stripesize) % 3181 + r5or6_blocks_per_row); 3182 + 3183 + r5or6_last_row_offset = 3184 + (u32)((last_block % stripesize) % 3185 + r5or6_blocks_per_row); 3186 + 3187 + first_column = r5or6_first_column = 3188 + r5or6_first_row_offset / map->strip_size; 3189 + r5or6_last_column = 3190 + r5or6_last_row_offset / map->strip_size; 3191 + #endif 3192 + if (r5or6_first_column != r5or6_last_column) 3193 + return IO_ACCEL_INELIGIBLE; 3194 + 3195 + /* Request is eligible */ 3196 + map_row = ((u32)(first_row >> map->parity_rotation_shift)) % 3197 + map->row_cnt; 3198 + 3199 + map_index = (first_group * 3200 + (map->row_cnt * total_disks_per_row)) + 3201 + (map_row * total_disks_per_row) + first_column; 3202 + break; 3203 + default: 3204 + return IO_ACCEL_INELIGIBLE; 3126 3205 } 3206 + 3127 3207 disk_handle = dd[map_index].ioaccel_handle; 3128 3208 disk_block = map->disk_starting_blk + (first_row * map->strip_size) + 3129 3209 (first_row_offset - (first_column * map->strip_size));