at v3.2 13 kB view raw
1/* 2 * Generic EDAC defs 3 * 4 * Author: Dave Jiang <djiang@mvista.com> 5 * 6 * 2006-2008 (c) MontaVista Software, Inc. This file is licensed under 7 * the terms of the GNU General Public License version 2. This program 8 * is licensed "as is" without any warranty of any kind, whether express 9 * or implied. 10 * 11 */ 12#ifndef _LINUX_EDAC_H_ 13#define _LINUX_EDAC_H_ 14 15#include <linux/atomic.h> 16#include <linux/sysdev.h> 17 18#define EDAC_OPSTATE_INVAL -1 19#define EDAC_OPSTATE_POLL 0 20#define EDAC_OPSTATE_NMI 1 21#define EDAC_OPSTATE_INT 2 22 23extern int edac_op_state; 24extern int edac_err_assert; 25extern atomic_t edac_handlers; 26extern struct sysdev_class edac_class; 27 28extern int edac_handler_set(void); 29extern void edac_atomic_assert_error(void); 30extern struct sysdev_class *edac_get_sysfs_class(void); 31extern void edac_put_sysfs_class(void); 32 33static inline void opstate_init(void) 34{ 35 switch (edac_op_state) { 36 case EDAC_OPSTATE_POLL: 37 case EDAC_OPSTATE_NMI: 38 break; 39 default: 40 edac_op_state = EDAC_OPSTATE_POLL; 41 } 42 return; 43} 44 45#define EDAC_MC_LABEL_LEN 31 46#define MC_PROC_NAME_MAX_LEN 7 47 48/* memory devices */ 49enum dev_type { 50 DEV_UNKNOWN = 0, 51 DEV_X1, 52 DEV_X2, 53 DEV_X4, 54 DEV_X8, 55 DEV_X16, 56 DEV_X32, /* Do these parts exist? */ 57 DEV_X64 /* Do these parts exist? */ 58}; 59 60#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN) 61#define DEV_FLAG_X1 BIT(DEV_X1) 62#define DEV_FLAG_X2 BIT(DEV_X2) 63#define DEV_FLAG_X4 BIT(DEV_X4) 64#define DEV_FLAG_X8 BIT(DEV_X8) 65#define DEV_FLAG_X16 BIT(DEV_X16) 66#define DEV_FLAG_X32 BIT(DEV_X32) 67#define DEV_FLAG_X64 BIT(DEV_X64) 68 69/* memory types */ 70enum mem_type { 71 MEM_EMPTY = 0, /* Empty csrow */ 72 MEM_RESERVED, /* Reserved csrow type */ 73 MEM_UNKNOWN, /* Unknown csrow type */ 74 MEM_FPM, /* Fast page mode */ 75 MEM_EDO, /* Extended data out */ 76 MEM_BEDO, /* Burst Extended data out */ 77 MEM_SDR, /* Single data rate SDRAM */ 78 MEM_RDR, /* Registered single data rate SDRAM */ 79 MEM_DDR, /* Double data rate SDRAM */ 80 MEM_RDDR, /* Registered Double data rate SDRAM */ 81 MEM_RMBS, /* Rambus DRAM */ 82 MEM_DDR2, /* DDR2 RAM */ 83 MEM_FB_DDR2, /* fully buffered DDR2 */ 84 MEM_RDDR2, /* Registered DDR2 RAM */ 85 MEM_XDR, /* Rambus XDR */ 86 MEM_DDR3, /* DDR3 RAM */ 87 MEM_RDDR3, /* Registered DDR3 RAM */ 88}; 89 90#define MEM_FLAG_EMPTY BIT(MEM_EMPTY) 91#define MEM_FLAG_RESERVED BIT(MEM_RESERVED) 92#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN) 93#define MEM_FLAG_FPM BIT(MEM_FPM) 94#define MEM_FLAG_EDO BIT(MEM_EDO) 95#define MEM_FLAG_BEDO BIT(MEM_BEDO) 96#define MEM_FLAG_SDR BIT(MEM_SDR) 97#define MEM_FLAG_RDR BIT(MEM_RDR) 98#define MEM_FLAG_DDR BIT(MEM_DDR) 99#define MEM_FLAG_RDDR BIT(MEM_RDDR) 100#define MEM_FLAG_RMBS BIT(MEM_RMBS) 101#define MEM_FLAG_DDR2 BIT(MEM_DDR2) 102#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) 103#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) 104#define MEM_FLAG_XDR BIT(MEM_XDR) 105#define MEM_FLAG_DDR3 BIT(MEM_DDR3) 106#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) 107 108/* chipset Error Detection and Correction capabilities and mode */ 109enum edac_type { 110 EDAC_UNKNOWN = 0, /* Unknown if ECC is available */ 111 EDAC_NONE, /* Doesn't support ECC */ 112 EDAC_RESERVED, /* Reserved ECC type */ 113 EDAC_PARITY, /* Detects parity errors */ 114 EDAC_EC, /* Error Checking - no correction */ 115 EDAC_SECDED, /* Single bit error correction, Double detection */ 116 EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */ 117 EDAC_S4ECD4ED, /* Chipkill x4 devices */ 118 EDAC_S8ECD8ED, /* Chipkill x8 devices */ 119 EDAC_S16ECD16ED, /* Chipkill x16 devices */ 120}; 121 122#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN) 123#define EDAC_FLAG_NONE BIT(EDAC_NONE) 124#define EDAC_FLAG_PARITY BIT(EDAC_PARITY) 125#define EDAC_FLAG_EC BIT(EDAC_EC) 126#define EDAC_FLAG_SECDED BIT(EDAC_SECDED) 127#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED) 128#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED) 129#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED) 130#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED) 131 132/* scrubbing capabilities */ 133enum scrub_type { 134 SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */ 135 SCRUB_NONE, /* No scrubber */ 136 SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */ 137 SCRUB_SW_SRC, /* Software scrub only errors */ 138 SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */ 139 SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */ 140 SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */ 141 SCRUB_HW_SRC, /* Hardware scrub only errors */ 142 SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */ 143 SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */ 144}; 145 146#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG) 147#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC) 148#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC) 149#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE) 150#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG) 151#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC) 152#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC) 153#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE) 154 155/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */ 156 157/* EDAC internal operation states */ 158#define OP_ALLOC 0x100 159#define OP_RUNNING_POLL 0x201 160#define OP_RUNNING_INTERRUPT 0x202 161#define OP_RUNNING_POLL_INTR 0x203 162#define OP_OFFLINE 0x300 163 164/* 165 * There are several things to be aware of that aren't at all obvious: 166 * 167 * 168 * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc.. 169 * 170 * These are some of the many terms that are thrown about that don't always 171 * mean what people think they mean (Inconceivable!). In the interest of 172 * creating a common ground for discussion, terms and their definitions 173 * will be established. 174 * 175 * Memory devices: The individual chip on a memory stick. These devices 176 * commonly output 4 and 8 bits each. Grouping several 177 * of these in parallel provides 64 bits which is common 178 * for a memory stick. 179 * 180 * Memory Stick: A printed circuit board that aggregates multiple 181 * memory devices in parallel. This is the atomic 182 * memory component that is purchaseable by Joe consumer 183 * and loaded into a memory socket. 184 * 185 * Socket: A physical connector on the motherboard that accepts 186 * a single memory stick. 187 * 188 * Channel: Set of memory devices on a memory stick that must be 189 * grouped in parallel with one or more additional 190 * channels from other memory sticks. This parallel 191 * grouping of the output from multiple channels are 192 * necessary for the smallest granularity of memory access. 193 * Some memory controllers are capable of single channel - 194 * which means that memory sticks can be loaded 195 * individually. Other memory controllers are only 196 * capable of dual channel - which means that memory 197 * sticks must be loaded as pairs (see "socket set"). 198 * 199 * Chip-select row: All of the memory devices that are selected together. 200 * for a single, minimum grain of memory access. 201 * This selects all of the parallel memory devices across 202 * all of the parallel channels. Common chip-select rows 203 * for single channel are 64 bits, for dual channel 128 204 * bits. 205 * 206 * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory. 207 * Motherboards commonly drive two chip-select pins to 208 * a memory stick. A single-ranked stick, will occupy 209 * only one of those rows. The other will be unused. 210 * 211 * Double-Ranked stick: A double-ranked stick has two chip-select rows which 212 * access different sets of memory devices. The two 213 * rows cannot be accessed concurrently. 214 * 215 * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick. 216 * A double-sided stick has two chip-select rows which 217 * access different sets of memory devices. The two 218 * rows cannot be accessed concurrently. "Double-sided" 219 * is irrespective of the memory devices being mounted 220 * on both sides of the memory stick. 221 * 222 * Socket set: All of the memory sticks that are required for 223 * a single memory access or all of the memory sticks 224 * spanned by a chip-select row. A single socket set 225 * has two chip-select rows and if double-sided sticks 226 * are used these will occupy those chip-select rows. 227 * 228 * Bank: This term is avoided because it is unclear when 229 * needing to distinguish between chip-select rows and 230 * socket sets. 231 * 232 * Controller pages: 233 * 234 * Physical pages: 235 * 236 * Virtual pages: 237 * 238 * 239 * STRUCTURE ORGANIZATION AND CHOICES 240 * 241 * 242 * 243 * PS - I enjoyed writing all that about as much as you enjoyed reading it. 244 */ 245 246struct channel_info { 247 int chan_idx; /* channel index */ 248 u32 ce_count; /* Correctable Errors for this CHANNEL */ 249 char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ 250 struct csrow_info *csrow; /* the parent */ 251}; 252 253struct csrow_info { 254 unsigned long first_page; /* first page number in dimm */ 255 unsigned long last_page; /* last page number in dimm */ 256 unsigned long page_mask; /* used for interleaving - 257 * 0UL for non intlv 258 */ 259 u32 nr_pages; /* number of pages in csrow */ 260 u32 grain; /* granularity of reported error in bytes */ 261 int csrow_idx; /* the chip-select row */ 262 enum dev_type dtype; /* memory device type */ 263 u32 ue_count; /* Uncorrectable Errors for this csrow */ 264 u32 ce_count; /* Correctable Errors for this csrow */ 265 enum mem_type mtype; /* memory csrow type */ 266 enum edac_type edac_mode; /* EDAC mode for this csrow */ 267 struct mem_ctl_info *mci; /* the parent */ 268 269 struct kobject kobj; /* sysfs kobject for this csrow */ 270 271 /* channel information for this csrow */ 272 u32 nr_channels; 273 struct channel_info *channels; 274}; 275 276struct mcidev_sysfs_group { 277 const char *name; /* group name */ 278 const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */ 279}; 280 281struct mcidev_sysfs_group_kobj { 282 struct list_head list; /* list for all instances within a mc */ 283 284 struct kobject kobj; /* kobj for the group */ 285 286 const struct mcidev_sysfs_group *grp; /* group description table */ 287 struct mem_ctl_info *mci; /* the parent */ 288}; 289 290/* mcidev_sysfs_attribute structure 291 * used for driver sysfs attributes and in mem_ctl_info 292 * sysfs top level entries 293 */ 294struct mcidev_sysfs_attribute { 295 /* It should use either attr or grp */ 296 struct attribute attr; 297 const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */ 298 299 /* Ops for show/store values at the attribute - not used on group */ 300 ssize_t (*show)(struct mem_ctl_info *,char *); 301 ssize_t (*store)(struct mem_ctl_info *, const char *,size_t); 302}; 303 304/* MEMORY controller information structure 305 */ 306struct mem_ctl_info { 307 struct list_head link; /* for global list of mem_ctl_info structs */ 308 309 struct module *owner; /* Module owner of this control struct */ 310 311 unsigned long mtype_cap; /* memory types supported by mc */ 312 unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */ 313 unsigned long edac_cap; /* configuration capabilities - this is 314 * closely related to edac_ctl_cap. The 315 * difference is that the controller may be 316 * capable of s4ecd4ed which would be listed 317 * in edac_ctl_cap, but if channels aren't 318 * capable of s4ecd4ed then the edac_cap would 319 * not have that capability. 320 */ 321 unsigned long scrub_cap; /* chipset scrub capabilities */ 322 enum scrub_type scrub_mode; /* current scrub mode */ 323 324 /* Translates sdram memory scrub rate given in bytes/sec to the 325 internal representation and configures whatever else needs 326 to be configured. 327 */ 328 int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw); 329 330 /* Get the current sdram memory scrub rate from the internal 331 representation and converts it to the closest matching 332 bandwidth in bytes/sec. 333 */ 334 int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci); 335 336 337 /* pointer to edac checking routine */ 338 void (*edac_check) (struct mem_ctl_info * mci); 339 340 /* 341 * Remaps memory pages: controller pages to physical pages. 342 * For most MC's, this will be NULL. 343 */ 344 /* FIXME - why not send the phys page to begin with? */ 345 unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci, 346 unsigned long page); 347 int mc_idx; 348 int nr_csrows; 349 struct csrow_info *csrows; 350 /* 351 * FIXME - what about controllers on other busses? - IDs must be 352 * unique. dev pointer should be sufficiently unique, but 353 * BUS:SLOT.FUNC numbers may not be unique. 354 */ 355 struct device *dev; 356 const char *mod_name; 357 const char *mod_ver; 358 const char *ctl_name; 359 const char *dev_name; 360 char proc_name[MC_PROC_NAME_MAX_LEN + 1]; 361 void *pvt_info; 362 u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */ 363 u32 ce_noinfo_count; /* Correctable Errors w/o info */ 364 u32 ue_count; /* Total Uncorrectable Errors for this MC */ 365 u32 ce_count; /* Total Correctable Errors for this MC */ 366 unsigned long start_time; /* mci load start time (in jiffies) */ 367 368 struct completion complete; 369 370 /* edac sysfs device control */ 371 struct kobject edac_mci_kobj; 372 373 /* list for all grp instances within a mc */ 374 struct list_head grp_kobj_list; 375 376 /* Additional top controller level attributes, but specified 377 * by the low level driver. 378 * 379 * Set by the low level driver to provide attributes at the 380 * controller level, same level as 'ue_count' and 'ce_count' above. 381 * An array of structures, NULL terminated 382 * 383 * If attributes are desired, then set to array of attributes 384 * If no attributes are desired, leave NULL 385 */ 386 const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes; 387 388 /* work struct for this MC */ 389 struct delayed_work work; 390 391 /* the internal state of this controller instance */ 392 int op_state; 393}; 394 395#endif