[IA64] Cache error recovery

Similar to memory error recovery, when a cache error is consumed
by a user process terminate the user instead of crashing the system.

Signed-off-by: Russ Anderson (rja@sgi.com)
Acked-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

authored by Russ Anderson and committed by Tony Luck 396e8e76 618b206f

+11 -21
+11 -21
arch/ia64/kernel/mca_drv.c
··· 602 default: 603 break; 604 } 605 } 606 607 return status; ··· 647 * Return value: 648 * 1 on Success / 0 on Failure 649 */ 650 - /* 651 - * Later we try to recover when below all conditions are satisfied. 652 - * 1. Only one processor error section is exist. 653 - * 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK) 654 - * 3. The entry of BUS_CHECK_INFO is 1. 655 - * 4. "External bus error" flag is set and the others are not set. 656 - */ 657 658 static int 659 recover_from_processor_error(int platform, slidx_table_t *slidx, ··· 682 /* 683 * The cache check and bus check bits have four possible states 684 * cc bc 685 - * 0 0 Weird record, not recovered 686 - * 1 0 Cache error, not recovered 687 - * 0 1 I/O error, attempt recovery 688 * 1 1 Memory error, attempt recovery 689 */ 690 - if (psp->bc == 0 || pbci == NULL) 691 - return fatal_mca("No bus check"); 692 693 /* 694 - * Sorry, we cannot handle so many. 695 */ 696 if (peidx_bus_check_num(peidx) > 1) 697 return fatal_mca("Too many bus checks"); 698 - /* 699 - * Well, here is only one bus error. 700 - */ 701 if (pbci->ib) 702 return fatal_mca("Internal Bus error"); 703 - if (pbci->cc) 704 - return fatal_mca("Cache-cache error"); 705 if (pbci->eb && pbci->bsi > 0) 706 return fatal_mca("External bus check fatal status"); 707 708 /* 709 - * This is a local MCA and estimated as recoverble external bus error. 710 - * (e.g. a load from poisoned memory) 711 - * This means "there are some platform errors". 712 */ 713 if (platform) 714 return recover_from_platform_error(slidx, peidx, pbci, sos); 715 /* 716 * On account of strange SAL error record, we cannot recover. 717 */
··· 602 default: 603 break; 604 } 605 + } else if (psp->cc && !psp->bc) { /* Cache error */ 606 + status = recover_from_read_error(slidx, peidx, pbci, sos); 607 } 608 609 return status; ··· 645 * Return value: 646 * 1 on Success / 0 on Failure 647 */ 648 649 static int 650 recover_from_processor_error(int platform, slidx_table_t *slidx, ··· 687 /* 688 * The cache check and bus check bits have four possible states 689 * cc bc 690 * 1 1 Memory error, attempt recovery 691 + * 1 0 Cache error, attempt recovery 692 + * 0 1 I/O error, attempt recovery 693 + * 0 0 Other error type, not recovered 694 */ 695 + if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL)) 696 + return fatal_mca("No cache or bus check"); 697 698 /* 699 + * Cannot handle more than one bus check. 700 */ 701 if (peidx_bus_check_num(peidx) > 1) 702 return fatal_mca("Too many bus checks"); 703 + 704 if (pbci->ib) 705 return fatal_mca("Internal Bus error"); 706 if (pbci->eb && pbci->bsi > 0) 707 return fatal_mca("External bus check fatal status"); 708 709 /* 710 + * This is a local MCA and estimated as a recoverble error. 711 */ 712 if (platform) 713 return recover_from_platform_error(slidx, peidx, pbci, sos); 714 + 715 /* 716 * On account of strange SAL error record, we cannot recover. 717 */