EDAC/synopsys: Fix ECC status and IRQ control race condition

The race condition around the ECCCLR register access happens in the IRQ
disable method called in the device remove() procedure and in the ECC IRQ
handler:

1. Enable IRQ:
a. ECCCLR = EN_CE | EN_UE
2. Disable IRQ:
a. ECCCLR = 0
3. IRQ handler:
a. ECCCLR = CLR_CE | CLR_CE_CNT | CLR_CE | CLR_CE_CNT
b. ECCCLR = 0
c. ECCCLR = EN_CE | EN_UE

So if the IRQ disabling procedure is called concurrently with the IRQ
handler method the IRQ might be actually left enabled due to the
statement 3c.

The root cause of the problem is that ECCCLR register (which since
v3.10a has been called as ECCCTL) has intermixed ECC status data clear
flags and the IRQ enable/disable flags. Thus the IRQ disabling (clear EN
flags) and handling (write 1 to clear ECC status data) procedures must
be serialised around the ECCCTL register modification to prevent the
race.

So fix the problem described above by adding the spin-lock around the
ECCCLR modifications and preventing the IRQ-handler from modifying the
IRQs enable flags (there is no point in disabling the IRQ and then
re-enabling it again within a single IRQ handler call, see the
statements 3a/3b and 3c above).

Fixes: f7824ded4149 ("EDAC/synopsys: Add support for version 3 of the Synopsys EDAC DDR")
Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20240222181324.28242-2-fancer.lancer@gmail.com

authored by Serge Semin and committed by Borislav Petkov (AMD) 591c9466 dd5a440a

Changed files
+37 -13
drivers
+37 -13
drivers/edac/synopsys_edac.c
··· 9 9 #include <linux/edac.h> 10 10 #include <linux/module.h> 11 11 #include <linux/platform_device.h> 12 + #include <linux/spinlock.h> 12 13 #include <linux/interrupt.h> 13 14 #include <linux/of.h> 14 15 ··· 300 299 /** 301 300 * struct synps_edac_priv - DDR memory controller private instance data. 302 301 * @baseaddr: Base address of the DDR controller. 302 + * @reglock: Concurrent CSRs access lock. 303 303 * @message: Buffer for framing the event specific info. 304 304 * @stat: ECC status information. 305 305 * @p_data: Platform data. ··· 315 313 */ 316 314 struct synps_edac_priv { 317 315 void __iomem *baseaddr; 316 + spinlock_t reglock; 318 317 char message[SYNPS_EDAC_MSG_SIZE]; 319 318 struct synps_ecc_status stat; 320 319 const struct synps_platform_data *p_data; ··· 411 408 static int zynqmp_get_error_info(struct synps_edac_priv *priv) 412 409 { 413 410 struct synps_ecc_status *p; 414 - u32 regval, clearval = 0; 411 + u32 regval, clearval; 412 + unsigned long flags; 415 413 void __iomem *base; 416 414 417 415 base = priv->baseaddr; ··· 456 452 p->ueinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK); 457 453 p->ueinfo.data = readl(base + ECC_UESYND0_OFST); 458 454 out: 459 - clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT; 460 - clearval |= ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT; 455 + spin_lock_irqsave(&priv->reglock, flags); 456 + 457 + clearval = readl(base + ECC_CLR_OFST) | 458 + ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT | 459 + ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT; 461 460 writel(clearval, base + ECC_CLR_OFST); 462 - writel(0x0, base + ECC_CLR_OFST); 461 + 462 + spin_unlock_irqrestore(&priv->reglock, flags); 463 463 464 464 return 0; 465 465 } ··· 523 515 524 516 static void enable_intr(struct synps_edac_priv *priv) 525 517 { 518 + unsigned long flags; 519 + 526 520 /* Enable UE/CE Interrupts */ 527 - if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR) 528 - writel(DDR_UE_MASK | DDR_CE_MASK, 529 - priv->baseaddr + ECC_CLR_OFST); 530 - else 521 + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) { 531 522 writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, 532 523 priv->baseaddr + DDR_QOS_IRQ_EN_OFST); 533 524 525 + return; 526 + } 527 + 528 + spin_lock_irqsave(&priv->reglock, flags); 529 + 530 + writel(DDR_UE_MASK | DDR_CE_MASK, 531 + priv->baseaddr + ECC_CLR_OFST); 532 + 533 + spin_unlock_irqrestore(&priv->reglock, flags); 534 534 } 535 535 536 536 static void disable_intr(struct synps_edac_priv *priv) 537 537 { 538 + unsigned long flags; 539 + 538 540 /* Disable UE/CE Interrupts */ 539 - if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR) 540 - writel(0x0, priv->baseaddr + ECC_CLR_OFST); 541 - else 541 + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) { 542 542 writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, 543 543 priv->baseaddr + DDR_QOS_IRQ_DB_OFST); 544 + 545 + return; 546 + } 547 + 548 + spin_lock_irqsave(&priv->reglock, flags); 549 + 550 + writel(0, priv->baseaddr + ECC_CLR_OFST); 551 + 552 + spin_unlock_irqrestore(&priv->reglock, flags); 544 553 } 545 554 546 555 /** ··· 601 576 /* v3.0 of the controller does not have this register */ 602 577 if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) 603 578 writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST); 604 - else 605 - enable_intr(priv); 606 579 607 580 return IRQ_HANDLED; 608 581 } ··· 1380 1357 priv = mci->pvt_info; 1381 1358 priv->baseaddr = baseaddr; 1382 1359 priv->p_data = p_data; 1360 + spin_lock_init(&priv->reglock); 1383 1361 1384 1362 mc_init(mci, pdev); 1385 1363