[linux-yocto] [PATCH 02/13] MIPS: OCTEON: Fix L1 dacache parity for OCTEON3
Chandrakala Chavva
cchavva.cavm at gmail.com
Mon Jan 26 22:04:08 PST 2015
From: Abhishek Paliwal <abhishek.paliwal at aricent.com>
From: David Daney <david.daney at cavium.com>
Use COP0_ERRCTL for L1 Dcache parity and TLB parity checks for OCTEON3.
For OCTEON II CPUs, add a Bus Error handler for the Write Buffer
parity condition. Then make this log the errors with the EDAC system.
Signed-off-by: David Daney <david.daney at cavium.com>
Signed-off-by: Leonid Rosenboim <lrosenboim at caviumnetworks.com>
Signed-off-by: Chandrakala Chavva <cchavva at caviumnetworks.com>
Signed-off-by: Abhishek Paliwal <abhishek.paliwal at aricent.com>
---
arch/mips/cavium-octeon/setup.c | 12 ++-
arch/mips/include/asm/mipsregs.h | 4 +
arch/mips/include/asm/octeon/octeon.h | 12 +++
arch/mips/include/asm/traps.h | 9 ++
arch/mips/kernel/traps.c | 11 ++-
arch/mips/mm/c-octeon.c | 170 ++++++++++++++++++++++++++++++++--
arch/mips/mm/cex-oct.S | 37 ++++++++
drivers/edac/octeon_edac-pc.c | 34 ++++---
8 files changed, 267 insertions(+), 22 deletions(-)
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index fc0b0d2..7027044 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -381,7 +381,10 @@ void octeon_check_cpu_bist(void)
pr_err("Core%d BIST Failure: CacheErr(icache) = 0x%llx\n",
coreid, bist_val);
- bist_val = read_octeon_c0_dcacheerr();
+ if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+ bist_val = read_octeon_c0_errctl();
+ else
+ bist_val = read_octeon_c0_dcacheerr();
if (bist_val & 1)
pr_err("Core%d L1 Dcache parity error: "
"CacheErr(dcache) = 0x%llx\n",
@@ -392,7 +395,9 @@ void octeon_check_cpu_bist(void)
if (bist_val & mask)
pr_err("Core%d BIST Failure: COP0_CVM_MEM_CTL = 0x%llx\n",
coreid, bist_val);
-
+ if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+ write_octeon_c0_errctl(1);
+ else
write_octeon_c0_dcacheerr(0);
}
@@ -583,7 +588,8 @@ void octeon_user_io_init(void)
cvmmemctl.s.cvmsegenau = 0;
/* Enable TLB parity error reporting on OCTEON II */
- if (current_cpu_type() == CPU_CAVIUM_OCTEON2)
+ if (current_cpu_type() == CPU_CAVIUM_OCTEON2 ||
+ current_cpu_type() == CPU_CAVIUM_OCTEON3)
cvmmemctl.s.tlbperrena = 1;
write_c0_cvmmemctl(cvmmemctl.u64);
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 327f989..b7feb87 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -69,6 +69,7 @@
#define CP0_DEPC $24
#define CP0_PERFORMANCE $25
#define CP0_ECC $26
+#define CP0_ERRCTL $26
#define CP0_CACHEERR $27
#define CP0_TAGLO $28
#define CP0_TAGHI $29
@@ -1203,6 +1204,9 @@ do { \
#define read_octeon_c0_dcacheerr() __read_64bit_c0_register($27, 1)
#define write_octeon_c0_dcacheerr(val) __write_64bit_c0_register($27, 1, val)
+#define read_octeon_c0_errctl() __read_64bit_c0_register($26, 0)
+#define write_octeon_c0_errctl(val) __write_64bit_c0_register($26, 0, val)
+
/* BMIPS3300 */
#define read_c0_brcm_config_0() __read_32bit_c0_register($22, 0)
#define write_c0_brcm_config_0(val) __write_32bit_c0_register($22, 0, val)
diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h
index c809f86..58e8beb 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -8,7 +8,10 @@
#ifndef __ASM_OCTEON_OCTEON_H
#define __ASM_OCTEON_OCTEON_H
+#include <linux/irqflags.h>
+#include <linux/notifier.h>
#include <asm/octeon/cvmx.h>
+#include <linux/irq.h>
extern uint64_t octeon_bootmem_alloc_range_phys(uint64_t size,
uint64_t alignment,
@@ -265,4 +268,13 @@ void octeon_irq_set_ip4_handler(octeon_irq_ip4_handler_t);
extern void octeon_fixup_irqs(void);
+int register_co_cache_error_notifier(struct notifier_block *nb);
+int unregister_co_cache_error_notifier(struct notifier_block *nb);
+#define CO_CACHE_ERROR_RECOVERABLE 0
+#define CO_CACHE_ERROR_UNRECOVERABLE 1
+#define CO_CACHE_ERROR_WB_PARITY 2
+#define CO_CACHE_ERROR_TLB_PARITY 3
+
+extern unsigned long long cache_err_dcache[NR_CPUS];
+
#endif /* __ASM_OCTEON_OCTEON_H */
diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h
index f41cf3e..6b896c7 100644
--- a/arch/mips/include/asm/traps.h
+++ b/arch/mips/include/asm/traps.h
@@ -21,6 +21,15 @@
extern void (*board_be_init)(void);
extern int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
+/*
+ * Possible status responses for a board_mcheck_handler backend.
+ */
+#define MIPS_MC_DISCARD 0 /* return with no action */
+#define MIPS_MC_NOT_HANDLED 1 /* default handling */
+#define MIPS_MC_FATAL 2 /* treat as an unrecoverable error */
+
+extern int (*board_mcheck_handler)(struct pt_regs *regs);
+
extern void (*board_nmi_handler_setup)(void);
extern void (*board_ejtag_handler_setup)(void);
extern void (*board_bind_eic_interrupt)(int irq, int regset);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 81e6ae0..321657d 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -88,6 +88,7 @@ extern asmlinkage void handle_reserved(void);
void (*board_be_init)(void);
int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
+int (*board_mcheck_handler)(struct pt_regs *regs);
void (*board_nmi_handler_setup)(void);
void (*board_ejtag_handler_setup)(void);
void (*board_bind_eic_interrupt)(int irq, int regset);
@@ -1232,7 +1233,15 @@ asmlinkage void do_mcheck(struct pt_regs *regs)
enum ctx_state prev_state;
prev_state = exception_enter();
- show_regs(regs);
+ if (board_mcheck_handler) {
+ int resp = board_mcheck_handler(regs);
+ if (resp == MIPS_MC_DISCARD)
+ return;
+ if (resp == MIPS_MC_FATAL)
+ multi_match = 0;
+ }
+
+ show_registers(regs);
if (multi_match) {
printk("Index : %0x\n", read_c0_index());
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index f41a5c5..14d91ee 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -31,6 +31,8 @@
unsigned long long cache_err_dcache[NR_CPUS];
EXPORT_SYMBOL_GPL(cache_err_dcache);
+static RAW_NOTIFIER_HEAD(co_cache_error_chain);
+
/**
* Octeon automatically flushes the dcache on tlb changes, so
* from Linux's viewpoint it acts much like a physically
@@ -176,6 +178,148 @@ static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size)
BUG();
}
+/*
+ * Octeon specific bus error handler, as write buffer parity errors
+ * trigger bus errors. These are fatal since the copy in the write buffer
+ * is the only copy of the data.
+ */
+static int octeon2_be_handler(struct pt_regs *regs, int is_fixup)
+{
+ u64 dcache_err;
+ u64 wbfperr_mask = 1ULL << 1;
+
+ dcache_err = read_octeon_c0_dcacheerr();
+ if (dcache_err & wbfperr_mask) {
+ int rv = raw_notifier_call_chain(&co_cache_error_chain,
+ CO_CACHE_ERROR_WB_PARITY,
+ NULL);
+ if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+ unsigned int coreid = cvmx_get_core_num();
+
+ pr_err("Core%u: Write buffer parity error:\n", coreid);
+ pr_err("CacheErr (Dcache) == %llx\n", dcache_err);
+ }
+
+ write_octeon_c0_dcacheerr(wbfperr_mask);
+ return MIPS_BE_FATAL;
+ }
+ if (is_fixup)
+ return MIPS_BE_FIXUP;
+ else
+ return MIPS_BE_FATAL;
+}
+
+/*
+ * Octeon specific MachineCheck handler, as TLB parity errors
+ * trigger MachineCheck errors.
+ */
+static int octeon2_mcheck_handler(struct pt_regs *regs)
+{
+ u64 dcache_err;
+ u64 tlbperr_mask = 1ULL << 5;
+ dcache_err = read_octeon_c0_dcacheerr();
+ if (dcache_err & tlbperr_mask) {
+ int rv;
+ union octeon_cvmemctl cvmmemctl;
+
+ /* Clear the indicator */
+ write_octeon_c0_dcacheerr(tlbperr_mask);
+ /*
+ * Blow everything away to (hopefully) write good
+ * parity to all TLB entries
+ */
+ local_flush_tlb_all();
+ /* Reenable TLB parity error reporting. */
+ cvmmemctl.u64 = read_c0_cvmmemctl();
+ cvmmemctl.s.tlbperrena = 1;
+ write_c0_cvmmemctl(cvmmemctl.u64);
+
+ rv = raw_notifier_call_chain(&co_cache_error_chain,
+ CO_CACHE_ERROR_TLB_PARITY,
+ NULL);
+ if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+ unsigned int coreid = cvmx_get_core_num();
+
+ pr_err("Core%u: TLB parity error:\n", coreid);
+ return MIPS_MC_FATAL;
+ }
+
+ return MIPS_MC_DISCARD;
+ }
+ return MIPS_MC_NOT_HANDLED;
+}
+
+/*
+ * Octeon3 specific bus error handler, as write buffer parity errors
+ * trigger bus errors. These are fatal since the copy in the write buffer
+ * is the only copy of the data.
+ */
+static int octeon3_be_handler(struct pt_regs *regs, int is_fixup)
+{
+ u64 dcache_err;
+ u64 wbfperr_mask = 1ULL << 9;
+
+ dcache_err = read_octeon_c0_errctl();
+ if (dcache_err & wbfperr_mask) {
+ int rv = raw_notifier_call_chain(&co_cache_error_chain,
+ CO_CACHE_ERROR_WB_PARITY,
+ NULL);
+ if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+ unsigned int coreid = cvmx_get_core_num();
+
+ pr_err("Core%u: Write buffer parity error:\n", coreid);
+ pr_err("CacheErr (Dcache) == %llx\n", dcache_err);
+ }
+
+ write_octeon_c0_errctl(wbfperr_mask);
+ return MIPS_BE_FATAL;
+ }
+ if (is_fixup)
+ return MIPS_BE_FIXUP;
+ else
+ return MIPS_BE_FATAL;
+}
+
+/*
+ * Octeon3 specific MachineCheck handler, as TLB parity errors
+ * trigger MachineCheck errors.
+ */
+static int octeon3_mcheck_handler(struct pt_regs *regs)
+{
+ u64 dcache_err;
+ u64 tlbperr_mask = 1ULL << 14;
+ dcache_err = read_octeon_c0_errctl();
+ if (dcache_err & tlbperr_mask) {
+ int rv;
+ union octeon_cvmemctl cvmmemctl;
+
+ /* Clear the indicator */
+ write_octeon_c0_errctl(tlbperr_mask);
+ /*
+ * Blow everything away to (hopefully) write good
+ * parity to all TLB entries
+ */
+ local_flush_tlb_all();
+ /* Reenable TLB parity error reporting. */
+ cvmmemctl.u64 = read_c0_cvmmemctl();
+ cvmmemctl.s.tlbperrena = 1;
+ write_c0_cvmmemctl(cvmmemctl.u64);
+
+ rv = raw_notifier_call_chain(&co_cache_error_chain,
+ CO_CACHE_ERROR_TLB_PARITY,
+ NULL);
+ if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+ unsigned int coreid = cvmx_get_core_num();
+
+ pr_err("Core%u: TLB parity error:\n", coreid);
+ return MIPS_MC_FATAL;
+ }
+
+ return MIPS_MC_DISCARD;
+ }
+ return MIPS_MC_NOT_HANDLED;
+}
+
/**
* Probe Octeon's caches
*
@@ -223,6 +367,9 @@ static void probe_octeon(void)
c->dcache.sets = 8;
dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz;
c->options |= MIPS_CPU_PREFETCH;
+
+ board_be_handler = octeon2_be_handler;
+ board_mcheck_handler = octeon2_mcheck_handler;
break;
case CPU_CAVIUM_OCTEON3:
@@ -237,6 +384,9 @@ static void probe_octeon(void)
c->dcache.sets = 8;
dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz;
c->options |= MIPS_CPU_PREFETCH;
+
+ board_be_handler = octeon3_be_handler;
+ board_mcheck_handler = octeon3_mcheck_handler;
break;
default:
@@ -268,8 +418,13 @@ static void probe_octeon(void)
static void octeon_cache_error_setup(void)
{
- extern char except_vec2_octeon;
- set_handler(0x100, &except_vec2_octeon, 0x80);
+ if (current_cpu_type() == CPU_CAVIUM_OCTEON3) {
+ extern char except_vec2_octeon3;
+ set_handler(0x100, &except_vec2_octeon3, 0x80);
+ } else {
+ extern char except_vec2_octeon;
+ set_handler(0x100, &except_vec2_octeon, 0x80);
+ }
}
/**
@@ -304,8 +459,6 @@ void octeon_cache_init(void)
/*
* Handle a cache error exception
*/
-static RAW_NOTIFIER_HEAD(co_cache_error_chain);
-
int register_co_cache_error_notifier(struct notifier_block *nb)
{
return raw_notifier_chain_register(&co_cache_error_chain, nb);
@@ -330,7 +483,10 @@ static void co_cache_error_call_notifiers(unsigned long val)
dcache_err = cache_err_dcache[coreid];
cache_err_dcache[coreid] = 0;
} else {
- dcache_err = read_octeon_c0_dcacheerr();
+ if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+ dcache_err = read_octeon_c0_errctl();
+ else
+ dcache_err = read_octeon_c0_dcacheerr();
}
pr_err("Core%lu: Cache error exception:\n", coreid);
@@ -353,7 +509,7 @@ static void co_cache_error_call_notifiers(unsigned long val)
asmlinkage void cache_parity_error_octeon_recoverable(void)
{
- co_cache_error_call_notifiers(0);
+ co_cache_error_call_notifiers(CO_CACHE_ERROR_RECOVERABLE);
}
/**
@@ -362,6 +518,6 @@ asmlinkage void cache_parity_error_octeon_recoverable(void)
asmlinkage void cache_parity_error_octeon_non_recoverable(void)
{
- co_cache_error_call_notifiers(1);
+ co_cache_error_call_notifiers(CO_CACHE_ERROR_UNRECOVERABLE);
panic("Can't handle cache error: nested exception");
}
diff --git a/arch/mips/mm/cex-oct.S b/arch/mips/mm/cex-oct.S
index 9029092..e73fec3 100644
--- a/arch/mips/mm/cex-oct.S
+++ b/arch/mips/mm/cex-oct.S
@@ -51,6 +51,43 @@
.set pop
END(except_vec2_octeon)
+ /* Handle cache error for OCTEON3. */
+ LEAF(except_vec2_octeon3)
+
+ .set push
+ .set mips64r2
+ .set noreorder
+ .set noat
+
+
+ /* due to an errata we need to read the COP0 CacheErr (Dcache)
+ * before any cache/DRAM access */
+
+ rdhwr k0, $0 /* get core_id */
+ PTR_LA k1, cache_err_dcache
+ sll k0, k0, 3
+ PTR_ADDU k1, k0, k1 /* k1 = &cache_err_dcache[core_id] */
+
+ dmfc0 k0, CP0_ERRCTL
+ sd k0, (k1)
+ andi k0, 1 /* Write 1 to clear Dcache parity error */
+ dmtc0 k0, CP0_ERRCTL
+
+ /* check whether this is a nested exception */
+ mfc0 k1, CP0_STATUS
+ andi k1, k1, ST0_EXL
+ beqz k1, 2f
+ nop
+ j cache_parity_error_octeon_non_recoverable
+ nop
+
+ /* exception is recoverable */
+2 : j handle_cache_err
+ nop
+
+ .set pop
+ END(except_vec2_octeon3)
+
/* We need to jump to handle_cache_err so that the previous handler
* can fit within 0x80 bytes. We also move from 0xFFFFFFFFAXXXXXXX
* space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached). */
diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c
index 380cbe2..5f41e2f 100644
--- a/drivers/edac/octeon_edac-pc.c
+++ b/drivers/edac/octeon_edac-pc.c
@@ -19,13 +19,9 @@
#include "edac_module.h"
#include <asm/octeon/cvmx.h>
+#include <asm/octeon/octeon.h>
#include <asm/mipsregs.h>
-extern int register_co_cache_error_notifier(struct notifier_block *nb);
-extern int unregister_co_cache_error_notifier(struct notifier_block *nb);
-
-extern unsigned long long cache_err_dcache[NR_CPUS];
-
struct co_cache_error {
struct notifier_block notifier;
struct edac_device_ctl_info *ed;
@@ -47,11 +43,26 @@ static int co_cache_error_event(struct notifier_block *this,
u64 icache_err = read_octeon_c0_icacheerr();
u64 dcache_err;
- if (event) {
+ switch (event) {
+ case CO_CACHE_ERROR_UNRECOVERABLE:
dcache_err = cache_err_dcache[core];
cache_err_dcache[core] = 0;
- } else {
- dcache_err = read_octeon_c0_dcacheerr();
+ break;
+ case CO_CACHE_ERROR_RECOVERABLE:
+ if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+ dcache_err = read_octeon_c0_errctl();
+ else
+ dcache_err = read_octeon_c0_dcacheerr();
+ break;
+ case CO_CACHE_ERROR_WB_PARITY:
+ edac_device_printk(p->ed, KERN_ERR,
+ "CacheErr (WB Parity): core %d/cpu %d\n",
+ core, cpu);
+ edac_device_handle_ue(p->ed, cpu, 2, "write-buffer");
+ return NOTIFY_STOP;
+ default:
+ WARN(1, "Unknown event: %lu\n", event);
+ return NOTIFY_BAD;
}
if (icache_err & 1) {
@@ -73,12 +84,13 @@ static int co_cache_error_event(struct notifier_block *this,
edac_device_handle_ce(p->ed, cpu, 0, "dcache");
/* Clear the error indication */
- if (current_cpu_type() == CPU_CAVIUM_OCTEON2)
+ if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+ write_octeon_c0_errctl(1);
+ else if (current_cpu_type() == CPU_CAVIUM_OCTEON2)
write_octeon_c0_dcacheerr(1);
else
write_octeon_c0_dcacheerr(0);
}
-
return NOTIFY_STOP;
}
@@ -93,7 +105,7 @@ static int co_cache_error_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, p);
p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(),
- "cache", 2, 0, NULL, 0,
+ "cache", 3, 0, NULL, 0,
edac_device_alloc_index());
if (!p->ed)
goto err;
--
1.8.1.4
More information about the linux-yocto
mailing list