[linux-yocto] [PATCH 02/13] MIPS: OCTEON: Fix L1 dacache parity for OCTEON3

Chandrakala Chavva cchavva.cavm at gmail.com
Mon Jan 26 22:04:08 PST 2015


From: Abhishek Paliwal <abhishek.paliwal at aricent.com>

From: David Daney <david.daney at cavium.com>

Use COP0_ERRCTL for L1 Dcache parity and TLB parity checks for OCTEON3.

For OCTEON II CPUs, add a Bus Error handler for the Write Buffer
parity condition.  Then make this log the errors with the EDAC system.

Signed-off-by: David Daney <david.daney at cavium.com>
Signed-off-by: Leonid Rosenboim <lrosenboim at caviumnetworks.com>
Signed-off-by: Chandrakala Chavva <cchavva at caviumnetworks.com>
Signed-off-by: Abhishek Paliwal <abhishek.paliwal at aricent.com>
---
 arch/mips/cavium-octeon/setup.c       |  12 ++-
 arch/mips/include/asm/mipsregs.h      |   4 +
 arch/mips/include/asm/octeon/octeon.h |  12 +++
 arch/mips/include/asm/traps.h         |   9 ++
 arch/mips/kernel/traps.c              |  11 ++-
 arch/mips/mm/c-octeon.c               | 170 ++++++++++++++++++++++++++++++++--
 arch/mips/mm/cex-oct.S                |  37 ++++++++
 drivers/edac/octeon_edac-pc.c         |  34 ++++---
 8 files changed, 267 insertions(+), 22 deletions(-)

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index fc0b0d2..7027044 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -381,7 +381,10 @@ void octeon_check_cpu_bist(void)
 		pr_err("Core%d BIST Failure: CacheErr(icache) = 0x%llx\n",
 		       coreid, bist_val);
 
-	bist_val = read_octeon_c0_dcacheerr();
+	if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+		bist_val = read_octeon_c0_errctl();
+	else
+		bist_val = read_octeon_c0_dcacheerr();
 	if (bist_val & 1)
 		pr_err("Core%d L1 Dcache parity error: "
 		       "CacheErr(dcache) = 0x%llx\n",
@@ -392,7 +395,9 @@ void octeon_check_cpu_bist(void)
 	if (bist_val & mask)
 		pr_err("Core%d BIST Failure: COP0_CVM_MEM_CTL = 0x%llx\n",
 		       coreid, bist_val);
-
+	if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+		write_octeon_c0_errctl(1);
+	else
 		write_octeon_c0_dcacheerr(0);
 }
 
@@ -583,7 +588,8 @@ void octeon_user_io_init(void)
 	cvmmemctl.s.cvmsegenau = 0;
 
 	/* Enable TLB parity error reporting on OCTEON II */
-	if (current_cpu_type() == CPU_CAVIUM_OCTEON2)
+	if (current_cpu_type() == CPU_CAVIUM_OCTEON2 ||
+	    current_cpu_type() == CPU_CAVIUM_OCTEON3)
 		cvmmemctl.s.tlbperrena = 1;
 
 	write_c0_cvmmemctl(cvmmemctl.u64);
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 327f989..b7feb87 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -69,6 +69,7 @@
 #define CP0_DEPC $24
 #define CP0_PERFORMANCE $25
 #define CP0_ECC $26
+#define CP0_ERRCTL $26
 #define CP0_CACHEERR $27
 #define CP0_TAGLO $28
 #define CP0_TAGHI $29
@@ -1203,6 +1204,9 @@ do {									\
 #define read_octeon_c0_dcacheerr()	__read_64bit_c0_register($27, 1)
 #define write_octeon_c0_dcacheerr(val)	__write_64bit_c0_register($27, 1, val)
 
+#define read_octeon_c0_errctl()                __read_64bit_c0_register($26, 0)
+#define write_octeon_c0_errctl(val)    __write_64bit_c0_register($26, 0, val)
+
 /* BMIPS3300 */
 #define read_c0_brcm_config_0()		__read_32bit_c0_register($22, 0)
 #define write_c0_brcm_config_0(val)	__write_32bit_c0_register($22, 0, val)
diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h
index c809f86..58e8beb 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -8,7 +8,10 @@
 #ifndef __ASM_OCTEON_OCTEON_H
 #define __ASM_OCTEON_OCTEON_H
 
+#include <linux/irqflags.h>
+#include <linux/notifier.h>
 #include <asm/octeon/cvmx.h>
+#include <linux/irq.h>
 
 extern uint64_t octeon_bootmem_alloc_range_phys(uint64_t size,
 						uint64_t alignment,
@@ -265,4 +268,13 @@ void octeon_irq_set_ip4_handler(octeon_irq_ip4_handler_t);
 
 extern void octeon_fixup_irqs(void);
 
+int register_co_cache_error_notifier(struct notifier_block *nb);
+int unregister_co_cache_error_notifier(struct notifier_block *nb);
+#define CO_CACHE_ERROR_RECOVERABLE 0
+#define CO_CACHE_ERROR_UNRECOVERABLE 1
+#define CO_CACHE_ERROR_WB_PARITY 2
+#define CO_CACHE_ERROR_TLB_PARITY 3
+
+extern unsigned long long cache_err_dcache[NR_CPUS];
+
 #endif /* __ASM_OCTEON_OCTEON_H */
diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h
index f41cf3e..6b896c7 100644
--- a/arch/mips/include/asm/traps.h
+++ b/arch/mips/include/asm/traps.h
@@ -21,6 +21,15 @@
 extern void (*board_be_init)(void);
 extern int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
 
+/*
+ * Possible status responses for a board_mcheck_handler backend.
+ */
+#define MIPS_MC_DISCARD	0		/* return with no action */
+#define MIPS_MC_NOT_HANDLED	1	/* default handling */
+#define MIPS_MC_FATAL	2		/* treat as an unrecoverable error */
+
+extern int (*board_mcheck_handler)(struct pt_regs *regs);
+
 extern void (*board_nmi_handler_setup)(void);
 extern void (*board_ejtag_handler_setup)(void);
 extern void (*board_bind_eic_interrupt)(int irq, int regset);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 81e6ae0..321657d 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -88,6 +88,7 @@ extern asmlinkage void handle_reserved(void);
 
 void (*board_be_init)(void);
 int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
+int (*board_mcheck_handler)(struct pt_regs *regs);
 void (*board_nmi_handler_setup)(void);
 void (*board_ejtag_handler_setup)(void);
 void (*board_bind_eic_interrupt)(int irq, int regset);
@@ -1232,7 +1233,15 @@ asmlinkage void do_mcheck(struct pt_regs *regs)
 	enum ctx_state prev_state;
 
 	prev_state = exception_enter();
-	show_regs(regs);
+	if (board_mcheck_handler) {
+		int resp = board_mcheck_handler(regs);
+		if (resp == MIPS_MC_DISCARD)
+			return;
+		if (resp == MIPS_MC_FATAL)
+			multi_match = 0;
+	}
+
+	show_registers(regs);
 
 	if (multi_match) {
 		printk("Index	: %0x\n", read_c0_index());
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index f41a5c5..14d91ee 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -31,6 +31,8 @@
 unsigned long long cache_err_dcache[NR_CPUS];
 EXPORT_SYMBOL_GPL(cache_err_dcache);
 
+static RAW_NOTIFIER_HEAD(co_cache_error_chain);
+
 /**
  * Octeon automatically flushes the dcache on tlb changes, so
  * from Linux's viewpoint it acts much like a physically
@@ -176,6 +178,148 @@ static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size)
 	BUG();
 }
 
+/*
+ * Octeon specific bus error handler, as write buffer parity errors
+ * trigger bus errors.  These are fatal since the copy in the write buffer
+ * is the only copy of the data.
+ */
+static int octeon2_be_handler(struct pt_regs *regs, int is_fixup)
+{
+	u64 dcache_err;
+	u64 wbfperr_mask = 1ULL << 1;
+
+	dcache_err = read_octeon_c0_dcacheerr();
+	if (dcache_err & wbfperr_mask) {
+		int rv = raw_notifier_call_chain(&co_cache_error_chain,
+						CO_CACHE_ERROR_WB_PARITY,
+						NULL);
+		if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+			unsigned int coreid = cvmx_get_core_num();
+
+			pr_err("Core%u: Write buffer parity error:\n", coreid);
+			pr_err("CacheErr (Dcache) == %llx\n", dcache_err);
+		}
+
+		write_octeon_c0_dcacheerr(wbfperr_mask);
+		return MIPS_BE_FATAL;
+	}
+	if (is_fixup)
+		return MIPS_BE_FIXUP;
+	else
+		return MIPS_BE_FATAL;
+}
+
+/*
+ * Octeon specific MachineCheck handler, as TLB parity errors
+ * trigger MachineCheck errors.
+ */
+static int octeon2_mcheck_handler(struct pt_regs *regs)
+{
+	u64 dcache_err;
+	u64 tlbperr_mask = 1ULL << 5;
+	dcache_err = read_octeon_c0_dcacheerr();
+	if (dcache_err & tlbperr_mask) {
+		int rv;
+		union octeon_cvmemctl cvmmemctl;
+
+		/* Clear the indicator */
+		write_octeon_c0_dcacheerr(tlbperr_mask);
+		/*
+		 * Blow everything away to (hopefully) write good
+		 * parity to all TLB entries
+		 */
+		local_flush_tlb_all();
+		/* Reenable TLB parity error reporting. */
+		cvmmemctl.u64 = read_c0_cvmmemctl();
+		cvmmemctl.s.tlbperrena = 1;
+		write_c0_cvmmemctl(cvmmemctl.u64);
+
+		rv = raw_notifier_call_chain(&co_cache_error_chain,
+						CO_CACHE_ERROR_TLB_PARITY,
+						NULL);
+		if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+			unsigned int coreid = cvmx_get_core_num();
+
+			pr_err("Core%u: TLB parity error:\n", coreid);
+			return MIPS_MC_FATAL;
+		}
+
+		return MIPS_MC_DISCARD;
+	}
+	return MIPS_MC_NOT_HANDLED;
+}
+
+/*
+ * Octeon3 specific bus error handler, as write buffer parity errors
+ * trigger bus errors.  These are fatal since the copy in the write buffer
+ * is the only copy of the data.
+ */
+static int octeon3_be_handler(struct pt_regs *regs, int is_fixup)
+{
+	u64 dcache_err;
+	u64 wbfperr_mask = 1ULL << 9;
+
+	dcache_err = read_octeon_c0_errctl();
+	if (dcache_err & wbfperr_mask) {
+		int rv = raw_notifier_call_chain(&co_cache_error_chain,
+						CO_CACHE_ERROR_WB_PARITY,
+						NULL);
+		if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+			unsigned int coreid = cvmx_get_core_num();
+
+			pr_err("Core%u: Write buffer parity error:\n", coreid);
+			pr_err("CacheErr (Dcache) == %llx\n", dcache_err);
+		}
+
+		write_octeon_c0_errctl(wbfperr_mask);
+		return MIPS_BE_FATAL;
+	}
+	if (is_fixup)
+		return MIPS_BE_FIXUP;
+	else
+		return MIPS_BE_FATAL;
+}
+
+/*
+ * Octeon3 specific MachineCheck handler, as TLB parity errors
+ * trigger MachineCheck errors.
+ */
+static int octeon3_mcheck_handler(struct pt_regs *regs)
+{
+	u64 dcache_err;
+	u64 tlbperr_mask = 1ULL << 14;
+	dcache_err = read_octeon_c0_errctl();
+	if (dcache_err & tlbperr_mask) {
+		int rv;
+		union octeon_cvmemctl cvmmemctl;
+
+		/* Clear the indicator */
+		write_octeon_c0_errctl(tlbperr_mask);
+		/*
+		 * Blow everything away to (hopefully) write good
+		 * parity to all TLB entries
+		 */
+		local_flush_tlb_all();
+		/* Reenable TLB parity error reporting. */
+		cvmmemctl.u64 = read_c0_cvmmemctl();
+		cvmmemctl.s.tlbperrena = 1;
+		write_c0_cvmmemctl(cvmmemctl.u64);
+
+		rv = raw_notifier_call_chain(&co_cache_error_chain,
+						CO_CACHE_ERROR_TLB_PARITY,
+						NULL);
+		if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+			unsigned int coreid = cvmx_get_core_num();
+
+			pr_err("Core%u: TLB parity error:\n", coreid);
+			return MIPS_MC_FATAL;
+		}
+
+		return MIPS_MC_DISCARD;
+	}
+	return MIPS_MC_NOT_HANDLED;
+}
+
 /**
  * Probe Octeon's caches
  *
@@ -223,6 +367,9 @@ static void probe_octeon(void)
 		c->dcache.sets = 8;
 		dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz;
 		c->options |= MIPS_CPU_PREFETCH;
+
+		board_be_handler = octeon2_be_handler;
+		board_mcheck_handler = octeon2_mcheck_handler;
 		break;
 
 	case CPU_CAVIUM_OCTEON3:
@@ -237,6 +384,9 @@ static void probe_octeon(void)
 		c->dcache.sets = 8;
 		dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz;
 		c->options |= MIPS_CPU_PREFETCH;
+
+		board_be_handler = octeon3_be_handler;
+		board_mcheck_handler = octeon3_mcheck_handler;
 		break;
 
 	default:
@@ -268,8 +418,13 @@ static void probe_octeon(void)
 
 static void  octeon_cache_error_setup(void)
 {
-	extern char except_vec2_octeon;
-	set_handler(0x100, &except_vec2_octeon, 0x80);
+	if (current_cpu_type() == CPU_CAVIUM_OCTEON3) {
+		extern char except_vec2_octeon3;
+		set_handler(0x100, &except_vec2_octeon3, 0x80);
+	} else {
+		extern char except_vec2_octeon;
+		set_handler(0x100, &except_vec2_octeon, 0x80);
+	}
 }
 
 /**
@@ -304,8 +459,6 @@ void octeon_cache_init(void)
 /*
  * Handle a cache error exception
  */
-static RAW_NOTIFIER_HEAD(co_cache_error_chain);
-
 int register_co_cache_error_notifier(struct notifier_block *nb)
 {
 	return raw_notifier_chain_register(&co_cache_error_chain, nb);
@@ -330,7 +483,10 @@ static void co_cache_error_call_notifiers(unsigned long val)
 			dcache_err = cache_err_dcache[coreid];
 			cache_err_dcache[coreid] = 0;
 		} else {
-			dcache_err = read_octeon_c0_dcacheerr();
+			if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+				dcache_err = read_octeon_c0_errctl();
+			else
+				dcache_err = read_octeon_c0_dcacheerr();
 		}
 
 		pr_err("Core%lu: Cache error exception:\n", coreid);
@@ -353,7 +509,7 @@ static void co_cache_error_call_notifiers(unsigned long val)
 
 asmlinkage void cache_parity_error_octeon_recoverable(void)
 {
-	co_cache_error_call_notifiers(0);
+	co_cache_error_call_notifiers(CO_CACHE_ERROR_RECOVERABLE);
 }
 
 /**
@@ -362,6 +518,6 @@ asmlinkage void cache_parity_error_octeon_recoverable(void)
 
 asmlinkage void cache_parity_error_octeon_non_recoverable(void)
 {
-	co_cache_error_call_notifiers(1);
+	co_cache_error_call_notifiers(CO_CACHE_ERROR_UNRECOVERABLE);
 	panic("Can't handle cache error: nested exception");
 }
diff --git a/arch/mips/mm/cex-oct.S b/arch/mips/mm/cex-oct.S
index 9029092..e73fec3 100644
--- a/arch/mips/mm/cex-oct.S
+++ b/arch/mips/mm/cex-oct.S
@@ -51,6 +51,43 @@
 	.set	pop
 	END(except_vec2_octeon)
 
+	/* Handle cache error for OCTEON3. */
+	LEAF(except_vec2_octeon3)
+
+	.set    push
+	.set    mips64r2
+	.set    noreorder
+	.set    noat
+
+
+	/* due to an errata we need to read the COP0 CacheErr (Dcache)
+	 * before any cache/DRAM access  */
+
+	rdhwr   k0, $0        /* get core_id */
+	PTR_LA  k1, cache_err_dcache
+	sll     k0, k0, 3
+	PTR_ADDU k1, k0, k1    /* k1 = &cache_err_dcache[core_id] */
+
+	dmfc0   k0, CP0_ERRCTL
+	sd      k0, (k1)
+	andi    k0, 1           /* Write 1 to clear Dcache parity error */
+	dmtc0   k0, CP0_ERRCTL
+
+	/* check whether this is a nested exception */
+	mfc0    k1, CP0_STATUS
+	andi    k1, k1, ST0_EXL
+	beqz    k1, 2f
+	nop
+	j       cache_parity_error_octeon_non_recoverable
+	nop
+
+	/* exception is recoverable */
+2 :	j       handle_cache_err
+	nop
+
+	.set    pop
+	END(except_vec2_octeon3)
+
  /* We need to jump to handle_cache_err so that the previous handler
   * can fit within 0x80 bytes. We also move from 0xFFFFFFFFAXXXXXXX
   * space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached).	*/
diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c
index 380cbe2..5f41e2f 100644
--- a/drivers/edac/octeon_edac-pc.c
+++ b/drivers/edac/octeon_edac-pc.c
@@ -19,13 +19,9 @@
 #include "edac_module.h"
 
 #include <asm/octeon/cvmx.h>
+#include <asm/octeon/octeon.h>
 #include <asm/mipsregs.h>
 
-extern int register_co_cache_error_notifier(struct notifier_block *nb);
-extern int unregister_co_cache_error_notifier(struct notifier_block *nb);
-
-extern unsigned long long cache_err_dcache[NR_CPUS];
-
 struct co_cache_error {
 	struct notifier_block notifier;
 	struct edac_device_ctl_info *ed;
@@ -47,11 +43,26 @@ static int  co_cache_error_event(struct notifier_block *this,
 	u64 icache_err = read_octeon_c0_icacheerr();
 	u64 dcache_err;
 
-	if (event) {
+	switch (event) {
+	case CO_CACHE_ERROR_UNRECOVERABLE:
 		dcache_err = cache_err_dcache[core];
 		cache_err_dcache[core] = 0;
-	} else {
-		dcache_err = read_octeon_c0_dcacheerr();
+		break;
+	case CO_CACHE_ERROR_RECOVERABLE:
+		if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+			dcache_err = read_octeon_c0_errctl();
+		else
+			dcache_err = read_octeon_c0_dcacheerr();
+		break;
+	case CO_CACHE_ERROR_WB_PARITY:
+		edac_device_printk(p->ed, KERN_ERR,
+					"CacheErr (WB Parity): core %d/cpu %d\n",
+					core, cpu);
+		edac_device_handle_ue(p->ed, cpu, 2, "write-buffer");
+		return NOTIFY_STOP;
+	default:
+		WARN(1, "Unknown event: %lu\n", event);
+		return NOTIFY_BAD;
 	}
 
 	if (icache_err & 1) {
@@ -73,12 +84,13 @@ static int  co_cache_error_event(struct notifier_block *this,
 			edac_device_handle_ce(p->ed, cpu, 0, "dcache");
 
 		/* Clear the error indication */
-		if (current_cpu_type() == CPU_CAVIUM_OCTEON2)
+		if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+			write_octeon_c0_errctl(1);
+		else if (current_cpu_type() == CPU_CAVIUM_OCTEON2)
 			write_octeon_c0_dcacheerr(1);
 		else
 			write_octeon_c0_dcacheerr(0);
 	}
-
 	return NOTIFY_STOP;
 }
 
@@ -93,7 +105,7 @@ static int co_cache_error_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, p);
 
 	p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(),
-					   "cache", 2, 0, NULL, 0,
+					   "cache", 3, 0, NULL, 0,
 					   edac_device_alloc_index());
 	if (!p->ed)
 		goto err;
-- 
1.8.1.4



More information about the linux-yocto mailing list