[linux-yocto] [PATCH 05/13] MIPS: Handle CPU_CAVIUM_OCTEON3 like CPU_CAVIUM_OCTEON2 in clear_page.

Chandrakala Chavva cchavva.cavm at gmail.com
Mon Jan 26 22:04:11 PST 2015


From: Abhishek Paliwal <abhishek.paliwal at aricent.com>

From: David Daney <ddaney at caviumnetworks.com>

MIPS: Add Octeon2 optimizations to clear_page.

Use the ZCBT instruction for Octeon2.
Reduce the number of generated instructions when possible.

Both OCTEON3 and OCTEON2 use the same instrucitons for this.

Signed-off-by: David Daney <ddaney at caviumnetworks.com>
Signed-off-by: Leonid Rosenboim <lrosenboim at caviumnetworks.com>
Signed-off-by: Abhishek Paliwal <abhishek.paliwal at aricent.com>
---
 arch/mips/mm/page.c | 146 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 101 insertions(+), 45 deletions(-)

diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 58033c4..d4f78ee 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -25,6 +25,7 @@
 #include <asm/prefetch.h>
 #include <asm/bootinfo.h>
 #include <asm/mipsregs.h>
+#include <asm/sections.h>
 #include <asm/mmu_context.h>
 #include <asm/cpu.h>
 #include <asm/war.h>
@@ -37,6 +38,28 @@
 
 #include <asm/uasm.h>
 
+#ifdef CONFIG_MAPPED_KERNEL
+/* Initialized so it is not clobbered when .bss is zeroed.  */
+unsigned long phys_to_kernel_offset = 1;
+unsigned long kernel_image_end = 1;
+#endif
+
+#ifdef CONFIG_64BIT
+unsigned long __phys_addr(unsigned long x)
+{
+#ifdef CONFIG_MAPPED_KERNEL
+	if ((char *)x >= _text && (char *)x < _end)
+		return x - phys_to_kernel_offset;
+#endif
+	if (x < CKSEG0)
+		return XPHYSADDR(x);
+	if (x < CKSSEG)
+		return CPHYSADDR(x);
+	BUG();
+}
+EXPORT_SYMBOL(__phys_addr);
+#endif /* CONFIG_64BIT */
+
 /* Registers used in the assembled routines. */
 #define ZERO 0
 #define AT 2
@@ -100,8 +123,13 @@ pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
 		uasm_i_daddu(buf, reg1, reg2, T9);
 	} else {
 		if (off > 0x7fff) {
-			uasm_i_lui(buf, T9, uasm_rel_hi(off));
-			uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+			if (off == 0x8000) {
+				uasm_i_ori(buf, T9, ZERO, 0x8000);
+			} else {
+				uasm_i_lui(buf, T9, uasm_rel_hi(off));
+				if (uasm_rel_lo(off) != 0)
+					uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+			}
 			UASM_i_ADDU(buf, reg1, reg2, T9);
 		} else
 			UASM_i_ADDIU(buf, reg1, reg2, off);
@@ -256,66 +284,94 @@ void build_clear_page(void)
 	memset(labels, 0, sizeof(labels));
 	memset(relocs, 0, sizeof(relocs));
 
-	set_prefetch_parameters();
+	if (current_cpu_data.cputype == CPU_CAVIUM_OCTEON2 ||
+	    current_cpu_data.cputype == CPU_CAVIUM_OCTEON3) {
+		const unsigned int wb_nudge = 26;
+
+		pg_addiu(&buf, T0, A0, PAGE_SIZE);
+
+		UASM_i_ADDIU(&buf, A1, A0, 128);
+		uasm_l_clear_pref(&l, buf);
+		uasm_i_zcbt(&buf, A0);
+		UASM_i_ADDIU(&buf, A0, A0, 256);
+		uasm_i_zcbt(&buf, A1);
+		UASM_i_ADDIU(&buf, A1, A1, 256);
+		uasm_i_zcbt(&buf, A0);
+		UASM_i_ADDIU(&buf, A0, A0, 256);
+		uasm_i_zcbt(&buf, A1);
+		uasm_i_pref(&buf, wb_nudge, 0, A1);
+		UASM_i_ADDIU(&buf, A1, A1, 256);
+		uasm_i_zcbt(&buf, A0);
+		UASM_i_ADDIU(&buf, A0, A0, 256);
+		uasm_i_zcbt(&buf, A1);
+		UASM_i_ADDIU(&buf, A1, A1, 256);
+		uasm_i_zcbt(&buf, A0);
+		UASM_i_ADDIU(&buf, A0, A0, 256);
+		uasm_i_zcbt(&buf, A1);
+		uasm_i_pref(&buf, wb_nudge, 0, A1);
+		uasm_il_bne(&buf, &r, A0, T0, label_clear_pref);
+		UASM_i_ADDIU(&buf, A1, A1, 256);
+	} else {
+		set_prefetch_parameters();
 
-	/*
-	 * This algorithm makes the following assumptions:
-	 *   - The prefetch bias is a multiple of 2 words.
-	 *   - The prefetch bias is less than one page.
-	 */
-	BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
-	BUG_ON(PAGE_SIZE < pref_bias_clear_store);
+		/*
+		 * This algorithm makes the following assumptions:
+		 *   - The prefetch bias is a multiple of 2 words.
+		 *   - The prefetch bias is less than one page.
+		 */
+		BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
+		BUG_ON(PAGE_SIZE < pref_bias_clear_store);
 
-	off = PAGE_SIZE - pref_bias_clear_store;
-	if (off > 0xffff || !pref_bias_clear_store)
-		pg_addiu(&buf, A2, A0, off);
-	else
-		uasm_i_ori(&buf, A2, A0, off);
+		off = PAGE_SIZE - pref_bias_clear_store;
+		if (off > 0xffff || !pref_bias_clear_store)
+			pg_addiu(&buf, A2, A0, off);
+		else
+			uasm_i_ori(&buf, A2, A0, off);
 
-	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-		uasm_i_lui(&buf, AT, 0xa000);
+		if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+			uasm_i_lui(&buf, AT, 0xa000);
 
-	off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
+		off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
 				* cache_line_size : 0;
-	while (off) {
-		build_clear_pref(&buf, -off);
-		off -= cache_line_size;
-	}
-	uasm_l_clear_pref(&l, buf);
-	do {
-		build_clear_pref(&buf, off);
-		build_clear_store(&buf, off);
-		off += clear_word_size;
-	} while (off < half_clear_loop_size);
-	pg_addiu(&buf, A0, A0, 2 * off);
-	off = -off;
-	do {
-		build_clear_pref(&buf, off);
-		if (off == -clear_word_size)
-			uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
-		build_clear_store(&buf, off);
-		off += clear_word_size;
-	} while (off < 0);
-
-	if (pref_bias_clear_store) {
-		pg_addiu(&buf, A2, A0, pref_bias_clear_store);
-		uasm_l_clear_nopref(&l, buf);
-		off = 0;
+		while (off) {
+			build_clear_pref(&buf, -off);
+			off -= cache_line_size;
+		}
+		uasm_l_clear_pref(&l, buf);
 		do {
+			build_clear_pref(&buf, off);
 			build_clear_store(&buf, off);
 			off += clear_word_size;
 		} while (off < half_clear_loop_size);
 		pg_addiu(&buf, A0, A0, 2 * off);
 		off = -off;
 		do {
+			build_clear_pref(&buf, off);
 			if (off == -clear_word_size)
-				uasm_il_bne(&buf, &r, A0, A2,
-					    label_clear_nopref);
+				uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
 			build_clear_store(&buf, off);
 			off += clear_word_size;
 		} while (off < 0);
-	}
 
+		if (pref_bias_clear_store) {
+			pg_addiu(&buf, A2, A0, pref_bias_clear_store);
+			uasm_l_clear_nopref(&l, buf);
+			off = 0;
+			do {
+				build_clear_store(&buf, off);
+				off += clear_word_size;
+			} while (off < half_clear_loop_size);
+			pg_addiu(&buf, A0, A0, 2 * off);
+			off = -off;
+			do {
+				if (off == -clear_word_size)
+					uasm_il_bne(&buf, &r, A0, A2,
+						    label_clear_nopref);
+				build_clear_store(&buf, off);
+				off += clear_word_size;
+			} while (off < 0);
+		}
+	}
 	uasm_i_jr(&buf, RA);
 	uasm_i_nop(&buf);
 
-- 
1.8.1.4



More information about the linux-yocto mailing list