[linux-yocto] [PATCH 05/13] MIPS: Handle CPU_CAVIUM_OCTEON3 like CPU_CAVIUM_OCTEON2 in clear_page.
Chandrakala Chavva
cchavva.cavm at gmail.com
Mon Jan 26 22:04:11 PST 2015
From: Abhishek Paliwal <abhishek.paliwal at aricent.com>
From: David Daney <ddaney at caviumnetworks.com>
MIPS: Add Octeon2 optimizations to clear_page.
Use the ZCBT instruction for Octeon2.
Reduce the number of generated instructions when possible.
Both OCTEON3 and OCTEON2 use the same instrucitons for this.
Signed-off-by: David Daney <ddaney at caviumnetworks.com>
Signed-off-by: Leonid Rosenboim <lrosenboim at caviumnetworks.com>
Signed-off-by: Abhishek Paliwal <abhishek.paliwal at aricent.com>
---
arch/mips/mm/page.c | 146 ++++++++++++++++++++++++++++++++++++----------------
1 file changed, 101 insertions(+), 45 deletions(-)
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 58033c4..d4f78ee 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -25,6 +25,7 @@
#include <asm/prefetch.h>
#include <asm/bootinfo.h>
#include <asm/mipsregs.h>
+#include <asm/sections.h>
#include <asm/mmu_context.h>
#include <asm/cpu.h>
#include <asm/war.h>
@@ -37,6 +38,28 @@
#include <asm/uasm.h>
+#ifdef CONFIG_MAPPED_KERNEL
+/* Initialized so it is not clobbered when .bss is zeroed. */
+unsigned long phys_to_kernel_offset = 1;
+unsigned long kernel_image_end = 1;
+#endif
+
+#ifdef CONFIG_64BIT
+unsigned long __phys_addr(unsigned long x)
+{
+#ifdef CONFIG_MAPPED_KERNEL
+ if ((char *)x >= _text && (char *)x < _end)
+ return x - phys_to_kernel_offset;
+#endif
+ if (x < CKSEG0)
+ return XPHYSADDR(x);
+ if (x < CKSSEG)
+ return CPHYSADDR(x);
+ BUG();
+}
+EXPORT_SYMBOL(__phys_addr);
+#endif /* CONFIG_64BIT */
+
/* Registers used in the assembled routines. */
#define ZERO 0
#define AT 2
@@ -100,8 +123,13 @@ pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
uasm_i_daddu(buf, reg1, reg2, T9);
} else {
if (off > 0x7fff) {
- uasm_i_lui(buf, T9, uasm_rel_hi(off));
- uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+ if (off == 0x8000) {
+ uasm_i_ori(buf, T9, ZERO, 0x8000);
+ } else {
+ uasm_i_lui(buf, T9, uasm_rel_hi(off));
+ if (uasm_rel_lo(off) != 0)
+ uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+ }
UASM_i_ADDU(buf, reg1, reg2, T9);
} else
UASM_i_ADDIU(buf, reg1, reg2, off);
@@ -256,66 +284,94 @@ void build_clear_page(void)
memset(labels, 0, sizeof(labels));
memset(relocs, 0, sizeof(relocs));
- set_prefetch_parameters();
+ if (current_cpu_data.cputype == CPU_CAVIUM_OCTEON2 ||
+ current_cpu_data.cputype == CPU_CAVIUM_OCTEON3) {
+ const unsigned int wb_nudge = 26;
+
+ pg_addiu(&buf, T0, A0, PAGE_SIZE);
+
+ UASM_i_ADDIU(&buf, A1, A0, 128);
+ uasm_l_clear_pref(&l, buf);
+ uasm_i_zcbt(&buf, A0);
+ UASM_i_ADDIU(&buf, A0, A0, 256);
+ uasm_i_zcbt(&buf, A1);
+ UASM_i_ADDIU(&buf, A1, A1, 256);
+ uasm_i_zcbt(&buf, A0);
+ UASM_i_ADDIU(&buf, A0, A0, 256);
+ uasm_i_zcbt(&buf, A1);
+ uasm_i_pref(&buf, wb_nudge, 0, A1);
+ UASM_i_ADDIU(&buf, A1, A1, 256);
+ uasm_i_zcbt(&buf, A0);
+ UASM_i_ADDIU(&buf, A0, A0, 256);
+ uasm_i_zcbt(&buf, A1);
+ UASM_i_ADDIU(&buf, A1, A1, 256);
+ uasm_i_zcbt(&buf, A0);
+ UASM_i_ADDIU(&buf, A0, A0, 256);
+ uasm_i_zcbt(&buf, A1);
+ uasm_i_pref(&buf, wb_nudge, 0, A1);
+ uasm_il_bne(&buf, &r, A0, T0, label_clear_pref);
+ UASM_i_ADDIU(&buf, A1, A1, 256);
+ } else {
+ set_prefetch_parameters();
- /*
- * This algorithm makes the following assumptions:
- * - The prefetch bias is a multiple of 2 words.
- * - The prefetch bias is less than one page.
- */
- BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
- BUG_ON(PAGE_SIZE < pref_bias_clear_store);
+ /*
+ * This algorithm makes the following assumptions:
+ * - The prefetch bias is a multiple of 2 words.
+ * - The prefetch bias is less than one page.
+ */
+ BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
+ BUG_ON(PAGE_SIZE < pref_bias_clear_store);
- off = PAGE_SIZE - pref_bias_clear_store;
- if (off > 0xffff || !pref_bias_clear_store)
- pg_addiu(&buf, A2, A0, off);
- else
- uasm_i_ori(&buf, A2, A0, off);
+ off = PAGE_SIZE - pref_bias_clear_store;
+ if (off > 0xffff || !pref_bias_clear_store)
+ pg_addiu(&buf, A2, A0, off);
+ else
+ uasm_i_ori(&buf, A2, A0, off);
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
- uasm_i_lui(&buf, AT, 0xa000);
+ if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ uasm_i_lui(&buf, AT, 0xa000);
- off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
+ off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
* cache_line_size : 0;
- while (off) {
- build_clear_pref(&buf, -off);
- off -= cache_line_size;
- }
- uasm_l_clear_pref(&l, buf);
- do {
- build_clear_pref(&buf, off);
- build_clear_store(&buf, off);
- off += clear_word_size;
- } while (off < half_clear_loop_size);
- pg_addiu(&buf, A0, A0, 2 * off);
- off = -off;
- do {
- build_clear_pref(&buf, off);
- if (off == -clear_word_size)
- uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
- build_clear_store(&buf, off);
- off += clear_word_size;
- } while (off < 0);
-
- if (pref_bias_clear_store) {
- pg_addiu(&buf, A2, A0, pref_bias_clear_store);
- uasm_l_clear_nopref(&l, buf);
- off = 0;
+ while (off) {
+ build_clear_pref(&buf, -off);
+ off -= cache_line_size;
+ }
+ uasm_l_clear_pref(&l, buf);
do {
+ build_clear_pref(&buf, off);
build_clear_store(&buf, off);
off += clear_word_size;
} while (off < half_clear_loop_size);
pg_addiu(&buf, A0, A0, 2 * off);
off = -off;
do {
+ build_clear_pref(&buf, off);
if (off == -clear_word_size)
- uasm_il_bne(&buf, &r, A0, A2,
- label_clear_nopref);
+ uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
build_clear_store(&buf, off);
off += clear_word_size;
} while (off < 0);
- }
+ if (pref_bias_clear_store) {
+ pg_addiu(&buf, A2, A0, pref_bias_clear_store);
+ uasm_l_clear_nopref(&l, buf);
+ off = 0;
+ do {
+ build_clear_store(&buf, off);
+ off += clear_word_size;
+ } while (off < half_clear_loop_size);
+ pg_addiu(&buf, A0, A0, 2 * off);
+ off = -off;
+ do {
+ if (off == -clear_word_size)
+ uasm_il_bne(&buf, &r, A0, A2,
+ label_clear_nopref);
+ build_clear_store(&buf, off);
+ off += clear_word_size;
+ } while (off < 0);
+ }
+ }
uasm_i_jr(&buf, RA);
uasm_i_nop(&buf);
--
1.8.1.4
More information about the linux-yocto
mailing list