+2018-04-08 Monk Chiang <sh.chiang04@gmail.com>
+
+ * config/nds32/constants.md (unspec_volatile_element): Add values for
+ TLB operation and data prefetch.
+ * config/nds32/nds32-intrinsic.c: Implementation of intrinsic
+ functions for TLB operation and data prefetch.
+ * config/nds32/nds32-intrinsic.md: Likewise.
+ * config/nds32/nds32_intrinsic.h: Likewise.
+ * config/nds32/nds32.c (nds32_dpref_names): Likewise.
+ (nds32_print_operand): Likewise.
+ * config/nds32/nds32.h (nds32_builtins): Likewise.
+
2018-04-07 Thomas Koenig <tkoenig@gcc.gnu.org>
Andrew Pinski <pinsika@gcc.gnu.org>
UNSPEC_VOLATILE_CCTL_VA_WBINVAL_LA
UNSPEC_VOLATILE_CCTL_IDX_WBINVAL
UNSPEC_VOLATILE_CCTL_VA_LCK
+ UNSPEC_VOLATILE_DPREF_QW
+ UNSPEC_VOLATILE_DPREF_HW
+ UNSPEC_VOLATILE_DPREF_W
+ UNSPEC_VOLATILE_DPREF_DW
+ UNSPEC_VOLATILE_TLBOP_TRD
+ UNSPEC_VOLATILE_TLBOP_TWR
+ UNSPEC_VOLATILE_TLBOP_RWR
+ UNSPEC_VOLATILE_TLBOP_RWLK
+ UNSPEC_VOLATILE_TLBOP_UNLK
+ UNSPEC_VOLATILE_TLBOP_PB
+ UNSPEC_VOLATILE_TLBOP_INV
+ UNSPEC_VOLATILE_TLBOP_FLUA
UNSPEC_VOLATILE_RELAX_GROUP
UNSPEC_VOLATILE_POP25_RETURN
])
return target;
}
+/* Expand builtins that take three operands and the third is immediate. */
+static rtx
+nds32_expand_triopimm_builtin (enum insn_code icode, tree exp, rtx target,
+ bool return_p, const char *name)
+{
+ rtx pat;
+ rtx op0 = nds32_read_argument (exp, 0);
+ rtx op1 = nds32_read_argument (exp, 1);
+ rtx op2 = nds32_read_argument (exp, 2);
+ int op0_num = return_p ? 1 : 0;
+ int op1_num = return_p ? 2 : 1;
+ int op2_num = return_p ? 3 : 2;
+
+ if (return_p)
+ target = nds32_legitimize_target (icode, target);
+
+ if (!nds32_check_constant_argument (icode, op2_num, op2, name))
+ return NULL_RTX;
+
+ op0 = nds32_legitimize_argument (icode, op0_num, op0);
+ op1 = nds32_legitimize_argument (icode, op1_num, op1);
+ op2 = nds32_legitimize_argument (icode, op2_num, op2);
+
+ /* Emit and return the new instruction. */
+ if (return_p)
+ pat = GEN_FCN (icode) (target, op0, op1, op2);
+ else
+ pat = GEN_FCN (icode) (op0, op1, op2);
+
+ if (! pat)
+ return NULL_RTX;
+
+ emit_insn (pat);
+ return target;
+}
+
/* Expand builtins for load. */
static rtx
nds32_expand_builtin_load (enum insn_code icode, tree exp, rtx target)
NDS32_BUILTIN(clzsi2, "clz", CLZ)
NDS32_BUILTIN(unspec_clo, "clo", CLO)
NDS32_BUILTIN(unspec_wsbh, "wsbh", WSBH)
+ NDS32_BUILTIN(unspec_tlbop_pb, "tlbop_pb",TLBOP_PB)
NDS32_BUILTIN(unaligned_load_hw, "unaligned_load_hw", UALOAD_HW)
NDS32_BUILTIN(unaligned_loadsi, "unaligned_load_w", UALOAD_W)
NDS32_BUILTIN(unaligned_loaddi, "unaligned_load_dw", UALOAD_DW)
NDS32_NO_TARGET_BUILTIN(unspec_jral_ton, "jral_ton", JRAL_TON)
NDS32_NO_TARGET_BUILTIN(unspec_ret_toff, "ret_toff", RET_TOFF)
NDS32_NO_TARGET_BUILTIN(unspec_jral_iton, "jral_iton",JRAL_ITON)
+ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_trd, "tlbop_trd", TLBOP_TRD)
+ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_twr, "tlbop_twr", TLBOP_TWR)
+ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_rwr, "tlbop_rwr", TLBOP_RWR)
+ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_rwlk, "tlbop_rwlk", TLBOP_RWLK)
+ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_unlk, "tlbop_unlk", TLBOP_UNLK)
+ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_inv, "tlbop_inv", TLBOP_INV)
NDS32_NO_TARGET_BUILTIN(unspec_ret_itoff, "ret_itoff", RET_ITOFF)
NDS32_NO_TARGET_BUILTIN(unspec_set_current_sp,
"set_current_sp", SET_CURRENT_SP)
NDS32_NO_TARGET_BUILTIN(bsp, "bsp", BSP)
};
+/* Three-argument intrinsics with an immediate third argument. */
+static struct builtin_description bdesc_3argimm[] =
+{
+ NDS32_NO_TARGET_BUILTIN(prefetch_qw, "prefetch_qw", DPREF_QW)
+ NDS32_NO_TARGET_BUILTIN(prefetch_hw, "prefetch_hw", DPREF_HW)
+ NDS32_NO_TARGET_BUILTIN(prefetch_w, "prefetch_w", DPREF_W)
+ NDS32_NO_TARGET_BUILTIN(prefetch_dw, "prefetch_dw", DPREF_DW)
+};
+
/* Intrinsics that load a value. */
static struct builtin_description bdesc_load[] =
{
case NDS32_BUILTIN_SCHE_BARRIER:
emit_insn (gen_blockage ());
return target;
+ case NDS32_BUILTIN_TLBOP_FLUA:
+ emit_insn (gen_unspec_tlbop_flua ());
+ return target;
case NDS32_BUILTIN_SCW:
return nds32_expand_scw_builtin (CODE_FOR_unspec_volatile_scw,
exp, target);
if (d->code == fcode)
return nds32_expand_triop_builtin (d->icode, exp, target, d->return_p);
+ for (i = 0, d = bdesc_3argimm; i < ARRAY_SIZE (bdesc_3argimm); i++, d++)
+ if (d->code == fcode)
+ return nds32_expand_triopimm_builtin (d->icode, exp, target,
+ d->return_p, d->name);
+
for (i = 0, d = bdesc_load; i < ARRAY_SIZE (bdesc_load); i++, d++)
if (d->code == fcode)
return nds32_expand_builtin_load (d->icode, exp, target);
ADD_NDS32_BUILTIN3 ("cctl_idx_write", void, integer, unsigned, unsigned,
CCTL_IDX_WRITE);
+ /* PREFETCH */
+ ADD_NDS32_BUILTIN3 ("dpref_qw", void, ptr_uchar, unsigned, integer, DPREF_QW);
+ ADD_NDS32_BUILTIN3 ("dpref_hw", void, ptr_ushort, unsigned, integer,
+ DPREF_HW);
+ ADD_NDS32_BUILTIN3 ("dpref_w", void, ptr_uint, unsigned, integer, DPREF_W);
+ ADD_NDS32_BUILTIN3 ("dpref_dw", void, ptr_ulong, unsigned, integer, DPREF_DW);
+
/* Performance Extension */
ADD_NDS32_BUILTIN1 ("pe_abs", integer, integer, ABS);
ADD_NDS32_BUILTIN2 ("pe_ave", integer, integer, integer, AVE);
/* Schedule Barrier */
ADD_NDS32_BUILTIN0 ("schedule_barrier", void, SCHE_BARRIER);
+
+ /* TLBOP */
+ ADD_NDS32_BUILTIN1 ("tlbop_trd", void, unsigned, TLBOP_TRD);
+ ADD_NDS32_BUILTIN1 ("tlbop_twr", void, unsigned, TLBOP_TWR);
+ ADD_NDS32_BUILTIN1 ("tlbop_rwr", void, unsigned, TLBOP_RWR);
+ ADD_NDS32_BUILTIN1 ("tlbop_rwlk", void, unsigned, TLBOP_RWLK);
+ ADD_NDS32_BUILTIN1 ("tlbop_unlk", void, unsigned, TLBOP_UNLK);
+ ADD_NDS32_BUILTIN1 ("tlbop_pb", unsigned, unsigned, TLBOP_PB);
+ ADD_NDS32_BUILTIN1 ("tlbop_inv", void, unsigned, TLBOP_INV);
+ ADD_NDS32_BUILTIN0 ("tlbop_flua", void, TLBOP_FLUA);
+
/* Unaligned Load/Store */
ADD_NDS32_BUILTIN1 ("unaligned_load_hw", short_unsigned, ptr_ushort,
UALOAD_HW);
[(set_attr "type" "mmu")]
)
+;;PREFETCH
+
+(define_insn "prefetch_qw"
+ [(unspec_volatile:QI [(match_operand:SI 0 "register_operand" "r")
+ (match_operand:SI 1 "nonmemory_operand" "r")
+ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_DPREF_QW)]
+ ""
+ "dpref\t%Z2, [%0 + %1]"
+ [(set_attr "type" "misc")]
+)
+
+(define_insn "prefetch_hw"
+ [(unspec_volatile:HI [(match_operand:SI 0 "register_operand" "r")
+ (match_operand:SI 1 "nonmemory_operand" "r")
+ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_DPREF_HW)]
+ ""
+ "dpref\t%Z2, [%0 + (%1<<1)]"
+ [(set_attr "type" "misc")]
+)
+
+(define_insn "prefetch_w"
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" " r, r")
+ (match_operand:SI 1 "nonmemory_operand" "Is15, r")
+ (match_operand:SI 2 "immediate_operand" " i, i")] UNSPEC_VOLATILE_DPREF_W)]
+ ""
+ "@
+ dprefi.w\t%Z2, [%0 + %1]
+ dpref\t%Z2, [%0 + (%1<<2)]"
+ [(set_attr "type" "misc")]
+)
+
+(define_insn "prefetch_dw"
+ [(unspec_volatile:DI [(match_operand:SI 0 "register_operand" " r, r")
+ (match_operand:SI 1 "nonmemory_operand" "Is15, r")
+ (match_operand:SI 2 "immediate_operand" " i, i")] UNSPEC_VOLATILE_DPREF_DW)]
+ ""
+ "@
+ dprefi.d\t%Z2, [%0 + %1]
+ dpref\t%Z2, [%0 + (%1<<3)]"
+ [(set_attr "type" "misc")]
+)
;; Performance Extension
[(set_attr "type" "alu")
(set_attr "length" "4")]
)
+
+;; TLBOP Intrinsic
+
+(define_insn "unspec_tlbop_trd"
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_TRD)]
+ ""
+ "tlbop\t%0, TRD"
+ [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_twr"
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_TWR)]
+ ""
+ "tlbop\t%0, TWR"
+ [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_rwr"
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_RWR)]
+ ""
+ "tlbop\t%0, RWR"
+ [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_rwlk"
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_RWLK)]
+ ""
+ "tlbop\t%0, RWLK"
+ [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_unlk"
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_UNLK)]
+ ""
+ "tlbop\t%0, UNLK"
+ [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_pb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_PB))]
+ ""
+ "tlbop\t%0, %1, PB"
+ [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_inv"
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_INV)]
+ ""
+ "tlbop\t%0, INV"
+ [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_flua"
+ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_TLBOP_FLUA)]
+ ""
+ "tlbop\tFLUA"
+ [(set_attr "type" "mmu")]
+)
+
;;Unaligned Load/Store
(define_expand "unaligned_load_hw"
"L1I_IX_WWD"
};
+static const char * const nds32_dpref_names[] =
+{
+ "SRD",
+ "MRD",
+ "SWR",
+ "MWR",
+ "PTE",
+ "CLWR"
+};
+
/* Defining register allocation order for performance.
We want to allocate callee-saved registers after others.
It may be used by nds32_adjust_reg_alloc_order(). */
fprintf (stream, "%s", nds32_cctl_names[op_value + 16]);
return;
+ case 'Z': /* dpref */
+ fprintf (stream, "%s", nds32_dpref_names[op_value]);
+ return;
+
default :
/* Unknown flag. */
output_operand_lossage ("invalid operand output code");
NDS32_BUILTIN_CCTL_L1D_INVALALL,
NDS32_BUILTIN_CCTL_L1D_WBALL_ALVL,
NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL,
+ NDS32_BUILTIN_DPREF_QW,
+ NDS32_BUILTIN_DPREF_HW,
+ NDS32_BUILTIN_DPREF_W,
+ NDS32_BUILTIN_DPREF_DW,
+ NDS32_BUILTIN_TLBOP_TRD,
+ NDS32_BUILTIN_TLBOP_TWR,
+ NDS32_BUILTIN_TLBOP_RWR,
+ NDS32_BUILTIN_TLBOP_RWLK,
+ NDS32_BUILTIN_TLBOP_UNLK,
+ NDS32_BUILTIN_TLBOP_PB,
+ NDS32_BUILTIN_TLBOP_INV,
+ NDS32_BUILTIN_TLBOP_FLUA,
NDS32_BUILTIN_UALOAD_HW,
NDS32_BUILTIN_UALOAD_W,
NDS32_BUILTIN_UALOAD_DW,
__NDS32_CCTL_L1I_IX_WWD__
};
+enum nds32_dpref
+{
+ __NDS32_DPREF_SRD__,
+ __NDS32_DPREF_MRD__,
+ __NDS32_DPREF_SWR__,
+ __NDS32_DPREF_MWR__,
+ __NDS32_DPREF_PTE__,
+ __NDS32_DPREF_CLWR__
+};
+
/* ------------------------------------------------------------------------ */
/* Define intrinsic register name macro for compatibility. */
#define NDS32_CCTL_L1D_IX_WWD __NDS32_CCTL_L1D_IX_WWD__
#define NDS32_CCTL_L1I_IX_WTAG __NDS32_CCTL_L1I_IX_WTAG__
#define NDS32_CCTL_L1I_IX_WWD __NDS32_CCTL_L1I_IX_WWD__
+
+#define NDS32_DPREF_SRD __NDS32_DPREF_SRD__
+#define NDS32_DPREF_MRD __NDS32_DPREF_MRD__
+#define NDS32_DPREF_SWR __NDS32_DPREF_SWR__
+#define NDS32_DPREF_MWR __NDS32_DPREF_MWR__
+#define NDS32_DPREF_PTE __NDS32_DPREF_PTE__
+#define NDS32_DPREF_CLWR __NDS32_DPREF_CLWR__
+
/* ------------------------------------------------------------------------ */
(__builtin_nds32_svs ((a), (b)))
#define __nds32__sva(a, b) \
(__builtin_nds32_sva ((a), (b)))
+#define __nds32__dpref_qw(a, b, subtype) \
+ (__builtin_nds32_dpref_qw ((a), (b), (subtype)))
+#define __nds32__dpref_hw(a, b, subtype) \
+ (__builtin_nds32_dpref_hw ((a), (b), (subtype)))
+#define __nds32__dpref_w(a, b, subtype) \
+ (__builtin_nds32_dpref_w ((a), (b), (subtype)))
+#define __nds32__dpref_dw(a, b, subtype) \
+ (__builtin_nds32_dpref_dw ((a), (b), (subtype)))
#define __nds32__teqz(a, swid) \
(__builtin_nds32_teqz ((a), (swid)))
#define __nds32__fmfcfg() \
(__builtin_nds32_fmfcfg())
+#define __nds32__tlbop_trd(a) \
+ (__builtin_nds32_tlbop_trd ((a)))
+#define __nds32__tlbop_twr(a) \
+ (__builtin_nds32_tlbop_twr ((a)))
+#define __nds32__tlbop_rwr(a) \
+ (__builtin_nds32_tlbop_rwr ((a)))
+#define __nds32__tlbop_rwlk(a) \
+ (__builtin_nds32_tlbop_rwlk ((a)))
+#define __nds32__tlbop_unlk(a) \
+ (__builtin_nds32_tlbop_unlk ((a)))
+#define __nds32__tlbop_pb(a) \
+ (__builtin_nds32_tlbop_pb ((a)))
+#define __nds32__tlbop_inv(a) \
+ (__builtin_nds32_tlbop_inv ((a)))
+#define __nds32__tlbop_flua() \
+(__builtin_nds32_tlbop_flua())
+
#endif /* nds32_intrinsic.h */