From 6cf538daaead38a5bc09a79bfb26c34c83fec91e Mon Sep 17 00:00:00 2001 From: Robert Suchanek Date: Mon, 9 May 2016 12:04:09 +0000 Subject: [PATCH] Add support for MIPS SIMD Architecture (MSA). gcc/ * config.gcc: Add MSA header file for mips*-*-* target. * config/mips/constraints.md (YI, YC, YZ, Unv5, Uuv5, Usv5, Uuv6) (Ubv8i, Urv8): New constraints. * config/mips/mips-ftypes.def: Add function types for MSA builtins. * config/mips/mips-modes.def (V16QI, V8HI, V4SI, V2DI, V4SF) (V2DF, V32QI, V16HI, V8SI, V4DI, V8SF, V4DF): New modes. * config/mips/mips-msa.md: New file. * config/mips/mips-protos.h (mips_split_128bit_const_insns): New prototype. (mips_msa_idiv_insns): Likewise. (mips_split_128bit_move): Likewise. (mips_split_128bit_move_p): Likewise. (mips_split_msa_copy_d): Likewise. (mips_split_msa_insert_d): Likewise. (mips_split_msa_fill_d): Likewise. (mips_expand_msa_branch): Likewise. (mips_const_vector_same_val_p): Likewise. (mips_const_vector_same_bytes_p): Likewise. (mips_const_vector_same_int_p): Likewise. (mips_const_vector_shuffle_set_p): Likewise. (mips_const_vector_bitimm_set_p): Likewise. (mips_const_vector_bitimm_clr_p): Likewise. (mips_msa_vec_parallel_const_half): Likewise. (mips_msa_output_division): Likewise. (mips_ldst_scaled_shift): Likewise. (mips_expand_vec_cond_expr): Likewise. * config/mips/mips.c (enum mips_builtin_type): Add MIPS_BUILTIN_MSA_TEST_BRANCH. (mips_gen_const_int_vector_shuffle): New prototype. (mips_const_vector_bitimm_set_p): New function. (mips_const_vector_bitimm_clr_p): Likewise. (mips_const_vector_same_val_p): Likewise. (mips_const_vector_same_bytes_p): Likewise. (mips_const_vector_same_int_p): Likewise. (mips_const_vector_shuffle_set_p): Likewise. (mips_symbol_insns): Forbid loading symbols via immediate for MSA. (mips_valid_offset_p): Limit offset to 10-bit for MSA loads and stores. (mips_valid_lo_sum_p): Forbid loadings symbols via %lo(base) for MSA. (mips_lx_address_p): Add support load indexed address for MSA. (mips_address_insns): Add calculation of instructions needed for stores and loads for MSA. (mips_const_insns): Move CONST_DOUBLE below CONST_VECTOR. Handle CONST_VECTOR for MSA and let it fall through. (mips_ldst_scaled_shift): New function. (mips_subword_at_byte): Likewise. (mips_msa_idiv_insns): Likewise. (mips_legitimize_move): Validate MSA moves. (mips_rtx_costs): Add UNGE, UNGT, UNLE, UNLT cases. Add calculation of costs for MSA division. (mips_split_move_p): Check if MSA moves need splitting. (mips_split_move): Split MSA moves if necessary. (mips_split_128bit_move_p): New function. (mips_split_128bit_move): Likewise. (mips_split_msa_copy_d): Likewise. (mips_split_msa_insert_d): Likewise. (mips_split_msa_fill_d): Likewise. (mips_output_move): Handle MSA moves. (mips_expand_msa_branch): New function. (mips_print_operand): Add 'E', 'B', 'w', 'v' and 'V' modifiers. Reinstate 'y' modifier. (mips_file_start): Add MSA .gnu_attribute. (mips_hard_regno_mode_ok_p): Allow TImode and 128-bit vectors in FPRs. (mips_hard_regno_nregs): Always return 1 for MSA supported mode. (mips_class_max_nregs): Add register size for MSA supported mode. (mips_cannot_change_mode_class): Allow conversion between MSA vector modes and TImode. (mips_mode_ok_for_mov_fmt_p): Allow MSA to use move.v instruction. (mips_secondary_reload_class): Force MSA loads/stores via memory. (mips_preferred_simd_mode): Add preffered modes for MSA. (mips_vector_mode_supported_p): Add MSA supported modes. (mips_autovectorize_vector_sizes): New function. (mips_msa_output_division): Likewise. (MSA_BUILTIN, MIPS_BUILTIN_DIRECT_NO_TARGET) (MSA_NO_TARGET_BUILTIN, MSA_BUILTIN_TEST_BRANCH): New macros. (CODE_FOR_msa_adds_s_b, CODE_FOR_msa_adds_s_h) (CODE_FOR_msa_adds_s_w, CODE_FOR_msa_adds_s_d) (CODE_FOR_msa_adds_u_b, CODE_FOR_msa_adds_u_h) (CODE_FOR_msa_adds_u_w, CODE_FOR_msa_adds_u_du (CODE_FOR_msa_addv_b, CODE_FOR_msa_addv_h, CODE_FOR_msa_addv_w) (CODE_FOR_msa_addv_d, CODE_FOR_msa_and_v, CODE_FOR_msa_bmnz_v) (CODE_FOR_msa_bmnzi_b, CODE_FOR_msa_bmz_v, CODE_FOR_msa_bmzi_b) (CODE_FOR_msa_bnz_v, CODE_FOR_msa_bz_v, CODE_FOR_msa_bsel_v) (CODE_FOR_msa_bseli_b, CODE_FOR_msa_ceqi_h, CODE_FOR_msa_ceqi_w) (CODE_FOR_msa_ceqi_d, CODE_FOR_msa_clti_s_b) (CODE_FOR_msa_clti_s_h, CODE_FOR_msa_clti_s_w) (CODE_FOR_msa_clti_s_d, CODE_FOR_msa_clti_u_b) (CODE_FOR_msa_clti_u_h, CODE_FOR_msa_clti_u_w) (CODE_FOR_msa_clti_u_d, CODE_FOR_msa_clei_s_b) (CODE_FOR_msa_clei_s_h, CODE_FOR_msa_clei_s_w) (CODE_FOR_msa_clei_s_d, CODE_FOR_msa_clei_u_b) (CODE_FOR_msa_clei_u_h, CODE_FOR_msa_clei_u_w) (CODE_FOR_msa_clei_u_d, CODE_FOR_msa_div_s_b) (CODE_FOR_msa_div_s_h, CODE_FOR_msa_div_s_w) (CODE_FOR_msa_div_s_d, CODE_FOR_msa_div_u_b) (CODE_FOR_msa_div_u_h, CODE_FOR_msa_div_u_w) (CODE_FOR_msa_div_u_d, CODE_FOR_msa_fadd_w, CODE_FOR_msa_fadd_d) (CODE_FOR_msa_fexdo_w, CODE_FOR_msa_ftrunc_s_w) (CODE_FOR_msa_ftrunc_s_d, CODE_FOR_msa_ftrunc_u_w) (CODE_FOR_msa_ftrunc_u_d, CODE_FOR_msa_ffint_s_w) (CODE_FOR_msa_ffint_s_d, CODE_FOR_msa_ffint_u_w) (CODE_FOR_msa_ffint_u_d, CODE_FOR_msa_fsub_w) (CODE_FOR_msa_fsub_d, CODE_FOR_msa_fmsub_d, CODE_FOR_msa_fmadd_w) (CODE_FOR_msa_fmadd_d, CODE_FOR_msa_fmsub_w, CODE_FOR_msa_fmul_w) (CODE_FOR_msa_fmul_d, CODE_FOR_msa_fdiv_w, CODE_FOR_msa_fdiv_d) (CODE_FOR_msa_fmax_w, CODE_FOR_msa_fmax_d, CODE_FOR_msa_fmax_a_w) (CODE_FOR_msa_fmax_a_d, CODE_FOR_msa_fmin_w, CODE_FOR_msa_fmin_d) (CODE_FOR_msa_fmin_a_w, CODE_FOR_msa_fmin_a_d) (CODE_FOR_msa_fsqrt_w, CODE_FOR_msa_fsqrt_d) (CODE_FOR_msa_max_s_b, CODE_FOR_msa_max_s_h) (CODE_FOR_msa_max_s_w, CODE_FOR_msa_max_s_d) (CODE_FOR_msa_max_u_b, CODE_FOR_msa_max_u_h) (CODE_FOR_msa_max_u_w, CODE_FOR_msa_max_u_d) (CODE_FOR_msa_min_s_b, CODE_FOR_msa_min_s_h) (CODE_FOR_msa_min_s_w, CODE_FOR_msa_min_s_d) (CODE_FOR_msa_min_u_b, CODE_FOR_msa_min_u_h) (CODE_FOR_msa_min_u_w, CODE_FOR_msa_min_u_d) (CODE_FOR_msa_mod_s_b, CODE_FOR_msa_mod_s_h) (CODE_FOR_msa_mod_s_w, CODE_FOR_msa_mod_s_d) (CODE_FOR_msa_mod_u_b, CODE_FOR_msa_mod_u_h) (CODE_FOR_msa_mod_u_w, CODE_FOR_msa_mod_u_d) (CODE_FOR_msa_mod_s_b, CODE_FOR_msa_mod_s_h) (CODE_FOR_msa_mod_s_w, CODE_FOR_msa_mod_s_d) (CODE_FOR_msa_mod_u_b, CODE_FOR_msa_mod_u_h) (CODE_FOR_msa_mod_u_w, CODE_FOR_msa_mod_u_d) (CODE_FOR_msa_mulv_b, CODE_FOR_msa_mulv_h, CODE_FOR_msa_mulv_w) (CODE_FOR_msa_mulv_d, CODE_FOR_msa_nlzc_b, CODE_FOR_msa_nlzc_h) (CODE_FOR_msa_nlzc_w, CODE_FOR_msa_nlzc_d, CODE_FOR_msa_nor_v) (CODE_FOR_msa_or_v, CODE_FOR_msa_ori_b, CODE_FOR_msa_nori_b) (CODE_FOR_msa_pcnt_b, CODE_FOR_msa_pcnt_h, CODE_FOR_msa_pcnt_w) (CODE_FOR_msa_pcnt_d, CODE_FOR_msa_xor_v, CODE_FOR_msa_xori_b) (CODE_FOR_msa_sll_b, CODE_FOR_msa_sll_h, CODE_FOR_msa_sll_w) (CODE_FOR_msa_sll_d, CODE_FOR_msa_slli_b, CODE_FOR_msa_slli_h) (CODE_FOR_msa_slli_w, CODE_FOR_msa_slli_d, CODE_FOR_msa_sra_b) (CODE_FOR_msa_sra_h, CODE_FOR_msa_sra_w, CODE_FOR_msa_sra_d) (CODE_FOR_msa_srai_b, CODE_FOR_msa_srai_h, CODE_FOR_msa_srai_w) (CODE_FOR_msa_srai_d, CODE_FOR_msa_srl_b, CODE_FOR_msa_srl_h) (CODE_FOR_msa_srl_w, CODE_FOR_msa_srl_d, CODE_FOR_msa_srli_b) (CODE_FOR_msa_srli_h, CODE_FOR_msa_srli_w, CODE_FOR_msa_srli_d) (CODE_FOR_msa_subv_b, CODE_FOR_msa_subv_h, CODE_FOR_msa_subv_w) (CODE_FOR_msa_subv_d, CODE_FOR_msa_subvi_b, CODE_FOR_msa_subvi_h) (CODE_FOR_msa_subvi_w, CODE_FOR_msa_subvi_d, CODE_FOR_msa_move_v) (CODE_FOR_msa_vshf_b, CODE_FOR_msa_vshf_h, CODE_FOR_msa_vshf_w) (CODE_FOR_msa_vshf_d, CODE_FOR_msa_ilvod_d, CODE_FOR_msa_ilvev_d) (CODE_FOR_msa_pckod_d, CODE_FOR_msa_pckdev_d, CODE_FOR_msa_ldi_b) (CODE_FOR_msa_ldi_hi, CODE_FOR_msa_ldi_w) (CODE_FOR_msa_ldi_d): New code_aliasing macros. (mips_builtins): Add MSA sll_b, sll_h, sll_w, sll_d, slli_b, slli_h, slli_w, slli_d, sra_b, sra_h, sra_w, sra_d, srai_b, srai_h, srai_w, srai_d, srar_b, srar_h, srar_w, srar_d, srari_b, srari_h, srari_w, srari_d, srl_b, srl_h, srl_w, srl_d, srli_b, srli_h, srli_w, srli_d, srlr_b, srlr_h, srlr_w, srlr_d, srlri_b, srlri_h, srlri_w, srlri_d, bclr_b, bclr_h, bclr_w, bclr_d, bclri_b, bclri_h, bclri_w, bclri_d, bset_b, bset_h, bset_w, bset_d, bseti_b, bseti_h, bseti_w, bseti_d, bneg_b, bneg_h, bneg_w, bneg_d, bnegi_b, bnegi_h, bnegi_w, bnegi_d, binsl_b, binsl_h, binsl_w, binsl_d, binsli_b, binsli_h, binsli_w, binsli_d, binsr_b, binsr_h, binsr_w, binsr_d, binsri_b, binsri_h, binsri_w, binsri_d, addv_b, addv_h, addv_w, addv_d, addvi_b, addvi_h, addvi_w, addvi_d, subv_b, subv_h, subv_w, subv_d, subvi_b, subvi_h, subvi_w, subvi_d, max_s_b, max_s_h, max_s_w, max_s_d, maxi_s_b, maxi_s_h, maxi_s_w, maxi_s_d, max_u_b, max_u_h, max_u_w, max_u_d, maxi_u_b, maxi_u_h, maxi_u_w, maxi_u_d, min_s_b, min_s_h, min_s_w, min_s_d, mini_s_b, mini_s_h, mini_s_w, mini_s_d, min_u_b, min_u_h, min_u_w, min_u_d, mini_u_b, mini_u_h, mini_u_w, mini_u_d, max_a_b, max_a_h, max_a_w, max_a_d, min_a_b, min_a_h, min_a_w, min_a_d, ceq_b, ceq_h, ceq_w, ceq_d, ceqi_b, ceqi_h, ceqi_w, ceqi_d, clt_s_b, clt_s_h, clt_s_w, clt_s_d, clti_s_b, clti_s_h, clti_s_w, clti_s_d, clt_u_b, clt_u_h, clt_u_w, clt_u_d, clti_u_b, clti_u_h, clti_u_w, clti_u_d, cle_s_b, cle_s_h, cle_s_w, cle_s_d, clei_s_b, clei_s_h, clei_s_w, clei_s_d, cle_u_b, cle_u_h, cle_u_w, cle_u_d, clei_u_b, clei_u_h, clei_u_w, clei_u_d, ld_b, ld_h, ld_w, ld_d, st_b, st_h, st_w, st_d, sat_s_b, sat_s_h, sat_s_w, sat_s_d, sat_u_b, sat_u_h, sat_u_w, sat_u_d, add_a_b, add_a_h, add_a_w, add_a_d, adds_a_b, adds_a_h, adds_a_w, adds_a_d, adds_s_b, adds_s_h, adds_s_w, adds_s_d, adds_u_b, adds_u_h, adds_u_w, adds_u_d, ave_s_b, ave_s_h, ave_s_w, ave_s_d, ave_u_b, ave_u_h, ave_u_w, ave_u_d, aver_s_b, aver_s_h, aver_s_w, aver_s_d, aver_u_b, aver_u_h, aver_u_w, aver_u_d, subs_s_b, subs_s_h, subs_s_w, subs_s_d, subs_u_b, subs_u_h, subs_u_w, subs_u_d, subsuu_s_b, subsuu_s_h, subsuu_s_w, subsuu_s_d, subsus_u_b, subsus_u_h, subsus_u_w, subsus_u_d, asub_s_b, asub_s_h, asub_s_w, asub_s_d, asub_u_b, asub_u_h, asub_u_w, asub_u_d, mulv_b, mulv_h, mulv_w, mulv_d, maddv_b, maddv_h, maddv_w, maddv_d, msubv_b, msubv_h, msubv_w, msubv_d, div_s_b, div_s_h, div_s_w, div_s_d, div_u_b, div_u_h, div_u_w, div_u_d, hadd_s_h, hadd_s_w, hadd_s_d, hadd_u_h, hadd_u_w, hadd_u_d, hsub_s_h, hsub_s_w, hsub_s_d, hsub_u_h, hsub_u_w, hsub_u_d, mod_s_b, mod_s_h, mod_s_w, mod_s_d, mod_u_b, mod_u_h, mod_u_w, mod_u_d, dotp_s_h, dotp_s_w, dotp_s_d, dotp_u_h, dotp_u_w, dotp_u_d, dpadd_s_h, dpadd_s_w, dpadd_s_d, dpadd_u_h, dpadd_u_w, dpadd_u_d, dpsub_s_h, dpsub_s_w, dpsub_s_d, dpsub_u_h, dpsub_u_w, dpsub_u_d, sld_b, sld_h, sld_w, sld_d, sldi_b, sldi_h, sldi_w, sldi_d, splat_b, splat_h, splat_w, splat_d, splati_b, splati_h, splati_w, splati_d, pckev_b, pckev_h, pckev_w, pckev_d, pckod_b, pckod_h, pckod_w, pckod_d, ilvl_b, ilvl_h, ilvl_w, ilvl_d, ilvr_b, ilvr_h, ilvr_w, ilvr_d, ilvev_b, ilvev_h, ilvev_w, ilvev_d, ilvod_b, ilvod_h, ilvod_w, ilvod_d, vshf_b, vshf_h, vshf_w, vshf_d, and_v, andi_b, or_v, ori_b, nor_v, nori_b, xor_v, xori_b, bmnz_v, bmnzi_b, bmz_v, bmzi_b, bsel_v, bseli_b, shf_b, shf_h, shf_w, bnz_v, bz_v, fill_b, fill_h, fill_w, fill_d, pcnt_b, pcnt_h, pcnt_w, pcnt_d, nloc_b, nloc_h, nloc_w, nloc_d, nlzc_b, nlzc_h, nlzc_w, nlzc_d, copy_s_b, copy_s_h, copy_s_w, copy_s_d, copy_u_b, copy_u_h, copy_u_w, copy_u_d, insert_b, insert_h, insert_w, insert_d, insve_b, insve_h, insve_w, insve_d, bnz_b, bnz_h, bnz_w, bnz_d, bz_b, bz_h, bz_w, bz_d, ldi_b, ldi_h, ldi_w, ldi_d, fcaf_w, fcaf_d, fcor_w, fcor_d, fcun_w, fcun_d, fcune_w, fcune_d, fcueq_w, fcueq_d, fceq_w, fceq_d, fcne_w, fcne_d, fclt_w, fclt_d, fcult_w, fcult_d, fcle_w, fcle_d, fcule_w, fcule_d, fsaf_w, fsaf_d, fsor_w, fsor_d, fsun_w, fsun_d, fsune_w, fsune_d, fsueq_w, fsueq_d, fseq_w, fseq_d, fsne_w, fsne_d, fslt_w, fslt_d, fsult_w, fsult_d, fsle_w, fsle_d, fsule_w, fsule_d, fadd_w, fadd_d, fsub_w, fsub_d, fmul_w, fmul_d, fdiv_w, fdiv_d, fmadd_w, fmadd_d, fmsub_w, fmsub_d, fexp2_w, fexp2_d, fexdo_h, fexdo_w, ftq_h, ftq_w, fmin_w, fmin_d, fmin_a_w, fmin_a_d, fmax_w, fmax_d, fmax_a_w, fmax_a_d, mul_q_h, mul_q_w, mulr_q_h, mulr_q_w, madd_q_h, madd_q_w, maddr_q_h, maddr_q_w, msub_q_h, msub_q_w, msubr_q_h, msubr_q_w, fclass_w, fclass_d, fsqrt_w, fsqrt_d, frcp_w, frcp_d, frint_w, frint_d, frsqrt_w, frsqrt_d, flog2_w, flog2_d, fexupl_w, fexupl_d, fexupr_w, fexupr_d, ffql_w, ffql_d, ffqr_w, ffqr_d, ftint_s_w, ftint_s_d, ftint_u_w, ftint_u_d, ftrunc_s_w, ftrunc_s_d, ftrunc_u_w, ftrunc_u_d, ffint_s_w, ffint_s_d, ffint_u_w, ffint_u_d, ctcmsa, cfcmsa, move_v builtins. (mips_get_builtin_decl_index): New array. (MIPS_ATYPE_QI, MIPS_ATYPE_HI, MIPS_ATYPE_V2DI, MIPS_ATYPE_V4SI) (MIPS_ATYPE_V8HI, MIPS_ATYPE_V16QI, MIPS_ATYPE_V2DF) (MIPS_ATYPE_V4SF, MIPS_ATYPE_UV2DI, MIPS_ATYPE_UV4SI) (MIPS_ATYPE_UV8HI, MIPS_ATYPE_UV16QI): New. (mips_init_builtins): Initialize mips_get_builtin_decl_index array. (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define target hook. (mips_expand_builtin_insn): Prepare operands for CODE_FOR_msa_addvi_b, CODE_FOR_msa_addvi_h, CODE_FOR_msa_addvi_w, CODE_FOR_msa_addvi_d, CODE_FOR_msa_clti_u_b, CODE_FOR_msa_clti_u_h, CODE_FOR_msa_clti_u_w, CODE_FOR_msa_clti_u_d, CODE_FOR_msa_clei_u_b, CODE_FOR_msa_clei_u_h, CODE_FOR_msa_clei_u_w, CODE_FOR_msa_clei_u_d, CODE_FOR_msa_maxi_u_b, CODE_FOR_msa_maxi_u_h, CODE_FOR_msa_maxi_u_w, CODE_FOR_msa_maxi_u_d, CODE_FOR_msa_mini_u_b, CODE_FOR_msa_mini_u_h, CODE_FOR_msa_mini_u_w, CODE_FOR_msa_mini_u_d, CODE_FOR_msa_subvi_b, CODE_FOR_msa_subvi_h, CODE_FOR_msa_subvi_w, CODE_FOR_msa_subvi_d, CODE_FOR_msa_ceqi_b, CODE_FOR_msa_ceqi_h, CODE_FOR_msa_ceqi_w, CODE_FOR_msa_ceqi_d, CODE_FOR_msa_clti_s_b, CODE_FOR_msa_clti_s_h, CODE_FOR_msa_clti_s_w, CODE_FOR_msa_clti_s_d, CODE_FOR_msa_clei_s_b, CODE_FOR_msa_clei_s_h, CODE_FOR_msa_clei_s_w, CODE_FOR_msa_clei_s_d, CODE_FOR_msa_maxi_s_b, CODE_FOR_msa_maxi_s_h, CODE_FOR_msa_maxi_s_w, CODE_FOR_msa_maxi_s_d, CODE_FOR_msa_mini_s_b, CODE_FOR_msa_mini_s_h, CODE_FOR_msa_mini_s_w, CODE_FOR_msa_mini_s_d, CODE_FOR_msa_andi_b, CODE_FOR_msa_ori_b, CODE_FOR_msa_nori_b, CODE_FOR_msa_xori_b, CODE_FOR_msa_bmzi_b, CODE_FOR_msa_bmnzi_b, CODE_FOR_msa_bseli_b, CODE_FOR_msa_fill_b, CODE_FOR_msa_fill_h, CODE_FOR_msa_fill_w, CODE_FOR_msa_fill_d, CODE_FOR_msa_ilvl_b, CODE_FOR_msa_ilvl_h, CODE_FOR_msa_ilvl_w, CODE_FOR_msa_ilvl_d, CODE_FOR_msa_ilvr_b, CODE_FOR_msa_ilvr_h, CODE_FOR_msa_ilvr_w, CODE_FOR_msa_ilvr_d, CODE_FOR_msa_ilvev_b, CODE_FOR_msa_ilvev_h, CODE_FOR_msa_ilvev_w, CODE_FOR_msa_ilvod_b, CODE_FOR_msa_ilvod_h, CODE_FOR_msa_ilvod_w, CODE_FOR_msa_pckev_b, CODE_FOR_msa_pckev_h, CODE_FOR_msa_pckev_w, CODE_FOR_msa_pckod_b, CODE_FOR_msa_pckod_h, CODE_FOR_msa_pckod_w, CODE_FOR_msa_slli_b, CODE_FOR_msa_slli_h, CODE_FOR_msa_slli_w, CODE_FOR_msa_slli_d, CODE_FOR_msa_srai_b, CODE_FOR_msa_srai_h, CODE_FOR_msa_srai_w, CODE_FOR_msa_srai_d, CODE_FOR_msa_srli_b, CODE_FOR_msa_srli_h, CODE_FOR_msa_srli_w, CODE_FOR_msa_srli_d, CODE_FOR_msa_insert_b, CODE_FOR_msa_insert_h, CODE_FOR_msa_insert_w, CODE_FOR_msa_insert_d, CODE_FOR_msa_insve_b, CODE_FOR_msa_insve_h, CODE_FOR_msa_insve_w, CODE_FOR_msa_insve_d, CODE_FOR_msa_shf_b, CODE_FOR_msa_shf_h, CODE_FOR_msa_shf_w, CODE_FOR_msa_shf_w_f, CODE_FOR_msa_vshf_b, CODE_FOR_msa_vshf_h, CODE_FOR_msa_vshf_w, CODE_FOR_msa_vshf_d. (mips_expand_builtin): Add case for MIPS_BULTIN_MSA_TEST_BRANCH. (mips_set_compression_mode): Disallow MSA with MIPS16 code. (mips_option_override): -mmsa requires -mfp64 and -mhard-float. These are set implicitly and an error is reported if overridden. (mips_expand_builtin_msa_test_branch): New function. (mips_expand_msa_shuffle): Likewise. (MAX_VECT_LEN): Increase maximum length of a vector to 16 bytes. (TARGET_SCHED_REASSOCIATION_WIDTH): Define target hook. (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Likewise. (mips_expand_vec_unpack): Add support for MSA. (mips_expand_vector_init): Likewise. (mips_expand_vi_constant): Use CONST0_RTX (element_mode) instead of const0_rtx. (mips_msa_vec_parallel_const_half): New function. (mips_gen_const_int_vector): Likewise. (mips_gen_const_int_vector_shuffle): Likewise. (mips_expand_msa_cmp): Likewise. (mips_expand_vec_cond_expr): Likewise. * config/mips/mips.h (TARGET_CPU_CPP_BUILTINS): Add __mips_msa and __mips_msa_width. (OPTION_DEFAULT_SPECS): Ignore --with-fp-32 if -mmsa is specified. (ASM_SPEC): Pass mmsa and mno-msa to the assembler. (ISA_HAS_MSA): New macro. (UNITS_PER_MSA_REG): Likewise. (BITS_PER_MSA_REG): Likewise. (BIGGEST_ALIGNMENT): Redefine using ISA_HAS_MSA. (MSA_REG_FIRST): New macro. (MSA_REG_LAST): Likewise. (MSA_REG_NUM): Likewise. (MSA_REG_P): Likewise. (MSA_REG_RTX_P): Likewise. (MSA_SUPPORTED_MODE_P): Likewise. (HARD_REGNO_CALL_PART_CLOBBERED): Redefine using TARGET_MSA. (ADDITIONAL_REGISTER_NAMES): Add named registers $w0-$w31. * config/mips/mips.md: Include mips-msa.md. (alu_type): Add simd_add. (mode): Add V2DI, V4SI, V8HI, V16QI, V2DF, V4SF. (type): Add simd_div, simd_fclass, simd_flog2, simd_fadd, simd_fcvt, simd_fmul, simd_fmadd, simd_fdiv, simd_bitins, simd_bitmov, simd_insert, simd_sld, simd_mul, simd_fcmp, simd_fexp2, simd_int_arith, simd_bit, simd_shift, simd_splat, simd_fill, simd_permute, simd_shf, simd_sat, simd_pcnt, simd_copy, simd_branch, simd_cmsa, simd_fminmax, simd_logic, simd_move, simd_load, simd_store. Choose "multi" for moves for "qword_mode". (qword_mode): New attribute. (insn_count): Add instruction count for quad moves. Increase the count for MIPS SIMD division. (UNITMODE): Add UNITMODEs for vector types. (addsub): New code iterator. * config/mips/mips.opt (mmsa): New option. * config/mips/msa.h: New file. * config/mips/mti-elf.h: Don't infer -mfpxx if -mmsa is specified. * config/mips/mti-linux.h: Likewise. * config/mips/predicates.md (const_msa_branch_operand): New constraint. (const_uimm3_operand): Likewise. (const_uimm4_operand): Likewise. (const_uimm5_operand): Likewise. (const_uimm8_operand): Likewise. (const_imm5_operand): Likewise. (aq10b_operand): Likewise. (aq10h_operand): Likewise. (aq10w_operand): Likewise. (aq10d_operand): Likewise. (const_m1_operand): Likewise. (reg_or_m1_operand): Likewise. (const_exp_2_operand): Likewise. (const_exp_4_operand): Likewise. (const_exp_8_operand): Likewise. (const_exp_16_operand): Likewise. (const_vector_same_val_operand): Likewise. (const_vector_same_simm5_operand): Likewise. (const_vector_same_uimm5_operand): Likewise. (const_vector_same_uimm6_operand): Likewise. (const_vector_same_uimm8_operand): Likewise. (par_const_vector_shf_set_operand): Likewise. (reg_or_vector_same_val_operand): Likewise. (reg_or_vector_same_simm5_operand): Likewise. (reg_or_vector_same_uimm6_operand): Likewise. * doc/extend.texi (MIPS SIMD Architecture Functions): New section. * doc/invoke.texi (-mmsa): Document new option. Co-Authored-By: Chao-ying Fu Co-Authored-By: Graham Stott Co-Authored-By: Matthew Fortune Co-Authored-By: Sameera Deshpande From-SVN: r236030 --- gcc/ChangeLog | 373 +++++ gcc/config.gcc | 2 +- gcc/config/mips/constraints.md | 55 + gcc/config/mips/mips-ftypes.def | 157 ++ gcc/config/mips/mips-modes.def | 14 +- gcc/config/mips/mips-msa.md | 2736 +++++++++++++++++++++++++++++++ gcc/config/mips/mips-protos.h | 20 +- gcc/config/mips/mips.c | 2229 ++++++++++++++++++++++++- gcc/config/mips/mips.h | 86 +- gcc/config/mips/mips.md | 45 +- gcc/config/mips/mips.opt | 4 + gcc/config/mips/msa.h | 582 +++++++ gcc/config/mips/mti-elf.h | 4 +- gcc/config/mips/mti-linux.h | 6 +- gcc/config/mips/predicates.md | 129 +- gcc/doc/extend.texi | 789 +++++++++ gcc/doc/invoke.texi | 1 + gcc/doc/sourcebuild.texi | 3 + 18 files changed, 7166 insertions(+), 69 deletions(-) create mode 100644 gcc/config/mips/mips-msa.md create mode 100644 gcc/config/mips/msa.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b00dc71cb1a..6928d0a8c19 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,376 @@ +2016-05-09 Robert Suchanek + Sameera Deshpande + Matthew Fortune + Graham Stott + Chao-ying Fu + + * config.gcc: Add MSA header file for mips*-*-* target. + * config/mips/constraints.md (YI, YC, YZ, Unv5, Uuv5, Usv5, Uuv6) + (Ubv8i, Urv8): New constraints. + * config/mips/mips-ftypes.def: Add function types for MSA + builtins. + * config/mips/mips-modes.def (V16QI, V8HI, V4SI, V2DI, V4SF) + (V2DF, V32QI, V16HI, V8SI, V4DI, V8SF, V4DF): New modes. + * config/mips/mips-msa.md: New file. + * config/mips/mips-protos.h + (mips_split_128bit_const_insns): New prototype. + (mips_msa_idiv_insns): Likewise. + (mips_split_128bit_move): Likewise. + (mips_split_128bit_move_p): Likewise. + (mips_split_msa_copy_d): Likewise. + (mips_split_msa_insert_d): Likewise. + (mips_split_msa_fill_d): Likewise. + (mips_expand_msa_branch): Likewise. + (mips_const_vector_same_val_p): Likewise. + (mips_const_vector_same_bytes_p): Likewise. + (mips_const_vector_same_int_p): Likewise. + (mips_const_vector_shuffle_set_p): Likewise. + (mips_const_vector_bitimm_set_p): Likewise. + (mips_const_vector_bitimm_clr_p): Likewise. + (mips_msa_vec_parallel_const_half): Likewise. + (mips_msa_output_division): Likewise. + (mips_ldst_scaled_shift): Likewise. + (mips_expand_vec_cond_expr): Likewise. + * config/mips/mips.c (enum mips_builtin_type): Add + MIPS_BUILTIN_MSA_TEST_BRANCH. + (mips_gen_const_int_vector_shuffle): New prototype. + (mips_const_vector_bitimm_set_p): New function. + (mips_const_vector_bitimm_clr_p): Likewise. + (mips_const_vector_same_val_p): Likewise. + (mips_const_vector_same_bytes_p): Likewise. + (mips_const_vector_same_int_p): Likewise. + (mips_const_vector_shuffle_set_p): Likewise. + (mips_symbol_insns): Forbid loading symbols via immediate for + MSA. + (mips_valid_offset_p): Limit offset to 10-bit for MSA loads and + stores. + (mips_valid_lo_sum_p): Forbid loadings symbols via %lo(base) for + MSA. + (mips_lx_address_p): Add support load indexed address for MSA. + (mips_address_insns): Add calculation of instructions needed for + stores and loads for MSA. + (mips_const_insns): Move CONST_DOUBLE below CONST_VECTOR. Handle + CONST_VECTOR for MSA and let it fall through. + (mips_ldst_scaled_shift): New function. + (mips_subword_at_byte): Likewise. + (mips_msa_idiv_insns): Likewise. + (mips_legitimize_move): Validate MSA moves. + (mips_rtx_costs): Add UNGE, UNGT, UNLE, UNLT cases. Add + calculation of costs for MSA division. + (mips_split_move_p): Check if MSA moves need splitting. + (mips_split_move): Split MSA moves if necessary. + (mips_split_128bit_move_p): New function. + (mips_split_128bit_move): Likewise. + (mips_split_msa_copy_d): Likewise. + (mips_split_msa_insert_d): Likewise. + (mips_split_msa_fill_d): Likewise. + (mips_output_move): Handle MSA moves. + (mips_expand_msa_branch): New function. + (mips_print_operand): Add 'E', 'B', 'w', 'v' and 'V' modifiers. + Reinstate 'y' modifier. + (mips_file_start): Add MSA .gnu_attribute. + (mips_hard_regno_mode_ok_p): Allow TImode and 128-bit vectors in + FPRs. + (mips_hard_regno_nregs): Always return 1 for MSA supported mode. + (mips_class_max_nregs): Add register size for MSA supported mode. + (mips_cannot_change_mode_class): Allow conversion between MSA + vector modes and TImode. + (mips_mode_ok_for_mov_fmt_p): Allow MSA to use move.v + instruction. + (mips_secondary_reload_class): Force MSA loads/stores via memory. + (mips_preferred_simd_mode): Add preffered modes for MSA. + (mips_vector_mode_supported_p): Add MSA supported modes. + (mips_autovectorize_vector_sizes): New function. + (mips_msa_output_division): Likewise. + (MSA_BUILTIN, MIPS_BUILTIN_DIRECT_NO_TARGET) + (MSA_NO_TARGET_BUILTIN, MSA_BUILTIN_TEST_BRANCH): New macros. + (CODE_FOR_msa_adds_s_b, CODE_FOR_msa_adds_s_h) + (CODE_FOR_msa_adds_s_w, CODE_FOR_msa_adds_s_d) + (CODE_FOR_msa_adds_u_b, CODE_FOR_msa_adds_u_h) + (CODE_FOR_msa_adds_u_w, CODE_FOR_msa_adds_u_du + (CODE_FOR_msa_addv_b, CODE_FOR_msa_addv_h, CODE_FOR_msa_addv_w) + (CODE_FOR_msa_addv_d, CODE_FOR_msa_and_v, CODE_FOR_msa_bmnz_v) + (CODE_FOR_msa_bmnzi_b, CODE_FOR_msa_bmz_v, CODE_FOR_msa_bmzi_b) + (CODE_FOR_msa_bnz_v, CODE_FOR_msa_bz_v, CODE_FOR_msa_bsel_v) + (CODE_FOR_msa_bseli_b, CODE_FOR_msa_ceqi_h, CODE_FOR_msa_ceqi_w) + (CODE_FOR_msa_ceqi_d, CODE_FOR_msa_clti_s_b) + (CODE_FOR_msa_clti_s_h, CODE_FOR_msa_clti_s_w) + (CODE_FOR_msa_clti_s_d, CODE_FOR_msa_clti_u_b) + (CODE_FOR_msa_clti_u_h, CODE_FOR_msa_clti_u_w) + (CODE_FOR_msa_clti_u_d, CODE_FOR_msa_clei_s_b) + (CODE_FOR_msa_clei_s_h, CODE_FOR_msa_clei_s_w) + (CODE_FOR_msa_clei_s_d, CODE_FOR_msa_clei_u_b) + (CODE_FOR_msa_clei_u_h, CODE_FOR_msa_clei_u_w) + (CODE_FOR_msa_clei_u_d, CODE_FOR_msa_div_s_b) + (CODE_FOR_msa_div_s_h, CODE_FOR_msa_div_s_w) + (CODE_FOR_msa_div_s_d, CODE_FOR_msa_div_u_b) + (CODE_FOR_msa_div_u_h, CODE_FOR_msa_div_u_w) + (CODE_FOR_msa_div_u_d, CODE_FOR_msa_fadd_w, CODE_FOR_msa_fadd_d) + (CODE_FOR_msa_fexdo_w, CODE_FOR_msa_ftrunc_s_w) + (CODE_FOR_msa_ftrunc_s_d, CODE_FOR_msa_ftrunc_u_w) + (CODE_FOR_msa_ftrunc_u_d, CODE_FOR_msa_ffint_s_w) + (CODE_FOR_msa_ffint_s_d, CODE_FOR_msa_ffint_u_w) + (CODE_FOR_msa_ffint_u_d, CODE_FOR_msa_fsub_w) + (CODE_FOR_msa_fsub_d, CODE_FOR_msa_fmsub_d, CODE_FOR_msa_fmadd_w) + (CODE_FOR_msa_fmadd_d, CODE_FOR_msa_fmsub_w, CODE_FOR_msa_fmul_w) + (CODE_FOR_msa_fmul_d, CODE_FOR_msa_fdiv_w, CODE_FOR_msa_fdiv_d) + (CODE_FOR_msa_fmax_w, CODE_FOR_msa_fmax_d, CODE_FOR_msa_fmax_a_w) + (CODE_FOR_msa_fmax_a_d, CODE_FOR_msa_fmin_w, CODE_FOR_msa_fmin_d) + (CODE_FOR_msa_fmin_a_w, CODE_FOR_msa_fmin_a_d) + (CODE_FOR_msa_fsqrt_w, CODE_FOR_msa_fsqrt_d) + (CODE_FOR_msa_max_s_b, CODE_FOR_msa_max_s_h) + (CODE_FOR_msa_max_s_w, CODE_FOR_msa_max_s_d) + (CODE_FOR_msa_max_u_b, CODE_FOR_msa_max_u_h) + (CODE_FOR_msa_max_u_w, CODE_FOR_msa_max_u_d) + (CODE_FOR_msa_min_s_b, CODE_FOR_msa_min_s_h) + (CODE_FOR_msa_min_s_w, CODE_FOR_msa_min_s_d) + (CODE_FOR_msa_min_u_b, CODE_FOR_msa_min_u_h) + (CODE_FOR_msa_min_u_w, CODE_FOR_msa_min_u_d) + (CODE_FOR_msa_mod_s_b, CODE_FOR_msa_mod_s_h) + (CODE_FOR_msa_mod_s_w, CODE_FOR_msa_mod_s_d) + (CODE_FOR_msa_mod_u_b, CODE_FOR_msa_mod_u_h) + (CODE_FOR_msa_mod_u_w, CODE_FOR_msa_mod_u_d) + (CODE_FOR_msa_mod_s_b, CODE_FOR_msa_mod_s_h) + (CODE_FOR_msa_mod_s_w, CODE_FOR_msa_mod_s_d) + (CODE_FOR_msa_mod_u_b, CODE_FOR_msa_mod_u_h) + (CODE_FOR_msa_mod_u_w, CODE_FOR_msa_mod_u_d) + (CODE_FOR_msa_mulv_b, CODE_FOR_msa_mulv_h, CODE_FOR_msa_mulv_w) + (CODE_FOR_msa_mulv_d, CODE_FOR_msa_nlzc_b, CODE_FOR_msa_nlzc_h) + (CODE_FOR_msa_nlzc_w, CODE_FOR_msa_nlzc_d, CODE_FOR_msa_nor_v) + (CODE_FOR_msa_or_v, CODE_FOR_msa_ori_b, CODE_FOR_msa_nori_b) + (CODE_FOR_msa_pcnt_b, CODE_FOR_msa_pcnt_h, CODE_FOR_msa_pcnt_w) + (CODE_FOR_msa_pcnt_d, CODE_FOR_msa_xor_v, CODE_FOR_msa_xori_b) + (CODE_FOR_msa_sll_b, CODE_FOR_msa_sll_h, CODE_FOR_msa_sll_w) + (CODE_FOR_msa_sll_d, CODE_FOR_msa_slli_b, CODE_FOR_msa_slli_h) + (CODE_FOR_msa_slli_w, CODE_FOR_msa_slli_d, CODE_FOR_msa_sra_b) + (CODE_FOR_msa_sra_h, CODE_FOR_msa_sra_w, CODE_FOR_msa_sra_d) + (CODE_FOR_msa_srai_b, CODE_FOR_msa_srai_h, CODE_FOR_msa_srai_w) + (CODE_FOR_msa_srai_d, CODE_FOR_msa_srl_b, CODE_FOR_msa_srl_h) + (CODE_FOR_msa_srl_w, CODE_FOR_msa_srl_d, CODE_FOR_msa_srli_b) + (CODE_FOR_msa_srli_h, CODE_FOR_msa_srli_w, CODE_FOR_msa_srli_d) + (CODE_FOR_msa_subv_b, CODE_FOR_msa_subv_h, CODE_FOR_msa_subv_w) + (CODE_FOR_msa_subv_d, CODE_FOR_msa_subvi_b, CODE_FOR_msa_subvi_h) + (CODE_FOR_msa_subvi_w, CODE_FOR_msa_subvi_d, CODE_FOR_msa_move_v) + (CODE_FOR_msa_vshf_b, CODE_FOR_msa_vshf_h, CODE_FOR_msa_vshf_w) + (CODE_FOR_msa_vshf_d, CODE_FOR_msa_ilvod_d, CODE_FOR_msa_ilvev_d) + (CODE_FOR_msa_pckod_d, CODE_FOR_msa_pckdev_d, CODE_FOR_msa_ldi_b) + (CODE_FOR_msa_ldi_hi, CODE_FOR_msa_ldi_w) + (CODE_FOR_msa_ldi_d): New code_aliasing macros. + (mips_builtins): Add MSA sll_b, sll_h, sll_w, sll_d, slli_b, + slli_h, slli_w, slli_d, sra_b, sra_h, sra_w, sra_d, srai_b, + srai_h, srai_w, srai_d, srar_b, srar_h, srar_w, srar_d, srari_b, + srari_h, srari_w, srari_d, srl_b, srl_h, srl_w, srl_d, srli_b, + srli_h, srli_w, srli_d, srlr_b, srlr_h, srlr_w, srlr_d, srlri_b, + srlri_h, srlri_w, srlri_d, bclr_b, bclr_h, bclr_w, bclr_d, + bclri_b, bclri_h, bclri_w, bclri_d, bset_b, bset_h, bset_w, + bset_d, bseti_b, bseti_h, bseti_w, bseti_d, bneg_b, bneg_h, + bneg_w, bneg_d, bnegi_b, bnegi_h, bnegi_w, bnegi_d, binsl_b, + binsl_h, binsl_w, binsl_d, binsli_b, binsli_h, binsli_w, + binsli_d, binsr_b, binsr_h, binsr_w, binsr_d, binsri_b, binsri_h, + binsri_w, binsri_d, addv_b, addv_h, addv_w, addv_d, addvi_b, + addvi_h, addvi_w, addvi_d, subv_b, subv_h, subv_w, subv_d, + subvi_b, subvi_h, subvi_w, subvi_d, max_s_b, max_s_h, max_s_w, + max_s_d, maxi_s_b, maxi_s_h, maxi_s_w, maxi_s_d, max_u_b, + max_u_h, max_u_w, max_u_d, maxi_u_b, maxi_u_h, maxi_u_w, + maxi_u_d, min_s_b, min_s_h, min_s_w, min_s_d, mini_s_b, mini_s_h, + mini_s_w, mini_s_d, min_u_b, min_u_h, min_u_w, min_u_d, mini_u_b, + mini_u_h, mini_u_w, mini_u_d, max_a_b, max_a_h, max_a_w, max_a_d, + min_a_b, min_a_h, min_a_w, min_a_d, ceq_b, ceq_h, ceq_w, ceq_d, + ceqi_b, ceqi_h, ceqi_w, ceqi_d, clt_s_b, clt_s_h, clt_s_w, + clt_s_d, clti_s_b, clti_s_h, clti_s_w, clti_s_d, clt_u_b, + clt_u_h, clt_u_w, clt_u_d, clti_u_b, clti_u_h, clti_u_w, + clti_u_d, cle_s_b, cle_s_h, cle_s_w, cle_s_d, clei_s_b, clei_s_h, + clei_s_w, clei_s_d, cle_u_b, cle_u_h, cle_u_w, cle_u_d, clei_u_b, + clei_u_h, clei_u_w, clei_u_d, ld_b, ld_h, ld_w, ld_d, st_b, st_h, + st_w, st_d, sat_s_b, sat_s_h, sat_s_w, sat_s_d, sat_u_b, sat_u_h, + sat_u_w, sat_u_d, add_a_b, add_a_h, add_a_w, add_a_d, adds_a_b, + adds_a_h, adds_a_w, adds_a_d, adds_s_b, adds_s_h, adds_s_w, + adds_s_d, adds_u_b, adds_u_h, adds_u_w, adds_u_d, ave_s_b, + ave_s_h, ave_s_w, ave_s_d, ave_u_b, ave_u_h, ave_u_w, ave_u_d, + aver_s_b, aver_s_h, aver_s_w, aver_s_d, aver_u_b, aver_u_h, + aver_u_w, aver_u_d, subs_s_b, subs_s_h, subs_s_w, subs_s_d, + subs_u_b, subs_u_h, subs_u_w, subs_u_d, subsuu_s_b, subsuu_s_h, + subsuu_s_w, subsuu_s_d, subsus_u_b, subsus_u_h, subsus_u_w, + subsus_u_d, asub_s_b, asub_s_h, asub_s_w, asub_s_d, asub_u_b, + asub_u_h, asub_u_w, asub_u_d, mulv_b, mulv_h, mulv_w, mulv_d, + maddv_b, maddv_h, maddv_w, maddv_d, msubv_b, msubv_h, msubv_w, + msubv_d, div_s_b, div_s_h, div_s_w, div_s_d, div_u_b, div_u_h, + div_u_w, div_u_d, hadd_s_h, hadd_s_w, hadd_s_d, hadd_u_h, + hadd_u_w, hadd_u_d, hsub_s_h, hsub_s_w, hsub_s_d, hsub_u_h, + hsub_u_w, hsub_u_d, mod_s_b, mod_s_h, mod_s_w, mod_s_d, mod_u_b, + mod_u_h, mod_u_w, mod_u_d, dotp_s_h, dotp_s_w, dotp_s_d, + dotp_u_h, dotp_u_w, dotp_u_d, dpadd_s_h, dpadd_s_w, dpadd_s_d, + dpadd_u_h, dpadd_u_w, dpadd_u_d, dpsub_s_h, dpsub_s_w, dpsub_s_d, + dpsub_u_h, dpsub_u_w, dpsub_u_d, sld_b, sld_h, sld_w, sld_d, + sldi_b, sldi_h, sldi_w, sldi_d, splat_b, splat_h, splat_w, + splat_d, splati_b, splati_h, splati_w, splati_d, pckev_b, + pckev_h, pckev_w, pckev_d, pckod_b, pckod_h, pckod_w, pckod_d, + ilvl_b, ilvl_h, ilvl_w, ilvl_d, ilvr_b, ilvr_h, ilvr_w, ilvr_d, + ilvev_b, ilvev_h, ilvev_w, ilvev_d, ilvod_b, ilvod_h, ilvod_w, + ilvod_d, vshf_b, vshf_h, vshf_w, vshf_d, and_v, andi_b, or_v, + ori_b, nor_v, nori_b, xor_v, xori_b, bmnz_v, bmnzi_b, bmz_v, + bmzi_b, bsel_v, bseli_b, shf_b, shf_h, shf_w, bnz_v, bz_v, + fill_b, fill_h, fill_w, fill_d, pcnt_b, pcnt_h, pcnt_w, + pcnt_d, nloc_b, nloc_h, nloc_w, nloc_d, nlzc_b, nlzc_h, nlzc_w, + nlzc_d, copy_s_b, copy_s_h, copy_s_w, copy_s_d, copy_u_b, + copy_u_h, copy_u_w, copy_u_d, insert_b, insert_h, insert_w, + insert_d, insve_b, insve_h, insve_w, insve_d, bnz_b, bnz_h, + bnz_w, bnz_d, bz_b, bz_h, bz_w, bz_d, ldi_b, ldi_h, ldi_w, ldi_d, + fcaf_w, fcaf_d, fcor_w, fcor_d, fcun_w, fcun_d, fcune_w, fcune_d, + fcueq_w, fcueq_d, fceq_w, fceq_d, fcne_w, fcne_d, fclt_w, fclt_d, + fcult_w, fcult_d, fcle_w, fcle_d, fcule_w, fcule_d, fsaf_w, + fsaf_d, fsor_w, fsor_d, fsun_w, fsun_d, fsune_w, fsune_d, + fsueq_w, fsueq_d, fseq_w, fseq_d, fsne_w, fsne_d, fslt_w, + fslt_d, fsult_w, fsult_d, fsle_w, fsle_d, fsule_w, fsule_d, + fadd_w, fadd_d, fsub_w, fsub_d, fmul_w, fmul_d, fdiv_w, fdiv_d, + fmadd_w, fmadd_d, fmsub_w, fmsub_d, fexp2_w, fexp2_d, fexdo_h, + fexdo_w, ftq_h, ftq_w, fmin_w, fmin_d, fmin_a_w, fmin_a_d, + fmax_w, fmax_d, fmax_a_w, fmax_a_d, mul_q_h, mul_q_w, mulr_q_h, + mulr_q_w, madd_q_h, madd_q_w, maddr_q_h, maddr_q_w, msub_q_h, + msub_q_w, msubr_q_h, msubr_q_w, fclass_w, fclass_d, fsqrt_w, + fsqrt_d, frcp_w, frcp_d, frint_w, frint_d, frsqrt_w, frsqrt_d, + flog2_w, flog2_d, fexupl_w, fexupl_d, fexupr_w, fexupr_d, ffql_w, + ffql_d, ffqr_w, ffqr_d, ftint_s_w, ftint_s_d, ftint_u_w, + ftint_u_d, ftrunc_s_w, ftrunc_s_d, ftrunc_u_w, ftrunc_u_d, + ffint_s_w, ffint_s_d, ffint_u_w, ffint_u_d, ctcmsa, cfcmsa, + move_v builtins. + (mips_get_builtin_decl_index): New array. + (MIPS_ATYPE_QI, MIPS_ATYPE_HI, MIPS_ATYPE_V2DI, MIPS_ATYPE_V4SI) + (MIPS_ATYPE_V8HI, MIPS_ATYPE_V16QI, MIPS_ATYPE_V2DF) + (MIPS_ATYPE_V4SF, MIPS_ATYPE_UV2DI, MIPS_ATYPE_UV4SI) + (MIPS_ATYPE_UV8HI, MIPS_ATYPE_UV16QI): New. + (mips_init_builtins): Initialize mips_get_builtin_decl_index + array. + (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define target + hook. + (mips_expand_builtin_insn): Prepare operands for + CODE_FOR_msa_addvi_b, CODE_FOR_msa_addvi_h, CODE_FOR_msa_addvi_w, + CODE_FOR_msa_addvi_d, CODE_FOR_msa_clti_u_b, + CODE_FOR_msa_clti_u_h, CODE_FOR_msa_clti_u_w, + CODE_FOR_msa_clti_u_d, CODE_FOR_msa_clei_u_b, + CODE_FOR_msa_clei_u_h, CODE_FOR_msa_clei_u_w, + CODE_FOR_msa_clei_u_d, CODE_FOR_msa_maxi_u_b, + CODE_FOR_msa_maxi_u_h, CODE_FOR_msa_maxi_u_w, + CODE_FOR_msa_maxi_u_d, CODE_FOR_msa_mini_u_b, + CODE_FOR_msa_mini_u_h, CODE_FOR_msa_mini_u_w, + CODE_FOR_msa_mini_u_d, CODE_FOR_msa_subvi_b, + CODE_FOR_msa_subvi_h, CODE_FOR_msa_subvi_w, CODE_FOR_msa_subvi_d, + CODE_FOR_msa_ceqi_b, CODE_FOR_msa_ceqi_h, CODE_FOR_msa_ceqi_w, + CODE_FOR_msa_ceqi_d, CODE_FOR_msa_clti_s_b, + CODE_FOR_msa_clti_s_h, CODE_FOR_msa_clti_s_w, + CODE_FOR_msa_clti_s_d, CODE_FOR_msa_clei_s_b, + CODE_FOR_msa_clei_s_h, CODE_FOR_msa_clei_s_w, + CODE_FOR_msa_clei_s_d, CODE_FOR_msa_maxi_s_b, + CODE_FOR_msa_maxi_s_h, CODE_FOR_msa_maxi_s_w, + CODE_FOR_msa_maxi_s_d, CODE_FOR_msa_mini_s_b, + CODE_FOR_msa_mini_s_h, CODE_FOR_msa_mini_s_w, + CODE_FOR_msa_mini_s_d, CODE_FOR_msa_andi_b, CODE_FOR_msa_ori_b, + CODE_FOR_msa_nori_b, CODE_FOR_msa_xori_b, CODE_FOR_msa_bmzi_b, + CODE_FOR_msa_bmnzi_b, CODE_FOR_msa_bseli_b, CODE_FOR_msa_fill_b, + CODE_FOR_msa_fill_h, CODE_FOR_msa_fill_w, CODE_FOR_msa_fill_d, + CODE_FOR_msa_ilvl_b, CODE_FOR_msa_ilvl_h, CODE_FOR_msa_ilvl_w, + CODE_FOR_msa_ilvl_d, CODE_FOR_msa_ilvr_b, CODE_FOR_msa_ilvr_h, + CODE_FOR_msa_ilvr_w, CODE_FOR_msa_ilvr_d, CODE_FOR_msa_ilvev_b, + CODE_FOR_msa_ilvev_h, CODE_FOR_msa_ilvev_w, CODE_FOR_msa_ilvod_b, + CODE_FOR_msa_ilvod_h, CODE_FOR_msa_ilvod_w, CODE_FOR_msa_pckev_b, + CODE_FOR_msa_pckev_h, CODE_FOR_msa_pckev_w, CODE_FOR_msa_pckod_b, + CODE_FOR_msa_pckod_h, CODE_FOR_msa_pckod_w, CODE_FOR_msa_slli_b, + CODE_FOR_msa_slli_h, CODE_FOR_msa_slli_w, CODE_FOR_msa_slli_d, + CODE_FOR_msa_srai_b, CODE_FOR_msa_srai_h, CODE_FOR_msa_srai_w, + CODE_FOR_msa_srai_d, CODE_FOR_msa_srli_b, CODE_FOR_msa_srli_h, + CODE_FOR_msa_srli_w, CODE_FOR_msa_srli_d, CODE_FOR_msa_insert_b, + CODE_FOR_msa_insert_h, CODE_FOR_msa_insert_w, + CODE_FOR_msa_insert_d, CODE_FOR_msa_insve_b, + CODE_FOR_msa_insve_h, CODE_FOR_msa_insve_w, CODE_FOR_msa_insve_d, + CODE_FOR_msa_shf_b, CODE_FOR_msa_shf_h, CODE_FOR_msa_shf_w, + CODE_FOR_msa_shf_w_f, CODE_FOR_msa_vshf_b, CODE_FOR_msa_vshf_h, + CODE_FOR_msa_vshf_w, CODE_FOR_msa_vshf_d. + (mips_expand_builtin): Add case for MIPS_BULTIN_MSA_TEST_BRANCH. + (mips_set_compression_mode): Disallow MSA with MIPS16 code. + (mips_option_override): -mmsa requires -mfp64 and -mhard-float. + These are set implicitly and an error is reported if overridden. + (mips_expand_builtin_msa_test_branch): New function. + (mips_expand_msa_shuffle): Likewise. + (MAX_VECT_LEN): Increase maximum length of a vector to 16 bytes. + (TARGET_SCHED_REASSOCIATION_WIDTH): Define target hook. + (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Likewise. + (mips_expand_vec_unpack): Add support for MSA. + (mips_expand_vector_init): Likewise. + (mips_expand_vi_constant): Use CONST0_RTX (element_mode) + instead of const0_rtx. + (mips_msa_vec_parallel_const_half): New function. + (mips_gen_const_int_vector): Likewise. + (mips_gen_const_int_vector_shuffle): Likewise. + (mips_expand_msa_cmp): Likewise. + (mips_expand_vec_cond_expr): Likewise. + * config/mips/mips.h + (TARGET_CPU_CPP_BUILTINS): Add __mips_msa and __mips_msa_width. + (OPTION_DEFAULT_SPECS): Ignore --with-fp-32 if -mmsa is + specified. + (ASM_SPEC): Pass mmsa and mno-msa to the assembler. + (ISA_HAS_MSA): New macro. + (UNITS_PER_MSA_REG): Likewise. + (BITS_PER_MSA_REG): Likewise. + (BIGGEST_ALIGNMENT): Redefine using ISA_HAS_MSA. + (MSA_REG_FIRST): New macro. + (MSA_REG_LAST): Likewise. + (MSA_REG_NUM): Likewise. + (MSA_REG_P): Likewise. + (MSA_REG_RTX_P): Likewise. + (MSA_SUPPORTED_MODE_P): Likewise. + (HARD_REGNO_CALL_PART_CLOBBERED): Redefine using TARGET_MSA. + (ADDITIONAL_REGISTER_NAMES): Add named registers $w0-$w31. + * config/mips/mips.md: Include mips-msa.md. + (alu_type): Add simd_add. + (mode): Add V2DI, V4SI, V8HI, V16QI, V2DF, V4SF. + (type): Add simd_div, simd_fclass, simd_flog2, simd_fadd, + simd_fcvt, simd_fmul, simd_fmadd, simd_fdiv, simd_bitins, + simd_bitmov, simd_insert, simd_sld, simd_mul, simd_fcmp, + simd_fexp2, simd_int_arith, simd_bit, simd_shift, simd_splat, + simd_fill, simd_permute, simd_shf, simd_sat, simd_pcnt, + simd_copy, simd_branch, simd_cmsa, simd_fminmax, simd_logic, + simd_move, simd_load, simd_store. Choose "multi" for moves + for "qword_mode". + (qword_mode): New attribute. + (insn_count): Add instruction count for quad moves. + Increase the count for MIPS SIMD division. + (UNITMODE): Add UNITMODEs for vector types. + (addsub): New code iterator. + * config/mips/mips.opt (mmsa): New option. + * config/mips/msa.h: New file. + * config/mips/mti-elf.h: Don't infer -mfpxx if -mmsa is + specified. + * config/mips/mti-linux.h: Likewise. + * config/mips/predicates.md + (const_msa_branch_operand): New constraint. + (const_uimm3_operand): Likewise. + (const_uimm4_operand): Likewise. + (const_uimm5_operand): Likewise. + (const_uimm8_operand): Likewise. + (const_imm5_operand): Likewise. + (aq10b_operand): Likewise. + (aq10h_operand): Likewise. + (aq10w_operand): Likewise. + (aq10d_operand): Likewise. + (const_m1_operand): Likewise. + (reg_or_m1_operand): Likewise. + (const_exp_2_operand): Likewise. + (const_exp_4_operand): Likewise. + (const_exp_8_operand): Likewise. + (const_exp_16_operand): Likewise. + (const_vector_same_val_operand): Likewise. + (const_vector_same_simm5_operand): Likewise. + (const_vector_same_uimm5_operand): Likewise. + (const_vector_same_uimm6_operand): Likewise. + (const_vector_same_uimm8_operand): Likewise. + (par_const_vector_shf_set_operand): Likewise. + (reg_or_vector_same_val_operand): Likewise. + (reg_or_vector_same_simm5_operand): Likewise. + (reg_or_vector_same_uimm6_operand): Likewise. + * doc/extend.texi (MIPS SIMD Architecture Functions): New + section. + * doc/invoke.texi (-mmsa): Document new option. + 2016-05-09 Rainer Orth * configure.ac (enable_vtable_verify): Handle --enable-vtable-verify. diff --git a/gcc/config.gcc b/gcc/config.gcc index 74e9f4e4e83..51af122aafb 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -427,7 +427,7 @@ microblaze*-*-*) ;; mips*-*-*) cpu_type=mips - extra_headers="loongson.h" + extra_headers="loongson.h msa.h" extra_objs="frame-header-opt.o" extra_options="${extra_options} g.opt fused-madd.opt mips/mips-tables.opt" ;; diff --git a/gcc/config/mips/constraints.md b/gcc/config/mips/constraints.md index 133e346a8fc..56b363e699b 100644 --- a/gcc/config/mips/constraints.md +++ b/gcc/config/mips/constraints.md @@ -308,6 +308,61 @@ "@internal" (match_operand 0 "low_bitmask_operand")) +(define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range + [-512,511]." + (and (match_code "const_vector") + (match_test "mips_const_vector_same_int_p (op, mode, -512, 511)"))) + +(define_constraint "YC" + "@internal + A replicated vector const in which the replicated value has a single + bit set." + (and (match_code "const_vector") + (match_test "mips_const_vector_bitimm_set_p (op, mode)"))) + +(define_constraint "YZ" + "@internal + A replicated vector const in which the replicated value has a single + bit clear." + (and (match_code "const_vector") + (match_test "mips_const_vector_bitimm_clr_p (op, mode)"))) + +(define_constraint "Unv5" + "@internal + A replicated vector const in which the replicated value is in the range + [-31,0]." + (and (match_code "const_vector") + (match_test "mips_const_vector_same_int_p (op, mode, -31, 0)"))) + +(define_constraint "Uuv5" + "@internal + A replicated vector const in which the replicated value is in the range + [0,31]." + (and (match_code "const_vector") + (match_test "mips_const_vector_same_int_p (op, mode, 0, 31)"))) + +(define_constraint "Usv5" + "@internal + A replicated vector const in which the replicated value is in the range + [-16,15]." + (and (match_code "const_vector") + (match_test "mips_const_vector_same_int_p (op, mode, -16, 15)"))) + +(define_constraint "Uuv6" + "@internal + A replicated vector const in which the replicated value is in the range + [0,63]." + (and (match_code "const_vector") + (match_test "mips_const_vector_same_int_p (op, mode, 0, 63)"))) + +(define_constraint "Urv8" + "@internal + A replicated vector const with replicated byte values as well as elements" + (and (match_code "const_vector") + (match_test "mips_const_vector_same_bytes_p (op, mode)"))) + (define_memory_constraint "ZC" "A memory operand whose address is formed by a base register and offset that is suitable for use in instructions with the same addressing mode diff --git a/gcc/config/mips/mips-ftypes.def b/gcc/config/mips/mips-ftypes.def index 7fe1c06eb00..69cf4379ed1 100644 --- a/gcc/config/mips/mips-ftypes.def +++ b/gcc/config/mips/mips-ftypes.def @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see Please keep this list lexicographically sorted by the LIST argument. */ DEF_MIPS_FTYPE (1, (DF, DF)) DEF_MIPS_FTYPE (2, (DF, DF, DF)) +DEF_MIPS_FTYPE (1, (DF, V2DF)) DEF_MIPS_FTYPE (2, (DI, DI, DI)) DEF_MIPS_FTYPE (2, (DI, DI, SI)) @@ -45,6 +46,7 @@ DEF_MIPS_FTYPE (3, (DI, DI, V4QI, V4QI)) DEF_MIPS_FTYPE (2, (DI, POINTER, SI)) DEF_MIPS_FTYPE (2, (DI, SI, SI)) DEF_MIPS_FTYPE (2, (DI, USI, USI)) +DEF_MIPS_FTYPE (2, (DI, V2DI, UQI)) DEF_MIPS_FTYPE (2, (INT, DF, DF)) DEF_MIPS_FTYPE (2, (INT, SF, SF)) @@ -54,23 +56,51 @@ DEF_MIPS_FTYPE (4, (INT, V2SF, V2SF, V2SF, V2SF)) DEF_MIPS_FTYPE (1, (SF, SF)) DEF_MIPS_FTYPE (2, (SF, SF, SF)) DEF_MIPS_FTYPE (1, (SF, V2SF)) +DEF_MIPS_FTYPE (1, (SF, V4SF)) DEF_MIPS_FTYPE (2, (SI, DI, SI)) DEF_MIPS_FTYPE (2, (SI, POINTER, SI)) DEF_MIPS_FTYPE (1, (SI, SI)) DEF_MIPS_FTYPE (2, (SI, SI, SI)) DEF_MIPS_FTYPE (3, (SI, SI, SI, SI)) +DEF_MIPS_FTYPE (1, (SI, UQI)) +DEF_MIPS_FTYPE (1, (SI, UV16QI)) +DEF_MIPS_FTYPE (1, (SI, UV2DI)) +DEF_MIPS_FTYPE (1, (SI, UV4SI)) +DEF_MIPS_FTYPE (1, (SI, UV8HI)) +DEF_MIPS_FTYPE (2, (SI, V16QI, UQI)) DEF_MIPS_FTYPE (1, (SI, V2HI)) DEF_MIPS_FTYPE (2, (SI, V2HI, V2HI)) DEF_MIPS_FTYPE (1, (SI, V4QI)) DEF_MIPS_FTYPE (2, (SI, V4QI, V4QI)) +DEF_MIPS_FTYPE (2, (SI, V4SI, UQI)) +DEF_MIPS_FTYPE (2, (SI, V8HI, UQI)) DEF_MIPS_FTYPE (1, (SI, VOID)) DEF_MIPS_FTYPE (2, (UDI, UDI, UDI)) DEF_MIPS_FTYPE (2, (UDI, UV2SI, UV2SI)) +DEF_MIPS_FTYPE (2, (UDI, V2DI, UQI)) +DEF_MIPS_FTYPE (2, (USI, V16QI, UQI)) +DEF_MIPS_FTYPE (2, (USI, V4SI, UQI)) +DEF_MIPS_FTYPE (2, (USI, V8HI, UQI)) DEF_MIPS_FTYPE (1, (USI, VOID)) +DEF_MIPS_FTYPE (2, (UV16QI, UV16QI, UQI)) +DEF_MIPS_FTYPE (2, (UV16QI, UV16QI, UV16QI)) +DEF_MIPS_FTYPE (3, (UV16QI, UV16QI, UV16QI, UQI)) +DEF_MIPS_FTYPE (3, (UV16QI, UV16QI, UV16QI, UV16QI)) +DEF_MIPS_FTYPE (2, (UV16QI, UV16QI, V16QI)) + +DEF_MIPS_FTYPE (2, (UV2DI, UV2DI, UQI)) +DEF_MIPS_FTYPE (2, (UV2DI, UV2DI, UV2DI)) +DEF_MIPS_FTYPE (3, (UV2DI, UV2DI, UV2DI, UQI)) +DEF_MIPS_FTYPE (3, (UV2DI, UV2DI, UV2DI, UV2DI)) +DEF_MIPS_FTYPE (3, (UV2DI, UV2DI, UV4SI, UV4SI)) +DEF_MIPS_FTYPE (2, (UV2DI, UV2DI, V2DI)) +DEF_MIPS_FTYPE (2, (UV2DI, UV4SI, UV4SI)) +DEF_MIPS_FTYPE (1, (UV2DI, V2DF)) + DEF_MIPS_FTYPE (2, (UV2SI, UV2SI, UQI)) DEF_MIPS_FTYPE (2, (UV2SI, UV2SI, UV2SI)) @@ -82,10 +112,75 @@ DEF_MIPS_FTYPE (3, (UV4HI, UV4HI, UV4HI, USI)) DEF_MIPS_FTYPE (1, (UV4HI, UV8QI)) DEF_MIPS_FTYPE (2, (UV4HI, UV8QI, UV8QI)) +DEF_MIPS_FTYPE (2, (UV4SI, UV4SI, UQI)) +DEF_MIPS_FTYPE (2, (UV4SI, UV4SI, UV4SI)) +DEF_MIPS_FTYPE (3, (UV4SI, UV4SI, UV4SI, UQI)) +DEF_MIPS_FTYPE (3, (UV4SI, UV4SI, UV4SI, UV4SI)) +DEF_MIPS_FTYPE (3, (UV4SI, UV4SI, UV8HI, UV8HI)) +DEF_MIPS_FTYPE (2, (UV4SI, UV4SI, V4SI)) +DEF_MIPS_FTYPE (2, (UV4SI, UV8HI, UV8HI)) +DEF_MIPS_FTYPE (1, (UV4SI, V4SF)) + +DEF_MIPS_FTYPE (2, (UV8HI, UV16QI, UV16QI)) +DEF_MIPS_FTYPE (2, (UV8HI, UV8HI, UQI)) +DEF_MIPS_FTYPE (3, (UV8HI, UV8HI, UV16QI, UV16QI)) +DEF_MIPS_FTYPE (2, (UV8HI, UV8HI, UV8HI)) +DEF_MIPS_FTYPE (3, (UV8HI, UV8HI, UV8HI, UQI)) +DEF_MIPS_FTYPE (3, (UV8HI, UV8HI, UV8HI, UV8HI)) +DEF_MIPS_FTYPE (2, (UV8HI, UV8HI, V8HI)) + DEF_MIPS_FTYPE (2, (UV8QI, UV4HI, UV4HI)) DEF_MIPS_FTYPE (1, (UV8QI, UV8QI)) DEF_MIPS_FTYPE (2, (UV8QI, UV8QI, UV8QI)) +DEF_MIPS_FTYPE (2, (V16QI, CVPOINTER, SI)) +DEF_MIPS_FTYPE (1, (V16QI, HI)) +DEF_MIPS_FTYPE (1, (V16QI, SI)) +DEF_MIPS_FTYPE (2, (V16QI, UV16QI, UQI)) +DEF_MIPS_FTYPE (2, (V16QI, UV16QI, UV16QI)) +DEF_MIPS_FTYPE (1, (V16QI, V16QI)) +DEF_MIPS_FTYPE (2, (V16QI, V16QI, QI)) +DEF_MIPS_FTYPE (2, (V16QI, V16QI, SI)) +DEF_MIPS_FTYPE (2, (V16QI, V16QI, UQI)) +DEF_MIPS_FTYPE (3, (V16QI, V16QI, UQI, SI)) +DEF_MIPS_FTYPE (3, (V16QI, V16QI, UQI, V16QI)) +DEF_MIPS_FTYPE (2, (V16QI, V16QI, V16QI)) +DEF_MIPS_FTYPE (3, (V16QI, V16QI, V16QI, SI)) +DEF_MIPS_FTYPE (3, (V16QI, V16QI, V16QI, UQI)) +DEF_MIPS_FTYPE (3, (V16QI, V16QI, V16QI, V16QI)) + +DEF_MIPS_FTYPE (1, (V2DF, DF)) +DEF_MIPS_FTYPE (1, (V2DF, UV2DI)) +DEF_MIPS_FTYPE (1, (V2DF, V2DF)) +DEF_MIPS_FTYPE (2, (V2DF, V2DF, V2DF)) +DEF_MIPS_FTYPE (3, (V2DF, V2DF, V2DF, V2DF)) +DEF_MIPS_FTYPE (2, (V2DF, V2DF, V2DI)) +DEF_MIPS_FTYPE (1, (V2DF, V2DI)) +DEF_MIPS_FTYPE (1, (V2DF, V4SF)) +DEF_MIPS_FTYPE (1, (V2DF, V4SI)) + +DEF_MIPS_FTYPE (2, (V2DI, CVPOINTER, SI)) +DEF_MIPS_FTYPE (1, (V2DI, DI)) +DEF_MIPS_FTYPE (1, (V2DI, HI)) +DEF_MIPS_FTYPE (2, (V2DI, UV2DI, UQI)) +DEF_MIPS_FTYPE (2, (V2DI, UV2DI, UV2DI)) +DEF_MIPS_FTYPE (2, (V2DI, UV4SI, UV4SI)) +DEF_MIPS_FTYPE (1, (V2DI, V2DF)) +DEF_MIPS_FTYPE (2, (V2DI, V2DF, V2DF)) +DEF_MIPS_FTYPE (1, (V2DI, V2DI)) +DEF_MIPS_FTYPE (2, (V2DI, V2DI, QI)) +DEF_MIPS_FTYPE (2, (V2DI, V2DI, SI)) +DEF_MIPS_FTYPE (2, (V2DI, V2DI, UQI)) +DEF_MIPS_FTYPE (3, (V2DI, V2DI, UQI, DI)) +DEF_MIPS_FTYPE (3, (V2DI, V2DI, UQI, V2DI)) +DEF_MIPS_FTYPE (3, (V2DI, V2DI, UV4SI, UV4SI)) +DEF_MIPS_FTYPE (2, (V2DI, V2DI, V2DI)) +DEF_MIPS_FTYPE (3, (V2DI, V2DI, V2DI, SI)) +DEF_MIPS_FTYPE (3, (V2DI, V2DI, V2DI, UQI)) +DEF_MIPS_FTYPE (3, (V2DI, V2DI, V2DI, V2DI)) +DEF_MIPS_FTYPE (3, (V2DI, V2DI, V4SI, V4SI)) +DEF_MIPS_FTYPE (2, (V2DI, V4SI, V4SI)) + DEF_MIPS_FTYPE (1, (V2HI, SI)) DEF_MIPS_FTYPE (2, (V2HI, SI, SI)) DEF_MIPS_FTYPE (3, (V2HI, SI, SI, SI)) @@ -118,12 +213,74 @@ DEF_MIPS_FTYPE (1, (V4QI, V4QI)) DEF_MIPS_FTYPE (2, (V4QI, V4QI, SI)) DEF_MIPS_FTYPE (2, (V4QI, V4QI, V4QI)) +DEF_MIPS_FTYPE (1, (V4SF, SF)) +DEF_MIPS_FTYPE (1, (V4SF, UV4SI)) +DEF_MIPS_FTYPE (2, (V4SF, V2DF, V2DF)) +DEF_MIPS_FTYPE (1, (V4SF, V4SF)) +DEF_MIPS_FTYPE (2, (V4SF, V4SF, V4SF)) +DEF_MIPS_FTYPE (3, (V4SF, V4SF, V4SF, V4SF)) +DEF_MIPS_FTYPE (2, (V4SF, V4SF, V4SI)) +DEF_MIPS_FTYPE (1, (V4SF, V4SI)) +DEF_MIPS_FTYPE (1, (V4SF, V8HI)) + +DEF_MIPS_FTYPE (2, (V4SI, CVPOINTER, SI)) +DEF_MIPS_FTYPE (1, (V4SI, HI)) +DEF_MIPS_FTYPE (1, (V4SI, SI)) +DEF_MIPS_FTYPE (2, (V4SI, UV4SI, UQI)) +DEF_MIPS_FTYPE (2, (V4SI, UV4SI, UV4SI)) +DEF_MIPS_FTYPE (2, (V4SI, UV8HI, UV8HI)) +DEF_MIPS_FTYPE (2, (V4SI, V2DF, V2DF)) +DEF_MIPS_FTYPE (1, (V4SI, V4SF)) +DEF_MIPS_FTYPE (2, (V4SI, V4SF, V4SF)) +DEF_MIPS_FTYPE (1, (V4SI, V4SI)) +DEF_MIPS_FTYPE (2, (V4SI, V4SI, QI)) +DEF_MIPS_FTYPE (2, (V4SI, V4SI, SI)) +DEF_MIPS_FTYPE (2, (V4SI, V4SI, UQI)) +DEF_MIPS_FTYPE (3, (V4SI, V4SI, UQI, SI)) +DEF_MIPS_FTYPE (3, (V4SI, V4SI, UQI, V4SI)) +DEF_MIPS_FTYPE (3, (V4SI, V4SI, UV8HI, UV8HI)) +DEF_MIPS_FTYPE (2, (V4SI, V4SI, V4SI)) +DEF_MIPS_FTYPE (3, (V4SI, V4SI, V4SI, SI)) +DEF_MIPS_FTYPE (3, (V4SI, V4SI, V4SI, UQI)) +DEF_MIPS_FTYPE (3, (V4SI, V4SI, V4SI, V4SI)) +DEF_MIPS_FTYPE (3, (V4SI, V4SI, V8HI, V8HI)) +DEF_MIPS_FTYPE (2, (V4SI, V8HI, V8HI)) + +DEF_MIPS_FTYPE (2, (V8HI, CVPOINTER, SI)) +DEF_MIPS_FTYPE (1, (V8HI, HI)) +DEF_MIPS_FTYPE (1, (V8HI, SI)) +DEF_MIPS_FTYPE (2, (V8HI, UV16QI, UV16QI)) +DEF_MIPS_FTYPE (2, (V8HI, UV8HI, UQI)) +DEF_MIPS_FTYPE (2, (V8HI, UV8HI, UV8HI)) +DEF_MIPS_FTYPE (2, (V8HI, V16QI, V16QI)) +DEF_MIPS_FTYPE (2, (V8HI, V4SF, V4SF)) +DEF_MIPS_FTYPE (1, (V8HI, V8HI)) +DEF_MIPS_FTYPE (2, (V8HI, V8HI, QI)) +DEF_MIPS_FTYPE (2, (V8HI, V8HI, SI)) +DEF_MIPS_FTYPE (3, (V8HI, V8HI, SI, UQI)) +DEF_MIPS_FTYPE (2, (V8HI, V8HI, UQI)) +DEF_MIPS_FTYPE (3, (V8HI, V8HI, UQI, SI)) +DEF_MIPS_FTYPE (3, (V8HI, V8HI, UQI, V8HI)) +DEF_MIPS_FTYPE (3, (V8HI, V8HI, UV16QI, UV16QI)) +DEF_MIPS_FTYPE (3, (V8HI, V8HI, V16QI, V16QI)) +DEF_MIPS_FTYPE (2, (V8HI, V8HI, V8HI)) +DEF_MIPS_FTYPE (3, (V8HI, V8HI, V8HI, SI)) +DEF_MIPS_FTYPE (3, (V8HI, V8HI, V8HI, UQI)) +DEF_MIPS_FTYPE (3, (V8HI, V8HI, V8HI, V8HI)) + DEF_MIPS_FTYPE (2, (V8QI, V4HI, V4HI)) DEF_MIPS_FTYPE (1, (V8QI, V8QI)) DEF_MIPS_FTYPE (2, (V8QI, V8QI, V8QI)) DEF_MIPS_FTYPE (2, (VOID, SI, CVPOINTER)) DEF_MIPS_FTYPE (2, (VOID, SI, SI)) +DEF_MIPS_FTYPE (2, (VOID, UQI, SI)) DEF_MIPS_FTYPE (1, (VOID, USI)) +DEF_MIPS_FTYPE (3, (VOID, V16QI, CVPOINTER, SI)) +DEF_MIPS_FTYPE (3, (VOID, V2DF, POINTER, SI)) +DEF_MIPS_FTYPE (3, (VOID, V2DI, CVPOINTER, SI)) DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI)) DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI)) +DEF_MIPS_FTYPE (3, (VOID, V4SF, POINTER, SI)) +DEF_MIPS_FTYPE (3, (VOID, V4SI, CVPOINTER, SI)) +DEF_MIPS_FTYPE (3, (VOID, V8HI, CVPOINTER, SI)) diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def index 08d713243d9..b21f5d16c95 100644 --- a/gcc/config/mips/mips-modes.def +++ b/gcc/config/mips/mips-modes.def @@ -24,11 +24,17 @@ VECTOR_MODES (INT, 4); /* V4QI V2HI */ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ +/* For MIPS MSA 128 bits. */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ +VECTOR_MODES (FLOAT, 16); /* V4SF V2DF */ + /* Double-sized vector modes for vec_concat. */ -VECTOR_MODE (INT, QI, 16); /* V16QI */ -VECTOR_MODE (INT, HI, 8); /* V8HI */ -VECTOR_MODE (INT, SI, 4); /* V4SI */ -VECTOR_MODE (FLOAT, SF, 4); /* V4SF */ +VECTOR_MODE (INT, QI, 32); /* V32QI */ +VECTOR_MODE (INT, HI, 16); /* V16HI */ +VECTOR_MODE (INT, SI, 8); /* V8SI */ +VECTOR_MODE (INT, DI, 4); /* V4DI */ +VECTOR_MODE (FLOAT, SF, 8); /* V8SF */ +VECTOR_MODE (FLOAT, DF, 4); /* V4DF */ VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */ VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */ diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md new file mode 100644 index 00000000000..1082856dd98 --- /dev/null +++ b/gcc/config/mips/mips-msa.md @@ -0,0 +1,2736 @@ +;; Machine Description for MIPS MSA ASE +;; Based on the MIPS MSA spec Revision 1.11 8/4/2014 +;; +;; Copyright (C) 2015 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; + +(define_c_enum "unspec" [ + UNSPEC_MSA_ASUB_S + UNSPEC_MSA_ASUB_U + UNSPEC_MSA_AVE_S + UNSPEC_MSA_AVE_U + UNSPEC_MSA_AVER_S + UNSPEC_MSA_AVER_U + UNSPEC_MSA_BCLR + UNSPEC_MSA_BCLRI + UNSPEC_MSA_BINSL + UNSPEC_MSA_BINSLI + UNSPEC_MSA_BINSR + UNSPEC_MSA_BINSRI + UNSPEC_MSA_BNEG + UNSPEC_MSA_BNEGI + UNSPEC_MSA_BSET + UNSPEC_MSA_BSETI + UNSPEC_MSA_BRANCH_V + UNSPEC_MSA_BRANCH + UNSPEC_MSA_CFCMSA + UNSPEC_MSA_CTCMSA + UNSPEC_MSA_FCAF + UNSPEC_MSA_FCLASS + UNSPEC_MSA_FCUNE + UNSPEC_MSA_FEXDO + UNSPEC_MSA_FEXP2 + UNSPEC_MSA_FEXUPL + UNSPEC_MSA_FEXUPR + UNSPEC_MSA_FFQL + UNSPEC_MSA_FFQR + UNSPEC_MSA_FLOG2 + UNSPEC_MSA_FRCP + UNSPEC_MSA_FRINT + UNSPEC_MSA_FRSQRT + UNSPEC_MSA_FSAF + UNSPEC_MSA_FSEQ + UNSPEC_MSA_FSLE + UNSPEC_MSA_FSLT + UNSPEC_MSA_FSNE + UNSPEC_MSA_FSOR + UNSPEC_MSA_FSUEQ + UNSPEC_MSA_FSULE + UNSPEC_MSA_FSULT + UNSPEC_MSA_FSUN + UNSPEC_MSA_FSUNE + UNSPEC_MSA_FTINT_S + UNSPEC_MSA_FTINT_U + UNSPEC_MSA_FTQ + UNSPEC_MSA_MADD_Q + UNSPEC_MSA_MADDR_Q + UNSPEC_MSA_MSUB_Q + UNSPEC_MSA_MSUBR_Q + UNSPEC_MSA_MUL_Q + UNSPEC_MSA_MULR_Q + UNSPEC_MSA_NLOC + UNSPEC_MSA_SAT_S + UNSPEC_MSA_SAT_U + UNSPEC_MSA_SLD + UNSPEC_MSA_SLDI + UNSPEC_MSA_SPLAT + UNSPEC_MSA_SPLATI + UNSPEC_MSA_SRAR + UNSPEC_MSA_SRARI + UNSPEC_MSA_SRLR + UNSPEC_MSA_SRLRI + UNSPEC_MSA_SUBS_S + UNSPEC_MSA_SUBS_U + UNSPEC_MSA_SUBSUU_S + UNSPEC_MSA_SUBSUS_U + UNSPEC_MSA_VSHF +]) + +;; All vector modes with 128 bits. +(define_mode_iterator MSA [V2DF V4SF V2DI V4SI V8HI V16QI]) + +;; Same as MSA. Used by vcond to iterate two modes. +(define_mode_iterator MSA_2 [V2DF V4SF V2DI V4SI V8HI V16QI]) + +;; Only used for splitting insert_d and copy_{u,s}.d. +(define_mode_iterator MSA_D [V2DI V2DF]) + +;; Only used for copy_{u,s}.w. +(define_mode_iterator MSA_W [V4SI V4SF]) + +;; Only integer modes. +(define_mode_iterator IMSA [V2DI V4SI V8HI V16QI]) + +;; As IMSA but excludes V16QI. +(define_mode_iterator IMSA_DWH [V2DI V4SI V8HI]) + +;; As IMSA but excludes V2DI. +(define_mode_iterator IMSA_WHB [V4SI V8HI V16QI]) + +;; Only integer modes equal or larger than a word. +(define_mode_iterator IMSA_DW [V2DI V4SI]) + +;; Only integer modes smaller than a word. +(define_mode_iterator IMSA_HB [V8HI V16QI]) + +;; Only integer modes for fixed-point madd_q/maddr_q. +(define_mode_iterator IMSA_WH [V4SI V8HI]) + +;; Only floating-point modes. +(define_mode_iterator FMSA [V2DF V4SF]) + +;; Only used for immediate set shuffle elements instruction. +(define_mode_iterator MSA_WHB_W [V4SI V8HI V16QI V4SF]) + +;; The attribute gives the integer vector mode with same size. +(define_mode_attr VIMODE + [(V2DF "V2DI") + (V4SF "V4SI") + (V2DI "V2DI") + (V4SI "V4SI") + (V8HI "V8HI") + (V16QI "V16QI")]) + +;; The attribute gives half modes for vector modes. +(define_mode_attr VHMODE + [(V8HI "V16QI") + (V4SI "V8HI") + (V2DI "V4SI")]) + +;; The attribute gives double modes for vector modes. +(define_mode_attr VDMODE + [(V4SI "V2DI") + (V8HI "V4SI") + (V16QI "V8HI")]) + +;; The attribute gives half modes with same number of elements for vector modes. +(define_mode_attr VTRUNCMODE + [(V8HI "V8QI") + (V4SI "V4HI") + (V2DI "V2SI")]) + +;; This attribute gives the mode of the result for "copy_s_b, copy_u_b" etc. +(define_mode_attr VRES + [(V2DF "DF") + (V4SF "SF") + (V2DI "DI") + (V4SI "SI") + (V8HI "SI") + (V16QI "SI")]) + +;; Only used with MSA_D iterator. +(define_mode_attr msa_d + [(V2DI "reg_or_0") + (V2DF "register")]) + +;; This attribute gives the integer vector mode with same size. +(define_mode_attr mode_i + [(V2DF "v2di") + (V4SF "v4si") + (V2DI "v2di") + (V4SI "v4si") + (V8HI "v8hi") + (V16QI "v16qi")]) + +;; This attribute gives suffix for MSA instructions. +(define_mode_attr msafmt + [(V2DF "d") + (V4SF "w") + (V2DI "d") + (V4SI "w") + (V8HI "h") + (V16QI "b")]) + +;; This attribute gives suffix for integers in VHMODE. +(define_mode_attr hmsafmt + [(V2DI "w") + (V4SI "h") + (V8HI "b")]) + +;; This attribute gives define_insn suffix for MSA instructions that need +;; distinction between integer and floating point. +(define_mode_attr msafmt_f + [(V2DF "d_f") + (V4SF "w_f") + (V2DI "d") + (V4SI "w") + (V8HI "h") + (V16QI "b")]) + +;; This is used to form an immediate operand constraint using +;; "const__operand". +(define_mode_attr indeximm + [(V2DF "0_or_1") + (V4SF "0_to_3") + (V2DI "0_or_1") + (V4SI "0_to_3") + (V8HI "uimm3") + (V16QI "uimm4")]) + +;; This attribute represents bitmask needed for vec_merge using +;; "const__operand". +(define_mode_attr bitmask + [(V2DF "exp_2") + (V4SF "exp_4") + (V2DI "exp_2") + (V4SI "exp_4") + (V8HI "exp_8") + (V16QI "exp_16")]) + +;; This attribute is used to form an immediate operand constraint using +;; "const__operand". +(define_mode_attr bitimm + [(V16QI "uimm3") + (V8HI "uimm4") + (V4SI "uimm5") + (V2DI "uimm6")]) + +(define_expand "vec_init" + [(match_operand:MSA 0 "register_operand") + (match_operand:MSA 1 "")] + "ISA_HAS_MSA" +{ + mips_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +;; pckev pattern with implicit type conversion. +(define_insn "vec_pack_trunc_" + [(set (match_operand: 0 "register_operand" "=f") + (vec_concat: + (truncate: + (match_operand:IMSA_DWH 1 "register_operand" "f")) + (truncate: + (match_operand:IMSA_DWH 2 "register_operand" "f"))))] + "ISA_HAS_MSA" + "pckev.\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "")]) + +(define_expand "vec_unpacks_hi_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "=f") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "f") + (match_dup 2))))] + "ISA_HAS_MSA" +{ + operands[2] = mips_msa_vec_parallel_const_half (V4SFmode, true/*high_p*/); +}) + +(define_expand "vec_unpacks_lo_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "=f") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "f") + (match_dup 2))))] + "ISA_HAS_MSA" +{ + operands[2] = mips_msa_vec_parallel_const_half (V4SFmode, false/*high_p*/); +}) + +(define_expand "vec_unpacks_hi_" + [(match_operand: 0 "register_operand") + (match_operand:IMSA_WHB 1 "register_operand")] + "ISA_HAS_MSA" +{ + mips_expand_vec_unpack (operands, false/*unsigned_p*/, true/*high_p*/); + DONE; +}) + +(define_expand "vec_unpacks_lo_" + [(match_operand: 0 "register_operand") + (match_operand:IMSA_WHB 1 "register_operand")] + "ISA_HAS_MSA" +{ + mips_expand_vec_unpack (operands, false/*unsigned_p*/, false/*high_p*/); + DONE; +}) + +(define_expand "vec_unpacku_hi_" + [(match_operand: 0 "register_operand") + (match_operand:IMSA_WHB 1 "register_operand")] + "ISA_HAS_MSA" +{ + mips_expand_vec_unpack (operands, true/*unsigned_p*/, true/*high_p*/); + DONE; +}) + +(define_expand "vec_unpacku_lo_" + [(match_operand: 0 "register_operand") + (match_operand:IMSA_WHB 1 "register_operand")] + "ISA_HAS_MSA" +{ + mips_expand_vec_unpack (operands, true/*unsigned_p*/, false/*high_p*/); + DONE; +}) + +(define_expand "vec_extract" + [(match_operand: 0 "register_operand") + (match_operand:IMSA 1 "register_operand") + (match_operand 2 "const__operand")] + "ISA_HAS_MSA" +{ + if (mode == QImode || mode == HImode) + { + rtx dest1 = gen_reg_rtx (SImode); + emit_insn (gen_msa_copy_s_ (dest1, operands[1], operands[2])); + emit_move_insn (operands[0], + gen_lowpart (mode, dest1)); + } + else + emit_insn (gen_msa_copy_s_ (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_extract" + [(match_operand: 0 "register_operand") + (match_operand:FMSA 1 "register_operand") + (match_operand 2 "const__operand")] + "ISA_HAS_MSA" +{ + rtx temp; + HOST_WIDE_INT val = INTVAL (operands[2]); + + if (val == 0) + temp = operands[1]; + else + { + /* We need to do the SLDI operation in V16QImode and adjust + operands[2] accordingly. */ + rtx wd = gen_reg_rtx (V16QImode); + rtx ws = gen_reg_rtx (V16QImode); + emit_move_insn (ws, gen_rtx_SUBREG (V16QImode, operands[1], 0)); + rtx n = GEN_INT (val * GET_MODE_SIZE (mode)); + gcc_assert (INTVAL (n) < GET_MODE_NUNITS (V16QImode)); + emit_insn (gen_msa_sldi_b (wd, ws, ws, n)); + temp = gen_reg_rtx (mode); + emit_move_insn (temp, gen_rtx_SUBREG (mode, wd, 0)); + } + emit_insn (gen_msa_vec_extract_ (operands[0], temp)); + DONE; +}) + +(define_insn_and_split "msa_vec_extract_" + [(set (match_operand: 0 "register_operand" "=f") + (vec_select: + (match_operand:FMSA 1 "register_operand" "f") + (parallel [(const_int 0)])))] + "ISA_HAS_MSA" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1))] + "operands[1] = gen_rtx_REG (mode, REGNO (operands[1]));" + [(set_attr "move_type" "fmove") + (set_attr "mode" "")]) + +(define_expand "vec_set" + [(match_operand:IMSA 0 "register_operand") + (match_operand: 1 "reg_or_0_operand") + (match_operand 2 "const__operand")] + "ISA_HAS_MSA" +{ + rtx index = GEN_INT (1 << INTVAL (operands[2])); + emit_insn (gen_msa_insert_ (operands[0], operands[1], + operands[0], index)); + DONE; +}) + +(define_expand "vec_set" + [(match_operand:FMSA 0 "register_operand") + (match_operand: 1 "register_operand") + (match_operand 2 "const__operand")] + "ISA_HAS_MSA" +{ + rtx index = GEN_INT (1 << INTVAL (operands[2])); + emit_insn (gen_msa_insve__scalar (operands[0], operands[1], + operands[0], index)); + DONE; +}) + +(define_expand "vcondu" + [(match_operand:MSA 0 "register_operand") + (match_operand:MSA 1 "reg_or_m1_operand") + (match_operand:MSA 2 "reg_or_0_operand") + (match_operator 3 "" + [(match_operand:IMSA 4 "register_operand") + (match_operand:IMSA 5 "register_operand")])] + "ISA_HAS_MSA + && (GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode))" +{ + mips_expand_vec_cond_expr (mode, mode, operands); + DONE; +}) + +(define_expand "vcond" + [(match_operand:MSA 0 "register_operand") + (match_operand:MSA 1 "reg_or_m1_operand") + (match_operand:MSA 2 "reg_or_0_operand") + (match_operator 3 "" + [(match_operand:MSA_2 4 "register_operand") + (match_operand:MSA_2 5 "register_operand")])] + "ISA_HAS_MSA + && (GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode))" +{ + mips_expand_vec_cond_expr (mode, mode, operands); + DONE; +}) + +(define_insn "msa_insert_" + [(set (match_operand:MSA 0 "register_operand" "=f") + (vec_merge:MSA + (vec_duplicate:MSA + (match_operand: 1 "reg_or_0_operand" "dJ")) + (match_operand:MSA 2 "register_operand" "0") + (match_operand 3 "const__operand" "")))] + "ISA_HAS_MSA" +{ + if (!TARGET_64BIT && (mode == V2DImode || mode == V2DFmode)) + return "#"; + else + return "insert.\t%w0[%y3],%z1"; +} + [(set_attr "type" "simd_insert") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:MSA_D 0 "register_operand") + (vec_merge:MSA_D + (vec_duplicate:MSA_D + (match_operand: 1 "_operand")) + (match_operand:MSA_D 2 "register_operand") + (match_operand 3 "const__operand")))] + "reload_completed && ISA_HAS_MSA && !TARGET_64BIT" + [(const_int 0)] +{ + mips_split_msa_insert_d (operands[0], operands[2], operands[3], operands[1]); + DONE; +}) + +(define_insn "msa_insve_" + [(set (match_operand:MSA 0 "register_operand" "=f") + (vec_merge:MSA + (vec_duplicate:MSA + (vec_select: + (match_operand:MSA 1 "register_operand" "f") + (parallel [(const_int 0)]))) + (match_operand:MSA 2 "register_operand" "0") + (match_operand 3 "const__operand" "")))] + "ISA_HAS_MSA" + "insve.\t%w0[%y3],%w1[0]" + [(set_attr "type" "simd_insert") + (set_attr "mode" "")]) + +;; Operand 3 is a scalar. +(define_insn "msa_insve__scalar" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (vec_merge:FMSA + (vec_duplicate:FMSA + (match_operand: 1 "register_operand" "f")) + (match_operand:FMSA 2 "register_operand" "0") + (match_operand 3 "const__operand" "")))] + "ISA_HAS_MSA" + "insve.\t%w0[%y3],%w1[0]" + [(set_attr "type" "simd_insert") + (set_attr "mode" "")]) + +(define_insn "msa_copy__" + [(set (match_operand: 0 "register_operand" "=d") + (any_extend: + (vec_select: + (match_operand:IMSA_HB 1 "register_operand" "f") + (parallel [(match_operand 2 "const__operand" "")]))))] + "ISA_HAS_MSA" + "copy_.\t%0,%w1[%2]" + [(set_attr "type" "simd_copy") + (set_attr "mode" "")]) + +(define_insn "msa_copy_u_w" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "f") + (parallel [(match_operand 2 "const_0_to_3_operand" "")]))))] + "ISA_HAS_MSA && TARGET_64BIT" + "copy_u.w\t%0,%w1[%2]" + [(set_attr "type" "simd_copy") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_copy_s__64bit" + [(set (match_operand:DI 0 "register_operand" "=d") + (sign_extend:DI + (vec_select: + (match_operand:MSA_W 1 "register_operand" "f") + (parallel [(match_operand 2 "const__operand" "")]))))] + "ISA_HAS_MSA && TARGET_64BIT" + "copy_s.\t%0,%w1[%2]" + [(set_attr "type" "simd_copy") + (set_attr "mode" "")]) + +(define_insn "msa_copy_s_" + [(set (match_operand: 0 "register_operand" "=d") + (vec_select: + (match_operand:MSA_W 1 "register_operand" "f") + (parallel [(match_operand 2 "const__operand" "")])))] + "ISA_HAS_MSA" + "copy_s.\t%0,%w1[%2]" + [(set_attr "type" "simd_copy") + (set_attr "mode" "")]) + +(define_insn_and_split "msa_copy_s_" + [(set (match_operand: 0 "register_operand" "=d") + (vec_select: + (match_operand:MSA_D 1 "register_operand" "f") + (parallel [(match_operand 2 "const__operand" "")])))] + "ISA_HAS_MSA" +{ + if (TARGET_64BIT) + return "copy_s.\t%0,%w1[%2]"; + else + return "#"; +} + "reload_completed && ISA_HAS_MSA && !TARGET_64BIT" + [(const_int 0)] +{ + mips_split_msa_copy_d (operands[0], operands[1], operands[2], + gen_msa_copy_s_w); + DONE; +} + [(set_attr "type" "simd_copy") + (set_attr "mode" "")]) + +(define_expand "vec_perm_const" + [(match_operand:MSA 0 "register_operand") + (match_operand:MSA 1 "register_operand") + (match_operand:MSA 2 "register_operand") + (match_operand: 3 "")] + "ISA_HAS_MSA" +{ + if (mips_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + +(define_expand "abs2" + [(match_operand:IMSA 0 "register_operand" "=f") + (abs:IMSA (match_operand:IMSA 1 "register_operand" "f"))] + "ISA_HAS_MSA" +{ + rtx reg = gen_reg_rtx (mode); + emit_move_insn (reg, CONST0_RTX (mode)); + emit_insn (gen_msa_add_a_ (operands[0], operands[1], reg)); + DONE; +}) + +(define_expand "neg2" + [(set (match_operand:MSA 0 "register_operand") + (minus:MSA (match_dup 2) + (match_operand:MSA 1 "register_operand")))] + "ISA_HAS_MSA" +{ + rtx reg = gen_reg_rtx (mode); + emit_move_insn (reg, CONST0_RTX (mode)); + operands[2] = reg; +}) + +(define_expand "msa_ldi" + [(match_operand:IMSA 0 "register_operand") + (match_operand 1 "const_imm10_operand")] + "ISA_HAS_MSA" +{ + if (mode == V16QImode) + operands[1] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]), + mode)); + emit_move_insn (operands[0], + mips_gen_const_int_vector (mode, INTVAL (operands[1]))); + DONE; +}) + +(define_insn "vec_perm" + [(set (match_operand:MSA 0 "register_operand" "=f") + (unspec:MSA [(match_operand:MSA 1 "register_operand" "f") + (match_operand:MSA 2 "register_operand" "f") + (match_operand: 3 "register_operand" "0")] + UNSPEC_MSA_VSHF))] + "ISA_HAS_MSA" + "vshf.\t%w0,%w2,%w1" + [(set_attr "type" "simd_sld") + (set_attr "mode" "")]) + +(define_expand "mov" + [(set (match_operand:MSA 0) + (match_operand:MSA 1))] + "ISA_HAS_MSA" +{ + if (mips_legitimize_move (mode, operands[0], operands[1])) + DONE; +}) + +(define_expand "movmisalign" + [(set (match_operand:MSA 0) + (match_operand:MSA 1))] + "ISA_HAS_MSA" +{ + if (mips_legitimize_move (mode, operands[0], operands[1])) + DONE; +}) + +;; 128-bit MSA modes can only exist in MSA registers or memory. An exception +;; is allowing MSA modes for GP registers for arguments and return values. +(define_insn "mov_msa" + [(set (match_operand:MSA 0 "nonimmediate_operand" "=f,f,R,*d,*f") + (match_operand:MSA 1 "move_operand" "fYGYI,R,f,*f,*d"))] + "ISA_HAS_MSA" + { return mips_output_move (operands[0], operands[1]); } + [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:MSA 0 "nonimmediate_operand") + (match_operand:MSA 1 "move_operand"))] + "reload_completed && ISA_HAS_MSA + && mips_split_move_insn_p (operands[0], operands[1], insn)" + [(const_int 0)] +{ + mips_split_move_insn (operands[0], operands[1], curr_insn); + DONE; +}) + +;; Offset load +(define_expand "msa_ld_" + [(match_operand:MSA 0 "register_operand") + (match_operand 1 "pmode_register_operand") + (match_operand 2 "aq10_operand")] + "ISA_HAS_MSA" +{ + rtx addr = plus_constant (GET_MODE (operands[1]), operands[1], + INTVAL (operands[2])); + mips_emit_move (operands[0], gen_rtx_MEM (mode, addr)); + DONE; +}) + +;; Offset store +(define_expand "msa_st_" + [(match_operand:MSA 0 "register_operand") + (match_operand 1 "pmode_register_operand") + (match_operand 2 "aq10_operand")] + "ISA_HAS_MSA" +{ + rtx addr = plus_constant (GET_MODE (operands[1]), operands[1], + INTVAL (operands[2])); + mips_emit_move (gen_rtx_MEM (mode, addr), operands[0]); + DONE; +}) + +;; Integer operations +(define_insn "add3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f,f") + (plus:IMSA + (match_operand:IMSA 1 "register_operand" "f,f,f") + (match_operand:IMSA 2 "reg_or_vector_same_ximm5_operand" "f,Unv5,Uuv5")))] + "ISA_HAS_MSA" +{ + switch (which_alternative) + { + case 0: + return "addv.\t%w0,%w1,%w2"; + case 1: + { + HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (operands[2], 0)); + + operands[2] = GEN_INT (-val); + return "subvi.\t%w0,%w1,%d2"; + } + case 2: + return "addvi.\t%w0,%w1,%E2"; + default: + gcc_unreachable (); + } +} + [(set_attr "alu_type" "simd_add") + (set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "sub3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (minus:IMSA + (match_operand:IMSA 1 "register_operand" "f,f") + (match_operand:IMSA 2 "reg_or_vector_same_uimm5_operand" "f,Uuv5")))] + "ISA_HAS_MSA" + "@ + subv.\t%w0,%w1,%w2 + subvi.\t%w0,%w1,%E2" + [(set_attr "alu_type" "simd_add") + (set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "mul3" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (mult:IMSA (match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + "mulv.\t%w0,%w1,%w2" + [(set_attr "type" "simd_mul") + (set_attr "mode" "")]) + +(define_insn "msa_maddv_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (plus:IMSA (mult:IMSA (match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")) + (match_operand:IMSA 3 "register_operand" "0")))] + "ISA_HAS_MSA" + "maddv.\t%w0,%w1,%w2" + [(set_attr "type" "simd_mul") + (set_attr "mode" "")]) + +(define_insn "msa_msubv_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (minus:IMSA (match_operand:IMSA 1 "register_operand" "0") + (mult:IMSA (match_operand:IMSA 2 "register_operand" "f") + (match_operand:IMSA 3 "register_operand" "f"))))] + "ISA_HAS_MSA" + "msubv.\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "")]) + +(define_insn "div3" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (div:IMSA (match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + { return mips_msa_output_division ("div_s.\t%w0,%w1,%w2", operands); } + [(set_attr "type" "simd_div") + (set_attr "mode" "")]) + +(define_insn "udiv3" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (udiv:IMSA (match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + { return mips_msa_output_division ("div_u.\t%w0,%w1,%w2", operands); } + [(set_attr "type" "simd_div") + (set_attr "mode" "")]) + +(define_insn "mod3" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (mod:IMSA (match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + { return mips_msa_output_division ("mod_s.\t%w0,%w1,%w2", operands); } + [(set_attr "type" "simd_div") + (set_attr "mode" "")]) + +(define_insn "umod3" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (umod:IMSA (match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + { return mips_msa_output_division ("mod_u.\t%w0,%w1,%w2", operands); } + [(set_attr "type" "simd_div") + (set_attr "mode" "")]) + +(define_insn "xor3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f,f") + (xor:IMSA + (match_operand:IMSA 1 "register_operand" "f,f,f") + (match_operand:IMSA 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] + "ISA_HAS_MSA" + "@ + xor.v\t%w0,%w1,%w2 + bnegi.%v0\t%w0,%w1,%V2 + xori.b\t%w0,%w1,%B2" + [(set_attr "type" "simd_logic,simd_bit,simd_logic") + (set_attr "mode" "")]) + +(define_insn "ior3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f,f") + (ior:IMSA + (match_operand:IMSA 1 "register_operand" "f,f,f") + (match_operand:IMSA 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] + "ISA_HAS_MSA" + "@ + or.v\t%w0,%w1,%w2 + bseti.%v0\t%w0,%w1,%V2 + ori.b\t%w0,%w1,%B2" + [(set_attr "type" "simd_logic,simd_bit,simd_logic") + (set_attr "mode" "")]) + +(define_insn "and3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f,f") + (and:IMSA + (match_operand:IMSA 1 "register_operand" "f,f,f") + (match_operand:IMSA 2 "reg_or_vector_same_val_operand" "f,YZ,Urv8")))] + "ISA_HAS_MSA" +{ + switch (which_alternative) + { + case 0: + return "and.v\t%w0,%w1,%w2"; + case 1: + { + rtx elt0 = CONST_VECTOR_ELT (operands[2], 0); + unsigned HOST_WIDE_INT val = ~UINTVAL (elt0); + operands[2] = mips_gen_const_int_vector (mode, val & (-val)); + return "bclri.%v0\t%w0,%w1,%V2"; + } + case 2: + return "andi.b\t%w0,%w1,%B2"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "simd_logic,simd_bit,simd_logic") + (set_attr "mode" "")]) + +(define_insn "one_cmpl2" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (not:IMSA (match_operand:IMSA 1 "register_operand" "f")))] + "ISA_HAS_MSA" + "nor.v\t%w0,%w1,%w1" + [(set_attr "type" "simd_logic") + (set_attr "mode" "TI")]) + +(define_insn "vlshr3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (lshiftrt:IMSA + (match_operand:IMSA 1 "register_operand" "f,f") + (match_operand:IMSA 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))] + "ISA_HAS_MSA" + "@ + srl.\t%w0,%w1,%w2 + srli.\t%w0,%w1,%E2" + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +(define_insn "vashr3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (ashiftrt:IMSA + (match_operand:IMSA 1 "register_operand" "f,f") + (match_operand:IMSA 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))] + "ISA_HAS_MSA" + "@ + sra.\t%w0,%w1,%w2 + srai.\t%w0,%w1,%E2" + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +(define_insn "vashl3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (ashift:IMSA + (match_operand:IMSA 1 "register_operand" "f,f") + (match_operand:IMSA 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))] + "ISA_HAS_MSA" + "@ + sll.\t%w0,%w1,%w2 + slli.\t%w0,%w1,%E2" + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +;; Floating-point operations +(define_insn "add3" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (plus:FMSA (match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + "fadd.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fadd") + (set_attr "mode" "")]) + +(define_insn "sub3" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (minus:FMSA (match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + "fsub.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fadd") + (set_attr "mode" "")]) + +(define_insn "mul3" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (mult:FMSA (match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + "fmul.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fmul") + (set_attr "mode" "")]) + +(define_insn "div3" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (div:FMSA (match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + "fdiv.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +(define_insn "fma4" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (fma:FMSA (match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f") + (match_operand:FMSA 3 "register_operand" "0")))] + "ISA_HAS_MSA" + "fmadd.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fmadd") + (set_attr "mode" "")]) + +(define_insn "fnma4" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (fma:FMSA (neg:FMSA (match_operand:FMSA 1 "register_operand" "f")) + (match_operand:FMSA 2 "register_operand" "f") + (match_operand:FMSA 3 "register_operand" "0")))] + "ISA_HAS_MSA" + "fmsub.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fmadd") + (set_attr "mode" "")]) + +(define_insn "sqrt2" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (sqrt:FMSA (match_operand:FMSA 1 "register_operand" "f")))] + "ISA_HAS_MSA" + "fsqrt.\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +;; Built-in functions +(define_insn "msa_add_a_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (plus:IMSA (abs:IMSA (match_operand:IMSA 1 "register_operand" "f")) + (abs:IMSA (match_operand:IMSA 2 "register_operand" "f"))))] + "ISA_HAS_MSA" + "add_a.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_adds_a_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (ss_plus:IMSA + (abs:IMSA (match_operand:IMSA 1 "register_operand" "f")) + (abs:IMSA (match_operand:IMSA 2 "register_operand" "f"))))] + "ISA_HAS_MSA" + "adds_a.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "ssadd3" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (ss_plus:IMSA (match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + "adds_s.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "usadd3" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (us_plus:IMSA (match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + "adds_u.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_asub_s_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_ASUB_S))] + "ISA_HAS_MSA" + "asub_s.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_asub_u_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_ASUB_U))] + "ISA_HAS_MSA" + "asub_u.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_ave_s_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_AVE_S))] + "ISA_HAS_MSA" + "ave_s.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_ave_u_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_AVE_U))] + "ISA_HAS_MSA" + "ave_u.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_aver_s_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_AVER_S))] + "ISA_HAS_MSA" + "aver_s.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_aver_u_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_AVER_U))] + "ISA_HAS_MSA" + "aver_u.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_bclr_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_BCLR))] + "ISA_HAS_MSA" + "bclr.\t%w0,%w1,%w2" + [(set_attr "type" "simd_bit") + (set_attr "mode" "")]) + +(define_insn "msa_bclri_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand 2 "const__operand" "")] + UNSPEC_MSA_BCLRI))] + "ISA_HAS_MSA" + "bclri.\t%w0,%w1,%2" + [(set_attr "type" "simd_bit") + (set_attr "mode" "")]) + +(define_insn "msa_binsl_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "0") + (match_operand:IMSA 2 "register_operand" "f") + (match_operand:IMSA 3 "register_operand" "f")] + UNSPEC_MSA_BINSL))] + "ISA_HAS_MSA" + "binsl.\t%w0,%w2,%w3" + [(set_attr "type" "simd_bitins") + (set_attr "mode" "")]) + +(define_insn "msa_binsli_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "0") + (match_operand:IMSA 2 "register_operand" "f") + (match_operand 3 "const__operand" "")] + UNSPEC_MSA_BINSLI))] + "ISA_HAS_MSA" + "binsli.\t%w0,%w2,%3" + [(set_attr "type" "simd_bitins") + (set_attr "mode" "")]) + +(define_insn "msa_binsr_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "0") + (match_operand:IMSA 2 "register_operand" "f") + (match_operand:IMSA 3 "register_operand" "f")] + UNSPEC_MSA_BINSR))] + "ISA_HAS_MSA" + "binsr.\t%w0,%w2,%w3" + [(set_attr "type" "simd_bitins") + (set_attr "mode" "")]) + +(define_insn "msa_binsri_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "0") + (match_operand:IMSA 2 "register_operand" "f") + (match_operand 3 "const__operand" "")] + UNSPEC_MSA_BINSRI))] + "ISA_HAS_MSA" + "binsri.\t%w0,%w2,%3" + [(set_attr "type" "simd_bitins") + (set_attr "mode" "")]) + +(define_insn "msa_bmnz_" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (ior:IMSA (and:IMSA (match_operand:IMSA 2 "register_operand" "f,f") + (match_operand:IMSA 3 "reg_or_vector_same_val_operand" "f,Urv8")) + (and:IMSA (not:IMSA (match_dup 3)) + (match_operand:IMSA 1 "register_operand" "0,0"))))] + "ISA_HAS_MSA" + "@ + bmnz.v\t%w0,%w2,%w3 + bmnzi.b\t%w0,%w2,%B3" + [(set_attr "type" "simd_bitmov") + (set_attr "mode" "")]) + +(define_insn "msa_bmz_" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (ior:IMSA (and:IMSA (not:IMSA + (match_operand:IMSA 3 "reg_or_vector_same_val_operand" "f,Urv8")) + (match_operand:IMSA 2 "register_operand" "f,f")) + (and:IMSA (match_operand:IMSA 1 "register_operand" "0,0") + (match_dup 3))))] + "ISA_HAS_MSA" + "@ + bmz.v\t%w0,%w2,%w3 + bmzi.b\t%w0,%w2,%B3" + [(set_attr "type" "simd_bitmov") + (set_attr "mode" "")]) + +(define_insn "msa_bneg_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_BNEG))] + "ISA_HAS_MSA" + "bneg.\t%w0,%w1,%w2" + [(set_attr "type" "simd_bit") + (set_attr "mode" "")]) + +(define_insn "msa_bnegi_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand 2 "const_msa_branch_operand" "")] + UNSPEC_MSA_BNEGI))] + "ISA_HAS_MSA" + "bnegi.\t%w0,%w1,%2" + [(set_attr "type" "simd_bit") + (set_attr "mode" "")]) + +(define_insn "msa_bsel_" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (ior:IMSA (and:IMSA (not:IMSA + (match_operand:IMSA 1 "register_operand" "0,0")) + (match_operand:IMSA 2 "register_operand" "f,f")) + (and:IMSA (match_dup 1) + (match_operand:IMSA 3 "reg_or_vector_same_val_operand" "f,Urv8"))))] + "ISA_HAS_MSA" + "@ + bsel.v\t%w0,%w2,%w3 + bseli.b\t%w0,%w2,%B3" + [(set_attr "type" "simd_bitmov") + (set_attr "mode" "")]) + +(define_insn "msa_bset_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_BSET))] + "ISA_HAS_MSA" + "bset.\t%w0,%w1,%w2" + [(set_attr "type" "simd_bit") + (set_attr "mode" "")]) + +(define_insn "msa_bseti_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand 2 "const__operand" "")] + UNSPEC_MSA_BSETI))] + "ISA_HAS_MSA" + "bseti.\t%w0,%w1,%2" + [(set_attr "type" "simd_bit") + (set_attr "mode" "")]) + +(define_code_iterator ICC [eq le leu lt ltu]) + +(define_code_attr icc + [(eq "eq") + (le "le_s") + (leu "le_u") + (lt "lt_s") + (ltu "lt_u")]) + +(define_code_attr icci + [(eq "eqi") + (le "lei_s") + (leu "lei_u") + (lt "lti_s") + (ltu "lti_u")]) + +(define_code_attr cmpi + [(eq "s") + (le "s") + (leu "u") + (lt "s") + (ltu "u")]) + +(define_insn "msa_c_" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (ICC:IMSA + (match_operand:IMSA 1 "register_operand" "f,f") + (match_operand:IMSA 2 "reg_or_vector_same_imm5_operand" "f,Uv5")))] + "ISA_HAS_MSA" + "@ + c.\t%w0,%w1,%w2 + c.\t%w0,%w1,%E2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_dotp__d" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (plus:V2DI + (mult:V2DI + (any_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "register_operand" "%f") + (parallel [(const_int 0) (const_int 2)]))) + (any_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "register_operand" "f") + (parallel [(const_int 0) (const_int 2)])))) + (mult:V2DI + (any_extend:V2DI + (vec_select:V4SI (match_dup 1) + (parallel [(const_int 1) (const_int 3)]))) + (any_extend:V2DI + (vec_select:V4SI (match_dup 2) + (parallel [(const_int 1) (const_int 3)]))))))] + "ISA_HAS_MSA" + "dotp_.d\t%w0,%w1,%w2" + [(set_attr "type" "simd_mul") + (set_attr "mode" "V2DI")]) + +(define_insn "msa_dotp__w" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (plus:V4SI + (mult:V4SI + (any_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "register_operand" "%f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) + (any_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "register_operand" "f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))) + (mult:V4SI + (any_extend:V4SI + (vec_select:V4HI (match_dup 1) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))) + (any_extend:V4SI + (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))))))] + "ISA_HAS_MSA" + "dotp_.w\t%w0,%w1,%w2" + [(set_attr "type" "simd_mul") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_dotp__h" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (plus:V8HI + (mult:V8HI + (any_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "register_operand" "%f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)]))) + (any_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 2 "register_operand" "f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)])))) + (mult:V8HI + (any_extend:V8HI + (vec_select:V8QI (match_dup 1) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))) + (any_extend:V8HI + (vec_select:V8QI (match_dup 2) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))))))] + "ISA_HAS_MSA" + "dotp_.h\t%w0,%w1,%w2" + [(set_attr "type" "simd_mul") + (set_attr "mode" "V8HI")]) + +(define_insn "msa_dpadd__d" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (plus:V2DI + (plus:V2DI + (mult:V2DI + (any_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "register_operand" "%f") + (parallel [(const_int 0) (const_int 2)]))) + (any_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 3 "register_operand" "f") + (parallel [(const_int 0) (const_int 2)])))) + (mult:V2DI + (any_extend:V2DI + (vec_select:V4SI (match_dup 2) + (parallel [(const_int 1) (const_int 3)]))) + (any_extend:V2DI + (vec_select:V4SI (match_dup 3) + (parallel [(const_int 1) (const_int 3)]))))) + (match_operand:V2DI 1 "register_operand" "0")))] + "ISA_HAS_MSA" + "dpadd_.d\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "V2DI")]) + +(define_insn "msa_dpadd__w" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (plus:V4SI + (plus:V4SI + (mult:V4SI + (any_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "register_operand" "%f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) + (any_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 3 "register_operand" "f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))) + (mult:V4SI + (any_extend:V4SI + (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))) + (any_extend:V4SI + (vec_select:V4HI (match_dup 3) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))))) + (match_operand:V4SI 1 "register_operand" "0")))] + "ISA_HAS_MSA" + "dpadd_.w\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_dpadd__h" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (plus:V8HI + (plus:V8HI + (mult:V8HI + (any_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 2 "register_operand" "%f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)]))) + (any_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 3 "register_operand" "f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)])))) + (mult:V8HI + (any_extend:V8HI + (vec_select:V8QI (match_dup 2) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))) + (any_extend:V8HI + (vec_select:V8QI (match_dup 3) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))))) + (match_operand:V8HI 1 "register_operand" "0")))] + "ISA_HAS_MSA" + "dpadd_.h\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "V8HI")]) + +(define_insn "msa_dpsub__d" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (minus:V2DI + (match_operand:V2DI 1 "register_operand" "0") + (plus:V2DI + (mult:V2DI + (any_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "register_operand" "%f") + (parallel [(const_int 0) (const_int 2)]))) + (any_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 3 "register_operand" "f") + (parallel [(const_int 0) (const_int 2)])))) + (mult:V2DI + (any_extend:V2DI + (vec_select:V4SI (match_dup 2) + (parallel [(const_int 1) (const_int 3)]))) + (any_extend:V2DI + (vec_select:V4SI (match_dup 3) + (parallel [(const_int 1) (const_int 3)])))))))] + "ISA_HAS_MSA" + "dpsub_.d\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "V2DI")]) + +(define_insn "msa_dpsub__w" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (minus:V4SI + (match_operand:V4SI 1 "register_operand" "0") + (plus:V4SI + (mult:V4SI + (any_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "register_operand" "%f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) + (any_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 3 "register_operand" "f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))) + (mult:V4SI + (any_extend:V4SI + (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))) + (any_extend:V4SI + (vec_select:V4HI (match_dup 3) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)])))))))] + "ISA_HAS_MSA" + "dpsub_.w\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_dpsub__h" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (minus:V8HI + (match_operand:V8HI 1 "register_operand" "0") + (plus:V8HI + (mult:V8HI + (any_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 2 "register_operand" "%f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)]))) + (any_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 3 "register_operand" "f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)])))) + (mult:V8HI + (any_extend:V8HI + (vec_select:V8QI (match_dup 2) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))) + (any_extend:V8HI + (vec_select:V8QI (match_dup 3) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)])))))))] + "ISA_HAS_MSA" + "dpsub_.h\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "V8HI")]) + +(define_insn "msa_fclass_" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FMSA 1 "register_operand" "f")] + UNSPEC_MSA_FCLASS))] + "ISA_HAS_MSA" + "fclass.\t%w0,%w1" + [(set_attr "type" "simd_fclass") + (set_attr "mode" "")]) + +(define_insn "msa_fcaf_" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f")] + UNSPEC_MSA_FCAF))] + "ISA_HAS_MSA" + "fcaf.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fcmp") + (set_attr "mode" "")]) + +(define_insn "msa_fcune_" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f")] + UNSPEC_MSA_FCUNE))] + "ISA_HAS_MSA" + "fcune.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fcmp") + (set_attr "mode" "")]) + +(define_code_iterator FCC [unordered ordered eq ne le lt uneq unle unlt]) + +(define_code_attr fcc + [(unordered "fcun") + (ordered "fcor") + (eq "fceq") + (ne "fcne") + (uneq "fcueq") + (unle "fcule") + (unlt "fcult") + (le "fcle") + (lt "fclt")]) + +(define_int_iterator FSC_UNS [UNSPEC_MSA_FSAF UNSPEC_MSA_FSUN UNSPEC_MSA_FSOR + UNSPEC_MSA_FSEQ UNSPEC_MSA_FSNE UNSPEC_MSA_FSUEQ + UNSPEC_MSA_FSUNE UNSPEC_MSA_FSULE UNSPEC_MSA_FSULT + UNSPEC_MSA_FSLE UNSPEC_MSA_FSLT]) + +(define_int_attr fsc + [(UNSPEC_MSA_FSAF "fsaf") + (UNSPEC_MSA_FSUN "fsun") + (UNSPEC_MSA_FSOR "fsor") + (UNSPEC_MSA_FSEQ "fseq") + (UNSPEC_MSA_FSNE "fsne") + (UNSPEC_MSA_FSUEQ "fsueq") + (UNSPEC_MSA_FSUNE "fsune") + (UNSPEC_MSA_FSULE "fsule") + (UNSPEC_MSA_FSULT "fsult") + (UNSPEC_MSA_FSLE "fsle") + (UNSPEC_MSA_FSLT "fslt")]) + +(define_insn "msa__" + [(set (match_operand: 0 "register_operand" "=f") + (FCC: (match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + ".\t%w0,%w1,%w2" + [(set_attr "type" "simd_fcmp") + (set_attr "mode" "")]) + +(define_insn "msa__" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f")] + FSC_UNS))] + "ISA_HAS_MSA" + ".\t%w0,%w1,%w2" + [(set_attr "type" "simd_fcmp") + (set_attr "mode" "")]) + +(define_insn "msa_fexp2_" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (unspec:FMSA [(match_operand:FMSA 1 "register_operand" "f") + (match_operand: 2 "register_operand" "f")] + UNSPEC_MSA_FEXP2))] + "ISA_HAS_MSA" + "fexp2.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fexp2") + (set_attr "mode" "")]) + +(define_mode_attr fint + [(V4SF "v4si") + (V2DF "v2di")]) + +(define_mode_attr FQ + [(V4SF "V8HI") + (V2DF "V4SI")]) + +(define_mode_attr FINTCNV + [(V4SF "I2S") + (V2DF "I2D")]) + +(define_mode_attr FINTCNV_2 + [(V4SF "S2I") + (V2DF "D2I")]) + +(define_insn "float2" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (float:FMSA (match_operand: 1 "register_operand" "f")))] + "ISA_HAS_MSA" + "ffint_s.\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +(define_insn "floatuns2" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (unsigned_float:FMSA + (match_operand: 1 "register_operand" "f")))] + "ISA_HAS_MSA" + "ffint_u.\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +(define_mode_attr FFQ + [(V4SF "V8HI") + (V2DF "V4SI")]) + +(define_insn "msa_ffql_" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (unspec:FMSA [(match_operand: 1 "register_operand" "f")] + UNSPEC_MSA_FFQL))] + "ISA_HAS_MSA" + "ffql.\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +(define_insn "msa_ffqr_" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (unspec:FMSA [(match_operand: 1 "register_operand" "f")] + UNSPEC_MSA_FFQR))] + "ISA_HAS_MSA" + "ffqr.\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +(define_insn "msa_fill_" + [(set (match_operand:MSA 0 "register_operand" "=f,f") + (vec_duplicate:MSA + (match_operand: 1 "reg_or_0_operand" "d,J")))] + "ISA_HAS_MSA" +{ + if (which_alternative == 1) + return "ldi.\t%w0,0"; + + if (!TARGET_64BIT && (mode == V2DImode || mode == V2DFmode)) + return "#"; + else + return "fill.\t%w0,%z1"; +} + [(set_attr "type" "simd_fill") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:MSA_D 0 "register_operand") + (vec_duplicate:MSA_D + (match_operand: 1 "register_operand")))] + "reload_completed && ISA_HAS_MSA && !TARGET_64BIT" + [(const_int 0)] +{ + mips_split_msa_fill_d (operands[0], operands[1]); + DONE; +}) + +(define_insn "msa_flog2_" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (unspec:FMSA [(match_operand:FMSA 1 "register_operand" "f")] + UNSPEC_MSA_FLOG2))] + "ISA_HAS_MSA" + "flog2.\t%w0,%w1" + [(set_attr "type" "simd_flog2") + (set_attr "mode" "")]) + +(define_insn "smax3" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (smax:FMSA (match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + "fmax.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fminmax") + (set_attr "mode" "")]) + +(define_insn "msa_fmax_a_" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (if_then_else + (gt (abs:FMSA (match_operand:FMSA 1 "register_operand" "f")) + (abs:FMSA (match_operand:FMSA 2 "register_operand" "f"))) + (match_dup 1) + (match_dup 2)))] + "ISA_HAS_MSA" + "fmax_a.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fminmax") + (set_attr "mode" "")]) + +(define_insn "smin3" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (smin:FMSA (match_operand:FMSA 1 "register_operand" "f") + (match_operand:FMSA 2 "register_operand" "f")))] + "ISA_HAS_MSA" + "fmin.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fminmax") + (set_attr "mode" "")]) + +(define_insn "msa_fmin_a_" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (if_then_else + (lt (abs:FMSA (match_operand:FMSA 1 "register_operand" "f")) + (abs:FMSA (match_operand:FMSA 2 "register_operand" "f"))) + (match_dup 1) + (match_dup 2)))] + "ISA_HAS_MSA" + "fmin_a.\t%w0,%w1,%w2" + [(set_attr "type" "simd_fminmax") + (set_attr "mode" "")]) + +(define_insn "msa_frcp_" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (unspec:FMSA [(match_operand:FMSA 1 "register_operand" "f")] + UNSPEC_MSA_FRCP))] + "ISA_HAS_MSA" + "frcp.\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +(define_insn "msa_frint_" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (unspec:FMSA [(match_operand:FMSA 1 "register_operand" "f")] + UNSPEC_MSA_FRINT))] + "ISA_HAS_MSA" + "frint.\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "")]) + +(define_insn "msa_frsqrt_" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (unspec:FMSA [(match_operand:FMSA 1 "register_operand" "f")] + UNSPEC_MSA_FRSQRT))] + "ISA_HAS_MSA" + "frsqrt.\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +(define_insn "msa_ftint_s_" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FMSA 1 "register_operand" "f")] + UNSPEC_MSA_FTINT_S))] + "ISA_HAS_MSA" + "ftint_s.\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +(define_insn "msa_ftint_u_" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FMSA 1 "register_operand" "f")] + UNSPEC_MSA_FTINT_U))] + "ISA_HAS_MSA" + "ftint_u.\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +(define_insn "fix_trunc2" + [(set (match_operand: 0 "register_operand" "=f") + (fix: (match_operand:FMSA 1 "register_operand" "f")))] + "ISA_HAS_MSA" + "ftrunc_s.\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +(define_insn "fixuns_trunc2" + [(set (match_operand: 0 "register_operand" "=f") + (unsigned_fix: (match_operand:FMSA 1 "register_operand" "f")))] + "ISA_HAS_MSA" + "ftrunc_u.\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +(define_insn "msa_ftq_h" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "f") + (match_operand:V4SF 2 "register_operand" "f")] + UNSPEC_MSA_FTQ))] + "ISA_HAS_MSA" + "ftq.h\t%w0,%w1,%w2" + [(set_attr "type" "simd_fcvt") + (set_attr "cnv_mode" "S2I") + (set_attr "mode" "V4SF")]) + +(define_insn "msa_ftq_w" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "f") + (match_operand:V2DF 2 "register_operand" "f")] + UNSPEC_MSA_FTQ))] + "ISA_HAS_MSA" + "ftq.w\t%w0,%w1,%w2" + [(set_attr "type" "simd_fcvt") + (set_attr "cnv_mode" "D2I") + (set_attr "mode" "V2DF")]) + +(define_insn "msa_h__h" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (addsub:V8HI + (any_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "register_operand" "f") + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))) + (any_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 2 "register_operand" "f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)])))))] + "ISA_HAS_MSA" + "h_.h\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "V8HI")]) + +(define_insn "msa_h__w" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (addsub:V4SI + (any_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "register_operand" "f") + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))) + (any_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "register_operand" "f") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))))] + "ISA_HAS_MSA" + "h_.w\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_h__d" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (addsub:V2DI + (any_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "register_operand" "f") + (parallel [(const_int 1) (const_int 3)]))) + (any_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "register_operand" "f") + (parallel [(const_int 0) (const_int 2)])))))] + "ISA_HAS_MSA" + "h_.d\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "V2DI")]) + +(define_insn "msa_ilvev_b" + [(set (match_operand:V16QI 0 "register_operand" "=f") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "f") + (match_operand:V16QI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 16) + (const_int 2) (const_int 18) + (const_int 4) (const_int 20) + (const_int 6) (const_int 22) + (const_int 8) (const_int 24) + (const_int 10) (const_int 26) + (const_int 12) (const_int 28) + (const_int 14) (const_int 30)])))] + "ISA_HAS_MSA" + "ilvev.b\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V16QI")]) + +(define_insn "msa_ilvev_h" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "f") + (match_operand:V8HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 8) + (const_int 2) (const_int 10) + (const_int 4) (const_int 12) + (const_int 6) (const_int 14)])))] + "ISA_HAS_MSA" + "ilvev.h\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V8HI")]) + +(define_insn "msa_ilvev_w" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "f") + (match_operand:V4SI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "ISA_HAS_MSA" + "ilvev.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_ilvev_w_f" + [(set (match_operand:V4SF 0 "register_operand" "=f") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "f") + (match_operand:V4SF 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "ISA_HAS_MSA" + "ilvev.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SF")]) + +(define_insn "msa_ilvl_b" + [(set (match_operand:V16QI 0 "register_operand" "=f") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "f") + (match_operand:V16QI 2 "register_operand" "f")) + (parallel [(const_int 8) (const_int 24) + (const_int 9) (const_int 25) + (const_int 10) (const_int 26) + (const_int 11) (const_int 27) + (const_int 12) (const_int 28) + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] + "ISA_HAS_MSA" + "ilvl.b\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V16QI")]) + +(define_insn "msa_ilvl_h" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "f") + (match_operand:V8HI 2 "register_operand" "f")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "ISA_HAS_MSA" + "ilvl.h\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V8HI")]) + +(define_insn "msa_ilvl_w" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "f") + (match_operand:V4SI 2 "register_operand" "f")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "ISA_HAS_MSA" + "ilvl.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_ilvl_w_f" + [(set (match_operand:V4SF 0 "register_operand" "=f") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "f") + (match_operand:V4SF 2 "register_operand" "f")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "ISA_HAS_MSA" + "ilvl.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SF")]) + +(define_insn "msa_ilvl_d" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (vec_select:V2DI + (vec_concat:V4DI + (match_operand:V2DI 1 "register_operand" "f") + (match_operand:V2DI 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 3)])))] + "ISA_HAS_MSA" + "ilvl.d\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V2DI")]) + +(define_insn "msa_ilvl_d_f" + [(set (match_operand:V2DF 0 "register_operand" "=f") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "register_operand" "f") + (match_operand:V2DF 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 3)])))] + "ISA_HAS_MSA" + "ilvl.d\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V2DF")]) + +(define_insn "msa_ilvod_b" + [(set (match_operand:V16QI 0 "register_operand" "=f") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "f") + (match_operand:V16QI 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 17) + (const_int 3) (const_int 19) + (const_int 5) (const_int 21) + (const_int 7) (const_int 23) + (const_int 9) (const_int 25) + (const_int 11) (const_int 27) + (const_int 13) (const_int 29) + (const_int 15) (const_int 31)])))] + "ISA_HAS_MSA" + "ilvod.b\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V16QI")]) + +(define_insn "msa_ilvod_h" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "f") + (match_operand:V8HI 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 9) + (const_int 3) (const_int 11) + (const_int 5) (const_int 13) + (const_int 7) (const_int 15)])))] + "ISA_HAS_MSA" + "ilvod.h\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V8HI")]) + +(define_insn "msa_ilvod_w" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "f") + (match_operand:V4SI 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] + "ISA_HAS_MSA" + "ilvod.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_ilvod_w_f" + [(set (match_operand:V4SF 0 "register_operand" "=f") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "f") + (match_operand:V4SF 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] + "ISA_HAS_MSA" + "ilvod.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SF")]) + +(define_insn "msa_ilvr_b" + [(set (match_operand:V16QI 0 "register_operand" "=f") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "f") + (match_operand:V16QI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 16) + (const_int 1) (const_int 17) + (const_int 2) (const_int 18) + (const_int 3) (const_int 19) + (const_int 4) (const_int 20) + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] + "ISA_HAS_MSA" + "ilvr.b\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V16QI")]) + +(define_insn "msa_ilvr_h" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "f") + (match_operand:V8HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] + "ISA_HAS_MSA" + "ilvr.h\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V8HI")]) + +(define_insn "msa_ilvr_w" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "f") + (match_operand:V4SI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "ISA_HAS_MSA" + "ilvr.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_ilvr_w_f" + [(set (match_operand:V4SF 0 "register_operand" "=f") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "f") + (match_operand:V4SF 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "ISA_HAS_MSA" + "ilvr.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SF")]) + +(define_insn "msa_ilvr_d" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (vec_select:V2DI + (vec_concat:V4DI + (match_operand:V2DI 1 "register_operand" "f") + (match_operand:V2DI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 2)])))] + "ISA_HAS_MSA" + "ilvr.d\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V2DI")]) + +(define_insn "msa_ilvr_d_f" + [(set (match_operand:V2DF 0 "register_operand" "=f") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "register_operand" "f") + (match_operand:V2DF 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 2)])))] + "ISA_HAS_MSA" + "ilvr.d\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V2DF")]) + +(define_insn "msa_madd_q_" + [(set (match_operand:IMSA_WH 0 "register_operand" "=f") + (unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "0") + (match_operand:IMSA_WH 2 "register_operand" "f") + (match_operand:IMSA_WH 3 "register_operand" "f")] + UNSPEC_MSA_MADD_Q))] + "ISA_HAS_MSA" + "madd_q.\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "")]) + +(define_insn "msa_maddr_q_" + [(set (match_operand:IMSA_WH 0 "register_operand" "=f") + (unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "0") + (match_operand:IMSA_WH 2 "register_operand" "f") + (match_operand:IMSA_WH 3 "register_operand" "f")] + UNSPEC_MSA_MADDR_Q))] + "ISA_HAS_MSA" + "maddr_q.\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "")]) + +(define_insn "msa_max_a_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (if_then_else + (gt (abs:IMSA (match_operand:IMSA 1 "register_operand" "f")) + (abs:IMSA (match_operand:IMSA 2 "register_operand" "f"))) + (match_dup 1) + (match_dup 2)))] + "ISA_HAS_MSA" + "max_a.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "smax3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (smax:IMSA (match_operand:IMSA 1 "register_operand" "f,f") + (match_operand:IMSA 2 "reg_or_vector_same_simm5_operand" "f,Usv5")))] + "ISA_HAS_MSA" + "@ + max_s.\t%w0,%w1,%w2 + maxi_s.\t%w0,%w1,%B2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "umax3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (umax:IMSA (match_operand:IMSA 1 "register_operand" "f,f") + (match_operand:IMSA 2 "reg_or_vector_same_uimm5_operand" "f,Uuv5")))] + "ISA_HAS_MSA" + "@ + max_u.\t%w0,%w1,%w2 + maxi_u.\t%w0,%w1,%B2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_min_a_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (if_then_else + (lt (abs:IMSA (match_operand:IMSA 1 "register_operand" "f")) + (abs:IMSA (match_operand:IMSA 2 "register_operand" "f"))) + (match_dup 1) + (match_dup 2)))] + "ISA_HAS_MSA" + "min_a.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "smin3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (smin:IMSA (match_operand:IMSA 1 "register_operand" "f,f") + (match_operand:IMSA 2 "reg_or_vector_same_simm5_operand" "f,Usv5")))] + "ISA_HAS_MSA" + "@ + min_s.\t%w0,%w1,%w2 + mini_s.\t%w0,%w1,%B2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "umin3" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (umin:IMSA (match_operand:IMSA 1 "register_operand" "f,f") + (match_operand:IMSA 2 "reg_or_vector_same_uimm5_operand" "f,Uuv5")))] + "ISA_HAS_MSA" + "@ + min_u.\t%w0,%w1,%w2 + mini_u.\t%w0,%w1,%B2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_msub_q_" + [(set (match_operand:IMSA_WH 0 "register_operand" "=f") + (unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "0") + (match_operand:IMSA_WH 2 "register_operand" "f") + (match_operand:IMSA_WH 3 "register_operand" "f")] + UNSPEC_MSA_MSUB_Q))] + "ISA_HAS_MSA" + "msub_q.\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "")]) + +(define_insn "msa_msubr_q_" + [(set (match_operand:IMSA_WH 0 "register_operand" "=f") + (unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "0") + (match_operand:IMSA_WH 2 "register_operand" "f") + (match_operand:IMSA_WH 3 "register_operand" "f")] + UNSPEC_MSA_MSUBR_Q))] + "ISA_HAS_MSA" + "msubr_q.\t%w0,%w2,%w3" + [(set_attr "type" "simd_mul") + (set_attr "mode" "")]) + +(define_insn "msa_mul_q_" + [(set (match_operand:IMSA_WH 0 "register_operand" "=f") + (unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "f") + (match_operand:IMSA_WH 2 "register_operand" "f")] + UNSPEC_MSA_MUL_Q))] + "ISA_HAS_MSA" + "mul_q.\t%w0,%w1,%w2" + [(set_attr "type" "simd_mul") + (set_attr "mode" "")]) + +(define_insn "msa_mulr_q_" + [(set (match_operand:IMSA_WH 0 "register_operand" "=f") + (unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "f") + (match_operand:IMSA_WH 2 "register_operand" "f")] + UNSPEC_MSA_MULR_Q))] + "ISA_HAS_MSA" + "mulr_q.\t%w0,%w1,%w2" + [(set_attr "type" "simd_mul") + (set_attr "mode" "")]) + +(define_insn "msa_nloc_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")] + UNSPEC_MSA_NLOC))] + "ISA_HAS_MSA" + "nloc.\t%w0,%w1" + [(set_attr "type" "simd_bit") + (set_attr "mode" "")]) + +(define_insn "clz2" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (clz:IMSA (match_operand:IMSA 1 "register_operand" "f")))] + "ISA_HAS_MSA" + "nlzc.\t%w0,%w1" + [(set_attr "type" "simd_bit") + (set_attr "mode" "")]) + +(define_insn "msa_nor_" + [(set (match_operand:IMSA 0 "register_operand" "=f,f") + (and:IMSA (not:IMSA (match_operand:IMSA 1 "register_operand" "f,f")) + (not:IMSA (match_operand:IMSA 2 "reg_or_vector_same_val_operand" "f,Urv8"))))] + "ISA_HAS_MSA" + "@ + nor.v\t%w0,%w1,%w2 + nori.b\t%w0,%w1,%B2" + [(set_attr "type" "simd_logic") + (set_attr "mode" "")]) + +(define_insn "msa_pckev_b" +[(set (match_operand:V16QI 0 "register_operand" "=f") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "f") + (match_operand:V16QI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14) + (const_int 16) (const_int 18) + (const_int 20) (const_int 22) + (const_int 24) (const_int 26) + (const_int 28) (const_int 30)])))] + "ISA_HAS_MSA" + "pckev.b\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V16QI")]) + +(define_insn "msa_pckev_h" +[(set (match_operand:V8HI 0 "register_operand" "=f") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "f") + (match_operand:V8HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)])))] + "ISA_HAS_MSA" + "pckev.h\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V8HI")]) + +(define_insn "msa_pckev_w" +[(set (match_operand:V4SI 0 "register_operand" "=f") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "f") + (match_operand:V4SI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))] + "ISA_HAS_MSA" + "pckev.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_pckev_w_f" +[(set (match_operand:V4SF 0 "register_operand" "=f") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "f") + (match_operand:V4SF 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))] + "ISA_HAS_MSA" + "pckev.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SF")]) + +(define_insn "msa_pckod_b" +[(set (match_operand:V16QI 0 "register_operand" "=f") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "f") + (match_operand:V16QI 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15) + (const_int 17) (const_int 19) + (const_int 21) (const_int 23) + (const_int 25) (const_int 27) + (const_int 29) (const_int 31)])))] + "ISA_HAS_MSA" + "pckod.b\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V16QI")]) + +(define_insn "msa_pckod_h" +[(set (match_operand:V8HI 0 "register_operand" "=f") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "f") + (match_operand:V8HI 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)])))] + "ISA_HAS_MSA" + "pckod.h\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V8HI")]) + +(define_insn "msa_pckod_w" +[(set (match_operand:V4SI 0 "register_operand" "=f") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "f") + (match_operand:V4SI 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)])))] + "ISA_HAS_MSA" + "pckod.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SI")]) + +(define_insn "msa_pckod_w_f" +[(set (match_operand:V4SF 0 "register_operand" "=f") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "f") + (match_operand:V4SF 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)])))] + "ISA_HAS_MSA" + "pckod.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_permute") + (set_attr "mode" "V4SF")]) + +(define_insn "popcount2" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (popcount:IMSA (match_operand:IMSA 1 "register_operand" "f")))] + "ISA_HAS_MSA" + "pcnt.\t%w0,%w1" + [(set_attr "type" "simd_pcnt") + (set_attr "mode" "")]) + +(define_insn "msa_sat_s_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand 2 "const__operand" "")] + UNSPEC_MSA_SAT_S))] + "ISA_HAS_MSA" + "sat_s.\t%w0,%w1,%2" + [(set_attr "type" "simd_sat") + (set_attr "mode" "")]) + +(define_insn "msa_sat_u_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand 2 "const__operand" "")] + UNSPEC_MSA_SAT_U))] + "ISA_HAS_MSA" + "sat_u.\t%w0,%w1,%2" + [(set_attr "type" "simd_sat") + (set_attr "mode" "")]) + +(define_insn "msa_shf_" + [(set (match_operand:MSA_WHB_W 0 "register_operand" "=f") + (vec_select:MSA_WHB_W + (match_operand:MSA_WHB_W 1 "register_operand" "f") + (match_operand 2 "par_const_vector_shf_set_operand" "")))] + "ISA_HAS_MSA" +{ + HOST_WIDE_INT val = 0; + unsigned int i; + + /* We convert the selection to an immediate. */ + for (i = 0; i < 4; i++) + val |= INTVAL (XVECEXP (operands[2], 0, i)) << (2 * i); + + operands[2] = GEN_INT (val); + return "shf.\t%w0,%w1,%X2"; +} + [(set_attr "type" "simd_shf") + (set_attr "mode" "")]) + +(define_insn "msa_srar_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_SRAR))] + "ISA_HAS_MSA" + "srar.\t%w0,%w1,%w2" + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +(define_insn "msa_srari_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand 2 "const__operand" "")] + UNSPEC_MSA_SRARI))] + "ISA_HAS_MSA" + "srari.\t%w0,%w1,%2" + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +(define_insn "msa_srlr_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_SRLR))] + "ISA_HAS_MSA" + "srlr.\t%w0,%w1,%w2" + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +(define_insn "msa_srlri_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand 2 "const__operand" "")] + UNSPEC_MSA_SRLRI))] + "ISA_HAS_MSA" + "srlri.\t%w0,%w1,%2" + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +(define_insn "msa_subs_s_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_SUBS_S))] + "ISA_HAS_MSA" + "subs_s.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_subs_u_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_SUBS_U))] + "ISA_HAS_MSA" + "subs_u.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_subsuu_s_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_SUBSUU_S))] + "ISA_HAS_MSA" + "subsuu_s.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_subsus_u_" + [(set (match_operand:IMSA 0 "register_operand" "=f") + (unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f") + (match_operand:IMSA 2 "register_operand" "f")] + UNSPEC_MSA_SUBSUS_U))] + "ISA_HAS_MSA" + "subsus_u.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +(define_insn "msa_sld_" + [(set (match_operand:MSA 0 "register_operand" "=f") + (unspec:MSA [(match_operand:MSA 1 "register_operand" "0") + (match_operand:MSA 2 "register_operand" "f") + (match_operand:SI 3 "reg_or_0_operand" "dJ")] + UNSPEC_MSA_SLD))] + "ISA_HAS_MSA" + "sld.\t%w0,%w2[%z3]" + [(set_attr "type" "simd_sld") + (set_attr "mode" "")]) + +(define_insn "msa_sldi_" + [(set (match_operand:MSA 0 "register_operand" "=f") + (unspec:MSA [(match_operand:MSA 1 "register_operand" "0") + (match_operand:MSA 2 "register_operand" "f") + (match_operand 3 "const__operand" "")] + UNSPEC_MSA_SLDI))] + "ISA_HAS_MSA" + "sldi.\t%w0,%w2[%3]" + [(set_attr "type" "simd_sld") + (set_attr "mode" "")]) + +(define_insn "msa_splat_" + [(set (match_operand:MSA 0 "register_operand" "=f") + (unspec:MSA [(match_operand:MSA 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "d")] + UNSPEC_MSA_SPLAT))] + "ISA_HAS_MSA" + "splat.\t%w0,%w1[%z2]" + [(set_attr "type" "simd_splat") + (set_attr "mode" "")]) + +(define_insn "msa_splati_" + [(set (match_operand:MSA 0 "register_operand" "=f") + (vec_duplicate:MSA + (vec_select: + (match_operand:MSA 1 "register_operand" "f") + (parallel [(match_operand 2 "const__operand" "")]))))] + "ISA_HAS_MSA" + "splati.\t%w0,%w1[%2]" + [(set_attr "type" "simd_splat") + (set_attr "mode" "")]) + +(define_insn "msa_splati__scalar" + [(set (match_operand:FMSA 0 "register_operand" "=f") + (unspec:FMSA [(match_operand: 1 "register_operand" "f")] + UNSPEC_MSA_SPLATI))] + "ISA_HAS_MSA" + "splati.\t%w0,%w1[0]" + [(set_attr "type" "simd_splat") + (set_attr "mode" "")]) + +(define_insn "msa_cfcmsa" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec_volatile:SI [(match_operand 1 "const_uimm5_operand" "")] + UNSPEC_MSA_CFCMSA))] + "ISA_HAS_MSA" + "cfcmsa\t%0,$%1" + [(set_attr "type" "simd_cmsa") + (set_attr "mode" "SI")]) + +(define_insn "msa_ctcmsa" + [(unspec_volatile [(match_operand 0 "const_uimm5_operand" "") + (match_operand:SI 1 "register_operand" "d")] + UNSPEC_MSA_CTCMSA)] + "ISA_HAS_MSA" + "ctcmsa\t$%0,%1" + [(set_attr "type" "simd_cmsa") + (set_attr "mode" "SI")]) + +(define_insn "msa_fexdo_h" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "f") + (match_operand:V4SF 2 "register_operand" "f")] + UNSPEC_MSA_FEXDO))] + "ISA_HAS_MSA" + "fexdo.h\t%w0,%w1,%w2" + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V8HI")]) + +(define_insn "vec_pack_trunc_v2df" + [(set (match_operand:V4SF 0 "register_operand" "=f") + (vec_concat:V4SF + (float_truncate:V2SF (match_operand:V2DF 1 "register_operand" "f")) + (float_truncate:V2SF (match_operand:V2DF 2 "register_operand" "f"))))] + "ISA_HAS_MSA" + "fexdo.w\t%w0,%w2,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V4SF")]) + +(define_insn "msa_fexupl_w" + [(set (match_operand:V4SF 0 "register_operand" "=f") + (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "f")] + UNSPEC_MSA_FEXUPL))] + "ISA_HAS_MSA" + "fexupl.w\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V4SF")]) + +(define_insn "msa_fexupl_d" + [(set (match_operand:V2DF 0 "register_operand" "=f") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "f") + (parallel [(const_int 2) (const_int 3)]))))] + "ISA_HAS_MSA" + "fexupl.d\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V2DF")]) + +(define_insn "msa_fexupr_w" + [(set (match_operand:V4SF 0 "register_operand" "=f") + (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "f")] + UNSPEC_MSA_FEXUPR))] + "ISA_HAS_MSA" + "fexupr.w\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V4SF")]) + +(define_insn "msa_fexupr_d" + [(set (match_operand:V2DF 0 "register_operand" "=f") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "f") + (parallel [(const_int 0) (const_int 1)]))))] + "ISA_HAS_MSA" + "fexupr.d\t%w0,%w1" + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V2DF")]) + +(define_code_attr msabr + [(eq "bz") + (ne "bnz")]) + +(define_code_attr msabr_neg + [(eq "bnz") + (ne "bz")]) + +(define_insn "msa__" + [(set (pc) (if_then_else + (equality_op + (unspec:SI [(match_operand:MSA 1 "register_operand" "f")] + UNSPEC_MSA_BRANCH) + (match_operand:SI 2 "const_0_operand")) + (label_ref (match_operand 0)) + (pc)))] + "ISA_HAS_MSA" +{ + return mips_output_conditional_branch (insn, operands, + MIPS_BRANCH (".", + "%w1,%0"), + MIPS_BRANCH (".", + "%w1,%0")); +} + [(set_attr "type" "simd_branch") + (set_attr "mode" "") + (set_attr "compact_form" "never")]) + +(define_insn "msa__v_" + [(set (pc) (if_then_else + (equality_op + (unspec:SI [(match_operand:MSA 1 "register_operand" "f")] + UNSPEC_MSA_BRANCH_V) + (match_operand:SI 2 "const_0_operand")) + (label_ref (match_operand 0)) + (pc)))] + "ISA_HAS_MSA" +{ + return mips_output_conditional_branch (insn, operands, + MIPS_BRANCH (".v", "%w1,%0"), + MIPS_BRANCH (".v", + "%w1,%0")); +} + [(set_attr "type" "simd_branch") + (set_attr "mode" "TI") + (set_attr "compact_form" "never")]) diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index 01aad8295b3..09cf6626e27 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -197,8 +197,9 @@ extern bool mips_stack_address_p (rtx, machine_mode); extern int mips_address_insns (rtx, machine_mode, bool); extern int mips_const_insns (rtx); extern int mips_split_const_insns (rtx); +extern int mips_split_128bit_const_insns (rtx); extern int mips_load_store_insns (rtx, rtx_insn *); -extern int mips_idiv_insns (void); +extern int mips_idiv_insns (machine_mode); extern rtx_insn *mips_emit_move (rtx, rtx); #ifdef RTX_CODE extern void mips_emit_binary (enum rtx_code, rtx, rtx, rtx); @@ -216,6 +217,11 @@ extern bool mips_split_move_p (rtx, rtx, enum mips_split_type); extern void mips_split_move (rtx, rtx, enum mips_split_type); extern bool mips_split_move_insn_p (rtx, rtx, rtx); extern void mips_split_move_insn (rtx, rtx, rtx); +extern void mips_split_128bit_move (rtx, rtx); +extern bool mips_split_128bit_move_p (rtx, rtx); +extern void mips_split_msa_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx)); +extern void mips_split_msa_insert_d (rtx, rtx, rtx, rtx); +extern void mips_split_msa_fill_d (rtx, rtx); extern const char *mips_output_move (rtx, rtx); extern bool mips_cfun_has_cprestore_slot_p (void); extern bool mips_cprestore_address_p (rtx, bool); @@ -278,6 +284,15 @@ extern void mips_expand_before_return (void); extern void mips_expand_epilogue (bool); extern bool mips_can_use_return_insn (void); +extern bool mips_const_vector_same_val_p (rtx, machine_mode); +extern bool mips_const_vector_same_bytes_p (rtx, machine_mode); +extern bool mips_const_vector_same_int_p (rtx, machine_mode, HOST_WIDE_INT, + HOST_WIDE_INT); +extern bool mips_const_vector_shuffle_set_p (rtx, machine_mode); +extern bool mips_const_vector_bitimm_set_p (rtx, machine_mode); +extern bool mips_const_vector_bitimm_clr_p (rtx, machine_mode); +extern rtx mips_msa_vec_parallel_const_half (machine_mode, bool); +extern rtx mips_gen_const_int_vector (machine_mode, int); extern bool mips_secondary_memory_needed (enum reg_class, enum reg_class, machine_mode); extern bool mips_cannot_change_mode_class (machine_mode, @@ -305,6 +320,7 @@ extern const char *mips_output_sync (void); extern const char *mips_output_sync_loop (rtx_insn *, rtx *); extern unsigned int mips_sync_loop_insns (rtx_insn *, rtx *); extern const char *mips_output_division (const char *, rtx *); +extern const char *mips_msa_output_division (const char *, rtx *); extern const char *mips_output_probe_stack_range (rtx, rtx); extern bool mips_hard_regno_rename_ok (unsigned int, unsigned int); extern unsigned int mips_hard_regno_nregs (int, machine_mode); @@ -343,6 +359,7 @@ extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx)); extern void mips_expand_vec_minmax (rtx, rtx, rtx, rtx (*) (rtx, rtx, rtx), bool); +extern int mips_ldst_scaled_shift (machine_mode); extern bool mips_signed_immediate_p (unsigned HOST_WIDE_INT, int, int); extern bool mips_unsigned_immediate_p (unsigned HOST_WIDE_INT, int, int); extern const char *umips_output_save_restore (bool, rtx); @@ -372,5 +389,6 @@ extern mulsidi3_gen_fn mips_mulsidi3_gen_fn (enum rtx_code); #endif extern void mips_register_frame_header_opt (void); +extern void mips_expand_vec_cond_expr (machine_mode, machine_mode, rtx *); #endif /* ! GCC_MIPS_PROTOS_H */ diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 399f231791d..06acd30ec25 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -244,6 +244,10 @@ enum mips_builtin_type { /* As above, but the instruction only sets a single $fcc register. */ MIPS_BUILTIN_CMP_SINGLE, + /* The function corresponds to an MSA conditional branch instruction + combined with a compare instruction. */ + MIPS_BUILTIN_MSA_TEST_BRANCH, + /* For generating bposge32 branch instructions in MIPS32 DSP ASE. */ MIPS_BUILTIN_BPOSGE32 }; @@ -1126,6 +1130,7 @@ static int mips_register_move_cost (machine_mode, reg_class_t, reg_class_t); static unsigned int mips_function_arg_boundary (machine_mode, const_tree); static machine_mode mips_get_reg_raw_mode (int regno); +static rtx mips_gen_const_int_vector_shuffle (machine_mode, int); /* This hash table keeps track of implicit "mips16" and "nomips16" attributes for -mflip_mips16. It maps decl names onto a boolean mode setting. */ @@ -1835,6 +1840,140 @@ mips_symbol_binds_local_p (const_rtx x) : SYMBOL_REF_LOCAL_P (x)); } +/* Return true if OP is a constant vector with the number of units in MODE, + and each unit has the same bit set. */ + +bool +mips_const_vector_bitimm_set_p (rtx op, machine_mode mode) +{ + if (GET_CODE (op) == CONST_VECTOR && op != CONST0_RTX (mode)) + { + unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0)); + int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode))); + + if (vlog2 != -1) + { + gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); + gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1); + return mips_const_vector_same_val_p (op, mode); + } + } + + return false; +} + +/* Return true if OP is a constant vector with the number of units in MODE, + and each unit has the same bit clear. */ + +bool +mips_const_vector_bitimm_clr_p (rtx op, machine_mode mode) +{ + if (GET_CODE (op) == CONST_VECTOR && op != CONSTM1_RTX (mode)) + { + unsigned HOST_WIDE_INT val = ~UINTVAL (CONST_VECTOR_ELT (op, 0)); + int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode))); + + if (vlog2 != -1) + { + gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); + gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1); + return mips_const_vector_same_val_p (op, mode); + } + } + + return false; +} + +/* Return true if OP is a constant vector with the number of units in MODE, + and each unit has the same value. */ + +bool +mips_const_vector_same_val_p (rtx op, machine_mode mode) +{ + int i, nunits = GET_MODE_NUNITS (mode); + rtx first; + + if (GET_CODE (op) != CONST_VECTOR || GET_MODE (op) != mode) + return false; + + first = CONST_VECTOR_ELT (op, 0); + for (i = 1; i < nunits; i++) + if (!rtx_equal_p (first, CONST_VECTOR_ELT (op, i))) + return false; + + return true; +} + +/* Return true if OP is a constant vector with the number of units in MODE, + and each unit has the same value as well as replicated bytes in the value. +*/ + +bool +mips_const_vector_same_bytes_p (rtx op, machine_mode mode) +{ + int i, bytes; + HOST_WIDE_INT val, first_byte; + rtx first; + + if (!mips_const_vector_same_val_p (op, mode)) + return false; + + first = CONST_VECTOR_ELT (op, 0); + bytes = GET_MODE_UNIT_SIZE (mode); + val = INTVAL (first); + first_byte = val & 0xff; + for (i = 1; i < bytes; i++) + { + val >>= 8; + if ((val & 0xff) != first_byte) + return false; + } + + return true; +} + +/* Return true if OP is a constant vector with the number of units in MODE, + and each unit has the same integer value in the range [LOW, HIGH]. */ + +bool +mips_const_vector_same_int_p (rtx op, machine_mode mode, HOST_WIDE_INT low, + HOST_WIDE_INT high) +{ + HOST_WIDE_INT value; + rtx elem0; + + if (!mips_const_vector_same_val_p (op, mode)) + return false; + + elem0 = CONST_VECTOR_ELT (op, 0); + if (!CONST_INT_P (elem0)) + return false; + + value = INTVAL (elem0); + return (value >= low && value <= high); +} + +/* Return true if OP is a constant vector with repeated 4-element sets + in mode MODE. */ + +bool +mips_const_vector_shuffle_set_p (rtx op, machine_mode mode) +{ + int nunits = GET_MODE_NUNITS (mode); + int nsets = nunits / 4; + int set = 0; + int i, j; + + /* Check if we have the same 4-element sets. */ + for (j = 0; j < nsets; j++, set = 4 * j) + for (i = 0; i < 4; i++) + if ((INTVAL (XVECEXP (op, 0, i)) + != (INTVAL (XVECEXP (op, 0, set + i)) - set)) + || !IN_RANGE (INTVAL (XVECEXP (op, 0, set + i)), 0, set + 3)) + return false; + return true; +} + /* Return true if rtx constants of mode MODE should be put into a small data section. */ @@ -2206,6 +2345,11 @@ mips_symbol_insns_1 (enum mips_symbol_type type, machine_mode mode) static int mips_symbol_insns (enum mips_symbol_type type, machine_mode mode) { + /* MSA LD.* and ST.* cannot support loading symbols via an immediate + operand. */ + if (MSA_SUPPORTED_MODE_P (mode)) + return 0; + return mips_symbol_insns_1 (type, mode) * (TARGET_MIPS16 ? 2 : 1); } @@ -2325,6 +2469,12 @@ mips_valid_offset_p (rtx x, machine_mode mode) && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) return false; + /* MSA LD.* and ST.* supports 10-bit signed offsets. */ + if (MSA_SUPPORTED_MODE_P (mode) + && !mips_signed_immediate_p (INTVAL (x), 10, + mips_ldst_scaled_shift (mode))) + return false; + return true; } @@ -2351,6 +2501,10 @@ mips_valid_lo_sum_p (enum mips_symbol_type symbol_type, machine_mode mode) && GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode)) return false; + /* MSA LD.* and ST.* cannot support loading symbols via %lo($base). */ + if (MSA_SUPPORTED_MODE_P (mode)) + return false; + return true; } @@ -2480,6 +2634,8 @@ mips_lx_address_p (rtx addr, machine_mode mode) return true; if (ISA_HAS_LDX && mode == DImode) return true; + if (MSA_SUPPORTED_MODE_P (mode)) + return true; return false; } @@ -2517,6 +2673,7 @@ mips_address_insns (rtx x, machine_mode mode, bool might_split_p) { struct mips_address_info addr; int factor; + bool msa_p = (!might_split_p && MSA_SUPPORTED_MODE_P (mode)); /* BLKmode is used for single unaligned loads and stores and should not count as a multiword mode. (GET_MODE_SIZE (BLKmode) is pretty @@ -2531,6 +2688,15 @@ mips_address_insns (rtx x, machine_mode mode, bool might_split_p) switch (addr.type) { case ADDRESS_REG: + if (msa_p) + { + /* MSA LD.* and ST.* supports 10-bit signed offsets. */ + if (mips_signed_immediate_p (INTVAL (addr.offset), 10, + mips_ldst_scaled_shift (mode))) + return 1; + else + return 0; + } if (TARGET_MIPS16 && !mips16_unextended_reference_p (mode, addr.reg, UINTVAL (addr.offset))) @@ -2538,13 +2704,13 @@ mips_address_insns (rtx x, machine_mode mode, bool might_split_p) return factor; case ADDRESS_LO_SUM: - return TARGET_MIPS16 ? factor * 2 : factor; + return msa_p ? 0 : TARGET_MIPS16 ? factor * 2 : factor; case ADDRESS_CONST_INT: - return factor; + return msa_p ? 0 : factor; case ADDRESS_SYMBOLIC: - return factor * mips_symbol_insns (addr.symbol_type, mode); + return msa_p ? 0 : factor * mips_symbol_insns (addr.symbol_type, mode); } return 0; } @@ -2568,6 +2734,19 @@ mips_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0) return mips_unsigned_immediate_p (x, bits, shift); } +/* Return the scale shift that applied to MSA LD/ST address offset. */ + +int +mips_ldst_scaled_shift (machine_mode mode) +{ + int shift = exact_log2 (GET_MODE_UNIT_SIZE (mode)); + + if (shift < 0 || shift > 8) + gcc_unreachable (); + + return shift; +} + /* Return true if X is legitimate for accessing values of mode MODE, if it is based on a MIPS16 register, and if the offset satisfies OFFSET_PREDICATE. */ @@ -2663,8 +2842,12 @@ mips_const_insns (rtx x) return mips_build_integer (codes, INTVAL (x)); - case CONST_DOUBLE: case CONST_VECTOR: + if (ISA_HAS_MSA + && mips_const_vector_same_int_p (x, GET_MODE (x), -512, 511)) + return 1; + /* Fall through. */ + case CONST_DOUBLE: /* Allow zeros for normal mode, where we can use $0. */ return !TARGET_MIPS16 && x == CONST0_RTX (GET_MODE (x)) ? 1 : 0; @@ -2724,6 +2907,26 @@ mips_split_const_insns (rtx x) return low + high; } +/* Return one word of 128-bit value OP, taking into account the fixed + endianness of certain registers. BYTE selects from the byte address. */ + +rtx +mips_subword_at_byte (rtx op, unsigned int byte) +{ + machine_mode mode; + + mode = GET_MODE (op); + if (mode == VOIDmode) + mode = TImode; + + gcc_assert (!FP_REG_RTX_P (op)); + + if (MEM_P (op)) + return mips_rewrite_small_data (adjust_address (op, word_mode, byte)); + + return simplify_gen_subreg (word_mode, op, mode, byte); +} + /* Return the number of instructions needed to implement INSN, given that it loads from or stores to MEM. Assume that BASE_INSN_LENGTH is the length of one instruction. */ @@ -2754,14 +2957,14 @@ mips_load_store_insns (rtx mem, rtx_insn *insn) assuming that BASE_INSN_LENGTH is the length of one instruction. */ int -mips_idiv_insns (void) +mips_idiv_insns (machine_mode mode) { int count; count = 1; if (TARGET_CHECK_ZERO_DIV) { - if (GENERATE_DIVIDE_TRAPS) + if (GENERATE_DIVIDE_TRAPS && !MSA_SUPPORTED_MODE_P (mode)) count++; else count += 2; @@ -2771,6 +2974,7 @@ mips_idiv_insns (void) count++; return count; } + /* Emit a move from SRC to DEST. Assume that the move expanders can handle all moves if !can_create_pseudo_p (). The distinction is @@ -3478,7 +3682,14 @@ mips_legitimize_const_move (machine_mode mode, rtx dest, rtx src) bool mips_legitimize_move (machine_mode mode, rtx dest, rtx src) { - if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) + /* Both src and dest are non-registers; one special case is supported where + the source is (const_int 0) and the store can source the zero register. + MIPS16 and MSA are never able to source the zero register directly in + memory operations. */ + if (!register_operand (dest, mode) + && !register_operand (src, mode) + && (TARGET_MIPS16 || !const_0_operand (src, mode) + || MSA_SUPPORTED_MODE_P (mode))) { mips_emit_move (dest, force_reg (mode, src)); return true; @@ -4044,6 +4255,10 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code, case NE: case UNORDERED: case LTGT: + case UNGE: + case UNGT: + case UNLE: + case UNLT: /* Branch comparisons have VOIDmode, so use the first operand's mode instead. */ mode = GET_MODE (XEXP (x, 0)); @@ -4208,7 +4423,7 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code, *total += set_src_cost (XEXP (x, 0), mode, speed); return true; } - *total = COSTS_N_INSNS (mips_idiv_insns ()); + *total = COSTS_N_INSNS (mips_idiv_insns (mode)); } else if (mode == DImode) *total = mips_cost->int_div_di; @@ -4514,6 +4729,10 @@ mips_split_move_p (rtx dest, rtx src, enum mips_split_type split_type) return false; } + /* Check if MSA moves need splitting. */ + if (MSA_SUPPORTED_MODE_P (GET_MODE (dest))) + return mips_split_128bit_move_p (dest, src); + /* Otherwise split all multiword moves. */ return size > UNITS_PER_WORD; } @@ -4527,7 +4746,9 @@ mips_split_move (rtx dest, rtx src, enum mips_split_type split_type) rtx low_dest; gcc_checking_assert (mips_split_move_p (dest, src, split_type)); - if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src)) + if (MSA_SUPPORTED_MODE_P (GET_MODE (dest))) + mips_split_128bit_move (dest, src); + else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src)) { if (!TARGET_64BIT && GET_MODE (dest) == DImode) emit_insn (gen_move_doubleword_fprdi (dest, src)); @@ -4600,6 +4821,199 @@ mips_insn_split_type (rtx insn) return SPLIT_IF_NECESSARY; } +/* Return true if a 128-bit move from SRC to DEST should be split. */ + +bool +mips_split_128bit_move_p (rtx dest, rtx src) +{ + /* MSA-to-MSA moves can be done in a single instruction. */ + if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) + return false; + + /* Check for MSA loads and stores. */ + if (FP_REG_RTX_P (dest) && MEM_P (src)) + return false; + if (FP_REG_RTX_P (src) && MEM_P (dest)) + return false; + + /* Check for MSA set to an immediate const vector with valid replicated + element. */ + if (FP_REG_RTX_P (dest) + && mips_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) + return false; + + /* Check for MSA load zero immediate. */ + if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))) + return false; + + return true; +} + +/* Split a 128-bit move from SRC to DEST. */ + +void +mips_split_128bit_move (rtx dest, rtx src) +{ + int byte, index; + rtx low_dest, low_src, d, s; + + if (FP_REG_RTX_P (dest)) + { + gcc_assert (!MEM_P (src)); + + rtx new_dest = dest; + if (!TARGET_64BIT) + { + if (GET_MODE (dest) != V4SImode) + new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); + } + else + { + if (GET_MODE (dest) != V2DImode) + new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0); + } + + for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode); + byte += UNITS_PER_WORD, index++) + { + s = mips_subword_at_byte (src, byte); + if (!TARGET_64BIT) + emit_insn (gen_msa_insert_w (new_dest, s, new_dest, + GEN_INT (1 << index))); + else + emit_insn (gen_msa_insert_d (new_dest, s, new_dest, + GEN_INT (1 << index))); + } + } + else if (FP_REG_RTX_P (src)) + { + gcc_assert (!MEM_P (dest)); + + rtx new_src = src; + if (!TARGET_64BIT) + { + if (GET_MODE (src) != V4SImode) + new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0); + } + else + { + if (GET_MODE (src) != V2DImode) + new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0); + } + + for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode); + byte += UNITS_PER_WORD, index++) + { + d = mips_subword_at_byte (dest, byte); + if (!TARGET_64BIT) + emit_insn (gen_msa_copy_s_w (d, new_src, GEN_INT (index))); + else + emit_insn (gen_msa_copy_s_d (d, new_src, GEN_INT (index))); + } + } + else + { + low_dest = mips_subword_at_byte (dest, 0); + low_src = mips_subword_at_byte (src, 0); + gcc_assert (REG_P (low_dest) && REG_P (low_src)); + /* Make sure the source register is not written before reading. */ + if (REGNO (low_dest) <= REGNO (low_src)) + { + for (byte = 0; byte < GET_MODE_SIZE (TImode); + byte += UNITS_PER_WORD) + { + d = mips_subword_at_byte (dest, byte); + s = mips_subword_at_byte (src, byte); + mips_emit_move (d, s); + } + } + else + { + for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0; + byte -= UNITS_PER_WORD) + { + d = mips_subword_at_byte (dest, byte); + s = mips_subword_at_byte (src, byte); + mips_emit_move (d, s); + } + } + } +} + +/* Split a COPY_S.D with operands DEST, SRC and INDEX. GEN is a function + used to generate subregs. */ + +void +mips_split_msa_copy_d (rtx dest, rtx src, rtx index, + rtx (*gen_fn)(rtx, rtx, rtx)) +{ + gcc_assert ((GET_MODE (src) == V2DImode && GET_MODE (dest) == DImode) + || (GET_MODE (src) == V2DFmode && GET_MODE (dest) == DFmode)); + + /* Note that low is always from the lower index, and high is always + from the higher index. */ + rtx low = mips_subword (dest, false); + rtx high = mips_subword (dest, true); + rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0); + + emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2))); + emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index) * 2 + 1))); +} + +/* Split a INSERT.D with operand DEST, SRC1.INDEX and SRC2. */ + +void +mips_split_msa_insert_d (rtx dest, rtx src1, rtx index, rtx src2) +{ + int i; + gcc_assert (GET_MODE (dest) == GET_MODE (src1)); + gcc_assert ((GET_MODE (dest) == V2DImode + && (GET_MODE (src2) == DImode || src2 == const0_rtx)) + || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode)); + + /* Note that low is always from the lower index, and high is always + from the higher index. */ + rtx low = mips_subword (src2, false); + rtx high = mips_subword (src2, true); + rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); + rtx new_src1 = simplify_gen_subreg (V4SImode, src1, GET_MODE (src1), 0); + i = exact_log2 (INTVAL (index)); + gcc_assert (i != -1); + + emit_insn (gen_msa_insert_w (new_dest, low, new_src1, + GEN_INT (1 << (i * 2)))); + emit_insn (gen_msa_insert_w (new_dest, high, new_dest, + GEN_INT (1 << (i * 2 + 1)))); +} + +/* Split FILL.D. */ + +void +mips_split_msa_fill_d (rtx dest, rtx src) +{ + gcc_assert ((GET_MODE (dest) == V2DImode + && (GET_MODE (src) == DImode || src == const0_rtx)) + || (GET_MODE (dest) == V2DFmode && GET_MODE (src) == DFmode)); + + /* Note that low is always from the lower index, and high is always + from the higher index. */ + rtx low, high; + if (src == const0_rtx) + { + low = src; + high = src; + } + else + { + low = mips_subword (src, false); + high = mips_subword (src, true); + } + rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); + emit_insn (gen_msa_fill_w (new_dest, low)); + emit_insn (gen_msa_insert_w (new_dest, high, new_dest, GEN_INT (1 << 1))); + emit_insn (gen_msa_insert_w (new_dest, high, new_dest, GEN_INT (1 << 3))); +} + /* Return true if a move from SRC to DEST in INSN should be split. */ bool @@ -4623,19 +5037,25 @@ mips_split_move_insn (rtx dest, rtx src, rtx insn) const char * mips_output_move (rtx dest, rtx src) { - enum rtx_code dest_code, src_code; - machine_mode mode; + enum rtx_code dest_code = GET_CODE (dest); + enum rtx_code src_code = GET_CODE (src); + machine_mode mode = GET_MODE (dest); + bool dbl_p = (GET_MODE_SIZE (mode) == 8); + bool msa_p = MSA_SUPPORTED_MODE_P (mode); enum mips_symbol_type symbol_type; - bool dbl_p; - - dest_code = GET_CODE (dest); - src_code = GET_CODE (src); - mode = GET_MODE (dest); - dbl_p = (GET_MODE_SIZE (mode) == 8); if (mips_split_move_p (dest, src, SPLIT_IF_NECESSARY)) return "#"; + if (msa_p + && dest_code == REG && FP_REG_P (REGNO (dest)) + && src_code == CONST_VECTOR + && CONST_INT_P (CONST_VECTOR_ELT (src, 0))) + { + gcc_assert (mips_const_vector_same_int_p (src, mode, -512, 511)); + return "ldi.%v0\t%w0,%E1"; + } + if ((src_code == REG && GP_REG_P (REGNO (src))) || (!TARGET_MIPS16 && src == CONST0_RTX (mode))) { @@ -4666,7 +5086,15 @@ mips_output_move (rtx dest, rtx src) } if (FP_REG_P (REGNO (dest))) - return dbl_p ? "dmtc1\t%z1,%0" : "mtc1\t%z1,%0"; + { + if (msa_p) + { + gcc_assert (src == CONST0_RTX (GET_MODE (src))); + return "ldi.%v0\t%w0,0"; + } + + return dbl_p ? "dmtc1\t%z1,%0" : "mtc1\t%z1,%0"; + } if (ALL_COP_REG_P (REGNO (dest))) { @@ -4683,6 +5111,7 @@ mips_output_move (rtx dest, rtx src) case 2: return "sh\t%z1,%0"; case 4: return "sw\t%z1,%0"; case 8: return "sd\t%z1,%0"; + default: gcc_unreachable (); } } if (dest_code == REG && GP_REG_P (REGNO (dest))) @@ -4711,7 +5140,10 @@ mips_output_move (rtx dest, rtx src) } if (FP_REG_P (REGNO (src))) - return dbl_p ? "dmfc1\t%0,%1" : "mfc1\t%0,%1"; + { + gcc_assert (!msa_p); + return dbl_p ? "dmfc1\t%0,%1" : "mfc1\t%0,%1"; + } if (ALL_COP_REG_P (REGNO (src))) { @@ -4729,6 +5161,7 @@ mips_output_move (rtx dest, rtx src) case 2: return "lhu\t%0,%1"; case 4: return "lw\t%0,%1"; case 8: return "ld\t%0,%1"; + default: gcc_unreachable (); } if (src_code == CONST_INT) @@ -4775,17 +5208,29 @@ mips_output_move (rtx dest, rtx src) { if (GET_MODE (dest) == V2SFmode) return "mov.ps\t%0,%1"; + else if (msa_p) + return "move.v\t%w0,%w1"; else return dbl_p ? "mov.d\t%0,%1" : "mov.s\t%0,%1"; } if (dest_code == MEM) - return dbl_p ? "sdc1\t%1,%0" : "swc1\t%1,%0"; + { + if (msa_p) + return "st.%v1\t%w1,%0"; + + return dbl_p ? "sdc1\t%1,%0" : "swc1\t%1,%0"; + } } if (dest_code == REG && FP_REG_P (REGNO (dest))) { if (src_code == MEM) - return dbl_p ? "ldc1\t%0,%1" : "lwc1\t%0,%1"; + { + if (msa_p) + return "ld.%v0\t%w0,%1"; + + return dbl_p ? "ldc1\t%0,%1" : "lwc1\t%0,%1"; + } } if (dest_code == REG && ALL_COP_REG_P (REGNO (dest)) && src_code == MEM) { @@ -8455,10 +8900,14 @@ mips_print_operand_punct_valid_p (unsigned char code) /* Implement TARGET_PRINT_OPERAND. The MIPS-specific operand codes are: + 'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal. 'X' Print CONST_INT OP in hexadecimal format. 'x' Print the low 16 bits of CONST_INT OP in hexadecimal format. 'd' Print CONST_INT OP in decimal. + 'B' Print CONST_INT OP element 0 of a replicated CONST_VECTOR + as an unsigned byte [0..255]. 'm' Print one less than CONST_INT OP in decimal. + 'y' Print exact log2 of CONST_INT OP in decimal. 'h' Print the high-part relocation associated with OP, after stripping any outermost HIGH. 'R' Print the low-part relocation associated with OP. @@ -8466,6 +8915,7 @@ mips_print_operand_punct_valid_p (unsigned char code) 'N' Print the inverse of the integer branch condition for comparison OP. 'F' Print the FPU branch condition for comparison OP. 'W' Print the inverse of the FPU branch condition for comparison OP. + 'w' Print a MSA register. 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), 'z' for (eq:?I ...), 'n' for (ne:?I ...). 't' Like 'T', but with the EQ/NE cases reversed @@ -8476,7 +8926,11 @@ mips_print_operand_punct_valid_p (unsigned char code) 'L' Print the low-order register in a double-word register operand. 'M' Print high-order register in a double-word register operand. 'z' Print $0 if OP is zero, otherwise print OP normally. - 'b' Print the address of a memory operand, without offset. */ + 'b' Print the address of a memory operand, without offset. + 'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI, + V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively. + 'V' Print exact log2 of CONST_INT OP element 0 of a replicated + CONST_VECTOR in decimal. */ static void mips_print_operand (FILE *file, rtx op, int letter) @@ -8494,6 +8948,18 @@ mips_print_operand (FILE *file, rtx op, int letter) switch (letter) { + case 'E': + if (GET_CODE (op) == CONST_VECTOR) + { + gcc_assert (mips_const_vector_same_val_p (op, GET_MODE (op))); + op = CONST_VECTOR_ELT (op, 0); + gcc_assert (CONST_INT_P (op)); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op)); + } + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + case 'X': if (CONST_INT_P (op)) fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op)); @@ -8515,6 +8981,19 @@ mips_print_operand (FILE *file, rtx op, int letter) output_operand_lossage ("invalid use of '%%%c'", letter); break; + case 'B': + if (GET_CODE (op) == CONST_VECTOR) + { + gcc_assert (mips_const_vector_same_val_p (op, GET_MODE (op))); + op = CONST_VECTOR_ELT (op, 0); + gcc_assert (CONST_INT_P (op)); + unsigned HOST_WIDE_INT val8 = UINTVAL (op) & GET_MODE_MASK (QImode); + fprintf (file, HOST_WIDE_INT_PRINT_UNSIGNED, val8); + } + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + case 'm': if (CONST_INT_P (op)) fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) - 1); @@ -8522,6 +9001,34 @@ mips_print_operand (FILE *file, rtx op, int letter) output_operand_lossage ("invalid use of '%%%c'", letter); break; + case 'y': + if (CONST_INT_P (op)) + { + int val = exact_log2 (INTVAL (op)); + if (val != -1) + fprintf (file, "%d", val); + else + output_operand_lossage ("invalid use of '%%%c'", letter); + } + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + + case 'V': + if (GET_CODE (op) == CONST_VECTOR) + { + machine_mode mode = GET_MODE_INNER (GET_MODE (op)); + unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0)); + int vlog2 = exact_log2 (val & GET_MODE_MASK (mode)); + if (vlog2 != -1) + fprintf (file, "%d", vlog2); + else + output_operand_lossage ("invalid use of '%%%c'", letter); + } + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + case 'h': if (code == HIGH) op = XEXP (op, 0); @@ -8582,6 +9089,35 @@ mips_print_operand (FILE *file, rtx op, int letter) output_operand_lossage ("invalid use of '%%%c'", letter); break; + case 'w': + if (code == REG && MSA_REG_P (REGNO (op))) + fprintf (file, "$w%s", ®_names[REGNO (op)][2]); + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + + case 'v': + switch (GET_MODE (op)) + { + case V16QImode: + fprintf (file, "b"); + break; + case V8HImode: + fprintf (file, "h"); + break; + case V4SImode: + case V4SFmode: + fprintf (file, "w"); + break; + case V2DImode: + case V2DFmode: + fprintf (file, "d"); + break; + default: + output_operand_lossage ("invalid use of '%%%c'", letter); + } + break; + default: switch (code) { @@ -9316,6 +9852,10 @@ mips_file_start (void) attr = 1; fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", attr); + + /* 128-bit MSA. */ + if (ISA_HAS_MSA) + fprintf (asm_out_file, "\t.gnu_attribute 8, 1\n"); } #endif #endif @@ -12159,9 +12699,13 @@ mips_hard_regno_mode_ok_p (unsigned int regno, machine_mode mode) size = GET_MODE_SIZE (mode); mclass = GET_MODE_CLASS (mode); - if (GP_REG_P (regno) && mode != CCFmode) + if (GP_REG_P (regno) && mode != CCFmode && !MSA_SUPPORTED_MODE_P (mode)) return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD; + /* For MSA, allow TImode and 128-bit vector modes in all FPR. */ + if (FP_REG_P (regno) && MSA_SUPPORTED_MODE_P (mode)) + return true; + if (FP_REG_P (regno) && (((regno - FP_REG_FIRST) % MAX_FPRS_PER_FMT) == 0 || (MIN_FPRS_PER_FMT == 1 && size <= UNITS_PER_FPREG))) @@ -12277,8 +12821,13 @@ mips_hard_regno_nregs (int regno, machine_mode mode) return (GET_MODE_SIZE (mode) + 3) / 4; if (FP_REG_P (regno)) - return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG; - + { + if (MSA_SUPPORTED_MODE_P (mode)) + return 1; + + return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG; + } + /* All other registers are word-sized. */ return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; } @@ -12298,12 +12847,19 @@ mips_class_max_nregs (enum reg_class rclass, machine_mode mode) { if (HARD_REGNO_MODE_OK (ST_REG_FIRST, mode)) size = MIN (size, 4); + AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) ST_REGS]); } if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS])) { if (HARD_REGNO_MODE_OK (FP_REG_FIRST, mode)) - size = MIN (size, UNITS_PER_FPREG); + { + if (MSA_SUPPORTED_MODE_P (mode)) + size = MIN (size, UNITS_PER_MSA_REG); + else + size = MIN (size, UNITS_PER_FPREG); + } + AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) FP_REGS]); } if (!hard_reg_set_empty_p (left)) @@ -12324,6 +12880,10 @@ mips_cannot_change_mode_class (machine_mode from, && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to)) return false; + /* Allow conversions between different MSA vector modes. */ + if (MSA_SUPPORTED_MODE_P (from) && MSA_SUPPORTED_MODE_P (to)) + return false; + /* Otherwise, there are several problems with changing the modes of values in floating-point registers: @@ -12359,7 +12919,8 @@ mips_small_register_classes_for_mode_p (machine_mode mode return TARGET_MIPS16; } -/* Return true if moves in mode MODE can use the FPU's mov.fmt instruction. */ +/* Return true if moves in mode MODE can use the FPU's mov.fmt instruction, + or use the MSA's move.v instruction. */ static bool mips_mode_ok_for_mov_fmt_p (machine_mode mode) @@ -12377,7 +12938,7 @@ mips_mode_ok_for_mov_fmt_p (machine_mode mode) return TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT; default: - return false; + return MSA_SUPPORTED_MODE_P (mode); } } @@ -12624,6 +13185,10 @@ mips_secondary_reload_class (enum reg_class rclass, pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported. */ return NO_REGS; + if (MEM_P (x) && MSA_SUPPORTED_MODE_P (mode)) + /* In this case we can use MSA LD.* and ST.*. */ + return NO_REGS; + if (GP_REG_P (regno) || x == CONST0_RTX (mode)) /* In this case we can use mtc1, mfc1, dmtc1 or dmfc1. */ return NO_REGS; @@ -12693,7 +13258,7 @@ mips_vector_mode_supported_p (machine_mode mode) return TARGET_LOONGSON_VECTORS; default: - return false; + return MSA_SUPPORTED_MODE_P (mode); } } @@ -12712,14 +13277,46 @@ mips_scalar_mode_supported_p (machine_mode mode) /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */ static machine_mode -mips_preferred_simd_mode (machine_mode mode ATTRIBUTE_UNUSED) +mips_preferred_simd_mode (machine_mode mode) { if (TARGET_PAIRED_SINGLE_FLOAT && mode == SFmode) return V2SFmode; + + if (!ISA_HAS_MSA) + return word_mode; + + switch (mode) + { + case QImode: + return V16QImode; + case HImode: + return V8HImode; + case SImode: + return V4SImode; + case DImode: + return V2DImode; + + case SFmode: + return V4SFmode; + + case DFmode: + return V2DFmode; + + default: + break; + } return word_mode; } +/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */ + +static unsigned int +mips_autovectorize_vector_sizes (void) +{ + return ISA_HAS_MSA ? 16 : 0; +} + /* Implement TARGET_INIT_LIBFUNCS. */ static void @@ -13727,6 +14324,25 @@ mips_output_division (const char *division, rtx *operands) } return s; } + +/* Return the assembly code for MSA DIV_{S,U}.DF or MOD_{S,U}.DF instructions, + which has the operands given by OPERANDS. Add in a divide-by-zero check + if needed. */ + +const char * +mips_msa_output_division (const char *division, rtx *operands) +{ + const char *s; + + s = division; + if (TARGET_CHECK_ZERO_DIV) + { + output_asm_insn ("%(bnz.%v0\t%w2,1f", operands); + output_asm_insn (s, operands); + s = "break\t7%)\n1:"; + } + return s; +} /* Return true if destination of IN_INSN is used as add source in OUT_INSN. Both IN_INSN and OUT_INSN are of type fmadd. Example: @@ -14480,6 +15096,7 @@ AVAIL_NON_MIPS16 (dsp_64, TARGET_64BIT && TARGET_DSP) AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2) AVAIL_NON_MIPS16 (loongson, TARGET_LOONGSON_VECTORS) AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN) +AVAIL_NON_MIPS16 (msa, TARGET_MSA) /* Construct a mips_builtin_description from the given arguments. @@ -14596,6 +15213,38 @@ AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN) #define LOONGSON_BUILTIN_SUFFIX(INSN, SUFFIX, FUNCTION_TYPE) \ LOONGSON_BUILTIN_ALIAS (INSN, INSN ## _ ## SUFFIX, FUNCTION_TYPE) +/* Define an MSA MIPS_BUILTIN_DIRECT function __builtin_msa_ + for instruction CODE_FOR_msa_. FUNCTION_TYPE is a builtin_description + field. */ +#define MSA_BUILTIN(INSN, FUNCTION_TYPE) \ + { CODE_FOR_msa_ ## INSN, MIPS_FP_COND_f, \ + "__builtin_msa_" #INSN, MIPS_BUILTIN_DIRECT, \ + FUNCTION_TYPE, mips_builtin_avail_msa } + +/* Define a remapped MSA MIPS_BUILTIN_DIRECT function __builtin_msa_ + for instruction CODE_FOR_msa_. FUNCTION_TYPE is + a builtin_description field. */ +#define MSA_BUILTIN_REMAP(INSN, INSN2, FUNCTION_TYPE) \ + { CODE_FOR_msa_ ## INSN2, MIPS_FP_COND_f, \ + "__builtin_msa_" #INSN, MIPS_BUILTIN_DIRECT, \ + FUNCTION_TYPE, mips_builtin_avail_msa } + +/* Define an MSA MIPS_BUILTIN_MSA_TEST_BRANCH function __builtin_msa_ + for instruction CODE_FOR_msa_. FUNCTION_TYPE is a builtin_description + field. */ +#define MSA_BUILTIN_TEST_BRANCH(INSN, FUNCTION_TYPE) \ + { CODE_FOR_msa_ ## INSN, MIPS_FP_COND_f, \ + "__builtin_msa_" #INSN, MIPS_BUILTIN_MSA_TEST_BRANCH, \ + FUNCTION_TYPE, mips_builtin_avail_msa } + +/* Define an MSA MIPS_BUILTIN_DIRECT_NO_TARGET function __builtin_msa_ + for instruction CODE_FOR_msa_. FUNCTION_TYPE is a builtin_description + field. */ +#define MSA_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE) \ + { CODE_FOR_msa_ ## INSN, MIPS_FP_COND_f, \ + "__builtin_msa_" #INSN, MIPS_BUILTIN_DIRECT_NO_TARGET, \ + FUNCTION_TYPE, mips_builtin_avail_msa } + #define CODE_FOR_mips_sqrt_ps CODE_FOR_sqrtv2sf2 #define CODE_FOR_mips_addq_ph CODE_FOR_addv2hi3 #define CODE_FOR_mips_addu_qb CODE_FOR_addv4qi3 @@ -14636,6 +15285,203 @@ AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN) #define CODE_FOR_loongson_psubush CODE_FOR_ussubv4hi3 #define CODE_FOR_loongson_psubusb CODE_FOR_ussubv8qi3 +#define CODE_FOR_msa_adds_s_b CODE_FOR_ssaddv16qi3 +#define CODE_FOR_msa_adds_s_h CODE_FOR_ssaddv8hi3 +#define CODE_FOR_msa_adds_s_w CODE_FOR_ssaddv4si3 +#define CODE_FOR_msa_adds_s_d CODE_FOR_ssaddv2di3 +#define CODE_FOR_msa_adds_u_b CODE_FOR_usaddv16qi3 +#define CODE_FOR_msa_adds_u_h CODE_FOR_usaddv8hi3 +#define CODE_FOR_msa_adds_u_w CODE_FOR_usaddv4si3 +#define CODE_FOR_msa_adds_u_d CODE_FOR_usaddv2di3 +#define CODE_FOR_msa_addv_b CODE_FOR_addv16qi3 +#define CODE_FOR_msa_addv_h CODE_FOR_addv8hi3 +#define CODE_FOR_msa_addv_w CODE_FOR_addv4si3 +#define CODE_FOR_msa_addv_d CODE_FOR_addv2di3 +#define CODE_FOR_msa_addvi_b CODE_FOR_addv16qi3 +#define CODE_FOR_msa_addvi_h CODE_FOR_addv8hi3 +#define CODE_FOR_msa_addvi_w CODE_FOR_addv4si3 +#define CODE_FOR_msa_addvi_d CODE_FOR_addv2di3 +#define CODE_FOR_msa_and_v CODE_FOR_andv16qi3 +#define CODE_FOR_msa_andi_b CODE_FOR_andv16qi3 +#define CODE_FOR_msa_bmnz_v CODE_FOR_msa_bmnz_b +#define CODE_FOR_msa_bmnzi_b CODE_FOR_msa_bmnz_b +#define CODE_FOR_msa_bmz_v CODE_FOR_msa_bmz_b +#define CODE_FOR_msa_bmzi_b CODE_FOR_msa_bmz_b +#define CODE_FOR_msa_bnz_v CODE_FOR_msa_bnz_v_b +#define CODE_FOR_msa_bz_v CODE_FOR_msa_bz_v_b +#define CODE_FOR_msa_bsel_v CODE_FOR_msa_bsel_b +#define CODE_FOR_msa_bseli_b CODE_FOR_msa_bsel_b +#define CODE_FOR_msa_ceqi_b CODE_FOR_msa_ceq_b +#define CODE_FOR_msa_ceqi_h CODE_FOR_msa_ceq_h +#define CODE_FOR_msa_ceqi_w CODE_FOR_msa_ceq_w +#define CODE_FOR_msa_ceqi_d CODE_FOR_msa_ceq_d +#define CODE_FOR_msa_clti_s_b CODE_FOR_msa_clt_s_b +#define CODE_FOR_msa_clti_s_h CODE_FOR_msa_clt_s_h +#define CODE_FOR_msa_clti_s_w CODE_FOR_msa_clt_s_w +#define CODE_FOR_msa_clti_s_d CODE_FOR_msa_clt_s_d +#define CODE_FOR_msa_clti_u_b CODE_FOR_msa_clt_u_b +#define CODE_FOR_msa_clti_u_h CODE_FOR_msa_clt_u_h +#define CODE_FOR_msa_clti_u_w CODE_FOR_msa_clt_u_w +#define CODE_FOR_msa_clti_u_d CODE_FOR_msa_clt_u_d +#define CODE_FOR_msa_clei_s_b CODE_FOR_msa_cle_s_b +#define CODE_FOR_msa_clei_s_h CODE_FOR_msa_cle_s_h +#define CODE_FOR_msa_clei_s_w CODE_FOR_msa_cle_s_w +#define CODE_FOR_msa_clei_s_d CODE_FOR_msa_cle_s_d +#define CODE_FOR_msa_clei_u_b CODE_FOR_msa_cle_u_b +#define CODE_FOR_msa_clei_u_h CODE_FOR_msa_cle_u_h +#define CODE_FOR_msa_clei_u_w CODE_FOR_msa_cle_u_w +#define CODE_FOR_msa_clei_u_d CODE_FOR_msa_cle_u_d +#define CODE_FOR_msa_div_s_b CODE_FOR_divv16qi3 +#define CODE_FOR_msa_div_s_h CODE_FOR_divv8hi3 +#define CODE_FOR_msa_div_s_w CODE_FOR_divv4si3 +#define CODE_FOR_msa_div_s_d CODE_FOR_divv2di3 +#define CODE_FOR_msa_div_u_b CODE_FOR_udivv16qi3 +#define CODE_FOR_msa_div_u_h CODE_FOR_udivv8hi3 +#define CODE_FOR_msa_div_u_w CODE_FOR_udivv4si3 +#define CODE_FOR_msa_div_u_d CODE_FOR_udivv2di3 +#define CODE_FOR_msa_fadd_w CODE_FOR_addv4sf3 +#define CODE_FOR_msa_fadd_d CODE_FOR_addv2df3 +#define CODE_FOR_msa_fexdo_w CODE_FOR_vec_pack_trunc_v2df +#define CODE_FOR_msa_ftrunc_s_w CODE_FOR_fix_truncv4sfv4si2 +#define CODE_FOR_msa_ftrunc_s_d CODE_FOR_fix_truncv2dfv2di2 +#define CODE_FOR_msa_ftrunc_u_w CODE_FOR_fixuns_truncv4sfv4si2 +#define CODE_FOR_msa_ftrunc_u_d CODE_FOR_fixuns_truncv2dfv2di2 +#define CODE_FOR_msa_ffint_s_w CODE_FOR_floatv4siv4sf2 +#define CODE_FOR_msa_ffint_s_d CODE_FOR_floatv2div2df2 +#define CODE_FOR_msa_ffint_u_w CODE_FOR_floatunsv4siv4sf2 +#define CODE_FOR_msa_ffint_u_d CODE_FOR_floatunsv2div2df2 +#define CODE_FOR_msa_fsub_w CODE_FOR_subv4sf3 +#define CODE_FOR_msa_fsub_d CODE_FOR_subv2df3 +#define CODE_FOR_msa_fmadd_w CODE_FOR_fmav4sf4 +#define CODE_FOR_msa_fmadd_d CODE_FOR_fmav2df4 +#define CODE_FOR_msa_fmsub_w CODE_FOR_fnmav4sf4 +#define CODE_FOR_msa_fmsub_d CODE_FOR_fnmav2df4 +#define CODE_FOR_msa_fmul_w CODE_FOR_mulv4sf3 +#define CODE_FOR_msa_fmul_d CODE_FOR_mulv2df3 +#define CODE_FOR_msa_fdiv_w CODE_FOR_divv4sf3 +#define CODE_FOR_msa_fdiv_d CODE_FOR_divv2df3 +#define CODE_FOR_msa_fmax_w CODE_FOR_smaxv4sf3 +#define CODE_FOR_msa_fmax_d CODE_FOR_smaxv2df3 +#define CODE_FOR_msa_fmin_w CODE_FOR_sminv4sf3 +#define CODE_FOR_msa_fmin_d CODE_FOR_sminv2df3 +#define CODE_FOR_msa_fsqrt_w CODE_FOR_sqrtv4sf2 +#define CODE_FOR_msa_fsqrt_d CODE_FOR_sqrtv2df2 +#define CODE_FOR_msa_max_s_b CODE_FOR_smaxv16qi3 +#define CODE_FOR_msa_max_s_h CODE_FOR_smaxv8hi3 +#define CODE_FOR_msa_max_s_w CODE_FOR_smaxv4si3 +#define CODE_FOR_msa_max_s_d CODE_FOR_smaxv2di3 +#define CODE_FOR_msa_maxi_s_b CODE_FOR_smaxv16qi3 +#define CODE_FOR_msa_maxi_s_h CODE_FOR_smaxv8hi3 +#define CODE_FOR_msa_maxi_s_w CODE_FOR_smaxv4si3 +#define CODE_FOR_msa_maxi_s_d CODE_FOR_smaxv2di3 +#define CODE_FOR_msa_max_u_b CODE_FOR_umaxv16qi3 +#define CODE_FOR_msa_max_u_h CODE_FOR_umaxv8hi3 +#define CODE_FOR_msa_max_u_w CODE_FOR_umaxv4si3 +#define CODE_FOR_msa_max_u_d CODE_FOR_umaxv2di3 +#define CODE_FOR_msa_maxi_u_b CODE_FOR_umaxv16qi3 +#define CODE_FOR_msa_maxi_u_h CODE_FOR_umaxv8hi3 +#define CODE_FOR_msa_maxi_u_w CODE_FOR_umaxv4si3 +#define CODE_FOR_msa_maxi_u_d CODE_FOR_umaxv2di3 +#define CODE_FOR_msa_min_s_b CODE_FOR_sminv16qi3 +#define CODE_FOR_msa_min_s_h CODE_FOR_sminv8hi3 +#define CODE_FOR_msa_min_s_w CODE_FOR_sminv4si3 +#define CODE_FOR_msa_min_s_d CODE_FOR_sminv2di3 +#define CODE_FOR_msa_mini_s_b CODE_FOR_sminv16qi3 +#define CODE_FOR_msa_mini_s_h CODE_FOR_sminv8hi3 +#define CODE_FOR_msa_mini_s_w CODE_FOR_sminv4si3 +#define CODE_FOR_msa_mini_s_d CODE_FOR_sminv2di3 +#define CODE_FOR_msa_min_u_b CODE_FOR_uminv16qi3 +#define CODE_FOR_msa_min_u_h CODE_FOR_uminv8hi3 +#define CODE_FOR_msa_min_u_w CODE_FOR_uminv4si3 +#define CODE_FOR_msa_min_u_d CODE_FOR_uminv2di3 +#define CODE_FOR_msa_mini_u_b CODE_FOR_uminv16qi3 +#define CODE_FOR_msa_mini_u_h CODE_FOR_uminv8hi3 +#define CODE_FOR_msa_mini_u_w CODE_FOR_uminv4si3 +#define CODE_FOR_msa_mini_u_d CODE_FOR_uminv2di3 +#define CODE_FOR_msa_mod_s_b CODE_FOR_modv16qi3 +#define CODE_FOR_msa_mod_s_h CODE_FOR_modv8hi3 +#define CODE_FOR_msa_mod_s_w CODE_FOR_modv4si3 +#define CODE_FOR_msa_mod_s_d CODE_FOR_modv2di3 +#define CODE_FOR_msa_mod_u_b CODE_FOR_umodv16qi3 +#define CODE_FOR_msa_mod_u_h CODE_FOR_umodv8hi3 +#define CODE_FOR_msa_mod_u_w CODE_FOR_umodv4si3 +#define CODE_FOR_msa_mod_u_d CODE_FOR_umodv2di3 +#define CODE_FOR_msa_mod_s_b CODE_FOR_modv16qi3 +#define CODE_FOR_msa_mod_s_h CODE_FOR_modv8hi3 +#define CODE_FOR_msa_mod_s_w CODE_FOR_modv4si3 +#define CODE_FOR_msa_mod_s_d CODE_FOR_modv2di3 +#define CODE_FOR_msa_mod_u_b CODE_FOR_umodv16qi3 +#define CODE_FOR_msa_mod_u_h CODE_FOR_umodv8hi3 +#define CODE_FOR_msa_mod_u_w CODE_FOR_umodv4si3 +#define CODE_FOR_msa_mod_u_d CODE_FOR_umodv2di3 +#define CODE_FOR_msa_mulv_b CODE_FOR_mulv16qi3 +#define CODE_FOR_msa_mulv_h CODE_FOR_mulv8hi3 +#define CODE_FOR_msa_mulv_w CODE_FOR_mulv4si3 +#define CODE_FOR_msa_mulv_d CODE_FOR_mulv2di3 +#define CODE_FOR_msa_nlzc_b CODE_FOR_clzv16qi2 +#define CODE_FOR_msa_nlzc_h CODE_FOR_clzv8hi2 +#define CODE_FOR_msa_nlzc_w CODE_FOR_clzv4si2 +#define CODE_FOR_msa_nlzc_d CODE_FOR_clzv2di2 +#define CODE_FOR_msa_nor_v CODE_FOR_msa_nor_b +#define CODE_FOR_msa_or_v CODE_FOR_iorv16qi3 +#define CODE_FOR_msa_ori_b CODE_FOR_iorv16qi3 +#define CODE_FOR_msa_nori_b CODE_FOR_msa_nor_b +#define CODE_FOR_msa_pcnt_b CODE_FOR_popcountv16qi2 +#define CODE_FOR_msa_pcnt_h CODE_FOR_popcountv8hi2 +#define CODE_FOR_msa_pcnt_w CODE_FOR_popcountv4si2 +#define CODE_FOR_msa_pcnt_d CODE_FOR_popcountv2di2 +#define CODE_FOR_msa_xor_v CODE_FOR_xorv16qi3 +#define CODE_FOR_msa_xori_b CODE_FOR_xorv16qi3 +#define CODE_FOR_msa_sll_b CODE_FOR_vashlv16qi3 +#define CODE_FOR_msa_sll_h CODE_FOR_vashlv8hi3 +#define CODE_FOR_msa_sll_w CODE_FOR_vashlv4si3 +#define CODE_FOR_msa_sll_d CODE_FOR_vashlv2di3 +#define CODE_FOR_msa_slli_b CODE_FOR_vashlv16qi3 +#define CODE_FOR_msa_slli_h CODE_FOR_vashlv8hi3 +#define CODE_FOR_msa_slli_w CODE_FOR_vashlv4si3 +#define CODE_FOR_msa_slli_d CODE_FOR_vashlv2di3 +#define CODE_FOR_msa_sra_b CODE_FOR_vashrv16qi3 +#define CODE_FOR_msa_sra_h CODE_FOR_vashrv8hi3 +#define CODE_FOR_msa_sra_w CODE_FOR_vashrv4si3 +#define CODE_FOR_msa_sra_d CODE_FOR_vashrv2di3 +#define CODE_FOR_msa_srai_b CODE_FOR_vashrv16qi3 +#define CODE_FOR_msa_srai_h CODE_FOR_vashrv8hi3 +#define CODE_FOR_msa_srai_w CODE_FOR_vashrv4si3 +#define CODE_FOR_msa_srai_d CODE_FOR_vashrv2di3 +#define CODE_FOR_msa_srl_b CODE_FOR_vlshrv16qi3 +#define CODE_FOR_msa_srl_h CODE_FOR_vlshrv8hi3 +#define CODE_FOR_msa_srl_w CODE_FOR_vlshrv4si3 +#define CODE_FOR_msa_srl_d CODE_FOR_vlshrv2di3 +#define CODE_FOR_msa_srli_b CODE_FOR_vlshrv16qi3 +#define CODE_FOR_msa_srli_h CODE_FOR_vlshrv8hi3 +#define CODE_FOR_msa_srli_w CODE_FOR_vlshrv4si3 +#define CODE_FOR_msa_srli_d CODE_FOR_vlshrv2di3 +#define CODE_FOR_msa_subv_b CODE_FOR_subv16qi3 +#define CODE_FOR_msa_subv_h CODE_FOR_subv8hi3 +#define CODE_FOR_msa_subv_w CODE_FOR_subv4si3 +#define CODE_FOR_msa_subv_d CODE_FOR_subv2di3 +#define CODE_FOR_msa_subvi_b CODE_FOR_subv16qi3 +#define CODE_FOR_msa_subvi_h CODE_FOR_subv8hi3 +#define CODE_FOR_msa_subvi_w CODE_FOR_subv4si3 +#define CODE_FOR_msa_subvi_d CODE_FOR_subv2di3 + +#define CODE_FOR_msa_move_v CODE_FOR_movv16qi + +#define CODE_FOR_msa_vshf_b CODE_FOR_vec_permv16qi +#define CODE_FOR_msa_vshf_h CODE_FOR_vec_permv8hi +#define CODE_FOR_msa_vshf_w CODE_FOR_vec_permv4si +#define CODE_FOR_msa_vshf_d CODE_FOR_vec_permv2di + +#define CODE_FOR_msa_ilvod_d CODE_FOR_msa_ilvl_d +#define CODE_FOR_msa_ilvev_d CODE_FOR_msa_ilvr_d +#define CODE_FOR_msa_pckod_d CODE_FOR_msa_ilvl_d +#define CODE_FOR_msa_pckev_d CODE_FOR_msa_ilvr_d + +#define CODE_FOR_msa_ldi_b CODE_FOR_msa_ldiv16qi +#define CODE_FOR_msa_ldi_h CODE_FOR_msa_ldiv8hi +#define CODE_FOR_msa_ldi_w CODE_FOR_msa_ldiv4si +#define CODE_FOR_msa_ldi_d CODE_FOR_msa_ldiv2di + static const struct mips_builtin_description mips_builtins[] = { #define MIPS_GET_FCSR 0 DIRECT_BUILTIN (get_fcsr, MIPS_USI_FTYPE_VOID, hard_float), @@ -14924,12 +15770,547 @@ static const struct mips_builtin_description mips_builtins[] = { LOONGSON_BUILTIN_SUFFIX (punpcklwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI), /* Sundry other built-in functions. */ - DIRECT_NO_TARGET_BUILTIN (cache, MIPS_VOID_FTYPE_SI_CVPOINTER, cache) + DIRECT_NO_TARGET_BUILTIN (cache, MIPS_VOID_FTYPE_SI_CVPOINTER, cache), + + /* Built-in functions for MSA. */ + MSA_BUILTIN (sll_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (sll_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (sll_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (sll_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (slli_b, MIPS_V16QI_FTYPE_V16QI_UQI), + MSA_BUILTIN (slli_h, MIPS_V8HI_FTYPE_V8HI_UQI), + MSA_BUILTIN (slli_w, MIPS_V4SI_FTYPE_V4SI_UQI), + MSA_BUILTIN (slli_d, MIPS_V2DI_FTYPE_V2DI_UQI), + MSA_BUILTIN (sra_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (sra_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (sra_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (sra_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (srai_b, MIPS_V16QI_FTYPE_V16QI_UQI), + MSA_BUILTIN (srai_h, MIPS_V8HI_FTYPE_V8HI_UQI), + MSA_BUILTIN (srai_w, MIPS_V4SI_FTYPE_V4SI_UQI), + MSA_BUILTIN (srai_d, MIPS_V2DI_FTYPE_V2DI_UQI), + MSA_BUILTIN (srar_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (srar_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (srar_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (srar_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (srari_b, MIPS_V16QI_FTYPE_V16QI_UQI), + MSA_BUILTIN (srari_h, MIPS_V8HI_FTYPE_V8HI_UQI), + MSA_BUILTIN (srari_w, MIPS_V4SI_FTYPE_V4SI_UQI), + MSA_BUILTIN (srari_d, MIPS_V2DI_FTYPE_V2DI_UQI), + MSA_BUILTIN (srl_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (srl_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (srl_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (srl_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (srli_b, MIPS_V16QI_FTYPE_V16QI_UQI), + MSA_BUILTIN (srli_h, MIPS_V8HI_FTYPE_V8HI_UQI), + MSA_BUILTIN (srli_w, MIPS_V4SI_FTYPE_V4SI_UQI), + MSA_BUILTIN (srli_d, MIPS_V2DI_FTYPE_V2DI_UQI), + MSA_BUILTIN (srlr_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (srlr_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (srlr_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (srlr_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (srlri_b, MIPS_V16QI_FTYPE_V16QI_UQI), + MSA_BUILTIN (srlri_h, MIPS_V8HI_FTYPE_V8HI_UQI), + MSA_BUILTIN (srlri_w, MIPS_V4SI_FTYPE_V4SI_UQI), + MSA_BUILTIN (srlri_d, MIPS_V2DI_FTYPE_V2DI_UQI), + MSA_BUILTIN (bclr_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (bclr_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (bclr_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (bclr_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (bclri_b, MIPS_UV16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (bclri_h, MIPS_UV8HI_FTYPE_UV8HI_UQI), + MSA_BUILTIN (bclri_w, MIPS_UV4SI_FTYPE_UV4SI_UQI), + MSA_BUILTIN (bclri_d, MIPS_UV2DI_FTYPE_UV2DI_UQI), + MSA_BUILTIN (bset_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (bset_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (bset_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (bset_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (bseti_b, MIPS_UV16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (bseti_h, MIPS_UV8HI_FTYPE_UV8HI_UQI), + MSA_BUILTIN (bseti_w, MIPS_UV4SI_FTYPE_UV4SI_UQI), + MSA_BUILTIN (bseti_d, MIPS_UV2DI_FTYPE_UV2DI_UQI), + MSA_BUILTIN (bneg_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (bneg_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (bneg_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (bneg_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (bnegi_b, MIPS_UV16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (bnegi_h, MIPS_UV8HI_FTYPE_UV8HI_UQI), + MSA_BUILTIN (bnegi_w, MIPS_UV4SI_FTYPE_UV4SI_UQI), + MSA_BUILTIN (bnegi_d, MIPS_UV2DI_FTYPE_UV2DI_UQI), + MSA_BUILTIN (binsl_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UV16QI), + MSA_BUILTIN (binsl_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI_UV8HI), + MSA_BUILTIN (binsl_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI_UV4SI), + MSA_BUILTIN (binsl_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI_UV2DI), + MSA_BUILTIN (binsli_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UQI), + MSA_BUILTIN (binsli_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI_UQI), + MSA_BUILTIN (binsli_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI_UQI), + MSA_BUILTIN (binsli_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI_UQI), + MSA_BUILTIN (binsr_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UV16QI), + MSA_BUILTIN (binsr_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI_UV8HI), + MSA_BUILTIN (binsr_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI_UV4SI), + MSA_BUILTIN (binsr_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI_UV2DI), + MSA_BUILTIN (binsri_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UQI), + MSA_BUILTIN (binsri_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI_UQI), + MSA_BUILTIN (binsri_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI_UQI), + MSA_BUILTIN (binsri_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI_UQI), + MSA_BUILTIN (addv_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (addv_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (addv_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (addv_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (addvi_b, MIPS_V16QI_FTYPE_V16QI_UQI), + MSA_BUILTIN (addvi_h, MIPS_V8HI_FTYPE_V8HI_UQI), + MSA_BUILTIN (addvi_w, MIPS_V4SI_FTYPE_V4SI_UQI), + MSA_BUILTIN (addvi_d, MIPS_V2DI_FTYPE_V2DI_UQI), + MSA_BUILTIN (subv_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (subv_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (subv_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (subv_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (subvi_b, MIPS_V16QI_FTYPE_V16QI_UQI), + MSA_BUILTIN (subvi_h, MIPS_V8HI_FTYPE_V8HI_UQI), + MSA_BUILTIN (subvi_w, MIPS_V4SI_FTYPE_V4SI_UQI), + MSA_BUILTIN (subvi_d, MIPS_V2DI_FTYPE_V2DI_UQI), + MSA_BUILTIN (max_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (max_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (max_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (max_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (maxi_s_b, MIPS_V16QI_FTYPE_V16QI_QI), + MSA_BUILTIN (maxi_s_h, MIPS_V8HI_FTYPE_V8HI_QI), + MSA_BUILTIN (maxi_s_w, MIPS_V4SI_FTYPE_V4SI_QI), + MSA_BUILTIN (maxi_s_d, MIPS_V2DI_FTYPE_V2DI_QI), + MSA_BUILTIN (max_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (max_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (max_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (max_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (maxi_u_b, MIPS_UV16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (maxi_u_h, MIPS_UV8HI_FTYPE_UV8HI_UQI), + MSA_BUILTIN (maxi_u_w, MIPS_UV4SI_FTYPE_UV4SI_UQI), + MSA_BUILTIN (maxi_u_d, MIPS_UV2DI_FTYPE_UV2DI_UQI), + MSA_BUILTIN (min_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (min_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (min_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (min_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (mini_s_b, MIPS_V16QI_FTYPE_V16QI_QI), + MSA_BUILTIN (mini_s_h, MIPS_V8HI_FTYPE_V8HI_QI), + MSA_BUILTIN (mini_s_w, MIPS_V4SI_FTYPE_V4SI_QI), + MSA_BUILTIN (mini_s_d, MIPS_V2DI_FTYPE_V2DI_QI), + MSA_BUILTIN (min_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (min_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (min_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (min_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (mini_u_b, MIPS_UV16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (mini_u_h, MIPS_UV8HI_FTYPE_UV8HI_UQI), + MSA_BUILTIN (mini_u_w, MIPS_UV4SI_FTYPE_UV4SI_UQI), + MSA_BUILTIN (mini_u_d, MIPS_UV2DI_FTYPE_UV2DI_UQI), + MSA_BUILTIN (max_a_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (max_a_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (max_a_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (max_a_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (min_a_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (min_a_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (min_a_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (min_a_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (ceq_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (ceq_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (ceq_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (ceq_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (ceqi_b, MIPS_V16QI_FTYPE_V16QI_QI), + MSA_BUILTIN (ceqi_h, MIPS_V8HI_FTYPE_V8HI_QI), + MSA_BUILTIN (ceqi_w, MIPS_V4SI_FTYPE_V4SI_QI), + MSA_BUILTIN (ceqi_d, MIPS_V2DI_FTYPE_V2DI_QI), + MSA_BUILTIN (clt_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (clt_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (clt_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (clt_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (clti_s_b, MIPS_V16QI_FTYPE_V16QI_QI), + MSA_BUILTIN (clti_s_h, MIPS_V8HI_FTYPE_V8HI_QI), + MSA_BUILTIN (clti_s_w, MIPS_V4SI_FTYPE_V4SI_QI), + MSA_BUILTIN (clti_s_d, MIPS_V2DI_FTYPE_V2DI_QI), + MSA_BUILTIN (clt_u_b, MIPS_V16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (clt_u_h, MIPS_V8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (clt_u_w, MIPS_V4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (clt_u_d, MIPS_V2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (clti_u_b, MIPS_V16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (clti_u_h, MIPS_V8HI_FTYPE_UV8HI_UQI), + MSA_BUILTIN (clti_u_w, MIPS_V4SI_FTYPE_UV4SI_UQI), + MSA_BUILTIN (clti_u_d, MIPS_V2DI_FTYPE_UV2DI_UQI), + MSA_BUILTIN (cle_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (cle_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (cle_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (cle_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (clei_s_b, MIPS_V16QI_FTYPE_V16QI_QI), + MSA_BUILTIN (clei_s_h, MIPS_V8HI_FTYPE_V8HI_QI), + MSA_BUILTIN (clei_s_w, MIPS_V4SI_FTYPE_V4SI_QI), + MSA_BUILTIN (clei_s_d, MIPS_V2DI_FTYPE_V2DI_QI), + MSA_BUILTIN (cle_u_b, MIPS_V16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (cle_u_h, MIPS_V8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (cle_u_w, MIPS_V4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (cle_u_d, MIPS_V2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (clei_u_b, MIPS_V16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (clei_u_h, MIPS_V8HI_FTYPE_UV8HI_UQI), + MSA_BUILTIN (clei_u_w, MIPS_V4SI_FTYPE_UV4SI_UQI), + MSA_BUILTIN (clei_u_d, MIPS_V2DI_FTYPE_UV2DI_UQI), + MSA_BUILTIN (ld_b, MIPS_V16QI_FTYPE_CVPOINTER_SI), + MSA_BUILTIN (ld_h, MIPS_V8HI_FTYPE_CVPOINTER_SI), + MSA_BUILTIN (ld_w, MIPS_V4SI_FTYPE_CVPOINTER_SI), + MSA_BUILTIN (ld_d, MIPS_V2DI_FTYPE_CVPOINTER_SI), + MSA_NO_TARGET_BUILTIN (st_b, MIPS_VOID_FTYPE_V16QI_CVPOINTER_SI), + MSA_NO_TARGET_BUILTIN (st_h, MIPS_VOID_FTYPE_V8HI_CVPOINTER_SI), + MSA_NO_TARGET_BUILTIN (st_w, MIPS_VOID_FTYPE_V4SI_CVPOINTER_SI), + MSA_NO_TARGET_BUILTIN (st_d, MIPS_VOID_FTYPE_V2DI_CVPOINTER_SI), + MSA_BUILTIN (sat_s_b, MIPS_V16QI_FTYPE_V16QI_UQI), + MSA_BUILTIN (sat_s_h, MIPS_V8HI_FTYPE_V8HI_UQI), + MSA_BUILTIN (sat_s_w, MIPS_V4SI_FTYPE_V4SI_UQI), + MSA_BUILTIN (sat_s_d, MIPS_V2DI_FTYPE_V2DI_UQI), + MSA_BUILTIN (sat_u_b, MIPS_UV16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (sat_u_h, MIPS_UV8HI_FTYPE_UV8HI_UQI), + MSA_BUILTIN (sat_u_w, MIPS_UV4SI_FTYPE_UV4SI_UQI), + MSA_BUILTIN (sat_u_d, MIPS_UV2DI_FTYPE_UV2DI_UQI), + MSA_BUILTIN (add_a_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (add_a_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (add_a_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (add_a_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (adds_a_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (adds_a_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (adds_a_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (adds_a_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (adds_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (adds_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (adds_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (adds_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (adds_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (adds_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (adds_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (adds_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (ave_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (ave_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (ave_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (ave_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (ave_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (ave_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (ave_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (ave_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (aver_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (aver_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (aver_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (aver_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (aver_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (aver_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (aver_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (aver_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (subs_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (subs_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (subs_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (subs_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (subs_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (subs_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (subs_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (subs_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (subsuu_s_b, MIPS_V16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (subsuu_s_h, MIPS_V8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (subsuu_s_w, MIPS_V4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (subsuu_s_d, MIPS_V2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (subsus_u_b, MIPS_UV16QI_FTYPE_UV16QI_V16QI), + MSA_BUILTIN (subsus_u_h, MIPS_UV8HI_FTYPE_UV8HI_V8HI), + MSA_BUILTIN (subsus_u_w, MIPS_UV4SI_FTYPE_UV4SI_V4SI), + MSA_BUILTIN (subsus_u_d, MIPS_UV2DI_FTYPE_UV2DI_V2DI), + MSA_BUILTIN (asub_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (asub_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (asub_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (asub_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (asub_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (asub_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (asub_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (asub_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (mulv_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (mulv_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (mulv_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (mulv_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (maddv_b, MIPS_V16QI_FTYPE_V16QI_V16QI_V16QI), + MSA_BUILTIN (maddv_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI), + MSA_BUILTIN (maddv_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI), + MSA_BUILTIN (maddv_d, MIPS_V2DI_FTYPE_V2DI_V2DI_V2DI), + MSA_BUILTIN (msubv_b, MIPS_V16QI_FTYPE_V16QI_V16QI_V16QI), + MSA_BUILTIN (msubv_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI), + MSA_BUILTIN (msubv_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI), + MSA_BUILTIN (msubv_d, MIPS_V2DI_FTYPE_V2DI_V2DI_V2DI), + MSA_BUILTIN (div_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (div_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (div_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (div_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (div_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (div_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (div_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (div_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (hadd_s_h, MIPS_V8HI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (hadd_s_w, MIPS_V4SI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (hadd_s_d, MIPS_V2DI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (hadd_u_h, MIPS_UV8HI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (hadd_u_w, MIPS_UV4SI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (hadd_u_d, MIPS_UV2DI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (hsub_s_h, MIPS_V8HI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (hsub_s_w, MIPS_V4SI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (hsub_s_d, MIPS_V2DI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (hsub_u_h, MIPS_V8HI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (hsub_u_w, MIPS_V4SI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (hsub_u_d, MIPS_V2DI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (mod_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (mod_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (mod_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (mod_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (mod_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (mod_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (mod_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (mod_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI), + MSA_BUILTIN (dotp_s_h, MIPS_V8HI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (dotp_s_w, MIPS_V4SI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (dotp_s_d, MIPS_V2DI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (dotp_u_h, MIPS_UV8HI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (dotp_u_w, MIPS_UV4SI_FTYPE_UV8HI_UV8HI), + MSA_BUILTIN (dotp_u_d, MIPS_UV2DI_FTYPE_UV4SI_UV4SI), + MSA_BUILTIN (dpadd_s_h, MIPS_V8HI_FTYPE_V8HI_V16QI_V16QI), + MSA_BUILTIN (dpadd_s_w, MIPS_V4SI_FTYPE_V4SI_V8HI_V8HI), + MSA_BUILTIN (dpadd_s_d, MIPS_V2DI_FTYPE_V2DI_V4SI_V4SI), + MSA_BUILTIN (dpadd_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV16QI_UV16QI), + MSA_BUILTIN (dpadd_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV8HI_UV8HI), + MSA_BUILTIN (dpadd_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV4SI_UV4SI), + MSA_BUILTIN (dpsub_s_h, MIPS_V8HI_FTYPE_V8HI_V16QI_V16QI), + MSA_BUILTIN (dpsub_s_w, MIPS_V4SI_FTYPE_V4SI_V8HI_V8HI), + MSA_BUILTIN (dpsub_s_d, MIPS_V2DI_FTYPE_V2DI_V4SI_V4SI), + MSA_BUILTIN (dpsub_u_h, MIPS_V8HI_FTYPE_V8HI_UV16QI_UV16QI), + MSA_BUILTIN (dpsub_u_w, MIPS_V4SI_FTYPE_V4SI_UV8HI_UV8HI), + MSA_BUILTIN (dpsub_u_d, MIPS_V2DI_FTYPE_V2DI_UV4SI_UV4SI), + MSA_BUILTIN (sld_b, MIPS_V16QI_FTYPE_V16QI_V16QI_SI), + MSA_BUILTIN (sld_h, MIPS_V8HI_FTYPE_V8HI_V8HI_SI), + MSA_BUILTIN (sld_w, MIPS_V4SI_FTYPE_V4SI_V4SI_SI), + MSA_BUILTIN (sld_d, MIPS_V2DI_FTYPE_V2DI_V2DI_SI), + MSA_BUILTIN (sldi_b, MIPS_V16QI_FTYPE_V16QI_V16QI_UQI), + MSA_BUILTIN (sldi_h, MIPS_V8HI_FTYPE_V8HI_V8HI_UQI), + MSA_BUILTIN (sldi_w, MIPS_V4SI_FTYPE_V4SI_V4SI_UQI), + MSA_BUILTIN (sldi_d, MIPS_V2DI_FTYPE_V2DI_V2DI_UQI), + MSA_BUILTIN (splat_b, MIPS_V16QI_FTYPE_V16QI_SI), + MSA_BUILTIN (splat_h, MIPS_V8HI_FTYPE_V8HI_SI), + MSA_BUILTIN (splat_w, MIPS_V4SI_FTYPE_V4SI_SI), + MSA_BUILTIN (splat_d, MIPS_V2DI_FTYPE_V2DI_SI), + MSA_BUILTIN (splati_b, MIPS_V16QI_FTYPE_V16QI_UQI), + MSA_BUILTIN (splati_h, MIPS_V8HI_FTYPE_V8HI_UQI), + MSA_BUILTIN (splati_w, MIPS_V4SI_FTYPE_V4SI_UQI), + MSA_BUILTIN (splati_d, MIPS_V2DI_FTYPE_V2DI_UQI), + MSA_BUILTIN (pckev_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (pckev_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (pckev_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (pckev_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (pckod_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (pckod_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (pckod_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (pckod_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (ilvl_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (ilvl_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (ilvl_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (ilvl_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (ilvr_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (ilvr_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (ilvr_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (ilvr_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (ilvev_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (ilvev_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (ilvev_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (ilvev_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (ilvod_b, MIPS_V16QI_FTYPE_V16QI_V16QI), + MSA_BUILTIN (ilvod_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (ilvod_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (ilvod_d, MIPS_V2DI_FTYPE_V2DI_V2DI), + MSA_BUILTIN (vshf_b, MIPS_V16QI_FTYPE_V16QI_V16QI_V16QI), + MSA_BUILTIN (vshf_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI), + MSA_BUILTIN (vshf_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI), + MSA_BUILTIN (vshf_d, MIPS_V2DI_FTYPE_V2DI_V2DI_V2DI), + MSA_BUILTIN (and_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (andi_b, MIPS_UV16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (or_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (ori_b, MIPS_UV16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (nor_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (nori_b, MIPS_UV16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (xor_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI), + MSA_BUILTIN (xori_b, MIPS_UV16QI_FTYPE_UV16QI_UQI), + MSA_BUILTIN (bmnz_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UV16QI), + MSA_BUILTIN (bmnzi_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UQI), + MSA_BUILTIN (bmz_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UV16QI), + MSA_BUILTIN (bmzi_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UQI), + MSA_BUILTIN (bsel_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UV16QI), + MSA_BUILTIN (bseli_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UQI), + MSA_BUILTIN (shf_b, MIPS_V16QI_FTYPE_V16QI_UQI), + MSA_BUILTIN (shf_h, MIPS_V8HI_FTYPE_V8HI_UQI), + MSA_BUILTIN (shf_w, MIPS_V4SI_FTYPE_V4SI_UQI), + MSA_BUILTIN_TEST_BRANCH (bnz_v, MIPS_SI_FTYPE_UV16QI), + MSA_BUILTIN_TEST_BRANCH (bz_v, MIPS_SI_FTYPE_UV16QI), + MSA_BUILTIN (fill_b, MIPS_V16QI_FTYPE_SI), + MSA_BUILTIN (fill_h, MIPS_V8HI_FTYPE_SI), + MSA_BUILTIN (fill_w, MIPS_V4SI_FTYPE_SI), + MSA_BUILTIN (fill_d, MIPS_V2DI_FTYPE_DI), + MSA_BUILTIN (pcnt_b, MIPS_V16QI_FTYPE_V16QI), + MSA_BUILTIN (pcnt_h, MIPS_V8HI_FTYPE_V8HI), + MSA_BUILTIN (pcnt_w, MIPS_V4SI_FTYPE_V4SI), + MSA_BUILTIN (pcnt_d, MIPS_V2DI_FTYPE_V2DI), + MSA_BUILTIN (nloc_b, MIPS_V16QI_FTYPE_V16QI), + MSA_BUILTIN (nloc_h, MIPS_V8HI_FTYPE_V8HI), + MSA_BUILTIN (nloc_w, MIPS_V4SI_FTYPE_V4SI), + MSA_BUILTIN (nloc_d, MIPS_V2DI_FTYPE_V2DI), + MSA_BUILTIN (nlzc_b, MIPS_V16QI_FTYPE_V16QI), + MSA_BUILTIN (nlzc_h, MIPS_V8HI_FTYPE_V8HI), + MSA_BUILTIN (nlzc_w, MIPS_V4SI_FTYPE_V4SI), + MSA_BUILTIN (nlzc_d, MIPS_V2DI_FTYPE_V2DI), + MSA_BUILTIN (copy_s_b, MIPS_SI_FTYPE_V16QI_UQI), + MSA_BUILTIN (copy_s_h, MIPS_SI_FTYPE_V8HI_UQI), + MSA_BUILTIN (copy_s_w, MIPS_SI_FTYPE_V4SI_UQI), + MSA_BUILTIN (copy_s_d, MIPS_DI_FTYPE_V2DI_UQI), + MSA_BUILTIN (copy_u_b, MIPS_USI_FTYPE_V16QI_UQI), + MSA_BUILTIN (copy_u_h, MIPS_USI_FTYPE_V8HI_UQI), + MSA_BUILTIN_REMAP (copy_u_w, copy_s_w, MIPS_USI_FTYPE_V4SI_UQI), + MSA_BUILTIN_REMAP (copy_u_d, copy_s_d, MIPS_UDI_FTYPE_V2DI_UQI), + MSA_BUILTIN (insert_b, MIPS_V16QI_FTYPE_V16QI_UQI_SI), + MSA_BUILTIN (insert_h, MIPS_V8HI_FTYPE_V8HI_UQI_SI), + MSA_BUILTIN (insert_w, MIPS_V4SI_FTYPE_V4SI_UQI_SI), + MSA_BUILTIN (insert_d, MIPS_V2DI_FTYPE_V2DI_UQI_DI), + MSA_BUILTIN (insve_b, MIPS_V16QI_FTYPE_V16QI_UQI_V16QI), + MSA_BUILTIN (insve_h, MIPS_V8HI_FTYPE_V8HI_UQI_V8HI), + MSA_BUILTIN (insve_w, MIPS_V4SI_FTYPE_V4SI_UQI_V4SI), + MSA_BUILTIN (insve_d, MIPS_V2DI_FTYPE_V2DI_UQI_V2DI), + MSA_BUILTIN_TEST_BRANCH (bnz_b, MIPS_SI_FTYPE_UV16QI), + MSA_BUILTIN_TEST_BRANCH (bnz_h, MIPS_SI_FTYPE_UV8HI), + MSA_BUILTIN_TEST_BRANCH (bnz_w, MIPS_SI_FTYPE_UV4SI), + MSA_BUILTIN_TEST_BRANCH (bnz_d, MIPS_SI_FTYPE_UV2DI), + MSA_BUILTIN_TEST_BRANCH (bz_b, MIPS_SI_FTYPE_UV16QI), + MSA_BUILTIN_TEST_BRANCH (bz_h, MIPS_SI_FTYPE_UV8HI), + MSA_BUILTIN_TEST_BRANCH (bz_w, MIPS_SI_FTYPE_UV4SI), + MSA_BUILTIN_TEST_BRANCH (bz_d, MIPS_SI_FTYPE_UV2DI), + MSA_BUILTIN (ldi_b, MIPS_V16QI_FTYPE_HI), + MSA_BUILTIN (ldi_h, MIPS_V8HI_FTYPE_HI), + MSA_BUILTIN (ldi_w, MIPS_V4SI_FTYPE_HI), + MSA_BUILTIN (ldi_d, MIPS_V2DI_FTYPE_HI), + MSA_BUILTIN (fcaf_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fcaf_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fcor_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fcor_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fcun_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fcun_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fcune_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fcune_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fcueq_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fcueq_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fceq_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fceq_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fcne_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fcne_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fclt_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fclt_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fcult_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fcult_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fcle_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fcle_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fcule_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fcule_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fsaf_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fsaf_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fsor_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fsor_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fsun_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fsun_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fsune_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fsune_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fsueq_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fsueq_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fseq_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fseq_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fsne_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fsne_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fslt_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fslt_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fsult_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fsult_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fsle_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fsle_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fsule_w, MIPS_V4SI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fsule_d, MIPS_V2DI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fadd_w, MIPS_V4SF_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fadd_d, MIPS_V2DF_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fsub_w, MIPS_V4SF_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fsub_d, MIPS_V2DF_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fmul_w, MIPS_V4SF_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fmul_d, MIPS_V2DF_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fdiv_w, MIPS_V4SF_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fdiv_d, MIPS_V2DF_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fmadd_w, MIPS_V4SF_FTYPE_V4SF_V4SF_V4SF), + MSA_BUILTIN (fmadd_d, MIPS_V2DF_FTYPE_V2DF_V2DF_V2DF), + MSA_BUILTIN (fmsub_w, MIPS_V4SF_FTYPE_V4SF_V4SF_V4SF), + MSA_BUILTIN (fmsub_d, MIPS_V2DF_FTYPE_V2DF_V2DF_V2DF), + MSA_BUILTIN (fexp2_w, MIPS_V4SF_FTYPE_V4SF_V4SI), + MSA_BUILTIN (fexp2_d, MIPS_V2DF_FTYPE_V2DF_V2DI), + MSA_BUILTIN (fexdo_h, MIPS_V8HI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fexdo_w, MIPS_V4SF_FTYPE_V2DF_V2DF), + MSA_BUILTIN (ftq_h, MIPS_V8HI_FTYPE_V4SF_V4SF), + MSA_BUILTIN (ftq_w, MIPS_V4SI_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fmin_w, MIPS_V4SF_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fmin_d, MIPS_V2DF_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fmin_a_w, MIPS_V4SF_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fmin_a_d, MIPS_V2DF_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fmax_w, MIPS_V4SF_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fmax_d, MIPS_V2DF_FTYPE_V2DF_V2DF), + MSA_BUILTIN (fmax_a_w, MIPS_V4SF_FTYPE_V4SF_V4SF), + MSA_BUILTIN (fmax_a_d, MIPS_V2DF_FTYPE_V2DF_V2DF), + MSA_BUILTIN (mul_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (mul_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (mulr_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI), + MSA_BUILTIN (mulr_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI), + MSA_BUILTIN (madd_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI), + MSA_BUILTIN (madd_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI), + MSA_BUILTIN (maddr_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI), + MSA_BUILTIN (maddr_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI), + MSA_BUILTIN (msub_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI), + MSA_BUILTIN (msub_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI), + MSA_BUILTIN (msubr_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI), + MSA_BUILTIN (msubr_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI), + MSA_BUILTIN (fclass_w, MIPS_V4SI_FTYPE_V4SF), + MSA_BUILTIN (fclass_d, MIPS_V2DI_FTYPE_V2DF), + MSA_BUILTIN (fsqrt_w, MIPS_V4SF_FTYPE_V4SF), + MSA_BUILTIN (fsqrt_d, MIPS_V2DF_FTYPE_V2DF), + MSA_BUILTIN (frcp_w, MIPS_V4SF_FTYPE_V4SF), + MSA_BUILTIN (frcp_d, MIPS_V2DF_FTYPE_V2DF), + MSA_BUILTIN (frint_w, MIPS_V4SF_FTYPE_V4SF), + MSA_BUILTIN (frint_d, MIPS_V2DF_FTYPE_V2DF), + MSA_BUILTIN (frsqrt_w, MIPS_V4SF_FTYPE_V4SF), + MSA_BUILTIN (frsqrt_d, MIPS_V2DF_FTYPE_V2DF), + MSA_BUILTIN (flog2_w, MIPS_V4SF_FTYPE_V4SF), + MSA_BUILTIN (flog2_d, MIPS_V2DF_FTYPE_V2DF), + MSA_BUILTIN (fexupl_w, MIPS_V4SF_FTYPE_V8HI), + MSA_BUILTIN (fexupl_d, MIPS_V2DF_FTYPE_V4SF), + MSA_BUILTIN (fexupr_w, MIPS_V4SF_FTYPE_V8HI), + MSA_BUILTIN (fexupr_d, MIPS_V2DF_FTYPE_V4SF), + MSA_BUILTIN (ffql_w, MIPS_V4SF_FTYPE_V8HI), + MSA_BUILTIN (ffql_d, MIPS_V2DF_FTYPE_V4SI), + MSA_BUILTIN (ffqr_w, MIPS_V4SF_FTYPE_V8HI), + MSA_BUILTIN (ffqr_d, MIPS_V2DF_FTYPE_V4SI), + MSA_BUILTIN (ftint_s_w, MIPS_V4SI_FTYPE_V4SF), + MSA_BUILTIN (ftint_s_d, MIPS_V2DI_FTYPE_V2DF), + MSA_BUILTIN (ftint_u_w, MIPS_UV4SI_FTYPE_V4SF), + MSA_BUILTIN (ftint_u_d, MIPS_UV2DI_FTYPE_V2DF), + MSA_BUILTIN (ftrunc_s_w, MIPS_V4SI_FTYPE_V4SF), + MSA_BUILTIN (ftrunc_s_d, MIPS_V2DI_FTYPE_V2DF), + MSA_BUILTIN (ftrunc_u_w, MIPS_UV4SI_FTYPE_V4SF), + MSA_BUILTIN (ftrunc_u_d, MIPS_UV2DI_FTYPE_V2DF), + MSA_BUILTIN (ffint_s_w, MIPS_V4SF_FTYPE_V4SI), + MSA_BUILTIN (ffint_s_d, MIPS_V2DF_FTYPE_V2DI), + MSA_BUILTIN (ffint_u_w, MIPS_V4SF_FTYPE_UV4SI), + MSA_BUILTIN (ffint_u_d, MIPS_V2DF_FTYPE_UV2DI), + MSA_NO_TARGET_BUILTIN (ctcmsa, MIPS_VOID_FTYPE_UQI_SI), + MSA_BUILTIN (cfcmsa, MIPS_SI_FTYPE_UQI), + MSA_BUILTIN (move_v, MIPS_V16QI_FTYPE_V16QI), }; /* Index I is the function declaration for mips_builtins[I], or null if the function isn't defined on this target. */ static GTY(()) tree mips_builtin_decls[ARRAY_SIZE (mips_builtins)]; +/* Get the index I of the function declaration for mips_builtin_decls[I] + using the instruction code or return null if not defined for the target. */ +static GTY(()) int mips_get_builtin_decl_index[NUM_INSN_CODES]; /* MODE is a vector mode whose elements have type TYPE. Return the type of the vector itself. */ @@ -14971,7 +16352,9 @@ mips_build_cvpointer_type (void) #define MIPS_ATYPE_CVPOINTER mips_build_cvpointer_type () /* Standard mode-based argument types. */ +#define MIPS_ATYPE_QI intQI_type_node #define MIPS_ATYPE_UQI unsigned_intQI_type_node +#define MIPS_ATYPE_HI intHI_type_node #define MIPS_ATYPE_SI intSI_type_node #define MIPS_ATYPE_USI unsigned_intSI_type_node #define MIPS_ATYPE_DI intDI_type_node @@ -14986,6 +16369,24 @@ mips_build_cvpointer_type (void) #define MIPS_ATYPE_V4QI mips_builtin_vector_type (intQI_type_node, V4QImode) #define MIPS_ATYPE_V4HI mips_builtin_vector_type (intHI_type_node, V4HImode) #define MIPS_ATYPE_V8QI mips_builtin_vector_type (intQI_type_node, V8QImode) + +#define MIPS_ATYPE_V2DI \ + mips_builtin_vector_type (long_long_integer_type_node, V2DImode) +#define MIPS_ATYPE_V4SI mips_builtin_vector_type (intSI_type_node, V4SImode) +#define MIPS_ATYPE_V8HI mips_builtin_vector_type (intHI_type_node, V8HImode) +#define MIPS_ATYPE_V16QI mips_builtin_vector_type (intQI_type_node, V16QImode) +#define MIPS_ATYPE_V2DF mips_builtin_vector_type (double_type_node, V2DFmode) +#define MIPS_ATYPE_V4SF mips_builtin_vector_type (float_type_node, V4SFmode) + +#define MIPS_ATYPE_UV2DI \ + mips_builtin_vector_type (long_long_unsigned_type_node, V2DImode) +#define MIPS_ATYPE_UV4SI \ + mips_builtin_vector_type (unsigned_intSI_type_node, V4SImode) +#define MIPS_ATYPE_UV8HI \ + mips_builtin_vector_type (unsigned_intHI_type_node, V8HImode) +#define MIPS_ATYPE_UV16QI \ + mips_builtin_vector_type (unsigned_intQI_type_node, V16QImode) + #define MIPS_ATYPE_UV2SI \ mips_builtin_vector_type (unsigned_intSI_type_node, V2SImode) #define MIPS_ATYPE_UV4HI \ @@ -15047,10 +16448,13 @@ mips_init_builtins (void) { d = &mips_builtins[i]; if (d->avail ()) - mips_builtin_decls[i] - = add_builtin_function (d->name, - mips_build_function_type (d->function_type), - i, BUILT_IN_MD, NULL, NULL); + { + mips_builtin_decls[i] + = add_builtin_function (d->name, + mips_build_function_type (d->function_type), + i, BUILT_IN_MD, NULL, NULL); + mips_get_builtin_decl_index[d->icode] = i; + } } } @@ -15064,6 +16468,48 @@ mips_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED) return mips_builtin_decls[code]; } +/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION. */ + +static tree +mips_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in) +{ + machine_mode in_mode, out_mode; + int in_n, out_n; + + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE + || !ISA_HAS_MSA) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + + /* INSN is the name of the associated instruction pattern, without + the leading CODE_FOR_. */ +#define MIPS_GET_BUILTIN(INSN) \ + mips_builtin_decls[mips_get_builtin_decl_index[CODE_FOR_##INSN]] + + switch (fn) + { + case BUILT_IN_SQRT: + if (out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return MIPS_GET_BUILTIN (msa_fsqrt_d); + break; + case BUILT_IN_SQRTF: + if (out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return MIPS_GET_BUILTIN (msa_fsqrt_w); + break; + default: + break; + } + + return NULL_TREE; +} + /* Take argument ARGNO from EXP's argument list and convert it into an expand operand. Store the operand in *OP. */ @@ -15090,6 +16536,211 @@ static rtx mips_expand_builtin_insn (enum insn_code icode, unsigned int nops, struct expand_operand *ops, bool has_target_p) { + machine_mode imode; + + switch (icode) + { + case CODE_FOR_msa_addvi_b: + case CODE_FOR_msa_addvi_h: + case CODE_FOR_msa_addvi_w: + case CODE_FOR_msa_addvi_d: + case CODE_FOR_msa_clti_u_b: + case CODE_FOR_msa_clti_u_h: + case CODE_FOR_msa_clti_u_w: + case CODE_FOR_msa_clti_u_d: + case CODE_FOR_msa_clei_u_b: + case CODE_FOR_msa_clei_u_h: + case CODE_FOR_msa_clei_u_w: + case CODE_FOR_msa_clei_u_d: + case CODE_FOR_msa_maxi_u_b: + case CODE_FOR_msa_maxi_u_h: + case CODE_FOR_msa_maxi_u_w: + case CODE_FOR_msa_maxi_u_d: + case CODE_FOR_msa_mini_u_b: + case CODE_FOR_msa_mini_u_h: + case CODE_FOR_msa_mini_u_w: + case CODE_FOR_msa_mini_u_d: + case CODE_FOR_msa_subvi_b: + case CODE_FOR_msa_subvi_h: + case CODE_FOR_msa_subvi_w: + case CODE_FOR_msa_subvi_d: + gcc_assert (has_target_p && nops == 3); + /* We only generate a vector of constants iff the second argument + is an immediate. We also validate the range of the immediate. */ + if (!CONST_INT_P (ops[2].value) + || !IN_RANGE (INTVAL (ops[2].value), 0, 31)) + break; + ops[2].mode = ops[0].mode; + ops[2].value = mips_gen_const_int_vector (ops[2].mode, + INTVAL (ops[2].value)); + break; + + case CODE_FOR_msa_ceqi_b: + case CODE_FOR_msa_ceqi_h: + case CODE_FOR_msa_ceqi_w: + case CODE_FOR_msa_ceqi_d: + case CODE_FOR_msa_clti_s_b: + case CODE_FOR_msa_clti_s_h: + case CODE_FOR_msa_clti_s_w: + case CODE_FOR_msa_clti_s_d: + case CODE_FOR_msa_clei_s_b: + case CODE_FOR_msa_clei_s_h: + case CODE_FOR_msa_clei_s_w: + case CODE_FOR_msa_clei_s_d: + case CODE_FOR_msa_maxi_s_b: + case CODE_FOR_msa_maxi_s_h: + case CODE_FOR_msa_maxi_s_w: + case CODE_FOR_msa_maxi_s_d: + case CODE_FOR_msa_mini_s_b: + case CODE_FOR_msa_mini_s_h: + case CODE_FOR_msa_mini_s_w: + case CODE_FOR_msa_mini_s_d: + gcc_assert (has_target_p && nops == 3); + /* We only generate a vector of constants iff the second argument + is an immediate. We also validate the range of the immediate. */ + if (!CONST_INT_P (ops[2].value) + || !IN_RANGE (INTVAL (ops[2].value), -16, 15)) + break; + ops[2].mode = ops[0].mode; + ops[2].value = mips_gen_const_int_vector (ops[2].mode, + INTVAL (ops[2].value)); + break; + + case CODE_FOR_msa_andi_b: + case CODE_FOR_msa_ori_b: + case CODE_FOR_msa_nori_b: + case CODE_FOR_msa_xori_b: + gcc_assert (has_target_p && nops == 3); + if (!CONST_INT_P (ops[2].value)) + break; + ops[2].mode = ops[0].mode; + ops[2].value = mips_gen_const_int_vector (ops[2].mode, + INTVAL (ops[2].value)); + break; + + case CODE_FOR_msa_bmzi_b: + case CODE_FOR_msa_bmnzi_b: + case CODE_FOR_msa_bseli_b: + gcc_assert (has_target_p && nops == 4); + if (!CONST_INT_P (ops[3].value)) + break; + ops[3].mode = ops[0].mode; + ops[3].value = mips_gen_const_int_vector (ops[3].mode, + INTVAL (ops[3].value)); + break; + + case CODE_FOR_msa_fill_b: + case CODE_FOR_msa_fill_h: + case CODE_FOR_msa_fill_w: + case CODE_FOR_msa_fill_d: + /* Map the built-ins to vector fill operations. We need fix up the mode + for the element being inserted. */ + gcc_assert (has_target_p && nops == 2); + imode = GET_MODE_INNER (ops[0].mode); + ops[1].value = lowpart_subreg (imode, ops[1].value, ops[1].mode); + ops[1].mode = imode; + break; + + case CODE_FOR_msa_ilvl_b: + case CODE_FOR_msa_ilvl_h: + case CODE_FOR_msa_ilvl_w: + case CODE_FOR_msa_ilvl_d: + case CODE_FOR_msa_ilvr_b: + case CODE_FOR_msa_ilvr_h: + case CODE_FOR_msa_ilvr_w: + case CODE_FOR_msa_ilvr_d: + case CODE_FOR_msa_ilvev_b: + case CODE_FOR_msa_ilvev_h: + case CODE_FOR_msa_ilvev_w: + case CODE_FOR_msa_ilvod_b: + case CODE_FOR_msa_ilvod_h: + case CODE_FOR_msa_ilvod_w: + case CODE_FOR_msa_pckev_b: + case CODE_FOR_msa_pckev_h: + case CODE_FOR_msa_pckev_w: + case CODE_FOR_msa_pckod_b: + case CODE_FOR_msa_pckod_h: + case CODE_FOR_msa_pckod_w: + /* Swap the operands 1 and 2 for interleave operations. Built-ins follow + convention of ISA, which have op1 as higher component and op2 as lower + component. However, the VEC_PERM op in tree and vec_concat in RTL + expects first operand to be lower component, because of which this + swap is needed for builtins. */ + gcc_assert (has_target_p && nops == 3); + std::swap (ops[1], ops[2]); + break; + + case CODE_FOR_msa_slli_b: + case CODE_FOR_msa_slli_h: + case CODE_FOR_msa_slli_w: + case CODE_FOR_msa_slli_d: + case CODE_FOR_msa_srai_b: + case CODE_FOR_msa_srai_h: + case CODE_FOR_msa_srai_w: + case CODE_FOR_msa_srai_d: + case CODE_FOR_msa_srli_b: + case CODE_FOR_msa_srli_h: + case CODE_FOR_msa_srli_w: + case CODE_FOR_msa_srli_d: + gcc_assert (has_target_p && nops == 3); + if (!CONST_INT_P (ops[2].value) + || !IN_RANGE (INTVAL (ops[2].value), 0, + GET_MODE_UNIT_PRECISION (ops[0].mode) - 1)) + break; + ops[2].mode = ops[0].mode; + ops[2].value = mips_gen_const_int_vector (ops[2].mode, + INTVAL (ops[2].value)); + break; + + case CODE_FOR_msa_insert_b: + case CODE_FOR_msa_insert_h: + case CODE_FOR_msa_insert_w: + case CODE_FOR_msa_insert_d: + /* Map the built-ins to insert operations. We need to swap operands, + fix up the mode for the element being inserted, and generate + a bit mask for vec_merge. */ + gcc_assert (has_target_p && nops == 4); + std::swap (ops[1], ops[2]); + std::swap (ops[1], ops[3]); + imode = GET_MODE_INNER (ops[0].mode); + ops[1].value = lowpart_subreg (imode, ops[1].value, ops[1].mode); + ops[1].mode = imode; + ops[3].value = GEN_INT (1 << INTVAL (ops[3].value)); + break; + + case CODE_FOR_msa_insve_b: + case CODE_FOR_msa_insve_h: + case CODE_FOR_msa_insve_w: + case CODE_FOR_msa_insve_d: + /* Map the built-ins to element insert operations. We need to swap + operands and generate a bit mask. */ + gcc_assert (has_target_p && nops == 4); + std::swap (ops[1], ops[2]); + std::swap (ops[1], ops[3]); + ops[3].value = GEN_INT (1 << INTVAL (ops[3].value)); + break; + + case CODE_FOR_msa_shf_b: + case CODE_FOR_msa_shf_h: + case CODE_FOR_msa_shf_w: + case CODE_FOR_msa_shf_w_f: + gcc_assert (has_target_p && nops == 3); + ops[2].value = mips_gen_const_int_vector_shuffle (ops[0].mode, + INTVAL (ops[2].value)); + break; + + case CODE_FOR_msa_vshf_b: + case CODE_FOR_msa_vshf_h: + case CODE_FOR_msa_vshf_w: + case CODE_FOR_msa_vshf_d: + gcc_assert (has_target_p && nops == 4); + std::swap (ops[1], ops[3]); + break; + + default: + break; + } + if (!maybe_expand_insn (icode, nops, ops)) { error ("invalid argument to built-in function"); @@ -15182,6 +16833,50 @@ mips_expand_builtin_movtf (enum mips_builtin_type type, 4, ops, true); } +/* Expand an MSA built-in for a compare and branch instruction specified by + ICODE, set a general-purpose register to 1 if the branch was taken, + 0 otherwise. */ + +static rtx +mips_expand_builtin_msa_test_branch (enum insn_code icode, tree exp) +{ + struct expand_operand ops[3]; + rtx_insn *cbranch; + rtx_code_label *true_label, *done_label; + rtx cmp_result; + + true_label = gen_label_rtx (); + done_label = gen_label_rtx (); + + create_input_operand (&ops[0], true_label, TYPE_MODE (TREE_TYPE (exp))); + mips_prepare_builtin_arg (&ops[1], exp, 0); + create_fixed_operand (&ops[2], const0_rtx); + + /* Make sure that the operand 1 is a REG. */ + if (GET_CODE (ops[1].value) != REG) + ops[1].value = force_reg (ops[1].mode, ops[1].value); + + if ((cbranch = maybe_gen_insn (icode, 3, ops)) == NULL_RTX) + error ("failed to expand built-in function"); + + cmp_result = gen_reg_rtx (SImode); + + /* First assume that CMP_RESULT is false. */ + mips_emit_move (cmp_result, const0_rtx); + + /* Branch to TRUE_LABEL if CBRANCH is taken and DONE_LABEL otherwise. */ + emit_jump_insn (cbranch); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + + /* Set CMP_RESULT to true if the branch was taken. */ + emit_label (true_label); + mips_emit_move (cmp_result, const1_rtx); + + emit_label (done_label); + return cmp_result; +} + /* Move VALUE_IF_TRUE into TARGET if CONDITION is true; move VALUE_IF_FALSE into TARGET otherwise. Return TARGET. */ @@ -15318,6 +17013,9 @@ mips_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return mips_expand_builtin_compare (d->builtin_type, d->icode, d->cond, target, exp); + case MIPS_BUILTIN_MSA_TEST_BRANCH: + return mips_expand_builtin_msa_test_branch (d->icode, exp); + case MIPS_BUILTIN_BPOSGE32: return mips_expand_builtin_bposge (d->builtin_type, target); } @@ -17592,6 +19290,9 @@ mips_set_compression_mode (unsigned int compression_mode) if (TARGET_HARD_FLOAT_ABI && !TARGET_OLDABI) sorry ("hard-float MIPS16 code for ABIs other than o32 and o64"); + + if (TARGET_MSA) + sorry ("MSA MIPS16 code"); } else { @@ -17768,6 +19469,11 @@ mips_option_override (void) if (TARGET_MICROMIPS && TARGET_MIPS16) error ("unsupported combination: %s", "-mips16 -mmicromips"); + /* Prohibit Paired-Single and MSA combination. This is software restriction + rather than architectural. */ + if (ISA_HAS_MSA && TARGET_PAIRED_SINGLE_FLOAT) + error ("unsupported combination: %s", "-mmsa -mpaired-single"); + /* Save the base compression state and process flags as though we were generating uncompressed code. */ mips_base_compression_flags = TARGET_COMPRESSION; @@ -17871,6 +19577,8 @@ mips_option_override (void) target_flags |= MASK_FLOAT64; else if (TARGET_64BIT && TARGET_DOUBLE_FLOAT) target_flags |= MASK_FLOAT64; + else if (mips_abi == ABI_32 && ISA_HAS_MSA && !TARGET_FLOATXX) + target_flags |= MASK_FLOAT64; else target_flags &= ~MASK_FLOAT64; } @@ -18129,6 +19837,11 @@ mips_option_override (void) TARGET_MIPS3D = 0; } + /* Make sure that when ISA_HAS_MSA is true, TARGET_FLOAT64 and + TARGET_HARD_FLOAT_ABI and both true. */ + if (ISA_HAS_MSA && !(TARGET_FLOAT64 && TARGET_HARD_FLOAT_ABI)) + error ("%<-mmsa%> must be used with %<-mfp64%> and %<-mhard-float%>"); + /* Make sure that -mpaired-single is only used on ISAs that support it. We must disable it otherwise since it relies on other ISA properties like ISA_HAS_8CC having their normal values. */ @@ -19164,7 +20877,7 @@ mips_prepare_pch_save (void) /* Generate or test for an insn that supports a constant permutation. */ -#define MAX_VECT_LEN 8 +#define MAX_VECT_LEN 16 struct expand_vec_perm_d { @@ -19368,6 +21081,41 @@ mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d) return true; } +/* Construct (set target (vec_select op0 (parallel selector))) and + return true if that's a valid instruction in the active ISA. */ + +static bool +mips_expand_msa_shuffle (struct expand_vec_perm_d *d) +{ + rtx x, elts[MAX_VECT_LEN]; + rtvec v; + rtx_insn *insn; + unsigned i; + + if (!ISA_HAS_MSA) + return false; + + for (i = 0; i < d->nelt; i++) + elts[i] = GEN_INT (d->perm[i]); + + v = gen_rtvec_v (d->nelt, elts); + x = gen_rtx_PARALLEL (VOIDmode, v); + + if (!mips_const_vector_shuffle_set_p (x, d->vmode)) + return false; + + x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x); + x = gen_rtx_SET (d->target, x); + + insn = emit_insn (x); + if (recog_memoized (insn) < 0) + { + remove_insn (insn); + return false; + } + return true; +} + static bool mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) { @@ -19402,6 +21150,8 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return true; if (mips_expand_vpc_loongson_bcast (d)) return true; + if (mips_expand_msa_shuffle (d)) + return true; return false; } @@ -19480,6 +21230,17 @@ mips_expand_vec_perm_const (rtx operands[4]) return ok; } +/* Implement TARGET_SCHED_REASSOCIATION_WIDTH. */ + +static int +mips_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, + machine_mode mode) +{ + if (MSA_SUPPORTED_MODE_P (mode)) + return 2; + return 1; +} + /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ static bool @@ -19530,9 +21291,62 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) { machine_mode imode = GET_MODE (operands[1]); rtx (*unpack) (rtx, rtx, rtx); - rtx (*cmpgt) (rtx, rtx, rtx); + rtx (*cmpFunc) (rtx, rtx, rtx); rtx tmp, dest, zero; + if (ISA_HAS_MSA) + { + switch (imode) + { + case V4SImode: + if (BYTES_BIG_ENDIAN != high_p) + unpack = gen_msa_ilvl_w; + else + unpack = gen_msa_ilvr_w; + + cmpFunc = gen_msa_clt_s_w; + break; + + case V8HImode: + if (BYTES_BIG_ENDIAN != high_p) + unpack = gen_msa_ilvl_h; + else + unpack = gen_msa_ilvr_h; + + cmpFunc = gen_msa_clt_s_h; + break; + + case V16QImode: + if (BYTES_BIG_ENDIAN != high_p) + unpack = gen_msa_ilvl_b; + else + unpack = gen_msa_ilvr_b; + + cmpFunc = gen_msa_clt_s_b; + break; + + default: + gcc_unreachable (); + break; + } + + if (!unsigned_p) + { + /* Extract sign extention for each element comparing each element + with immediate zero. */ + tmp = gen_reg_rtx (imode); + emit_insn (cmpFunc (tmp, operands[1], CONST0_RTX (imode))); + } + else + tmp = force_reg (imode, CONST0_RTX (imode)); + + dest = gen_reg_rtx (imode); + + emit_insn (unpack (dest, operands[1], tmp)); + emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest)); + return; + } + switch (imode) { case V8QImode: @@ -19540,14 +21354,14 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) unpack = gen_loongson_punpckhbh; else unpack = gen_loongson_punpcklbh; - cmpgt = gen_loongson_pcmpgtb; + cmpFunc = gen_loongson_pcmpgtb; break; case V4HImode: if (high_p) unpack = gen_loongson_punpckhhw; else unpack = gen_loongson_punpcklhw; - cmpgt = gen_loongson_pcmpgth; + cmpFunc = gen_loongson_pcmpgth; break; default: gcc_unreachable (); @@ -19559,7 +21373,7 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) else { tmp = gen_reg_rtx (imode); - emit_insn (cmpgt (tmp, zero, operands[1])); + emit_insn (cmpFunc (tmp, zero, operands[1])); } dest = gen_reg_rtx (imode); @@ -19568,6 +21382,28 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest)); } +/* Construct and return PARALLEL RTX with CONST_INTs for HIGH (high_p == TRUE) + or LOW (high_p == FALSE) half of a vector for mode MODE. */ + +rtx +mips_msa_vec_parallel_const_half (machine_mode mode, bool high_p) +{ + int nunits = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (nunits / 2); + int base; + int i; + + if (BYTES_BIG_ENDIAN) + base = high_p ? 0 : nunits / 2; + else + base = high_p ? nunits / 2 : 0; + + for (i = 0; i < nunits / 2; i++) + RTVEC_ELT (v, i) = GEN_INT (base + i); + + return gen_rtx_PARALLEL (VOIDmode, v); +} + /* A subroutine of mips_expand_vec_init, match constant vector elements. */ static inline bool @@ -19615,6 +21451,42 @@ mips_expand_vi_broadcast (machine_mode vmode, rtx target, rtx elt) gcc_assert (ok); } +/* Return a const_int vector of VAL with mode MODE. */ + +rtx +mips_gen_const_int_vector (machine_mode mode, int val) +{ + int nunits = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (nunits); + int i; + + for (i = 0; i < nunits; i++) + RTVEC_ELT (v, i) = gen_int_mode (val, GET_MODE_INNER (mode)); + + return gen_rtx_CONST_VECTOR (mode, v); +} + +/* Return a vector of repeated 4-element sets generated from + immediate VAL in mode MODE. */ + +static rtx +mips_gen_const_int_vector_shuffle (machine_mode mode, int val) +{ + int nunits = GET_MODE_NUNITS (mode); + int nsets = nunits / 4; + rtx elts[MAX_VECT_LEN]; + int set = 0; + int i, j; + + /* Generate a const_int vector replicating the same 4-element set + from an immediate. */ + for (j = 0; j < nsets; j++, set = 4 * j) + for (i = 0; i < 4; i++) + elts[set + i] = GEN_INT (set + ((val >> (2 * i)) & 0x3)); + + return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nunits, elts)); +} + /* A subroutine of mips_expand_vec_init, replacing all of the non-constant elements of VALS with zeros, copy the constant vector to TARGET. */ @@ -19627,8 +21499,9 @@ mips_expand_vi_constant (machine_mode vmode, unsigned nelt, for (i = 0; i < nelt; ++i) { - if (!mips_constant_elt_p (RTVEC_ELT (vec, i))) - RTVEC_ELT (vec, i) = const0_rtx; + rtx elem = RTVEC_ELT (vec, i); + if (!mips_constant_elt_p (elem)) + RTVEC_ELT (vec, i) = CONST0_RTX (GET_MODE (elem)); } emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec)); @@ -19689,6 +21562,106 @@ mips_expand_vector_init (rtx target, rtx vals) all_same = false; } + if (ISA_HAS_MSA) + { + if (all_same) + { + rtx same = XVECEXP (vals, 0, 0); + rtx temp, temp2; + + if (CONST_INT_P (same) && nvar == 0 + && mips_signed_immediate_p (INTVAL (same), 10, 0)) + { + switch (vmode) + { + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + emit_move_insn (target, same); + return; + + default: + break; + } + } + temp = gen_reg_rtx (imode); + if (imode == GET_MODE (same)) + temp2 = same; + else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) + temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0); + else + temp2 = lowpart_subreg (imode, same, GET_MODE (same)); + emit_move_insn (temp, temp2); + + switch (vmode) + { + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + mips_emit_move (target, gen_rtx_VEC_DUPLICATE (vmode, temp)); + break; + + case V4SFmode: + emit_insn (gen_msa_splati_w_f_scalar (target, temp)); + break; + + case V2DFmode: + emit_insn (gen_msa_splati_d_f_scalar (target, temp)); + break; + + default: + gcc_unreachable (); + } + } + else + { + rtvec vec = shallow_copy_rtvec (XVEC (vals, 0)); + + for (i = 0; i < nelt; ++i) + RTVEC_ELT (vec, i) = CONST0_RTX (imode); + + emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec)); + + for (i = 0; i < nelt; ++i) + { + rtx temp = gen_reg_rtx (imode); + emit_move_insn (temp, XVECEXP (vals, 0, i)); + switch (vmode) + { + case V16QImode: + emit_insn (gen_vec_setv16qi (target, temp, GEN_INT (i))); + break; + + case V8HImode: + emit_insn (gen_vec_setv8hi (target, temp, GEN_INT (i))); + break; + + case V4SImode: + emit_insn (gen_vec_setv4si (target, temp, GEN_INT (i))); + break; + + case V2DImode: + emit_insn (gen_vec_setv2di (target, temp, GEN_INT (i))); + break; + + case V4SFmode: + emit_insn (gen_vec_setv4sf (target, temp, GEN_INT (i))); + break; + + case V2DFmode: + emit_insn (gen_vec_setv2df (target, temp, GEN_INT (i))); + break; + + default: + gcc_unreachable (); + } + } + } + return; + } + /* Load constants from the pool, or whatever's handy. */ if (nvar == 0) { @@ -19839,6 +21812,169 @@ mips_hard_regno_caller_save_mode (unsigned int regno, return mode; } +/* Generate RTL for comparing CMP_OP0 and CMP_OP1 using condition COND and + store the result -1 or 0 in DEST. */ + +static void +mips_expand_msa_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1) +{ + machine_mode cmp_mode = GET_MODE (op0); + int unspec = -1; + bool negate = false; + + switch (cmp_mode) + { + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + switch (cond) + { + case NE: + cond = reverse_condition (cond); + negate = true; + break; + case EQ: + case LT: + case LE: + case LTU: + case LEU: + break; + case GE: + case GT: + case GEU: + case GTU: + std::swap (op0, op1); + cond = swap_condition (cond); + break; + default: + gcc_unreachable (); + } + mips_emit_binary (cond, dest, op0, op1); + if (negate) + emit_move_insn (dest, gen_rtx_NOT (GET_MODE (dest), dest)); + break; + + case V4SFmode: + case V2DFmode: + switch (cond) + { + case UNORDERED: + case ORDERED: + case EQ: + case NE: + case UNEQ: + case UNLE: + case UNLT: + break; + case LTGT: cond = NE; break; + case UNGE: cond = UNLE; std::swap (op0, op1); break; + case UNGT: cond = UNLT; std::swap (op0, op1); break; + case LE: unspec = UNSPEC_MSA_FSLE; break; + case LT: unspec = UNSPEC_MSA_FSLT; break; + case GE: unspec = UNSPEC_MSA_FSLE; std::swap (op0, op1); break; + case GT: unspec = UNSPEC_MSA_FSLT; std::swap (op0, op1); break; + default: + gcc_unreachable (); + } + if (unspec < 0) + mips_emit_binary (cond, dest, op0, op1); + else + { + rtx x = gen_rtx_UNSPEC (GET_MODE (dest), + gen_rtvec (2, op0, op1), unspec); + emit_insn (gen_rtx_SET (dest, x)); + } + break; + + default: + gcc_unreachable (); + break; + } +} + +/* Expand VEC_COND_EXPR, where: + MODE is mode of the result + VIMODE equivalent integer mode + OPERANDS operands of VEC_COND_EXPR. */ + +void +mips_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, + rtx *operands) +{ + rtx cond = operands[3]; + rtx cmp_op0 = operands[4]; + rtx cmp_op1 = operands[5]; + rtx cmp_res = gen_reg_rtx (vimode); + + mips_expand_msa_cmp (cmp_res, GET_CODE (cond), cmp_op0, cmp_op1); + + /* We handle the following cases: + 1) r = a CMP b ? -1 : 0 + 2) r = a CMP b ? -1 : v + 3) r = a CMP b ? v : 0 + 4) r = a CMP b ? v1 : v2 */ + + /* Case (1) above. We only move the results. */ + if (operands[1] == CONSTM1_RTX (vimode) + && operands[2] == CONST0_RTX (vimode)) + emit_move_insn (operands[0], cmp_res); + else + { + rtx src1 = gen_reg_rtx (vimode); + rtx src2 = gen_reg_rtx (vimode); + rtx mask = gen_reg_rtx (vimode); + rtx bsel; + + /* Move the vector result to use it as a mask. */ + emit_move_insn (mask, cmp_res); + + if (register_operand (operands[1], mode)) + { + rtx xop1 = operands[1]; + if (mode != vimode) + { + xop1 = gen_reg_rtx (vimode); + emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0)); + } + emit_move_insn (src1, xop1); + } + else + { + gcc_assert (operands[1] == CONSTM1_RTX (vimode)); + /* Case (2) if the below doesn't move the mask to src2. */ + emit_move_insn (src1, mask); + } + + if (register_operand (operands[2], mode)) + { + rtx xop2 = operands[2]; + if (mode != vimode) + { + xop2 = gen_reg_rtx (vimode); + emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0)); + } + emit_move_insn (src2, xop2); + } + else + { + gcc_assert (operands[2] == CONST0_RTX (mode)); + /* Case (3) if the above didn't move the mask to src1. */ + emit_move_insn (src2, mask); + } + + /* We deal with case (4) if the mask wasn't moved to either src1 or src2. + In any case, we eventually do vector mask-based copy. */ + bsel = gen_rtx_IOR (vimode, + gen_rtx_AND (vimode, + gen_rtx_NOT (vimode, mask), src2), + gen_rtx_AND (vimode, mask, src1)); + /* The result is placed back to a register with the mask. */ + emit_insn (gen_rtx_SET (mask, bsel)); + emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0)); + } +} + /* Implement TARGET_CASE_VALUES_THRESHOLD. */ unsigned int @@ -20120,6 +22256,9 @@ mips_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, #undef TARGET_MODE_REP_EXTENDED #define TARGET_MODE_REP_EXTENDED mips_mode_rep_extended +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION +#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ + mips_builtin_vectorized_function #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P mips_vector_mode_supported_p @@ -20128,6 +22267,9 @@ mips_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE mips_preferred_simd_mode +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ + mips_autovectorize_vector_sizes #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS mips_init_builtins @@ -20205,6 +22347,9 @@ mips_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK #define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok +#undef TARGET_SCHED_REASSOCIATION_WIDTH +#define TARGET_SCHED_REASSOCIATION_WIDTH mips_sched_reassociation_width + #undef TARGET_CASE_VALUES_THRESHOLD #define TARGET_CASE_VALUES_THRESHOLD mips_case_values_threshold diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 803ab98e760..1efa61a6ede 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -472,6 +472,12 @@ struct mips_cpu_info { builtin_define ("__mips_dsp_rev=1"); \ } \ \ + if (ISA_HAS_MSA) \ + { \ + builtin_define ("__mips_msa"); \ + builtin_define ("__mips_msa_width=128"); \ + } \ + \ MIPS_CPP_SET_PROCESSOR ("_MIPS_ARCH", mips_arch_info); \ MIPS_CPP_SET_PROCESSOR ("_MIPS_TUNE", mips_tune_info); \ \ @@ -824,7 +830,8 @@ struct mips_cpu_info { --with-fpu is ignored if -msoft-float, -msingle-float or -mdouble-float are specified. --with-nan is ignored if -mnan is specified. - --with-fp-32 is ignored if -msoft-float, -msingle-float or -mfp are specified. + --with-fp-32 is ignored if -msoft-float, -msingle-float, -mmsa or -mfp are + specified. --with-odd-spreg-32 is ignored if -msoft-float, -msingle-float, -modd-spreg or -mno-odd-spreg are specified. --with-divide is ignored if -mdivide-traps or -mdivide-breaks are @@ -841,7 +848,7 @@ struct mips_cpu_info { {"fpu", "%{!msoft-float:%{!msingle-float:%{!mdouble-float:-m%(VALUE)-float}}}" }, \ {"nan", "%{!mnan=*:-mnan=%(VALUE)}" }, \ {"fp_32", "%{" OPT_ARCH32 \ - ":%{!msoft-float:%{!msingle-float:%{!mfp*:-mfp%(VALUE)}}}}" }, \ + ":%{!msoft-float:%{!msingle-float:%{!mfp*:%{!mmsa:-mfp%(VALUE)}}}}}" }, \ {"odd_spreg_32", "%{" OPT_ARCH32 ":%{!msoft-float:%{!msingle-float:" \ "%{!modd-spreg:%{!mno-odd-spreg:-m%(VALUE)}}}}}" }, \ {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" }, \ @@ -1175,6 +1182,9 @@ struct mips_cpu_info { /* Revision 2 of the DSP ASE is available. */ #define ISA_HAS_DSPR2 (TARGET_DSPR2 && !TARGET_MIPS16) +/* The MSA ASE is available. */ +#define ISA_HAS_MSA (TARGET_MSA && !TARGET_MIPS16) + /* True if the result of a load is not available to the next instruction. A nop will then be needed between instructions like "lw $4,..." and "addiu $4,$4,1". */ @@ -1316,6 +1326,7 @@ struct mips_cpu_info { %{meva} %{mno-eva} \ %{mvirt} %{mno-virt} \ %{mxpa} %{mno-xpa} \ +%{mmsa} %{mno-msa} \ %{msmartmips} %{mno-smartmips} \ %{mmt} %{mno-mt} \ %{mfix-rm7000} %{mno-fix-rm7000} \ @@ -1487,6 +1498,11 @@ FP_ASM_SPEC "\ #define MIN_UNITS_PER_WORD 4 #endif +/* Width of a MSA vector register in bytes. */ +#define UNITS_PER_MSA_REG 16 +/* Width of a MSA vector register in bits. */ +#define BITS_PER_MSA_REG (UNITS_PER_MSA_REG * BITS_PER_UNIT) + /* For MIPS, width of a floating point register. */ #define UNITS_PER_FPREG (TARGET_FLOAT64 ? 8 : 4) @@ -1559,8 +1575,11 @@ FP_ASM_SPEC "\ /* 8 is observed right on a DECstation and on riscos 4.02. */ #define STRUCTURE_SIZE_BOUNDARY 8 -/* There is no point aligning anything to a rounder boundary than this. */ -#define BIGGEST_ALIGNMENT LONG_DOUBLE_TYPE_SIZE +/* There is no point aligning anything to a rounder boundary than + LONG_DOUBLE_TYPE_SIZE, unless under MSA the bigggest alignment is + BITS_PER_MSA_REG. */ +#define BIGGEST_ALIGNMENT \ + (ISA_HAS_MSA ? BITS_PER_MSA_REG : LONG_DOUBLE_TYPE_SIZE) /* All accesses must be aligned. */ #define STRICT_ALIGNMENT 1 @@ -1667,7 +1686,7 @@ FP_ASM_SPEC "\ /* The [d]clz instructions have the natural values at 0. */ #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ - ((VALUE) = GET_MODE_BITSIZE (MODE), 2) + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) /* Standard register usage. */ @@ -1798,6 +1817,10 @@ FP_ASM_SPEC "\ #define MD_REG_NUM (MD_REG_LAST - MD_REG_FIRST + 1) #define MD_DBX_FIRST (FP_DBX_FIRST + FP_REG_NUM) +#define MSA_REG_FIRST FP_REG_FIRST +#define MSA_REG_LAST FP_REG_LAST +#define MSA_REG_NUM FP_REG_NUM + /* The DWARF 2 CFA column which tracks the return address from a signal handler context. This means that to maintain backwards compatibility, no hard register can be assigned this column if it @@ -1886,8 +1909,11 @@ FP_ASM_SPEC "\ /* Test if REGNO is hi, lo, or one of the 6 new DSP accumulators. */ #define ACC_REG_P(REGNO) \ (MD_REG_P (REGNO) || DSP_ACC_REG_P (REGNO)) +#define MSA_REG_P(REGNO) \ + ((unsigned int) ((int) (REGNO) - MSA_REG_FIRST) < MSA_REG_NUM) #define FP_REG_RTX_P(X) (REG_P (X) && FP_REG_P (REGNO (X))) +#define MSA_REG_RTX_P(X) (REG_P (X) && MSA_REG_P (REGNO (X))) /* True if X is (const (unspec [(const_int 0)] UNSPEC_GP)). This is used to initialize the mips16 gp pseudo register. */ @@ -1916,10 +1942,12 @@ FP_ASM_SPEC "\ mips_hard_regno_caller_save_mode (REGNO, NREGS, MODE) /* Odd-numbered single-precision registers are not considered callee-saved - for o32 FPXX as they will be clobbered when run on an FR=1 FPU. */ + for o32 FPXX as they will be clobbered when run on an FR=1 FPU. + MSA vector registers with MODE > 64 bits are part clobbered too. */ #define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \ - (TARGET_FLOATXX && hard_regno_nregs[REGNO][MODE] == 1 \ - && FP_REG_P (REGNO) && ((REGNO) & 1)) + ((TARGET_FLOATXX && hard_regno_nregs[REGNO][MODE] == 1 \ + && FP_REG_P (REGNO) && ((REGNO) & 1)) \ + || (ISA_HAS_MSA && FP_REG_P (REGNO) && GET_MODE_SIZE (MODE) > 8)) #define MODES_TIEABLE_P mips_modes_tieable_p @@ -2381,6 +2409,13 @@ enum reg_class #define FP_ARG_FIRST (FP_REG_FIRST + 12) #define FP_ARG_LAST (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1) +/* True if MODE is vector and supported in a MSA vector register. */ +#define MSA_SUPPORTED_MODE_P(MODE) \ + (ISA_HAS_MSA \ + && GET_MODE_SIZE (MODE) == UNITS_PER_MSA_REG \ + && (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT \ + || GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT)) + /* Temporary register that is used when restoring $gp after a call. $4 and $5 are used for returning complex double values in soft-float code, so $6 is the first suitable candidate for TARGET_MIPS16. For !TARGET_MIPS16 we can use @@ -2606,6 +2641,7 @@ typedef struct mips_args { we generally don't want to use them for copying arbitrary data. A single N-word move is usually the same cost as N single-word moves. */ #define MOVE_MAX UNITS_PER_WORD +/* We don't modify it for MSA as it is only used by the classic reload. */ #define MAX_MOVE_MAX 8 /* Define this macro as a C expression which is nonzero if @@ -2767,7 +2803,39 @@ typedef struct mips_args { { "gp", 28 + GP_REG_FIRST }, \ { "sp", 29 + GP_REG_FIRST }, \ { "fp", 30 + GP_REG_FIRST }, \ - { "ra", 31 + GP_REG_FIRST } \ + { "ra", 31 + GP_REG_FIRST }, \ + { "$w0", 0 + FP_REG_FIRST }, \ + { "$w1", 1 + FP_REG_FIRST }, \ + { "$w2", 2 + FP_REG_FIRST }, \ + { "$w3", 3 + FP_REG_FIRST }, \ + { "$w4", 4 + FP_REG_FIRST }, \ + { "$w5", 5 + FP_REG_FIRST }, \ + { "$w6", 6 + FP_REG_FIRST }, \ + { "$w7", 7 + FP_REG_FIRST }, \ + { "$w8", 8 + FP_REG_FIRST }, \ + { "$w9", 9 + FP_REG_FIRST }, \ + { "$w10", 10 + FP_REG_FIRST }, \ + { "$w11", 11 + FP_REG_FIRST }, \ + { "$w12", 12 + FP_REG_FIRST }, \ + { "$w13", 13 + FP_REG_FIRST }, \ + { "$w14", 14 + FP_REG_FIRST }, \ + { "$w15", 15 + FP_REG_FIRST }, \ + { "$w16", 16 + FP_REG_FIRST }, \ + { "$w17", 17 + FP_REG_FIRST }, \ + { "$w18", 18 + FP_REG_FIRST }, \ + { "$w19", 19 + FP_REG_FIRST }, \ + { "$w20", 20 + FP_REG_FIRST }, \ + { "$w21", 21 + FP_REG_FIRST }, \ + { "$w22", 22 + FP_REG_FIRST }, \ + { "$w23", 23 + FP_REG_FIRST }, \ + { "$w24", 24 + FP_REG_FIRST }, \ + { "$w25", 25 + FP_REG_FIRST }, \ + { "$w26", 26 + FP_REG_FIRST }, \ + { "$w27", 27 + FP_REG_FIRST }, \ + { "$w28", 28 + FP_REG_FIRST }, \ + { "$w29", 29 + FP_REG_FIRST }, \ + { "$w30", 30 + FP_REG_FIRST }, \ + { "$w31", 31 + FP_REG_FIRST } \ } #define DBR_OUTPUT_SEQEND(STREAM) \ diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index 188308aae83..d8d564fabd9 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -225,11 +225,12 @@ shift_shift" (const_string "unknown")) -(define_attr "alu_type" "unknown,add,sub,not,nor,and,or,xor" +(define_attr "alu_type" "unknown,add,sub,not,nor,and,or,xor,simd_add" (const_string "unknown")) ;; Main data type used by the insn -(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,FPSW" +(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,FPSW, + V2DI,V4SI,V8HI,V16QI,V2DF,V4SF" (const_string "unknown")) ;; True if the main data type is twice the size of a word. @@ -243,6 +244,13 @@ (const_string "yes")] (const_string "no"))) +;; True if the main data type is four times of the size of a word. +(define_attr "qword_mode" "no,yes" + (cond [(and (eq_attr "mode" "TI,TF") + (not (match_test "TARGET_64BIT"))) + (const_string "yes")] + (const_string "no"))) + ;; Attributes describing a sync loop. These loops have the form: ;; ;; if (RELEASE_BARRIER == YES) sync @@ -365,7 +373,12 @@ shift,slt,signext,clz,pop,trap,imul,imul3,imul3nc,imadd,idiv,idiv3,move, fmove,fadd,fmul,fmadd,fdiv,frdiv,frdiv1,frdiv2,fabs,fneg,fcmp,fcvt,fsqrt, frsqrt,frsqrt1,frsqrt2,dspmac,dspmacsat,accext,accmod,dspalu,dspalusat, - multi,atomic,syncloop,nop,ghost,multimem" + multi,atomic,syncloop,nop,ghost,multimem, + simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd, + simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp, + simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill, + simd_permute,simd_shf,simd_sat,simd_pcnt,simd_copy,simd_branch,simd_cmsa, + simd_fminmax,simd_logic,simd_move,simd_load,simd_store" (cond [(eq_attr "jal" "!unset") (const_string "call") (eq_attr "got" "load") (const_string "load") @@ -400,6 +413,11 @@ (eq_attr "move_type" "constN,shift_shift") (const_string "multi") + ;; These types of move are split for quadword modes only. + (and (eq_attr "move_type" "move,const") + (eq_attr "qword_mode" "yes")) + (const_string "multi") + ;; These types of move are split for doubleword modes only. (and (eq_attr "move_type" "move,const") (eq_attr "dword_mode" "yes")) @@ -486,6 +504,12 @@ (eq_attr "dword_mode" "yes")) (const_int 2) + ;; Check for quadword moves that are decomposed into four + ;; instructions. + (and (eq_attr "move_type" "mtc,mfc,move") + (eq_attr "qword_mode" "yes")) + (const_int 4) + ;; Constants, loads and stores are handled by external routines. (and (eq_attr "move_type" "const,constN") (eq_attr "dword_mode" "yes")) @@ -527,7 +551,7 @@ (const_int 2) (eq_attr "type" "idiv,idiv3") - (symbol_ref "mips_idiv_insns ()") + (symbol_ref "mips_idiv_insns (GET_MODE (PATTERN (insn)))") (not (eq_attr "sync_mem" "none")) (symbol_ref "mips_sync_loop_insns (insn, operands)")] @@ -884,8 +908,10 @@ (define_mode_attr fmt [(SF "s") (DF "d") (V2SF "ps")]) ;; This attribute gives the upper-case mode name for one unit of a -;; floating-point mode. -(define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF")]) +;; floating-point mode or vector mode. +(define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF") (V4SF "SF") + (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") + (V2DF "DF")]) ;; This attribute gives the integer mode that has the same size as a ;; fixed-point mode. @@ -941,6 +967,10 @@ ;; from the same template. (define_code_iterator any_mod [mod umod]) +;; This code iterator allows addition and subtraction to be generated +;; from the same template. +(define_code_iterator addsub [plus minus]) + ;; This code iterator allows all native floating-point comparisons to be ;; generated from the same template. (define_code_iterator fcond [unordered uneq unlt unle eq lt le @@ -7634,6 +7664,9 @@ ; ST-Microelectronics Loongson-2E/2F-specific patterns. (include "loongson.md") +; The MIPS MSA Instructions. +(include "mips-msa.md") + (define_c_enum "unspec" [ UNSPEC_ADDRESS_FIRST ]) diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index ebd67e4bdb9..08dd83e14ce 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -299,6 +299,10 @@ mmicromips Target Report Mask(MICROMIPS) Use microMIPS instructions. +mmsa +Target Report Var(TARGET_MSA) +Use MIPS MSA Extension instructions. + mmt Target Report Var(TARGET_MT) Allow the use of MT instructions. diff --git a/gcc/config/mips/msa.h b/gcc/config/mips/msa.h new file mode 100644 index 00000000000..341eb7f81d1 --- /dev/null +++ b/gcc/config/mips/msa.h @@ -0,0 +1,582 @@ +/* MIPS MSA intrinsics include file. + + Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Imagination Technologies Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _MSA_H +#define _MSA_H 1 + +#if defined(__mips_msa) +typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__ ((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); + +#define __msa_sll_b __builtin_msa_sll_b +#define __msa_sll_h __builtin_msa_sll_h +#define __msa_sll_w __builtin_msa_sll_w +#define __msa_sll_d __builtin_msa_sll_d +#define __msa_slli_b __builtin_msa_slli_b +#define __msa_slli_h __builtin_msa_slli_h +#define __msa_slli_w __builtin_msa_slli_w +#define __msa_slli_d __builtin_msa_slli_d +#define __msa_sra_b __builtin_msa_sra_b +#define __msa_sra_h __builtin_msa_sra_h +#define __msa_sra_w __builtin_msa_sra_w +#define __msa_sra_d __builtin_msa_sra_d +#define __msa_srai_b __builtin_msa_srai_b +#define __msa_srai_h __builtin_msa_srai_h +#define __msa_srai_w __builtin_msa_srai_w +#define __msa_srai_d __builtin_msa_srai_d +#define __msa_srar_b __builtin_msa_srar_b +#define __msa_srar_h __builtin_msa_srar_h +#define __msa_srar_w __builtin_msa_srar_w +#define __msa_srar_d __builtin_msa_srar_d +#define __msa_srari_b __builtin_msa_srari_b +#define __msa_srari_h __builtin_msa_srari_h +#define __msa_srari_w __builtin_msa_srari_w +#define __msa_srari_d __builtin_msa_srari_d +#define __msa_srl_b __builtin_msa_srl_b +#define __msa_srl_h __builtin_msa_srl_h +#define __msa_srl_w __builtin_msa_srl_w +#define __msa_srl_d __builtin_msa_srl_d +#define __msa_srli_b __builtin_msa_srli_b +#define __msa_srli_h __builtin_msa_srli_h +#define __msa_srli_w __builtin_msa_srli_w +#define __msa_srli_d __builtin_msa_srli_d +#define __msa_srlr_b __builtin_msa_srlr_b +#define __msa_srlr_h __builtin_msa_srlr_h +#define __msa_srlr_w __builtin_msa_srlr_w +#define __msa_srlr_d __builtin_msa_srlr_d +#define __msa_srlri_b __builtin_msa_srlri_b +#define __msa_srlri_h __builtin_msa_srlri_h +#define __msa_srlri_w __builtin_msa_srlri_w +#define __msa_srlri_d __builtin_msa_srlri_d +#define __msa_bclr_b __builtin_msa_bclr_b +#define __msa_bclr_h __builtin_msa_bclr_h +#define __msa_bclr_w __builtin_msa_bclr_w +#define __msa_bclr_d __builtin_msa_bclr_d +#define __msa_bclri_b __builtin_msa_bclri_b +#define __msa_bclri_h __builtin_msa_bclri_h +#define __msa_bclri_w __builtin_msa_bclri_w +#define __msa_bclri_d __builtin_msa_bclri_d +#define __msa_bset_b __builtin_msa_bset_b +#define __msa_bset_h __builtin_msa_bset_h +#define __msa_bset_w __builtin_msa_bset_w +#define __msa_bset_d __builtin_msa_bset_d +#define __msa_bseti_b __builtin_msa_bseti_b +#define __msa_bseti_h __builtin_msa_bseti_h +#define __msa_bseti_w __builtin_msa_bseti_w +#define __msa_bseti_d __builtin_msa_bseti_d +#define __msa_bneg_b __builtin_msa_bneg_b +#define __msa_bneg_h __builtin_msa_bneg_h +#define __msa_bneg_w __builtin_msa_bneg_w +#define __msa_bneg_d __builtin_msa_bneg_d +#define __msa_bnegi_b __builtin_msa_bnegi_b +#define __msa_bnegi_h __builtin_msa_bnegi_h +#define __msa_bnegi_w __builtin_msa_bnegi_w +#define __msa_bnegi_d __builtin_msa_bnegi_d +#define __msa_binsl_b __builtin_msa_binsl_b +#define __msa_binsl_h __builtin_msa_binsl_h +#define __msa_binsl_w __builtin_msa_binsl_w +#define __msa_binsl_d __builtin_msa_binsl_d +#define __msa_binsli_b __builtin_msa_binsli_b +#define __msa_binsli_h __builtin_msa_binsli_h +#define __msa_binsli_w __builtin_msa_binsli_w +#define __msa_binsli_d __builtin_msa_binsli_d +#define __msa_binsr_b __builtin_msa_binsr_b +#define __msa_binsr_h __builtin_msa_binsr_h +#define __msa_binsr_w __builtin_msa_binsr_w +#define __msa_binsr_d __builtin_msa_binsr_d +#define __msa_binsri_b __builtin_msa_binsri_b +#define __msa_binsri_h __builtin_msa_binsri_h +#define __msa_binsri_w __builtin_msa_binsri_w +#define __msa_binsri_d __builtin_msa_binsri_d +#define __msa_addv_b __builtin_msa_addv_b +#define __msa_addv_h __builtin_msa_addv_h +#define __msa_addv_w __builtin_msa_addv_w +#define __msa_addv_d __builtin_msa_addv_d +#define __msa_addvi_b __builtin_msa_addvi_b +#define __msa_addvi_h __builtin_msa_addvi_h +#define __msa_addvi_w __builtin_msa_addvi_w +#define __msa_addvi_d __builtin_msa_addvi_d +#define __msa_subv_b __builtin_msa_subv_b +#define __msa_subv_h __builtin_msa_subv_h +#define __msa_subv_w __builtin_msa_subv_w +#define __msa_subv_d __builtin_msa_subv_d +#define __msa_subvi_b __builtin_msa_subvi_b +#define __msa_subvi_h __builtin_msa_subvi_h +#define __msa_subvi_w __builtin_msa_subvi_w +#define __msa_subvi_d __builtin_msa_subvi_d +#define __msa_max_s_b __builtin_msa_max_s_b +#define __msa_max_s_h __builtin_msa_max_s_h +#define __msa_max_s_w __builtin_msa_max_s_w +#define __msa_max_s_d __builtin_msa_max_s_d +#define __msa_maxi_s_b __builtin_msa_maxi_s_b +#define __msa_maxi_s_h __builtin_msa_maxi_s_h +#define __msa_maxi_s_w __builtin_msa_maxi_s_w +#define __msa_maxi_s_d __builtin_msa_maxi_s_d +#define __msa_max_u_b __builtin_msa_max_u_b +#define __msa_max_u_h __builtin_msa_max_u_h +#define __msa_max_u_w __builtin_msa_max_u_w +#define __msa_max_u_d __builtin_msa_max_u_d +#define __msa_maxi_u_b __builtin_msa_maxi_u_b +#define __msa_maxi_u_h __builtin_msa_maxi_u_h +#define __msa_maxi_u_w __builtin_msa_maxi_u_w +#define __msa_maxi_u_d __builtin_msa_maxi_u_d +#define __msa_min_s_b __builtin_msa_min_s_b +#define __msa_min_s_h __builtin_msa_min_s_h +#define __msa_min_s_w __builtin_msa_min_s_w +#define __msa_min_s_d __builtin_msa_min_s_d +#define __msa_mini_s_b __builtin_msa_mini_s_b +#define __msa_mini_s_h __builtin_msa_mini_s_h +#define __msa_mini_s_w __builtin_msa_mini_s_w +#define __msa_mini_s_d __builtin_msa_mini_s_d +#define __msa_min_u_b __builtin_msa_min_u_b +#define __msa_min_u_h __builtin_msa_min_u_h +#define __msa_min_u_w __builtin_msa_min_u_w +#define __msa_min_u_d __builtin_msa_min_u_d +#define __msa_mini_u_b __builtin_msa_mini_u_b +#define __msa_mini_u_h __builtin_msa_mini_u_h +#define __msa_mini_u_w __builtin_msa_mini_u_w +#define __msa_mini_u_d __builtin_msa_mini_u_d +#define __msa_max_a_b __builtin_msa_max_a_b +#define __msa_max_a_h __builtin_msa_max_a_h +#define __msa_max_a_w __builtin_msa_max_a_w +#define __msa_max_a_d __builtin_msa_max_a_d +#define __msa_min_a_b __builtin_msa_min_a_b +#define __msa_min_a_h __builtin_msa_min_a_h +#define __msa_min_a_w __builtin_msa_min_a_w +#define __msa_min_a_d __builtin_msa_min_a_d +#define __msa_ceq_b __builtin_msa_ceq_b +#define __msa_ceq_h __builtin_msa_ceq_h +#define __msa_ceq_w __builtin_msa_ceq_w +#define __msa_ceq_d __builtin_msa_ceq_d +#define __msa_ceqi_b __builtin_msa_ceqi_b +#define __msa_ceqi_h __builtin_msa_ceqi_h +#define __msa_ceqi_w __builtin_msa_ceqi_w +#define __msa_ceqi_d __builtin_msa_ceqi_d +#define __msa_clt_s_b __builtin_msa_clt_s_b +#define __msa_clt_s_h __builtin_msa_clt_s_h +#define __msa_clt_s_w __builtin_msa_clt_s_w +#define __msa_clt_s_d __builtin_msa_clt_s_d +#define __msa_clti_s_b __builtin_msa_clti_s_b +#define __msa_clti_s_h __builtin_msa_clti_s_h +#define __msa_clti_s_w __builtin_msa_clti_s_w +#define __msa_clti_s_d __builtin_msa_clti_s_d +#define __msa_clt_u_b __builtin_msa_clt_u_b +#define __msa_clt_u_h __builtin_msa_clt_u_h +#define __msa_clt_u_w __builtin_msa_clt_u_w +#define __msa_clt_u_d __builtin_msa_clt_u_d +#define __msa_clti_u_b __builtin_msa_clti_u_b +#define __msa_clti_u_h __builtin_msa_clti_u_h +#define __msa_clti_u_w __builtin_msa_clti_u_w +#define __msa_clti_u_d __builtin_msa_clti_u_d +#define __msa_cle_s_b __builtin_msa_cle_s_b +#define __msa_cle_s_h __builtin_msa_cle_s_h +#define __msa_cle_s_w __builtin_msa_cle_s_w +#define __msa_cle_s_d __builtin_msa_cle_s_d +#define __msa_clei_s_b __builtin_msa_clei_s_b +#define __msa_clei_s_h __builtin_msa_clei_s_h +#define __msa_clei_s_w __builtin_msa_clei_s_w +#define __msa_clei_s_d __builtin_msa_clei_s_d +#define __msa_cle_u_b __builtin_msa_cle_u_b +#define __msa_cle_u_h __builtin_msa_cle_u_h +#define __msa_cle_u_w __builtin_msa_cle_u_w +#define __msa_cle_u_d __builtin_msa_cle_u_d +#define __msa_clei_u_b __builtin_msa_clei_u_b +#define __msa_clei_u_h __builtin_msa_clei_u_h +#define __msa_clei_u_w __builtin_msa_clei_u_w +#define __msa_clei_u_d __builtin_msa_clei_u_d +#define __msa_ld_b __builtin_msa_ld_b +#define __msa_ld_h __builtin_msa_ld_h +#define __msa_ld_w __builtin_msa_ld_w +#define __msa_ld_d __builtin_msa_ld_d +#define __msa_st_b __builtin_msa_st_b +#define __msa_st_h __builtin_msa_st_h +#define __msa_st_w __builtin_msa_st_w +#define __msa_st_d __builtin_msa_st_d +#define __msa_sat_s_b __builtin_msa_sat_s_b +#define __msa_sat_s_h __builtin_msa_sat_s_h +#define __msa_sat_s_w __builtin_msa_sat_s_w +#define __msa_sat_s_d __builtin_msa_sat_s_d +#define __msa_sat_u_b __builtin_msa_sat_u_b +#define __msa_sat_u_h __builtin_msa_sat_u_h +#define __msa_sat_u_w __builtin_msa_sat_u_w +#define __msa_sat_u_d __builtin_msa_sat_u_d +#define __msa_add_a_b __builtin_msa_add_a_b +#define __msa_add_a_h __builtin_msa_add_a_h +#define __msa_add_a_w __builtin_msa_add_a_w +#define __msa_add_a_d __builtin_msa_add_a_d +#define __msa_adds_a_b __builtin_msa_adds_a_b +#define __msa_adds_a_h __builtin_msa_adds_a_h +#define __msa_adds_a_w __builtin_msa_adds_a_w +#define __msa_adds_a_d __builtin_msa_adds_a_d +#define __msa_adds_s_b __builtin_msa_adds_s_b +#define __msa_adds_s_h __builtin_msa_adds_s_h +#define __msa_adds_s_w __builtin_msa_adds_s_w +#define __msa_adds_s_d __builtin_msa_adds_s_d +#define __msa_adds_u_b __builtin_msa_adds_u_b +#define __msa_adds_u_h __builtin_msa_adds_u_h +#define __msa_adds_u_w __builtin_msa_adds_u_w +#define __msa_adds_u_d __builtin_msa_adds_u_d +#define __msa_ave_s_b __builtin_msa_ave_s_b +#define __msa_ave_s_h __builtin_msa_ave_s_h +#define __msa_ave_s_w __builtin_msa_ave_s_w +#define __msa_ave_s_d __builtin_msa_ave_s_d +#define __msa_ave_u_b __builtin_msa_ave_u_b +#define __msa_ave_u_h __builtin_msa_ave_u_h +#define __msa_ave_u_w __builtin_msa_ave_u_w +#define __msa_ave_u_d __builtin_msa_ave_u_d +#define __msa_aver_s_b __builtin_msa_aver_s_b +#define __msa_aver_s_h __builtin_msa_aver_s_h +#define __msa_aver_s_w __builtin_msa_aver_s_w +#define __msa_aver_s_d __builtin_msa_aver_s_d +#define __msa_aver_u_b __builtin_msa_aver_u_b +#define __msa_aver_u_h __builtin_msa_aver_u_h +#define __msa_aver_u_w __builtin_msa_aver_u_w +#define __msa_aver_u_d __builtin_msa_aver_u_d +#define __msa_subs_s_b __builtin_msa_subs_s_b +#define __msa_subs_s_h __builtin_msa_subs_s_h +#define __msa_subs_s_w __builtin_msa_subs_s_w +#define __msa_subs_s_d __builtin_msa_subs_s_d +#define __msa_subs_u_b __builtin_msa_subs_u_b +#define __msa_subs_u_h __builtin_msa_subs_u_h +#define __msa_subs_u_w __builtin_msa_subs_u_w +#define __msa_subs_u_d __builtin_msa_subs_u_d +#define __msa_subsuu_s_b __builtin_msa_subsuu_s_b +#define __msa_subsuu_s_h __builtin_msa_subsuu_s_h +#define __msa_subsuu_s_w __builtin_msa_subsuu_s_w +#define __msa_subsuu_s_d __builtin_msa_subsuu_s_d +#define __msa_subsus_u_b __builtin_msa_subsus_u_b +#define __msa_subsus_u_h __builtin_msa_subsus_u_h +#define __msa_subsus_u_w __builtin_msa_subsus_u_w +#define __msa_subsus_u_d __builtin_msa_subsus_u_d +#define __msa_asub_s_b __builtin_msa_asub_s_b +#define __msa_asub_s_h __builtin_msa_asub_s_h +#define __msa_asub_s_w __builtin_msa_asub_s_w +#define __msa_asub_s_d __builtin_msa_asub_s_d +#define __msa_asub_u_b __builtin_msa_asub_u_b +#define __msa_asub_u_h __builtin_msa_asub_u_h +#define __msa_asub_u_w __builtin_msa_asub_u_w +#define __msa_asub_u_d __builtin_msa_asub_u_d +#define __msa_mulv_b __builtin_msa_mulv_b +#define __msa_mulv_h __builtin_msa_mulv_h +#define __msa_mulv_w __builtin_msa_mulv_w +#define __msa_mulv_d __builtin_msa_mulv_d +#define __msa_maddv_b __builtin_msa_maddv_b +#define __msa_maddv_h __builtin_msa_maddv_h +#define __msa_maddv_w __builtin_msa_maddv_w +#define __msa_maddv_d __builtin_msa_maddv_d +#define __msa_msubv_b __builtin_msa_msubv_b +#define __msa_msubv_h __builtin_msa_msubv_h +#define __msa_msubv_w __builtin_msa_msubv_w +#define __msa_msubv_d __builtin_msa_msubv_d +#define __msa_div_s_b __builtin_msa_div_s_b +#define __msa_div_s_h __builtin_msa_div_s_h +#define __msa_div_s_w __builtin_msa_div_s_w +#define __msa_div_s_d __builtin_msa_div_s_d +#define __msa_div_u_b __builtin_msa_div_u_b +#define __msa_div_u_h __builtin_msa_div_u_h +#define __msa_div_u_w __builtin_msa_div_u_w +#define __msa_div_u_d __builtin_msa_div_u_d +#define __msa_hadd_s_h __builtin_msa_hadd_s_h +#define __msa_hadd_s_w __builtin_msa_hadd_s_w +#define __msa_hadd_s_d __builtin_msa_hadd_s_d +#define __msa_hadd_u_h __builtin_msa_hadd_u_h +#define __msa_hadd_u_w __builtin_msa_hadd_u_w +#define __msa_hadd_u_d __builtin_msa_hadd_u_d +#define __msa_hsub_s_h __builtin_msa_hsub_s_h +#define __msa_hsub_s_w __builtin_msa_hsub_s_w +#define __msa_hsub_s_d __builtin_msa_hsub_s_d +#define __msa_hsub_u_h __builtin_msa_hsub_u_h +#define __msa_hsub_u_w __builtin_msa_hsub_u_w +#define __msa_hsub_u_d __builtin_msa_hsub_u_d +#define __msa_mod_s_b __builtin_msa_mod_s_b +#define __msa_mod_s_h __builtin_msa_mod_s_h +#define __msa_mod_s_w __builtin_msa_mod_s_w +#define __msa_mod_s_d __builtin_msa_mod_s_d +#define __msa_mod_u_b __builtin_msa_mod_u_b +#define __msa_mod_u_h __builtin_msa_mod_u_h +#define __msa_mod_u_w __builtin_msa_mod_u_w +#define __msa_mod_u_d __builtin_msa_mod_u_d +#define __msa_dotp_s_h __builtin_msa_dotp_s_h +#define __msa_dotp_s_w __builtin_msa_dotp_s_w +#define __msa_dotp_s_d __builtin_msa_dotp_s_d +#define __msa_dotp_u_h __builtin_msa_dotp_u_h +#define __msa_dotp_u_w __builtin_msa_dotp_u_w +#define __msa_dotp_u_d __builtin_msa_dotp_u_d +#define __msa_dpadd_s_h __builtin_msa_dpadd_s_h +#define __msa_dpadd_s_w __builtin_msa_dpadd_s_w +#define __msa_dpadd_s_d __builtin_msa_dpadd_s_d +#define __msa_dpadd_u_h __builtin_msa_dpadd_u_h +#define __msa_dpadd_u_w __builtin_msa_dpadd_u_w +#define __msa_dpadd_u_d __builtin_msa_dpadd_u_d +#define __msa_dpsub_s_h __builtin_msa_dpsub_s_h +#define __msa_dpsub_s_w __builtin_msa_dpsub_s_w +#define __msa_dpsub_s_d __builtin_msa_dpsub_s_d +#define __msa_dpsub_u_h __builtin_msa_dpsub_u_h +#define __msa_dpsub_u_w __builtin_msa_dpsub_u_w +#define __msa_dpsub_u_d __builtin_msa_dpsub_u_d +#define __msa_sld_b __builtin_msa_sld_b +#define __msa_sld_h __builtin_msa_sld_h +#define __msa_sld_w __builtin_msa_sld_w +#define __msa_sld_d __builtin_msa_sld_d +#define __msa_sldi_b __builtin_msa_sldi_b +#define __msa_sldi_h __builtin_msa_sldi_h +#define __msa_sldi_w __builtin_msa_sldi_w +#define __msa_sldi_d __builtin_msa_sldi_d +#define __msa_splat_b __builtin_msa_splat_b +#define __msa_splat_h __builtin_msa_splat_h +#define __msa_splat_w __builtin_msa_splat_w +#define __msa_splat_d __builtin_msa_splat_d +#define __msa_splati_b __builtin_msa_splati_b +#define __msa_splati_h __builtin_msa_splati_h +#define __msa_splati_w __builtin_msa_splati_w +#define __msa_splati_d __builtin_msa_splati_d +#define __msa_pckev_b __builtin_msa_pckev_b +#define __msa_pckev_h __builtin_msa_pckev_h +#define __msa_pckev_w __builtin_msa_pckev_w +#define __msa_pckev_d __builtin_msa_pckev_d +#define __msa_pckod_b __builtin_msa_pckod_b +#define __msa_pckod_h __builtin_msa_pckod_h +#define __msa_pckod_w __builtin_msa_pckod_w +#define __msa_pckod_d __builtin_msa_pckod_d +#define __msa_ilvl_b __builtin_msa_ilvl_b +#define __msa_ilvl_h __builtin_msa_ilvl_h +#define __msa_ilvl_w __builtin_msa_ilvl_w +#define __msa_ilvl_d __builtin_msa_ilvl_d +#define __msa_ilvr_b __builtin_msa_ilvr_b +#define __msa_ilvr_h __builtin_msa_ilvr_h +#define __msa_ilvr_w __builtin_msa_ilvr_w +#define __msa_ilvr_d __builtin_msa_ilvr_d +#define __msa_ilvev_b __builtin_msa_ilvev_b +#define __msa_ilvev_h __builtin_msa_ilvev_h +#define __msa_ilvev_w __builtin_msa_ilvev_w +#define __msa_ilvev_d __builtin_msa_ilvev_d +#define __msa_ilvod_b __builtin_msa_ilvod_b +#define __msa_ilvod_h __builtin_msa_ilvod_h +#define __msa_ilvod_w __builtin_msa_ilvod_w +#define __msa_ilvod_d __builtin_msa_ilvod_d +#define __msa_vshf_b __builtin_msa_vshf_b +#define __msa_vshf_h __builtin_msa_vshf_h +#define __msa_vshf_w __builtin_msa_vshf_w +#define __msa_vshf_d __builtin_msa_vshf_d +#define __msa_and_v __builtin_msa_and_v +#define __msa_andi_b __builtin_msa_andi_b +#define __msa_or_v __builtin_msa_or_v +#define __msa_ori_b __builtin_msa_ori_b +#define __msa_nor_v __builtin_msa_nor_v +#define __msa_nori_b __builtin_msa_nori_b +#define __msa_xor_v __builtin_msa_xor_v +#define __msa_xori_b __builtin_msa_xori_b +#define __msa_bmnz_v __builtin_msa_bmnz_v +#define __msa_bmnzi_b __builtin_msa_bmnzi_b +#define __msa_bmz_v __builtin_msa_bmz_v +#define __msa_bmzi_b __builtin_msa_bmzi_b +#define __msa_bsel_v __builtin_msa_bsel_v +#define __msa_bseli_b __builtin_msa_bseli_b +#define __msa_shf_b __builtin_msa_shf_b +#define __msa_shf_h __builtin_msa_shf_h +#define __msa_shf_w __builtin_msa_shf_w +#define __msa_test_bnz_v __builtin_msa_bnz_v +#define __msa_test_bz_v __builtin_msa_bz_v +#define __msa_fill_b __builtin_msa_fill_b +#define __msa_fill_h __builtin_msa_fill_h +#define __msa_fill_w __builtin_msa_fill_w +#define __msa_fill_d __builtin_msa_fill_d +#define __msa_pcnt_b __builtin_msa_pcnt_b +#define __msa_pcnt_h __builtin_msa_pcnt_h +#define __msa_pcnt_w __builtin_msa_pcnt_w +#define __msa_pcnt_d __builtin_msa_pcnt_d +#define __msa_nloc_b __builtin_msa_nloc_b +#define __msa_nloc_h __builtin_msa_nloc_h +#define __msa_nloc_w __builtin_msa_nloc_w +#define __msa_nloc_d __builtin_msa_nloc_d +#define __msa_nlzc_b __builtin_msa_nlzc_b +#define __msa_nlzc_h __builtin_msa_nlzc_h +#define __msa_nlzc_w __builtin_msa_nlzc_w +#define __msa_nlzc_d __builtin_msa_nlzc_d +#define __msa_copy_s_b __builtin_msa_copy_s_b +#define __msa_copy_s_h __builtin_msa_copy_s_h +#define __msa_copy_s_w __builtin_msa_copy_s_w +#define __msa_copy_s_d __builtin_msa_copy_s_d +#define __msa_copy_u_b __builtin_msa_copy_u_b +#define __msa_copy_u_h __builtin_msa_copy_u_h +#define __msa_copy_u_w __builtin_msa_copy_u_w +#define __msa_copy_u_d __builtin_msa_copy_u_d +#define __msa_insert_b __builtin_msa_insert_b +#define __msa_insert_h __builtin_msa_insert_h +#define __msa_insert_w __builtin_msa_insert_w +#define __msa_insert_d __builtin_msa_insert_d +#define __msa_insve_b __builtin_msa_insve_b +#define __msa_insve_h __builtin_msa_insve_h +#define __msa_insve_w __builtin_msa_insve_w +#define __msa_insve_d __builtin_msa_insve_d +#define __msa_test_bnz_b __builtin_msa_bnz_b +#define __msa_test_bnz_h __builtin_msa_bnz_h +#define __msa_test_bnz_w __builtin_msa_bnz_w +#define __msa_test_bnz_d __builtin_msa_bnz_d +#define __msa_test_bz_b __builtin_msa_bz_b +#define __msa_test_bz_h __builtin_msa_bz_h +#define __msa_test_bz_w __builtin_msa_bz_w +#define __msa_test_bz_d __builtin_msa_bz_d +#define __msa_ldi_b __builtin_msa_ldi_b +#define __msa_ldi_h __builtin_msa_ldi_h +#define __msa_ldi_w __builtin_msa_ldi_w +#define __msa_ldi_d __builtin_msa_ldi_d +#define __msa_fcaf_w __builtin_msa_fcaf_w +#define __msa_fcaf_d __builtin_msa_fcaf_d +#define __msa_fcor_w __builtin_msa_fcor_w +#define __msa_fcor_d __builtin_msa_fcor_d +#define __msa_fcun_w __builtin_msa_fcun_w +#define __msa_fcun_d __builtin_msa_fcun_d +#define __msa_fcune_w __builtin_msa_fcune_w +#define __msa_fcune_d __builtin_msa_fcune_d +#define __msa_fcueq_w __builtin_msa_fcueq_w +#define __msa_fcueq_d __builtin_msa_fcueq_d +#define __msa_fceq_w __builtin_msa_fceq_w +#define __msa_fceq_d __builtin_msa_fceq_d +#define __msa_fcne_w __builtin_msa_fcne_w +#define __msa_fcne_d __builtin_msa_fcne_d +#define __msa_fclt_w __builtin_msa_fclt_w +#define __msa_fclt_d __builtin_msa_fclt_d +#define __msa_fcult_w __builtin_msa_fcult_w +#define __msa_fcult_d __builtin_msa_fcult_d +#define __msa_fcle_w __builtin_msa_fcle_w +#define __msa_fcle_d __builtin_msa_fcle_d +#define __msa_fcule_w __builtin_msa_fcule_w +#define __msa_fcule_d __builtin_msa_fcule_d +#define __msa_fsaf_w __builtin_msa_fsaf_w +#define __msa_fsaf_d __builtin_msa_fsaf_d +#define __msa_fsor_w __builtin_msa_fsor_w +#define __msa_fsor_d __builtin_msa_fsor_d +#define __msa_fsun_w __builtin_msa_fsun_w +#define __msa_fsun_d __builtin_msa_fsun_d +#define __msa_fsune_w __builtin_msa_fsune_w +#define __msa_fsune_d __builtin_msa_fsune_d +#define __msa_fsueq_w __builtin_msa_fsueq_w +#define __msa_fsueq_d __builtin_msa_fsueq_d +#define __msa_fseq_w __builtin_msa_fseq_w +#define __msa_fseq_d __builtin_msa_fseq_d +#define __msa_fsne_w __builtin_msa_fsne_w +#define __msa_fsne_d __builtin_msa_fsne_d +#define __msa_fslt_w __builtin_msa_fslt_w +#define __msa_fslt_d __builtin_msa_fslt_d +#define __msa_fsult_w __builtin_msa_fsult_w +#define __msa_fsult_d __builtin_msa_fsult_d +#define __msa_fsle_w __builtin_msa_fsle_w +#define __msa_fsle_d __builtin_msa_fsle_d +#define __msa_fsule_w __builtin_msa_fsule_w +#define __msa_fsule_d __builtin_msa_fsule_d +#define __msa_fadd_w __builtin_msa_fadd_w +#define __msa_fadd_d __builtin_msa_fadd_d +#define __msa_fsub_w __builtin_msa_fsub_w +#define __msa_fsub_d __builtin_msa_fsub_d +#define __msa_fmul_w __builtin_msa_fmul_w +#define __msa_fmul_d __builtin_msa_fmul_d +#define __msa_fdiv_w __builtin_msa_fdiv_w +#define __msa_fdiv_d __builtin_msa_fdiv_d +#define __msa_fmadd_w __builtin_msa_fmadd_w +#define __msa_fmadd_d __builtin_msa_fmadd_d +#define __msa_fmsub_w __builtin_msa_fmsub_w +#define __msa_fmsub_d __builtin_msa_fmsub_d +#define __msa_fexp2_w __builtin_msa_fexp2_w +#define __msa_fexp2_d __builtin_msa_fexp2_d +#define __msa_fexdo_h __builtin_msa_fexdo_h +#define __msa_fexdo_w __builtin_msa_fexdo_w +#define __msa_ftq_h __builtin_msa_ftq_h +#define __msa_ftq_w __builtin_msa_ftq_w +#define __msa_fmin_w __builtin_msa_fmin_w +#define __msa_fmin_d __builtin_msa_fmin_d +#define __msa_fmin_a_w __builtin_msa_fmin_a_w +#define __msa_fmin_a_d __builtin_msa_fmin_a_d +#define __msa_fmax_w __builtin_msa_fmax_w +#define __msa_fmax_d __builtin_msa_fmax_d +#define __msa_fmax_a_w __builtin_msa_fmax_a_w +#define __msa_fmax_a_d __builtin_msa_fmax_a_d +#define __msa_mul_q_h __builtin_msa_mul_q_h +#define __msa_mul_q_w __builtin_msa_mul_q_w +#define __msa_mulr_q_h __builtin_msa_mulr_q_h +#define __msa_mulr_q_w __builtin_msa_mulr_q_w +#define __msa_madd_q_h __builtin_msa_madd_q_h +#define __msa_madd_q_w __builtin_msa_madd_q_w +#define __msa_maddr_q_h __builtin_msa_maddr_q_h +#define __msa_maddr_q_w __builtin_msa_maddr_q_w +#define __msa_msub_q_h __builtin_msa_msub_q_h +#define __msa_msub_q_w __builtin_msa_msub_q_w +#define __msa_msubr_q_h __builtin_msa_msubr_q_h +#define __msa_msubr_q_w __builtin_msa_msubr_q_w +#define __msa_fclass_w __builtin_msa_fclass_w +#define __msa_fclass_d __builtin_msa_fclass_d +#define __msa_fsqrt_w __builtin_msa_fsqrt_w +#define __msa_fsqrt_d __builtin_msa_fsqrt_d +#define __msa_frcp_w __builtin_msa_frcp_w +#define __msa_frcp_d __builtin_msa_frcp_d +#define __msa_frint_w __builtin_msa_frint_w +#define __msa_frint_d __builtin_msa_frint_d +#define __msa_frsqrt_w __builtin_msa_frsqrt_w +#define __msa_frsqrt_d __builtin_msa_frsqrt_d +#define __msa_flog2_w __builtin_msa_flog2_w +#define __msa_flog2_d __builtin_msa_flog2_d +#define __msa_fexupl_w __builtin_msa_fexupl_w +#define __msa_fexupl_d __builtin_msa_fexupl_d +#define __msa_fexupr_w __builtin_msa_fexupr_w +#define __msa_fexupr_d __builtin_msa_fexupr_d +#define __msa_ffql_w __builtin_msa_ffql_w +#define __msa_ffql_d __builtin_msa_ffql_d +#define __msa_ffqr_w __builtin_msa_ffqr_w +#define __msa_ffqr_d __builtin_msa_ffqr_d +#define __msa_ftint_s_w __builtin_msa_ftint_s_w +#define __msa_ftint_s_d __builtin_msa_ftint_s_d +#define __msa_ftint_u_w __builtin_msa_ftint_u_w +#define __msa_ftint_u_d __builtin_msa_ftint_u_d +#define __msa_ftrunc_s_w __builtin_msa_ftrunc_s_w +#define __msa_ftrunc_s_d __builtin_msa_ftrunc_s_d +#define __msa_ftrunc_u_w __builtin_msa_ftrunc_u_w +#define __msa_ftrunc_u_d __builtin_msa_ftrunc_u_d +#define __msa_ffint_s_w __builtin_msa_ffint_s_w +#define __msa_ffint_s_d __builtin_msa_ffint_s_d +#define __msa_ffint_u_w __builtin_msa_ffint_u_w +#define __msa_ffint_u_d __builtin_msa_ffint_u_d +#define __msa_cfcmsa __builtin_msa_cfcmsa +#define __msa_move_v __builtin_msa_move_v +#endif /* defined(__mips_msa) */ +#endif /* _MSA_H */ diff --git a/gcc/config/mips/mti-elf.h b/gcc/config/mips/mti-elf.h index e804f6ab645..c4ae24bac36 100644 --- a/gcc/config/mips/mti-elf.h +++ b/gcc/config/mips/mti-elf.h @@ -39,8 +39,8 @@ along with GCC; see the file COPYING3. If not see \ /* If no FP ABI option is specified, infer one from the \ ABI/ISA level. */ \ - "%{!msoft-float: %{!msingle-float: %{!mfp*: %{mabi=32: %{" \ - MIPS_FPXX_OPTION_SPEC ": -mfpxx}}}}}", \ + "%{!msoft-float: %{!msingle-float: %{!mfp*: %{!mmsa: %{mabi=32: %{" \ + MIPS_FPXX_OPTION_SPEC ": -mfpxx}}}}}}", \ \ /* Make sure that an endian option is always present. This makes \ things like LINK_SPEC easier to write. */ \ diff --git a/gcc/config/mips/mti-linux.h b/gcc/config/mips/mti-linux.h index d84ad1842b2..76b0f34059c 100644 --- a/gcc/config/mips/mti-linux.h +++ b/gcc/config/mips/mti-linux.h @@ -61,9 +61,9 @@ along with GCC; see the file COPYING3. If not see "%{!mabi=*: %{" MIPS_32BIT_OPTION_SPEC ": -mabi=32;: -mabi=n32}}", \ \ /* If no FP ABI option is specified, infer one from the \ - ABI/ISA level. */ \ - "%{!msoft-float: %{!msingle-float: %{!mfp*: %{mabi=32: %{" \ - MIPS_FPXX_OPTION_SPEC ": -mfpxx}}}}}", \ + ABI/ISA level unless there is a conflicting option. */ \ + "%{!msoft-float: %{!msingle-float: %{!mfp*: %{!mmsa: %{mabi=32: %{" \ + MIPS_FPXX_OPTION_SPEC ": -mfpxx}}}}}}", \ \ /* Base SPECs. */ \ BASE_DRIVER_SELF_SPECS \ diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md index cbeace9d640..e6b6d2f60da 100644 --- a/gcc/config/mips/predicates.md +++ b/gcc/config/mips/predicates.md @@ -35,12 +35,36 @@ (define_predicate "const_immlsa_operand" (and (match_code "const_int") - (match_test "IN_RANGE (INTVAL (op), 1, 4)"))) + (match_test "IN_RANGE (INTVAL (op), 1, 4)"))) + +(define_predicate "const_msa_branch_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -1024, 1023)"))) + +(define_predicate "const_uimm3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + +(define_predicate "const_uimm4_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 15)"))) + +(define_predicate "const_uimm5_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 31)"))) (define_predicate "const_uimm6_operand" (and (match_code "const_int") (match_test "UIMM6_OPERAND (INTVAL (op))"))) +(define_predicate "const_uimm8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 255)"))) + +(define_predicate "const_imm5_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -16, 15)"))) + (define_predicate "const_imm10_operand" (and (match_code "const_int") (match_test "IMM10_OPERAND (INTVAL (op))"))) @@ -49,6 +73,22 @@ (ior (match_operand 0 "const_imm10_operand") (match_operand 0 "register_operand"))) +(define_predicate "aq10b_operand" + (and (match_code "const_int") + (match_test "mips_signed_immediate_p (INTVAL (op), 10, 0)"))) + +(define_predicate "aq10h_operand" + (and (match_code "const_int") + (match_test "mips_signed_immediate_p (INTVAL (op), 10, 1)"))) + +(define_predicate "aq10w_operand" + (and (match_code "const_int") + (match_test "mips_signed_immediate_p (INTVAL (op), 10, 2)"))) + +(define_predicate "aq10d_operand" + (and (match_code "const_int") + (match_test "mips_signed_immediate_p (INTVAL (op), 10, 3)"))) + (define_predicate "sle_operand" (and (match_code "const_int") (match_test "SMALL_OPERAND (INTVAL (op) + 1)"))) @@ -61,6 +101,14 @@ (and (match_code "const_int,const_double,const_vector") (match_test "op == CONST0_RTX (GET_MODE (op))"))) +(define_predicate "const_m1_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) + +(define_predicate "reg_or_m1_operand" + (ior (match_operand 0 "const_m1_operand") + (match_operand 0 "register_operand"))) + (define_predicate "reg_or_0_operand" (ior (and (match_operand 0 "const_0_operand") (not (match_test "TARGET_MIPS16"))) @@ -74,6 +122,23 @@ (ior (match_operand 0 "const_1_operand") (match_operand 0 "register_operand"))) +;; These are used in vec_merge, hence accept bitmask as const_int. +(define_predicate "const_exp_2_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 1)"))) + +(define_predicate "const_exp_4_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 3)"))) + +(define_predicate "const_exp_8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 7)"))) + +(define_predicate "const_exp_16_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 15)"))) + ;; This is used for indexing into vectors, and hence only accepts const_int. (define_predicate "const_0_or_1_operand" (and (match_code "const_int") @@ -507,3 +572,65 @@ (define_predicate "non_volatile_mem_operand" (and (match_operand 0 "memory_operand") (not (match_test "MEM_VOLATILE_P (op)")))) + +(define_predicate "const_vector_same_val_operand" + (match_code "const_vector") +{ + return mips_const_vector_same_val_p (op, mode); +}) + +(define_predicate "const_vector_same_simm5_operand" + (match_code "const_vector") +{ + return mips_const_vector_same_int_p (op, mode, -16, 15); +}) + +(define_predicate "const_vector_same_uimm5_operand" + (match_code "const_vector") +{ + return mips_const_vector_same_int_p (op, mode, 0, 31); +}) + +(define_predicate "const_vector_same_ximm5_operand" + (match_code "const_vector") +{ + return mips_const_vector_same_int_p (op, mode, -31, 31); +}) + +(define_predicate "const_vector_same_uimm6_operand" + (match_code "const_vector") +{ + return mips_const_vector_same_int_p (op, mode, 0, 63); +}) + +(define_predicate "const_vector_same_uimm8_operand" + (match_code "const_vector") +{ + return mips_const_vector_same_int_p (op, mode, 0, 255); +}) + +(define_predicate "par_const_vector_shf_set_operand" + (match_code "parallel") +{ + return mips_const_vector_shuffle_set_p (op, mode); +}) + +(define_predicate "reg_or_vector_same_val_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_vector_same_val_operand"))) + +(define_predicate "reg_or_vector_same_simm5_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_vector_same_simm5_operand"))) + +(define_predicate "reg_or_vector_same_uimm5_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_vector_same_uimm5_operand"))) + +(define_predicate "reg_or_vector_same_ximm5_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_vector_same_ximm5_operand"))) + +(define_predicate "reg_or_vector_same_uimm6_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_vector_same_uimm6_operand"))) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 802845dc66f..e4d6c1c88be 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -11451,6 +11451,7 @@ instructions, but allow the compiler to schedule those calls. * MIPS DSP Built-in Functions:: * MIPS Paired-Single Support:: * MIPS Loongson Built-in Functions:: +* MIPS SIMD Architecture (MSA) Support:: * Other MIPS Built-in Functions:: * MSP430 Built-in Functions:: * NDS32 Built-in Functions:: @@ -13561,6 +13562,794 @@ else @end smallexample @end table +@node MIPS SIMD Architecture (MSA) Support +@subsection MIPS SIMD Architecture (MSA) Support + +@menu +* MIPS SIMD Architecture Built-in Functions:: +@end menu + +GCC provides intrinsics to access the SIMD instructions provided by the +MSA MIPS SIMD Architecture. The interface is made available by including +@code{} and using @option{-mmsa -mhard-float -mfp64 -mnan=2008}. +For each @code{__builtin_msa_*}, there is a shortened name of the intrinsic, +@code{__msa_*}. + +MSA implements 128-bit wide vector registers, operating on 8-, 16-, 32- and +64-bit integer, 16- and 32-bit fixed-point, or 32- and 64-bit floating point +data elements. The following vectors typedefs are included in @code{msa.h}: +@itemize +@item @code{v16i8}, a vector of sixteen signed 8-bit integers; +@item @code{v16u8}, a vector of sixteen unsigned 8-bit integers; +@item @code{v8i16}, a vector of eight signed 16-bit integers; +@item @code{v8u16}, a vector of eight unsigned 16-bit integers; +@item @code{v4i32}, a vector of four signed 32-bit integers; +@item @code{v4u32}, a vector of four unsigned 32-bit integers; +@item @code{v2i64}, a vector of two signed 64-bit integers; +@item @code{v2u64}, a vector of two unsigned 64-bit integers; +@item @code{v4f32}, a vector of four 32-bit floats; +@item @code{v2f64}, a vector of two 64-bit doubles. +@end itemize + +Intructions and corresponding built-ins may have additional restrictions and/or +input/output values manipulated: +@itemize +@item @code{imm0_1}, an integer literal in range 0 to 1; +@item @code{imm0_3}, an integer literal in range 0 to 3; +@item @code{imm0_7}, an integer literal in range 0 to 7; +@item @code{imm0_15}, an integer literal in range 0 to 15; +@item @code{imm0_31}, an integer literal in range 0 to 31; +@item @code{imm0_63}, an integer literal in range 0 to 63; +@item @code{imm0_255}, an integer literal in range 0 to 255; +@item @code{imm_n16_15}, an integer literal in range -16 to 15; +@item @code{imm_n512_511}, an integer literal in range -512 to 511; +@item @code{imm_n1024_1022}, an integer literal in range -512 to 511 left +shifted by 1 bit, i.e., -1024, -1022, @dots{}, 1020, 1022; +@item @code{imm_n2048_2044}, an integer literal in range -512 to 511 left +shifted by 2 bits, i.e., -2048, -2044, @dots{}, 2040, 2044; +@item @code{imm_n4096_4088}, an integer literal in range -512 to 511 left +shifted by 3 bits, i.e., -4096, -4088, @dots{}, 4080, 4088; +@item @code{imm1_4}, an integer literal in range 1 to 4; +@item @code{i32, i64, u32, u64, f32, f64}, defined as follows: +@end itemize + +@smallexample +@{ +typedef int i32; +#if __LONG_MAX__ == __LONG_LONG_MAX__ +typedef long i64; +#else +typedef long long i64; +#endif + +typedef unsigned int u32; +#if __LONG_MAX__ == __LONG_LONG_MAX__ +typedef unsigned long u64; +#else +typedef unsigned long long u64; +#endif + +typedef double f64; +typedef float f32; +@} +@end smallexample + +@node MIPS SIMD Architecture Built-in Functions +@subsubsection MIPS SIMD Architecture Built-in Functions + +The intrinsics provided are listed below; each is named after the +machine instruction. + +@smallexample +v16i8 __builtin_msa_add_a_b (v16i8, v16i8); +v8i16 __builtin_msa_add_a_h (v8i16, v8i16); +v4i32 __builtin_msa_add_a_w (v4i32, v4i32); +v2i64 __builtin_msa_add_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_adds_a_b (v16i8, v16i8); +v8i16 __builtin_msa_adds_a_h (v8i16, v8i16); +v4i32 __builtin_msa_adds_a_w (v4i32, v4i32); +v2i64 __builtin_msa_adds_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_adds_s_b (v16i8, v16i8); +v8i16 __builtin_msa_adds_s_h (v8i16, v8i16); +v4i32 __builtin_msa_adds_s_w (v4i32, v4i32); +v2i64 __builtin_msa_adds_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_adds_u_b (v16u8, v16u8); +v8u16 __builtin_msa_adds_u_h (v8u16, v8u16); +v4u32 __builtin_msa_adds_u_w (v4u32, v4u32); +v2u64 __builtin_msa_adds_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_addv_b (v16i8, v16i8); +v8i16 __builtin_msa_addv_h (v8i16, v8i16); +v4i32 __builtin_msa_addv_w (v4i32, v4i32); +v2i64 __builtin_msa_addv_d (v2i64, v2i64); + +v16i8 __builtin_msa_addvi_b (v16i8, imm0_31); +v8i16 __builtin_msa_addvi_h (v8i16, imm0_31); +v4i32 __builtin_msa_addvi_w (v4i32, imm0_31); +v2i64 __builtin_msa_addvi_d (v2i64, imm0_31); + +v16u8 __builtin_msa_and_v (v16u8, v16u8); + +v16u8 __builtin_msa_andi_b (v16u8, imm0_255); + +v16i8 __builtin_msa_asub_s_b (v16i8, v16i8); +v8i16 __builtin_msa_asub_s_h (v8i16, v8i16); +v4i32 __builtin_msa_asub_s_w (v4i32, v4i32); +v2i64 __builtin_msa_asub_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_asub_u_b (v16u8, v16u8); +v8u16 __builtin_msa_asub_u_h (v8u16, v8u16); +v4u32 __builtin_msa_asub_u_w (v4u32, v4u32); +v2u64 __builtin_msa_asub_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_ave_s_b (v16i8, v16i8); +v8i16 __builtin_msa_ave_s_h (v8i16, v8i16); +v4i32 __builtin_msa_ave_s_w (v4i32, v4i32); +v2i64 __builtin_msa_ave_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_ave_u_b (v16u8, v16u8); +v8u16 __builtin_msa_ave_u_h (v8u16, v8u16); +v4u32 __builtin_msa_ave_u_w (v4u32, v4u32); +v2u64 __builtin_msa_ave_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_aver_s_b (v16i8, v16i8); +v8i16 __builtin_msa_aver_s_h (v8i16, v8i16); +v4i32 __builtin_msa_aver_s_w (v4i32, v4i32); +v2i64 __builtin_msa_aver_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_aver_u_b (v16u8, v16u8); +v8u16 __builtin_msa_aver_u_h (v8u16, v8u16); +v4u32 __builtin_msa_aver_u_w (v4u32, v4u32); +v2u64 __builtin_msa_aver_u_d (v2u64, v2u64); + +v16u8 __builtin_msa_bclr_b (v16u8, v16u8); +v8u16 __builtin_msa_bclr_h (v8u16, v8u16); +v4u32 __builtin_msa_bclr_w (v4u32, v4u32); +v2u64 __builtin_msa_bclr_d (v2u64, v2u64); + +v16u8 __builtin_msa_bclri_b (v16u8, imm0_7); +v8u16 __builtin_msa_bclri_h (v8u16, imm0_15); +v4u32 __builtin_msa_bclri_w (v4u32, imm0_31); +v2u64 __builtin_msa_bclri_d (v2u64, imm0_63); + +v16u8 __builtin_msa_binsl_b (v16u8, v16u8, v16u8); +v8u16 __builtin_msa_binsl_h (v8u16, v8u16, v8u16); +v4u32 __builtin_msa_binsl_w (v4u32, v4u32, v4u32); +v2u64 __builtin_msa_binsl_d (v2u64, v2u64, v2u64); + +v16u8 __builtin_msa_binsli_b (v16u8, v16u8, imm0_7); +v8u16 __builtin_msa_binsli_h (v8u16, v8u16, imm0_15); +v4u32 __builtin_msa_binsli_w (v4u32, v4u32, imm0_31); +v2u64 __builtin_msa_binsli_d (v2u64, v2u64, imm0_63); + +v16u8 __builtin_msa_binsr_b (v16u8, v16u8, v16u8); +v8u16 __builtin_msa_binsr_h (v8u16, v8u16, v8u16); +v4u32 __builtin_msa_binsr_w (v4u32, v4u32, v4u32); +v2u64 __builtin_msa_binsr_d (v2u64, v2u64, v2u64); + +v16u8 __builtin_msa_binsri_b (v16u8, v16u8, imm0_7); +v8u16 __builtin_msa_binsri_h (v8u16, v8u16, imm0_15); +v4u32 __builtin_msa_binsri_w (v4u32, v4u32, imm0_31); +v2u64 __builtin_msa_binsri_d (v2u64, v2u64, imm0_63); + +v16u8 __builtin_msa_bmnz_v (v16u8, v16u8, v16u8); + +v16u8 __builtin_msa_bmnzi_b (v16u8, v16u8, imm0_255); + +v16u8 __builtin_msa_bmz_v (v16u8, v16u8, v16u8); + +v16u8 __builtin_msa_bmzi_b (v16u8, v16u8, imm0_255); + +v16u8 __builtin_msa_bneg_b (v16u8, v16u8); +v8u16 __builtin_msa_bneg_h (v8u16, v8u16); +v4u32 __builtin_msa_bneg_w (v4u32, v4u32); +v2u64 __builtin_msa_bneg_d (v2u64, v2u64); + +v16u8 __builtin_msa_bnegi_b (v16u8, imm0_7); +v8u16 __builtin_msa_bnegi_h (v8u16, imm0_15); +v4u32 __builtin_msa_bnegi_w (v4u32, imm0_31); +v2u64 __builtin_msa_bnegi_d (v2u64, imm0_63); + +i32 __builtin_msa_bnz_b (v16u8); +i32 __builtin_msa_bnz_h (v8u16); +i32 __builtin_msa_bnz_w (v4u32); +i32 __builtin_msa_bnz_d (v2u64); + +i32 __builtin_msa_bnz_v (v16u8); + +v16u8 __builtin_msa_bsel_v (v16u8, v16u8, v16u8); + +v16u8 __builtin_msa_bseli_b (v16u8, v16u8, imm0_255); + +v16u8 __builtin_msa_bset_b (v16u8, v16u8); +v8u16 __builtin_msa_bset_h (v8u16, v8u16); +v4u32 __builtin_msa_bset_w (v4u32, v4u32); +v2u64 __builtin_msa_bset_d (v2u64, v2u64); + +v16u8 __builtin_msa_bseti_b (v16u8, imm0_7); +v8u16 __builtin_msa_bseti_h (v8u16, imm0_15); +v4u32 __builtin_msa_bseti_w (v4u32, imm0_31); +v2u64 __builtin_msa_bseti_d (v2u64, imm0_63); + +i32 __builtin_msa_bz_b (v16u8); +i32 __builtin_msa_bz_h (v8u16); +i32 __builtin_msa_bz_w (v4u32); +i32 __builtin_msa_bz_d (v2u64); + +i32 __builtin_msa_bz_v (v16u8); + +v16i8 __builtin_msa_ceq_b (v16i8, v16i8); +v8i16 __builtin_msa_ceq_h (v8i16, v8i16); +v4i32 __builtin_msa_ceq_w (v4i32, v4i32); +v2i64 __builtin_msa_ceq_d (v2i64, v2i64); + +v16i8 __builtin_msa_ceqi_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_ceqi_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_ceqi_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_ceqi_d (v2i64, imm_n16_15); + +i32 __builtin_msa_cfcmsa (imm0_31); + +v16i8 __builtin_msa_cle_s_b (v16i8, v16i8); +v8i16 __builtin_msa_cle_s_h (v8i16, v8i16); +v4i32 __builtin_msa_cle_s_w (v4i32, v4i32); +v2i64 __builtin_msa_cle_s_d (v2i64, v2i64); + +v16i8 __builtin_msa_cle_u_b (v16u8, v16u8); +v8i16 __builtin_msa_cle_u_h (v8u16, v8u16); +v4i32 __builtin_msa_cle_u_w (v4u32, v4u32); +v2i64 __builtin_msa_cle_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_clei_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_clei_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_clei_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_clei_s_d (v2i64, imm_n16_15); + +v16i8 __builtin_msa_clei_u_b (v16u8, imm0_31); +v8i16 __builtin_msa_clei_u_h (v8u16, imm0_31); +v4i32 __builtin_msa_clei_u_w (v4u32, imm0_31); +v2i64 __builtin_msa_clei_u_d (v2u64, imm0_31); + +v16i8 __builtin_msa_clt_s_b (v16i8, v16i8); +v8i16 __builtin_msa_clt_s_h (v8i16, v8i16); +v4i32 __builtin_msa_clt_s_w (v4i32, v4i32); +v2i64 __builtin_msa_clt_s_d (v2i64, v2i64); + +v16i8 __builtin_msa_clt_u_b (v16u8, v16u8); +v8i16 __builtin_msa_clt_u_h (v8u16, v8u16); +v4i32 __builtin_msa_clt_u_w (v4u32, v4u32); +v2i64 __builtin_msa_clt_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_clti_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_clti_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_clti_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_clti_s_d (v2i64, imm_n16_15); + +v16i8 __builtin_msa_clti_u_b (v16u8, imm0_31); +v8i16 __builtin_msa_clti_u_h (v8u16, imm0_31); +v4i32 __builtin_msa_clti_u_w (v4u32, imm0_31); +v2i64 __builtin_msa_clti_u_d (v2u64, imm0_31); + +i32 __builtin_msa_copy_s_b (v16i8, imm0_15); +i32 __builtin_msa_copy_s_h (v8i16, imm0_7); +i32 __builtin_msa_copy_s_w (v4i32, imm0_3); +i64 __builtin_msa_copy_s_d (v2i64, imm0_1); + +u32 __builtin_msa_copy_u_b (v16i8, imm0_15); +u32 __builtin_msa_copy_u_h (v8i16, imm0_7); +u32 __builtin_msa_copy_u_w (v4i32, imm0_3); +u64 __builtin_msa_copy_u_d (v2i64, imm0_1); + +void __builtin_msa_ctcmsa (imm0_31, i32); + +v16i8 __builtin_msa_div_s_b (v16i8, v16i8); +v8i16 __builtin_msa_div_s_h (v8i16, v8i16); +v4i32 __builtin_msa_div_s_w (v4i32, v4i32); +v2i64 __builtin_msa_div_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_div_u_b (v16u8, v16u8); +v8u16 __builtin_msa_div_u_h (v8u16, v8u16); +v4u32 __builtin_msa_div_u_w (v4u32, v4u32); +v2u64 __builtin_msa_div_u_d (v2u64, v2u64); + +v8i16 __builtin_msa_dotp_s_h (v16i8, v16i8); +v4i32 __builtin_msa_dotp_s_w (v8i16, v8i16); +v2i64 __builtin_msa_dotp_s_d (v4i32, v4i32); + +v8u16 __builtin_msa_dotp_u_h (v16u8, v16u8); +v4u32 __builtin_msa_dotp_u_w (v8u16, v8u16); +v2u64 __builtin_msa_dotp_u_d (v4u32, v4u32); + +v8i16 __builtin_msa_dpadd_s_h (v8i16, v16i8, v16i8); +v4i32 __builtin_msa_dpadd_s_w (v4i32, v8i16, v8i16); +v2i64 __builtin_msa_dpadd_s_d (v2i64, v4i32, v4i32); + +v8u16 __builtin_msa_dpadd_u_h (v8u16, v16u8, v16u8); +v4u32 __builtin_msa_dpadd_u_w (v4u32, v8u16, v8u16); +v2u64 __builtin_msa_dpadd_u_d (v2u64, v4u32, v4u32); + +v8i16 __builtin_msa_dpsub_s_h (v8i16, v16i8, v16i8); +v4i32 __builtin_msa_dpsub_s_w (v4i32, v8i16, v8i16); +v2i64 __builtin_msa_dpsub_s_d (v2i64, v4i32, v4i32); + +v8i16 __builtin_msa_dpsub_u_h (v8i16, v16u8, v16u8); +v4i32 __builtin_msa_dpsub_u_w (v4i32, v8u16, v8u16); +v2i64 __builtin_msa_dpsub_u_d (v2i64, v4u32, v4u32); + +v4f32 __builtin_msa_fadd_w (v4f32, v4f32); +v2f64 __builtin_msa_fadd_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcaf_w (v4f32, v4f32); +v2i64 __builtin_msa_fcaf_d (v2f64, v2f64); + +v4i32 __builtin_msa_fceq_w (v4f32, v4f32); +v2i64 __builtin_msa_fceq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fclass_w (v4f32); +v2i64 __builtin_msa_fclass_d (v2f64); + +v4i32 __builtin_msa_fcle_w (v4f32, v4f32); +v2i64 __builtin_msa_fcle_d (v2f64, v2f64); + +v4i32 __builtin_msa_fclt_w (v4f32, v4f32); +v2i64 __builtin_msa_fclt_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcne_w (v4f32, v4f32); +v2i64 __builtin_msa_fcne_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcor_w (v4f32, v4f32); +v2i64 __builtin_msa_fcor_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcueq_w (v4f32, v4f32); +v2i64 __builtin_msa_fcueq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcule_w (v4f32, v4f32); +v2i64 __builtin_msa_fcule_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcult_w (v4f32, v4f32); +v2i64 __builtin_msa_fcult_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcun_w (v4f32, v4f32); +v2i64 __builtin_msa_fcun_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcune_w (v4f32, v4f32); +v2i64 __builtin_msa_fcune_d (v2f64, v2f64); + +v4f32 __builtin_msa_fdiv_w (v4f32, v4f32); +v2f64 __builtin_msa_fdiv_d (v2f64, v2f64); + +v8i16 __builtin_msa_fexdo_h (v4f32, v4f32); +v4f32 __builtin_msa_fexdo_w (v2f64, v2f64); + +v4f32 __builtin_msa_fexp2_w (v4f32, v4i32); +v2f64 __builtin_msa_fexp2_d (v2f64, v2i64); + +v4f32 __builtin_msa_fexupl_w (v8i16); +v2f64 __builtin_msa_fexupl_d (v4f32); + +v4f32 __builtin_msa_fexupr_w (v8i16); +v2f64 __builtin_msa_fexupr_d (v4f32); + +v4f32 __builtin_msa_ffint_s_w (v4i32); +v2f64 __builtin_msa_ffint_s_d (v2i64); + +v4f32 __builtin_msa_ffint_u_w (v4u32); +v2f64 __builtin_msa_ffint_u_d (v2u64); + +v4f32 __builtin_msa_ffql_w (v8i16); +v2f64 __builtin_msa_ffql_d (v4i32); + +v4f32 __builtin_msa_ffqr_w (v8i16); +v2f64 __builtin_msa_ffqr_d (v4i32); + +v16i8 __builtin_msa_fill_b (i32); +v8i16 __builtin_msa_fill_h (i32); +v4i32 __builtin_msa_fill_w (i32); +v2i64 __builtin_msa_fill_d (i64); + +v4f32 __builtin_msa_flog2_w (v4f32); +v2f64 __builtin_msa_flog2_d (v2f64); + +v4f32 __builtin_msa_fmadd_w (v4f32, v4f32, v4f32); +v2f64 __builtin_msa_fmadd_d (v2f64, v2f64, v2f64); + +v4f32 __builtin_msa_fmax_w (v4f32, v4f32); +v2f64 __builtin_msa_fmax_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmax_a_w (v4f32, v4f32); +v2f64 __builtin_msa_fmax_a_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmin_w (v4f32, v4f32); +v2f64 __builtin_msa_fmin_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmin_a_w (v4f32, v4f32); +v2f64 __builtin_msa_fmin_a_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmsub_w (v4f32, v4f32, v4f32); +v2f64 __builtin_msa_fmsub_d (v2f64, v2f64, v2f64); + +v4f32 __builtin_msa_fmul_w (v4f32, v4f32); +v2f64 __builtin_msa_fmul_d (v2f64, v2f64); + +v4f32 __builtin_msa_frint_w (v4f32); +v2f64 __builtin_msa_frint_d (v2f64); + +v4f32 __builtin_msa_frcp_w (v4f32); +v2f64 __builtin_msa_frcp_d (v2f64); + +v4f32 __builtin_msa_frsqrt_w (v4f32); +v2f64 __builtin_msa_frsqrt_d (v2f64); + +v4i32 __builtin_msa_fsaf_w (v4f32, v4f32); +v2i64 __builtin_msa_fsaf_d (v2f64, v2f64); + +v4i32 __builtin_msa_fseq_w (v4f32, v4f32); +v2i64 __builtin_msa_fseq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsle_w (v4f32, v4f32); +v2i64 __builtin_msa_fsle_d (v2f64, v2f64); + +v4i32 __builtin_msa_fslt_w (v4f32, v4f32); +v2i64 __builtin_msa_fslt_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsne_w (v4f32, v4f32); +v2i64 __builtin_msa_fsne_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsor_w (v4f32, v4f32); +v2i64 __builtin_msa_fsor_d (v2f64, v2f64); + +v4f32 __builtin_msa_fsqrt_w (v4f32); +v2f64 __builtin_msa_fsqrt_d (v2f64); + +v4f32 __builtin_msa_fsub_w (v4f32, v4f32); +v2f64 __builtin_msa_fsub_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsueq_w (v4f32, v4f32); +v2i64 __builtin_msa_fsueq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsule_w (v4f32, v4f32); +v2i64 __builtin_msa_fsule_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsult_w (v4f32, v4f32); +v2i64 __builtin_msa_fsult_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsun_w (v4f32, v4f32); +v2i64 __builtin_msa_fsun_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsune_w (v4f32, v4f32); +v2i64 __builtin_msa_fsune_d (v2f64, v2f64); + +v4i32 __builtin_msa_ftint_s_w (v4f32); +v2i64 __builtin_msa_ftint_s_d (v2f64); + +v4u32 __builtin_msa_ftint_u_w (v4f32); +v2u64 __builtin_msa_ftint_u_d (v2f64); + +v8i16 __builtin_msa_ftq_h (v4f32, v4f32); +v4i32 __builtin_msa_ftq_w (v2f64, v2f64); + +v4i32 __builtin_msa_ftrunc_s_w (v4f32); +v2i64 __builtin_msa_ftrunc_s_d (v2f64); + +v4u32 __builtin_msa_ftrunc_u_w (v4f32); +v2u64 __builtin_msa_ftrunc_u_d (v2f64); + +v8i16 __builtin_msa_hadd_s_h (v16i8, v16i8); +v4i32 __builtin_msa_hadd_s_w (v8i16, v8i16); +v2i64 __builtin_msa_hadd_s_d (v4i32, v4i32); + +v8u16 __builtin_msa_hadd_u_h (v16u8, v16u8); +v4u32 __builtin_msa_hadd_u_w (v8u16, v8u16); +v2u64 __builtin_msa_hadd_u_d (v4u32, v4u32); + +v8i16 __builtin_msa_hsub_s_h (v16i8, v16i8); +v4i32 __builtin_msa_hsub_s_w (v8i16, v8i16); +v2i64 __builtin_msa_hsub_s_d (v4i32, v4i32); + +v8i16 __builtin_msa_hsub_u_h (v16u8, v16u8); +v4i32 __builtin_msa_hsub_u_w (v8u16, v8u16); +v2i64 __builtin_msa_hsub_u_d (v4u32, v4u32); + +v16i8 __builtin_msa_ilvev_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvev_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvev_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvev_d (v2i64, v2i64); + +v16i8 __builtin_msa_ilvl_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvl_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvl_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvl_d (v2i64, v2i64); + +v16i8 __builtin_msa_ilvod_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvod_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvod_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvod_d (v2i64, v2i64); + +v16i8 __builtin_msa_ilvr_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvr_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvr_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvr_d (v2i64, v2i64); + +v16i8 __builtin_msa_insert_b (v16i8, imm0_15, i32); +v8i16 __builtin_msa_insert_h (v8i16, imm0_7, i32); +v4i32 __builtin_msa_insert_w (v4i32, imm0_3, i32); +v2i64 __builtin_msa_insert_d (v2i64, imm0_1, i64); + +v16i8 __builtin_msa_insve_b (v16i8, imm0_15, v16i8); +v8i16 __builtin_msa_insve_h (v8i16, imm0_7, v8i16); +v4i32 __builtin_msa_insve_w (v4i32, imm0_3, v4i32); +v2i64 __builtin_msa_insve_d (v2i64, imm0_1, v2i64); + +v16i8 __builtin_msa_ld_b (void *, imm_n512_511); +v8i16 __builtin_msa_ld_h (void *, imm_n1024_1022); +v4i32 __builtin_msa_ld_w (void *, imm_n2048_2044); +v2i64 __builtin_msa_ld_d (void *, imm_n4096_4088); + +v16i8 __builtin_msa_ldi_b (imm_n512_511); +v8i16 __builtin_msa_ldi_h (imm_n512_511); +v4i32 __builtin_msa_ldi_w (imm_n512_511); +v2i64 __builtin_msa_ldi_d (imm_n512_511); + +v8i16 __builtin_msa_madd_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_madd_q_w (v4i32, v4i32, v4i32); + +v8i16 __builtin_msa_maddr_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_maddr_q_w (v4i32, v4i32, v4i32); + +v16i8 __builtin_msa_maddv_b (v16i8, v16i8, v16i8); +v8i16 __builtin_msa_maddv_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_maddv_w (v4i32, v4i32, v4i32); +v2i64 __builtin_msa_maddv_d (v2i64, v2i64, v2i64); + +v16i8 __builtin_msa_max_a_b (v16i8, v16i8); +v8i16 __builtin_msa_max_a_h (v8i16, v8i16); +v4i32 __builtin_msa_max_a_w (v4i32, v4i32); +v2i64 __builtin_msa_max_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_max_s_b (v16i8, v16i8); +v8i16 __builtin_msa_max_s_h (v8i16, v8i16); +v4i32 __builtin_msa_max_s_w (v4i32, v4i32); +v2i64 __builtin_msa_max_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_max_u_b (v16u8, v16u8); +v8u16 __builtin_msa_max_u_h (v8u16, v8u16); +v4u32 __builtin_msa_max_u_w (v4u32, v4u32); +v2u64 __builtin_msa_max_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_maxi_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_maxi_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_maxi_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_maxi_s_d (v2i64, imm_n16_15); + +v16u8 __builtin_msa_maxi_u_b (v16u8, imm0_31); +v8u16 __builtin_msa_maxi_u_h (v8u16, imm0_31); +v4u32 __builtin_msa_maxi_u_w (v4u32, imm0_31); +v2u64 __builtin_msa_maxi_u_d (v2u64, imm0_31); + +v16i8 __builtin_msa_min_a_b (v16i8, v16i8); +v8i16 __builtin_msa_min_a_h (v8i16, v8i16); +v4i32 __builtin_msa_min_a_w (v4i32, v4i32); +v2i64 __builtin_msa_min_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_min_s_b (v16i8, v16i8); +v8i16 __builtin_msa_min_s_h (v8i16, v8i16); +v4i32 __builtin_msa_min_s_w (v4i32, v4i32); +v2i64 __builtin_msa_min_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_min_u_b (v16u8, v16u8); +v8u16 __builtin_msa_min_u_h (v8u16, v8u16); +v4u32 __builtin_msa_min_u_w (v4u32, v4u32); +v2u64 __builtin_msa_min_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_mini_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_mini_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_mini_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_mini_s_d (v2i64, imm_n16_15); + +v16u8 __builtin_msa_mini_u_b (v16u8, imm0_31); +v8u16 __builtin_msa_mini_u_h (v8u16, imm0_31); +v4u32 __builtin_msa_mini_u_w (v4u32, imm0_31); +v2u64 __builtin_msa_mini_u_d (v2u64, imm0_31); + +v16i8 __builtin_msa_mod_s_b (v16i8, v16i8); +v8i16 __builtin_msa_mod_s_h (v8i16, v8i16); +v4i32 __builtin_msa_mod_s_w (v4i32, v4i32); +v2i64 __builtin_msa_mod_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_mod_u_b (v16u8, v16u8); +v8u16 __builtin_msa_mod_u_h (v8u16, v8u16); +v4u32 __builtin_msa_mod_u_w (v4u32, v4u32); +v2u64 __builtin_msa_mod_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_move_v (v16i8); + +v8i16 __builtin_msa_msub_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_msub_q_w (v4i32, v4i32, v4i32); + +v8i16 __builtin_msa_msubr_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_msubr_q_w (v4i32, v4i32, v4i32); + +v16i8 __builtin_msa_msubv_b (v16i8, v16i8, v16i8); +v8i16 __builtin_msa_msubv_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_msubv_w (v4i32, v4i32, v4i32); +v2i64 __builtin_msa_msubv_d (v2i64, v2i64, v2i64); + +v8i16 __builtin_msa_mul_q_h (v8i16, v8i16); +v4i32 __builtin_msa_mul_q_w (v4i32, v4i32); + +v8i16 __builtin_msa_mulr_q_h (v8i16, v8i16); +v4i32 __builtin_msa_mulr_q_w (v4i32, v4i32); + +v16i8 __builtin_msa_mulv_b (v16i8, v16i8); +v8i16 __builtin_msa_mulv_h (v8i16, v8i16); +v4i32 __builtin_msa_mulv_w (v4i32, v4i32); +v2i64 __builtin_msa_mulv_d (v2i64, v2i64); + +v16i8 __builtin_msa_nloc_b (v16i8); +v8i16 __builtin_msa_nloc_h (v8i16); +v4i32 __builtin_msa_nloc_w (v4i32); +v2i64 __builtin_msa_nloc_d (v2i64); + +v16i8 __builtin_msa_nlzc_b (v16i8); +v8i16 __builtin_msa_nlzc_h (v8i16); +v4i32 __builtin_msa_nlzc_w (v4i32); +v2i64 __builtin_msa_nlzc_d (v2i64); + +v16u8 __builtin_msa_nor_v (v16u8, v16u8); + +v16u8 __builtin_msa_nori_b (v16u8, imm0_255); + +v16u8 __builtin_msa_or_v (v16u8, v16u8); + +v16u8 __builtin_msa_ori_b (v16u8, imm0_255); + +v16i8 __builtin_msa_pckev_b (v16i8, v16i8); +v8i16 __builtin_msa_pckev_h (v8i16, v8i16); +v4i32 __builtin_msa_pckev_w (v4i32, v4i32); +v2i64 __builtin_msa_pckev_d (v2i64, v2i64); + +v16i8 __builtin_msa_pckod_b (v16i8, v16i8); +v8i16 __builtin_msa_pckod_h (v8i16, v8i16); +v4i32 __builtin_msa_pckod_w (v4i32, v4i32); +v2i64 __builtin_msa_pckod_d (v2i64, v2i64); + +v16i8 __builtin_msa_pcnt_b (v16i8); +v8i16 __builtin_msa_pcnt_h (v8i16); +v4i32 __builtin_msa_pcnt_w (v4i32); +v2i64 __builtin_msa_pcnt_d (v2i64); + +v16i8 __builtin_msa_sat_s_b (v16i8, imm0_7); +v8i16 __builtin_msa_sat_s_h (v8i16, imm0_15); +v4i32 __builtin_msa_sat_s_w (v4i32, imm0_31); +v2i64 __builtin_msa_sat_s_d (v2i64, imm0_63); + +v16u8 __builtin_msa_sat_u_b (v16u8, imm0_7); +v8u16 __builtin_msa_sat_u_h (v8u16, imm0_15); +v4u32 __builtin_msa_sat_u_w (v4u32, imm0_31); +v2u64 __builtin_msa_sat_u_d (v2u64, imm0_63); + +v16i8 __builtin_msa_shf_b (v16i8, imm0_255); +v8i16 __builtin_msa_shf_h (v8i16, imm0_255); +v4i32 __builtin_msa_shf_w (v4i32, imm0_255); + +v16i8 __builtin_msa_sld_b (v16i8, v16i8, i32); +v8i16 __builtin_msa_sld_h (v8i16, v8i16, i32); +v4i32 __builtin_msa_sld_w (v4i32, v4i32, i32); +v2i64 __builtin_msa_sld_d (v2i64, v2i64, i32); + +v16i8 __builtin_msa_sldi_b (v16i8, v16i8, imm0_15); +v8i16 __builtin_msa_sldi_h (v8i16, v8i16, imm0_7); +v4i32 __builtin_msa_sldi_w (v4i32, v4i32, imm0_3); +v2i64 __builtin_msa_sldi_d (v2i64, v2i64, imm0_1); + +v16i8 __builtin_msa_sll_b (v16i8, v16i8); +v8i16 __builtin_msa_sll_h (v8i16, v8i16); +v4i32 __builtin_msa_sll_w (v4i32, v4i32); +v2i64 __builtin_msa_sll_d (v2i64, v2i64); + +v16i8 __builtin_msa_slli_b (v16i8, imm0_7); +v8i16 __builtin_msa_slli_h (v8i16, imm0_15); +v4i32 __builtin_msa_slli_w (v4i32, imm0_31); +v2i64 __builtin_msa_slli_d (v2i64, imm0_63); + +v16i8 __builtin_msa_splat_b (v16i8, i32); +v8i16 __builtin_msa_splat_h (v8i16, i32); +v4i32 __builtin_msa_splat_w (v4i32, i32); +v2i64 __builtin_msa_splat_d (v2i64, i32); + +v16i8 __builtin_msa_splati_b (v16i8, imm0_15); +v8i16 __builtin_msa_splati_h (v8i16, imm0_7); +v4i32 __builtin_msa_splati_w (v4i32, imm0_3); +v2i64 __builtin_msa_splati_d (v2i64, imm0_1); + +v16i8 __builtin_msa_sra_b (v16i8, v16i8); +v8i16 __builtin_msa_sra_h (v8i16, v8i16); +v4i32 __builtin_msa_sra_w (v4i32, v4i32); +v2i64 __builtin_msa_sra_d (v2i64, v2i64); + +v16i8 __builtin_msa_srai_b (v16i8, imm0_7); +v8i16 __builtin_msa_srai_h (v8i16, imm0_15); +v4i32 __builtin_msa_srai_w (v4i32, imm0_31); +v2i64 __builtin_msa_srai_d (v2i64, imm0_63); + +v16i8 __builtin_msa_srar_b (v16i8, v16i8); +v8i16 __builtin_msa_srar_h (v8i16, v8i16); +v4i32 __builtin_msa_srar_w (v4i32, v4i32); +v2i64 __builtin_msa_srar_d (v2i64, v2i64); + +v16i8 __builtin_msa_srari_b (v16i8, imm0_7); +v8i16 __builtin_msa_srari_h (v8i16, imm0_15); +v4i32 __builtin_msa_srari_w (v4i32, imm0_31); +v2i64 __builtin_msa_srari_d (v2i64, imm0_63); + +v16i8 __builtin_msa_srl_b (v16i8, v16i8); +v8i16 __builtin_msa_srl_h (v8i16, v8i16); +v4i32 __builtin_msa_srl_w (v4i32, v4i32); +v2i64 __builtin_msa_srl_d (v2i64, v2i64); + +v16i8 __builtin_msa_srli_b (v16i8, imm0_7); +v8i16 __builtin_msa_srli_h (v8i16, imm0_15); +v4i32 __builtin_msa_srli_w (v4i32, imm0_31); +v2i64 __builtin_msa_srli_d (v2i64, imm0_63); + +v16i8 __builtin_msa_srlr_b (v16i8, v16i8); +v8i16 __builtin_msa_srlr_h (v8i16, v8i16); +v4i32 __builtin_msa_srlr_w (v4i32, v4i32); +v2i64 __builtin_msa_srlr_d (v2i64, v2i64); + +v16i8 __builtin_msa_srlri_b (v16i8, imm0_7); +v8i16 __builtin_msa_srlri_h (v8i16, imm0_15); +v4i32 __builtin_msa_srlri_w (v4i32, imm0_31); +v2i64 __builtin_msa_srlri_d (v2i64, imm0_63); + +void __builtin_msa_st_b (v16i8, void *, imm_n512_511); +void __builtin_msa_st_h (v8i16, void *, imm_n1024_1022); +void __builtin_msa_st_w (v4i32, void *, imm_n2048_2044); +void __builtin_msa_st_d (v2i64, void *, imm_n4096_4088); + +v16i8 __builtin_msa_subs_s_b (v16i8, v16i8); +v8i16 __builtin_msa_subs_s_h (v8i16, v8i16); +v4i32 __builtin_msa_subs_s_w (v4i32, v4i32); +v2i64 __builtin_msa_subs_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_subs_u_b (v16u8, v16u8); +v8u16 __builtin_msa_subs_u_h (v8u16, v8u16); +v4u32 __builtin_msa_subs_u_w (v4u32, v4u32); +v2u64 __builtin_msa_subs_u_d (v2u64, v2u64); + +v16u8 __builtin_msa_subsus_u_b (v16u8, v16i8); +v8u16 __builtin_msa_subsus_u_h (v8u16, v8i16); +v4u32 __builtin_msa_subsus_u_w (v4u32, v4i32); +v2u64 __builtin_msa_subsus_u_d (v2u64, v2i64); + +v16i8 __builtin_msa_subsuu_s_b (v16u8, v16u8); +v8i16 __builtin_msa_subsuu_s_h (v8u16, v8u16); +v4i32 __builtin_msa_subsuu_s_w (v4u32, v4u32); +v2i64 __builtin_msa_subsuu_s_d (v2u64, v2u64); + +v16i8 __builtin_msa_subv_b (v16i8, v16i8); +v8i16 __builtin_msa_subv_h (v8i16, v8i16); +v4i32 __builtin_msa_subv_w (v4i32, v4i32); +v2i64 __builtin_msa_subv_d (v2i64, v2i64); + +v16i8 __builtin_msa_subvi_b (v16i8, imm0_31); +v8i16 __builtin_msa_subvi_h (v8i16, imm0_31); +v4i32 __builtin_msa_subvi_w (v4i32, imm0_31); +v2i64 __builtin_msa_subvi_d (v2i64, imm0_31); + +v16i8 __builtin_msa_vshf_b (v16i8, v16i8, v16i8); +v8i16 __builtin_msa_vshf_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_vshf_w (v4i32, v4i32, v4i32); +v2i64 __builtin_msa_vshf_d (v2i64, v2i64, v2i64); + +v16u8 __builtin_msa_xor_v (v16u8, v16u8); + +v16u8 __builtin_msa_xori_b (v16u8, imm0_255); +@end smallexample + @node Other MIPS Built-in Functions @subsection Other MIPS Built-in Functions diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 908dbe856ee..a54a0af77b8 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -849,6 +849,7 @@ Objective-C and Objective-C++ Dialects}. -mvirt -mno-virt @gol -mxpa -mno-xpa @gol -mmicromips -mno-micromips @gol +-mmsa -mno-msa @gol -mfpu=@var{fpu-type} @gol -msmartmips -mno-smartmips @gol -mpaired-single -mno-paired-single -mdmx -mno-mdmx @gol diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 9a349a3f03a..3142cd53ae5 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1641,6 +1641,9 @@ MIPS target can generate MIPS16 code. MIPS target is a Loongson-2E or -2F target using an ABI that supports the Loongson vector modes. +@item mips_msa +MIPS target supports @code{-mmsa}, MIPS SIMD Architecture (MSA). + @item mips_newabi_large_long_double MIPS target supports @code{long double} larger than @code{double} when using the new ABI. -- 2.30.2