From e22862689667e739d308140d72e8cc85ed116a2f Mon Sep 17 00:00:00 2001 From: Monk Chiang Date: Fri, 6 Apr 2018 05:51:33 +0000 Subject: [PATCH] [NDS32] Add hard float support. gcc/ * config.gcc (nds32*-*-*): Add v2j v3f v3s checking. (nds32*-*-*): Add float and fpu_config into supported_defaults. * common/config/nds32/nds32-common.c (TARGET_DEFAULT_TARGET_FLAGS): Include TARGET_DEFAULT_FPU_ISA and TARGET_DEFAULT_FPU_FMA. * config/nds32/constants.md (unspec_element): Add UNSPEC_COPYSIGN, UNSPEC_FCPYNSD, UNSPEC_FCPYNSS, UNSPEC_FCPYSD and UNSPEC_FCPYSS. * config/nds32/constraints.md: New constraints and checking for hard float configuration. * config/nds32/iterators.md: New mode iterator and attribute for hard float configuration. * config/nds32/nds32-doubleword.md: Use hard float alternatives and patterns. * config/nds32/nds32-fpu.md: New file. * config/nds32/nds32-md-auxiliary.c: New functions and checkings to deal with hard float code generation. * config/nds32/nds32-opts.h (nds32_arch_type): Add ARCH_V3F and ARCH_V3S. (abi_type, float_reg_number): New enum type. * config/nds32/nds32-predicates.c: New predicates for hard float. * config/nds32/nds32-protos.h: Declare functions for hard float. * config/nds32/nds32.c: Implementation for hard float configuration. * config/nds32/nds32.h: Definitions for hard float configuration. * config/nds32/nds32.md: Include hard float machine description and modify patterns for hard float configuration. * config/nds32/nds32.opt: New options for hard float configuration. * config/nds32/predicates.md: New predicates for hard float configuration. Co-Authored-By: Chung-Ju Wu From-SVN: r259161 --- gcc/ChangeLog | 31 + gcc/common/config/nds32/nds32-common.c | 2 + gcc/config.gcc | 42 +- gcc/config/nds32/constants.md | 5 + gcc/config/nds32/constraints.md | 40 +- gcc/config/nds32/iterators.md | 6 +- gcc/config/nds32/nds32-doubleword.md | 211 ++--- gcc/config/nds32/nds32-fpu.md | 503 ++++++++++++ gcc/config/nds32/nds32-md-auxiliary.c | 684 +++++++++++++++- gcc/config/nds32/nds32-opts.h | 24 +- gcc/config/nds32/nds32-predicates.c | 67 ++ gcc/config/nds32/nds32-protos.h | 30 + gcc/config/nds32/nds32.c | 1019 +++++++++++++++++++++--- gcc/config/nds32/nds32.h | 268 ++++++- gcc/config/nds32/nds32.md | 114 ++- gcc/config/nds32/nds32.opt | 75 ++ gcc/config/nds32/predicates.md | 21 + 17 files changed, 2826 insertions(+), 316 deletions(-) create mode 100644 gcc/config/nds32/nds32-fpu.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5c49936fb19..979450f612c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,34 @@ +2018-04-06 Monk Chiang + Chung-Ju Wu + + * config.gcc (nds32*-*-*): Add v2j v3f v3s checking. + (nds32*-*-*): Add float and fpu_config into supported_defaults. + * common/config/nds32/nds32-common.c (TARGET_DEFAULT_TARGET_FLAGS): + Include TARGET_DEFAULT_FPU_ISA and TARGET_DEFAULT_FPU_FMA. + * config/nds32/constants.md (unspec_element): Add UNSPEC_COPYSIGN, + UNSPEC_FCPYNSD, UNSPEC_FCPYNSS, UNSPEC_FCPYSD and UNSPEC_FCPYSS. + * config/nds32/constraints.md: New constraints and checking for hard + float configuration. + * config/nds32/iterators.md: New mode iterator and attribute for hard + float configuration. + * config/nds32/nds32-doubleword.md: Use hard float alternatives and + patterns. + * config/nds32/nds32-fpu.md: New file. + * config/nds32/nds32-md-auxiliary.c: New functions and checkings to + deal with hard float code generation. + * config/nds32/nds32-opts.h (nds32_arch_type): Add ARCH_V3F and + ARCH_V3S. + (abi_type, float_reg_number): New enum type. + * config/nds32/nds32-predicates.c: New predicates for hard float. + * config/nds32/nds32-protos.h: Declare functions for hard float. + * config/nds32/nds32.c: Implementation for hard float configuration. + * config/nds32/nds32.h: Definitions for hard float configuration. + * config/nds32/nds32.md: Include hard float machine description and + modify patterns for hard float configuration. + * config/nds32/nds32.opt: New options for hard float configuration. + * config/nds32/predicates.md: New predicates for hard float + configuration. + 2018-04-06 Kuan-Lin Chen * common/config/nds32/nds32-common.c diff --git a/gcc/common/config/nds32/nds32-common.c b/gcc/common/config/nds32/nds32-common.c index 8d3c9b2ea96..dbcc390728e 100644 --- a/gcc/common/config/nds32/nds32-common.c +++ b/gcc/common/config/nds32/nds32-common.c @@ -107,6 +107,8 @@ static const struct default_options nds32_option_optimization_table[] = #undef TARGET_DEFAULT_TARGET_FLAGS #define TARGET_DEFAULT_TARGET_FLAGS \ (TARGET_CPU_DEFAULT \ + | TARGET_DEFAULT_FPU_ISA \ + | TARGET_DEFAULT_FPU_FMA \ | MASK_16_BIT \ | MASK_EXT_PERF \ | MASK_EXT_PERF2 \ diff --git a/gcc/config.gcc b/gcc/config.gcc index b8a9877b432..75d0ae815ff 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4278,15 +4278,26 @@ case "${target}" in ;; nds32*-*-*) - supported_defaults="arch cpu nds32_lib" + supported_defaults="arch cpu nds32_lib float fpu_config" # process --with-arch case "${with_arch}" in - "" | v2 | v3 | v3m) + "" | v3 ) + tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=0" + ;; + v2 | v2j | v3m) # OK + tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=0" + ;; + v3f) + tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=1" + ;; + v3s) + tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=2" + ;; *) - echo "Cannot accept --with-arch=$with_arch, available values are: v2 v3 v3m" 1>&2 + echo "Cannot accept --with-arch=$with_arch, available values are: v2 v2j v3 v3m v3f v3s" 1>&2 exit 1 ;; esac @@ -4321,8 +4332,31 @@ case "${target}" in exit 1 ;; esac - ;; + # process --with-float + case "${with_float}" in + "" | soft | hard) + # OK + ;; + *) + echo "Cannot accept --with-float=$with_float, available values are: soft hard" 1>&2 + exit 1 + ;; + esac + + # process --with-config-fpu + case "${with_config_fpu}" in + "" | 0 | 1 | 2 | 3) + # OK + ;; + *) + echo "Cannot accept --with-config-fpu=$with_config_fpu, available values from 0 to 7" 1>&2 + exit 1 + ;; + esac + + + ;; nios2*-*-*) supported_defaults="arch" case "$with_arch" in diff --git a/gcc/config/nds32/constants.md b/gcc/config/nds32/constants.md index 77fb71c3420..7c706eb7da0 100644 --- a/gcc/config/nds32/constants.md +++ b/gcc/config/nds32/constants.md @@ -32,6 +32,11 @@ ;; The unpec operation index. (define_c_enum "unspec_element" [ + UNSPEC_COPYSIGN + UNSPEC_FCPYNSD + UNSPEC_FCPYNSS + UNSPEC_FCPYSD + UNSPEC_FCPYSS UNSPEC_FFB UNSPEC_FFMISM UNSPEC_FLMISM diff --git a/gcc/config/nds32/constraints.md b/gcc/config/nds32/constraints.md index 7cf18eb5533..7af7769fcbf 100644 --- a/gcc/config/nds32/constraints.md +++ b/gcc/config/nds32/constraints.md @@ -53,6 +53,10 @@ (define_register_constraint "x" "FRAME_POINTER_REG" "Frame pointer register $fp") +(define_register_constraint "f" + "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ? FP_REGS : NO_REGS" + "The Floating point registers $fs0 ~ $fs31") + (define_constraint "Iv00" "Constant value 0" (and (match_code "const_int") @@ -108,6 +112,11 @@ (and (match_code "const_int") (match_test "ival < (1 << 4) && ival >= -(1 << 4)"))) +(define_constraint "Cs05" + "Signed immediate 5-bit value" + (and (match_code "const_double") + (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 4), (1 << 4))"))) + (define_constraint "Iu05" "Unsigned immediate 5-bit value" (and (match_code "const_int") @@ -246,12 +255,21 @@ (and (match_code "const_int") (match_test "ival < (1 << 19) && ival >= -(1 << 19)"))) +(define_constraint "Cs20" + "Signed immediate 20-bit value" + (and (match_code "const_double") + (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 19), (1 << 19))"))) (define_constraint "Ihig" "The immediate value that can be simply set high 20-bit" (and (match_code "const_int") (match_test "(ival != 0) && ((ival & 0xfff) == 0)"))) +(define_constraint "Chig" + "The immediate value that can be simply set high 20-bit" + (and (match_code "high") + (match_test "GET_CODE (XEXP (op, 0)) == CONST_DOUBLE"))) + (define_constraint "Izeb" "The immediate value 0xff" (and (match_code "const_int") @@ -296,25 +314,39 @@ "Memory constraint for 45 format" (and (match_code "mem") (match_test "(nds32_mem_format (op) == ADDRESS_REG) - && (GET_MODE (op) == SImode)"))) + && ((GET_MODE (op) == SImode) + || (GET_MODE (op) == SFmode))"))) (define_memory_constraint "Ufe" "Memory constraint for fe format" (and (match_code "mem") (match_test "nds32_mem_format (op) == ADDRESS_R8_IMM7U - && (GET_MODE (op) == SImode)"))) + && (GET_MODE (op) == SImode + || GET_MODE (op) == SFmode)"))) (define_memory_constraint "U37" "Memory constraint for 37 format" (and (match_code "mem") (match_test "(nds32_mem_format (op) == ADDRESS_SP_IMM7U || nds32_mem_format (op) == ADDRESS_FP_IMM7U) - && (GET_MODE (op) == SImode)"))) - + && (GET_MODE (op) == SImode + || GET_MODE (op) == SFmode)"))) (define_memory_constraint "Umw" "Memory constraint for lwm/smw" (and (match_code "mem") (match_test "nds32_valid_smw_lwm_base_p (op)"))) +(define_memory_constraint "Da" + "Memory constraint for non-offset loads/stores" + (and (match_code "mem") + (match_test "REG_P (XEXP (op, 0)) + || (GET_CODE (XEXP (op, 0)) == POST_INC)"))) + +(define_memory_constraint "Q" + "Memory constraint for no symbol_ref and const" + (and (match_code "mem") + (match_test "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + && nds32_float_mem_operand_p (op)"))) + ;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/iterators.md b/gcc/config/nds32/iterators.md index e0798ff812f..c2062de2e97 100644 --- a/gcc/config/nds32/iterators.md +++ b/gcc/config/nds32/iterators.md @@ -45,11 +45,15 @@ (define_mode_iterator VSQIHIDI [V4QI V2HI QI HI DI]) (define_mode_iterator VQIHIDI [V4QI V2HI DI]) +;; A list of the modes that are up to double-word long. +(define_mode_iterator ANYF [(SF "TARGET_FPU_SINGLE") + (DF "TARGET_FPU_DOUBLE")]) + ;;---------------------------------------------------------------------------- ;; Mode attributes. ;;---------------------------------------------------------------------------- -(define_mode_attr size [(QI "b") (HI "h") (SI "w")]) +(define_mode_attr size [(QI "b") (HI "h") (SI "w") (SF "s") (DF "d")]) (define_mode_attr byte [(QI "1") (HI "2") (SI "4") (V4QI "4") (V2HI "4")]) diff --git a/gcc/config/nds32/nds32-doubleword.md b/gcc/config/nds32/nds32-doubleword.md index 0e4b0dc4fb6..7df715a771f 100644 --- a/gcc/config/nds32/nds32-doubleword.md +++ b/gcc/config/nds32/nds32-doubleword.md @@ -46,145 +46,77 @@ (define_insn "move_" - [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r, r, m") - (match_operand:DIDF 1 "general_operand" " r, i, m, r"))] + [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r, r, r, Da, m, f, Q, f, *r, *f") + (match_operand:DIDF 1 "general_operand" " r, i, Da, m, r, r, Q, f, f, *f, *r"))] "register_operand(operands[0], mode) || register_operand(operands[1], mode)" { - rtx addr; - rtx otherops[5]; - switch (which_alternative) { case 0: return "movd44\t%0, %1"; - case 1: /* reg <- const_int, we ask gcc to split instruction. */ return "#"; - case 2: - /* Refer to nds32_legitimate_address_p() in nds32.c, - we only allow "reg", "symbol_ref", "const", and "reg + const_int" - as address rtx for DImode/DFmode memory access. */ - addr = XEXP (operands[1], 0); - - otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0])); - otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); - otherops[2] = addr; - - if (REG_P (addr)) - { - /* (reg) <- (mem (reg)) */ - output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops); - } - else if (GET_CODE (addr) == PLUS) - { - /* (reg) <- (mem (plus (reg) (const_int))) */ - rtx op0 = XEXP (addr, 0); - rtx op1 = XEXP (addr, 1); - - if (REG_P (op0)) - { - otherops[2] = op0; - otherops[3] = op1; - otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode); - } - else - { - otherops[2] = op1; - otherops[3] = op0; - otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode); - } - - /* To avoid base overwrite when REGNO(%0) == REGNO(%2). */ - if (REGNO (otherops[0]) != REGNO (otherops[2])) - { - output_asm_insn ("lwi\t%0, [%2 + (%3)]", otherops); - output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops); - } - else - { - output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops); - output_asm_insn ("lwi\t%0,[ %2 + (%3)]", otherops); - } - } - else - { - /* (reg) <- (mem (symbol_ref ...)) - (reg) <- (mem (const ...)) */ - output_asm_insn ("lwi.gp\t%0, [ + %2]", otherops); - output_asm_insn ("lwi.gp\t%1, [ + %2 + 4]", otherops); - } - - /* We have already used output_asm_insn() by ourself, - so return an empty string. */ - return ""; - + /* The memory format is (mem (reg)), + we can generate 'lmw.bi' instruction. */ + return nds32_output_double (operands, true); case 3: - /* Refer to nds32_legitimate_address_p() in nds32.c, - we only allow "reg", "symbol_ref", "const", and "reg + const_int" - as address rtx for DImode/DFmode memory access. */ - addr = XEXP (operands[0], 0); - - otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1])); - otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); - otherops[2] = addr; - - if (REG_P (addr)) - { - /* (mem (reg)) <- (reg) */ - output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops); - } - else if (GET_CODE (addr) == PLUS) - { - /* (mem (plus (reg) (const_int))) <- (reg) */ - rtx op0 = XEXP (addr, 0); - rtx op1 = XEXP (addr, 1); - - if (REG_P (op0)) - { - otherops[2] = op0; - otherops[3] = op1; - otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode); - } - else - { - otherops[2] = op1; - otherops[3] = op0; - otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode); - } - - /* To avoid base overwrite when REGNO(%0) == REGNO(%2). */ - if (REGNO (otherops[0]) != REGNO (otherops[2])) - { - output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops); - output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops); - } - else - { - output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops); - output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops); - } - } - else - { - /* (mem (symbol_ref ...)) <- (reg) - (mem (const ...)) <- (reg) */ - output_asm_insn ("swi.gp\t%0, [ + %2]", otherops); - output_asm_insn ("swi.gp\t%1, [ + %2 + 4]", otherops); - } - - /* We have already used output_asm_insn() by ourself, - so return an empty string. */ - return ""; - + /* We haven't 64-bit load instruction, + we split this pattern to two SImode pattern. */ + return "#"; + case 4: + /* The memory format is (mem (reg)), + we can generate 'smw.bi' instruction. */ + return nds32_output_double (operands, false); + case 5: + /* We haven't 64-bit store instruction, + we split this pattern to two SImode pattern. */ + return "#"; + case 6: + return nds32_output_float_load (operands); + case 7: + return nds32_output_float_store (operands); + case 8: + return "fcpysd\t%0, %1, %1"; + case 9: + return "fmfdr\t%0, %1"; + case 10: + return "fmtdr\t%1, %0"; default: gcc_unreachable (); } } - [(set_attr "type" "alu,alu,alu,alu") - (set_attr "length" " 4, 16, 8, 8")]) + [(set_attr "type" "alu,alu,load,load,store,store,fload,fstore,fcpy,fmfdr,fmtdr") + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else (match_test "!TARGET_16_BIT") + (const_int 4) + (const_int 2)) + ;; Alternative 1 + (const_int 16) + ;; Alternative 2 + (const_int 4) + ;; Alternative 3 + (const_int 8) + ;; Alternative 4 + (const_int 4) + ;; Alternative 5 + (const_int 8) + ;; Alternative 6 + (const_int 4) + ;; Alternative 7 + (const_int 4) + ;; Alternative 8 + (const_int 4) + ;; Alternative 9 + (const_int 4) + ;; Alternative 10 + (const_int 4) + ]) + (set_attr "feature" " v1, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) (define_split [(set (match_operand:DIDF 0 "register_operand" "") @@ -208,7 +140,12 @@ /* Actually we would like to create move behavior by ourself. So that movsi expander could have chance to split large constant. */ emit_move_insn (operands[2], operands[3]); - emit_move_insn (operands[4], operands[5]); + + unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); + if ((UINTVAL (operands[3]) & mask) == (UINTVAL (operands[5]) & mask)) + emit_move_insn (operands[4], operands[2]); + else + emit_move_insn (operands[4], operands[5]); DONE; }) @@ -218,7 +155,9 @@ [(set (match_operand:DIDF 0 "register_operand" "") (match_operand:DIDF 1 "register_operand" ""))] "reload_completed - && (TARGET_ISA_V2 || !TARGET_16_BIT)" + && (TARGET_ISA_V2 || !TARGET_16_BIT) + && NDS32_IS_GPR_REGNUM (REGNO (operands[0])) + && NDS32_IS_GPR_REGNUM (REGNO (operands[1]))" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))] { @@ -240,6 +179,28 @@ } }) +(define_split + [(set (match_operand:DIDF 0 "nds32_general_register_operand" "") + (match_operand:DIDF 1 "memory_operand" ""))] + "reload_completed + && nds32_split_double_word_load_store_p (operands, true)" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + nds32_spilt_doubleword (operands, true); +}) + +(define_split + [(set (match_operand:DIDF 0 "memory_operand" "") + (match_operand:DIDF 1 "nds32_general_register_operand" ""))] + "reload_completed + && nds32_split_double_word_load_store_p (operands, false)" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + nds32_spilt_doubleword (operands, false); +}) + ;; ------------------------------------------------------------- ;; Boolean DImode instructions. ;; ------------------------------------------------------------- diff --git a/gcc/config/nds32/nds32-fpu.md b/gcc/config/nds32/nds32-fpu.md new file mode 100644 index 00000000000..719b0428ced --- /dev/null +++ b/gcc/config/nds32/nds32-fpu.md @@ -0,0 +1,503 @@ +;; Machine description of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2015 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;SFmode moves + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" +{ + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (SFmode, operands[1]); + if (CONST_DOUBLE_P (operands[1]) + && !satisfies_constraint_Cs20 (operands[1])) + { + const REAL_VALUE_TYPE *r; + unsigned long l; + + r = CONST_DOUBLE_REAL_VALUE (operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (*r, l); + + emit_move_insn (operands[0], gen_rtx_HIGH (SFmode, operands[1])); + + if ((l & 0xFFF) != 0) + emit_insn (gen_movsf_lo (operands[0], operands[0], operands[1])); + DONE; + } +}) + +(define_insn "movsf_lo" + [(set (match_operand:SF 0 "register_operand" "=r") + (lo_sum:SF (match_operand:SF 1 "register_operand" "r") + (match_operand:SF 2 "immediate_operand" "i")))] + "" + "ori\t%0, %1, lo12(%2)" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "*movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m, l, l, l, d, r, f, *f, *r, f, Q, r, r, r") + (match_operand:SF 1 "general_operand" " r, r, l, l, l, d, r, U45, U33, U37, U45, m, f, *r, *f, Q, f,Cs05,Cs20, Chig"))] + "(register_operand(operands[0], SFmode) + || register_operand(operands[1], SFmode))" +{ + switch (which_alternative) + { + case 0: + return "mov55\t%0, %1"; + case 1: + return "ori\t%0, %1, 0"; + case 2: + case 3: + case 4: + case 5: + return nds32_output_16bit_store (operands, 4); + case 6: + return nds32_output_32bit_store (operands, 4); + case 7: + case 8: + case 9: + case 10: + return nds32_output_16bit_load (operands, 4); + case 11: + return nds32_output_32bit_load (operands, 4); + case 12: + if (TARGET_FPU_SINGLE) + return "fcpyss\t%0, %1, %1"; + else + return "#"; + case 13: + return "fmtsr\t%1, %0"; + case 14: + return "fmfsr\t%0, %1"; + case 15: + return nds32_output_float_load (operands); + case 16: + return nds32_output_float_store (operands); + case 17: + return "movi55\t%0, %1"; + case 18: + return "movi\t%0, %1"; + case 19: + return "sethi\t%0, %1"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,fcpy,fmtsr,fmfsr,fload,fstore,alu,alu,alu") + (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 2, 4, 4") + (set_attr "feature" " v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu, v1, v1, v1")]) + +;; Conditional Move Instructions + +(define_expand "movcc" + [(set (match_operand:ANYF 0 "register_operand" "") + (if_then_else:ANYF (match_operand 1 "nds32_float_comparison_operator" "") + (match_operand:ANYF 2 "register_operand" "") + (match_operand:ANYF 3 "register_operand" "")))] + "" +{ + if (nds32_cond_move_p (operands[1])) + { + /* Operands[1] condition code is UNORDERED or ORDERED, and + sub-operands[1] MODE isn't SFmode or SFmode, return FAIL + for gcc, because we don't using slt compare instruction + to generate UNORDERED and ORDERED condition. */ + FAIL; + } + else + nds32_expand_float_movcc (operands); +}) + +(define_insn "fcmov_eq" + [(set (match_operand:ANYF 0 "register_operand" "=f, f") + (if_then_else:ANYF (eq (match_operand:SI 1 "register_operand" "f, f") + (const_int 0)) + (match_operand:ANYF 2 "register_operand" "f, 0") + (match_operand:ANYF 3 "register_operand" "0, f")))] + "" + "@ + fcmovz\t%0,%2,%1 + fcmovn\t%0,%3,%1" + [(set_attr "type" "fcmov") + (set_attr "length" "4")] +) + +(define_insn "fcmov_ne" + [(set (match_operand:ANYF 0 "register_operand" "=f, f") + (if_then_else:ANYF (ne (match_operand:SI 1 "register_operand" "f, f") + (const_int 0)) + (match_operand:ANYF 2 "register_operand" "f, 0") + (match_operand:ANYF 3 "register_operand" "0, f")))] + "" + "@ + fcmovn\t%0,%2,%1 + fcmovz\t%0,%3,%1" + [(set_attr "type" "fcmov") + (set_attr "length" "4")] +) + +;; Arithmetic instructions. + +(define_insn "add3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (plus:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" + "fadd\t %0, %1, %2" + [(set_attr "type" "falu") + (set_attr "length" "4")] +) + +(define_insn "sub3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (minus:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" + "fsub\t %0, %1, %2" + [(set_attr "type" "falu") + (set_attr "length" "4")] +) + +;; Multiplication insns. + +(define_insn "mul3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (mult:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" + "fmul\t %0, %1, %2" + [(set_attr "type" "fmul") + (set_attr "length" "4")] +) + +(define_insn "fma4" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f") + (match_operand:ANYF 3 "register_operand" "0")))] + "TARGET_EXT_FPU_FMA" + "fmadd\t%0, %1, %2" + [(set_attr "type" "fmac") + (set_attr "length" "4")] +) + +(define_insn "fnma4" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) + (match_operand:ANYF 2 "register_operand" "f") + (match_operand:ANYF 3 "register_operand" "0")))] + "TARGET_EXT_FPU_FMA" + "fmsub\t%0, %1, %2" + [(set_attr "type" "fmac") + (set_attr "length" "4")] +) + +(define_insn "fms4" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f") + (neg:ANYF (match_operand:ANYF 3 "register_operand" "0"))))] + "TARGET_EXT_FPU_FMA" + "fnmsub\t%0, %1, %2" + [(set_attr "type" "fmac") + (set_attr "length" "4")] +) + +(define_insn "fnms4" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) + (match_operand:ANYF 2 "register_operand" "f") + (neg:ANYF (match_operand:ANYF 3 "register_operand" "0"))))] + "TARGET_EXT_FPU_FMA" + "fnmadd\t%0, %1, %2" + [(set_attr "type" "fmac") + (set_attr "length" "4")] +) + +;; Div Instructions. + +(define_insn "div3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (div:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" + "fdiv\t %0, %1, %2" + [(set_attr "type" "fdiv") + (set_attr "length" "4")] +) + +(define_insn "sqrt2" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))] + "" + "fsqrt\t %0, %1" + [(set_attr "type" "fsqrt") + (set_attr "length" "4")] +) + +;; Conditional Branch patterns + +(define_expand "cstore4" + [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 1 "nds32_float_comparison_operator" + [(match_operand:ANYF 2 "register_operand" "") + (match_operand:ANYF 3 "register_operand" "")]))] + "" +{ + nds32_expand_float_cstore (operands); + DONE; +}) + +(define_expand "cbranch4" + [(set (pc) + (if_then_else (match_operator 0 "nds32_float_comparison_operator" + [(match_operand:ANYF 1 "register_operand" "") + (match_operand:ANYF 2 "register_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + nds32_expand_float_cbranch (operands); + DONE; +}) + +;; Copysign Instructions. + +(define_insn "copysignsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")] + UNSPEC_COPYSIGN))] + "TARGET_FPU_SINGLE" + "fcpyss\t%0,%1,%2" + [(set_attr "type" "fcpy") + (set_attr "length" "4")] +) + +(define_insn "copysigndf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f")] + UNSPEC_COPYSIGN))] + "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE" + "fcpysd\t%0,%1,%2" + [(set_attr "type" "fcpy") + (set_attr "length" "4")] +) + +(define_insn "*ncopysign3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (neg:ANYF (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")] + UNSPEC_COPYSIGN)))] + "" + "fcpyns\t%0,%1,%2" + [(set_attr "type" "fcpy") + (set_attr "length" "4")] +) + +;; Absolute Instructions + +(define_insn "abssf2" + [(set (match_operand:SF 0 "register_operand" "=f, r") + (abs:SF (match_operand:SF 1 "register_operand" "f, r")))] + "TARGET_FPU_SINGLE || TARGET_EXT_PERF" + "@ + fabss\t%0, %1 + bclr\t%0, %1, 31" + [(set_attr "type" "fabs,alu") + (set_attr "length" "4") + (set_attr "feature" "fpu,pe1")] +) + +(define_insn "absdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (abs:DF (match_operand:DF 1 "register_operand" "f")))] + "TARGET_FPU_DOUBLE" + "fabsd\t%0, %1" + [(set_attr "type" "fabs") + (set_attr "length" "4")] +) + +;; Negation Instructions + +(define_insn "*negsf2" + [(set (match_operand:SF 0 "register_operand" "=f, r") + (neg:SF (match_operand:SF 1 "register_operand" "f, r")))] + "TARGET_FPU_SINGLE || TARGET_EXT_PERF" + "@ + fcpynss\t%0, %1, %1 + btgl\t%0, %1, 31" + [(set_attr "type" "fcpy,alu") + (set_attr "length" "4") + (set_attr "feature" "fpu,pe1")] +) + +(define_insn "*negdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (match_operand:DF 1 "register_operand" "f")))] + "TARGET_FPU_DOUBLE" + "fcpynsd\t%0, %1, %1" + [(set_attr "type" "fcpy") + (set_attr "length" "4")] +) + +;; Data Format Conversion Instructions + +(define_insn "floatunssi2" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unsigned_float:ANYF (match_operand:SI 1 "register_operand" "f")))] + "" + "fui2\t %0, %1" + [(set_attr "type" "falu") + (set_attr "length" "4")] +) + +(define_insn "floatsi2" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (float:ANYF (match_operand:SI 1 "register_operand" "f")))] + "" + "fsi2\t %0, %1" + [(set_attr "type" "falu") + (set_attr "length" "4")] +) + +(define_insn "fixuns_truncsi2" + [(set (match_operand:SI 0 "register_operand" "=f") + (unsigned_fix:SI (fix:ANYF (match_operand:ANYF 1 "register_operand" "f"))))] + "" + "f2ui.z\t %0, %1" + [(set_attr "type" "falu") + (set_attr "length" "4")] +) + +(define_insn "fix_truncsi2" + [(set (match_operand:SI 0 "register_operand" "=f") + (fix:SI (fix:ANYF (match_operand:ANYF 1 "register_operand" "f"))))] + "" + "f2si.z\t %0, %1" + [(set_attr "type" "falu") + (set_attr "length" "4")] +) + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float_extend:DF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_FPU_SINGLE && TARGET_FPU_DOUBLE" + "fs2d\t%0, %1" + [(set_attr "type" "falu") + (set_attr "length" "4")] +) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] + "TARGET_FPU_SINGLE && TARGET_FPU_DOUBLE" + "fd2s\t%0, %1" + [(set_attr "type" "falu") + (set_attr "length" "4")] +) + +;; Compare Instructions + +(define_insn "cmp_eq" + [(set (match_operand:SI 0 "register_operand" "=f") + (eq:SI (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" + { + if (NDS32_EXT_FPU_DOT_E) + return "fcmpeq.e %0, %1, %2"; + else + return "fcmpeq\t%0, %1, %2"; + } + [(set_attr "type" "fcmp") + (set_attr "length" "4")] +) + +(define_insn "cmp_lt" + [(set (match_operand:SI 0 "register_operand" "=f") + (lt:SI (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" +{ + if (NDS32_EXT_FPU_DOT_E) + return "fcmplt.e %0, %1, %2"; + else + return "fcmplt\t%0, %1, %2"; +} + [(set_attr "type" "fcmp") + (set_attr "length" "4")] +) + +(define_insn "cmp_le" + [(set (match_operand:SI 0 "register_operand" "=f") + (le:SI (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" +{ + if (NDS32_EXT_FPU_DOT_E) + return "fcmple.e %0, %1, %2"; + else + return "fcmple\t%0, %1, %2"; +} + [(set_attr "type" "fcmp") + (set_attr "length" "4")] +) + +(define_insn "cmp_un" + [(set (match_operand:SI 0 "register_operand" "=f") + (unordered:SI (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" +{ + if (NDS32_EXT_FPU_DOT_E) + return "fcmpun.e %0, %1, %2"; + else + return "fcmpun\t%0, %1, %2"; +} + [(set_attr "type" "fcmp") + (set_attr "length" "4")] +) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" ""))] + "!TARGET_FPU_SINGLE + && NDS32_IS_FPR_REGNUM (REGNO (operands[0])) + && NDS32_IS_FPR_REGNUM (REGNO (operands[1]))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] +{ + operands[2] = gen_rtx_REG (SFmode, TA_REGNUM); +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "const_double_operand" ""))] + "!satisfies_constraint_Cs20 (operands[1]) + && !satisfies_constraint_Chig (operands[1])" + [(set (match_dup 0) (high:SF (match_dup 1))) + (set (match_dup 0) (lo_sum:SF (match_dup 0) (match_dup 1)))]) +;; ---------------------------------------------------------------------------- diff --git a/gcc/config/nds32/nds32-md-auxiliary.c b/gcc/config/nds32/nds32-md-auxiliary.c index b3673ae72e6..dca1c9f959b 100644 --- a/gcc/config/nds32/nds32-md-auxiliary.c +++ b/gcc/config/nds32/nds32-md-auxiliary.c @@ -742,6 +742,146 @@ nds32_expand_cstore (rtx *operands) } } +void +nds32_expand_float_cbranch (rtx *operands) +{ + enum rtx_code code = GET_CODE (operands[0]); + enum rtx_code new_code = code; + rtx cmp_op0 = operands[1]; + rtx cmp_op1 = operands[2]; + rtx tmp_reg; + rtx tmp; + + int reverse = 0; + + /* Main Goal: Use compare instruction + branch instruction. + + For example: + GT, GE: swap condition and swap operands and generate + compare instruction(LT, LE) + branch not equal instruction. + + UNORDERED, LT, LE, EQ: no need to change and generate + compare instruction(UNORDERED, LT, LE, EQ) + branch not equal instruction. + + ORDERED, NE: reverse condition and generate + compare instruction(EQ) + branch equal instruction. */ + + switch (code) + { + case GT: + case GE: + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 = tmp; + new_code = swap_condition (new_code); + break; + case UNORDERED: + case LT: + case LE: + case EQ: + break; + case ORDERED: + case NE: + new_code = reverse_condition (new_code); + reverse = 1; + break; + case UNGT: + case UNGE: + new_code = reverse_condition_maybe_unordered (new_code); + reverse = 1; + break; + case UNLT: + case UNLE: + new_code = reverse_condition_maybe_unordered (new_code); + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 = tmp; + new_code = swap_condition (new_code); + reverse = 1; + break; + default: + return; + } + + tmp_reg = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (tmp_reg, + gen_rtx_fmt_ee (new_code, SImode, + cmp_op0, cmp_op1))); + + PUT_CODE (operands[0], reverse ? EQ : NE); + emit_insn (gen_cbranchsi4 (operands[0], tmp_reg, + const0_rtx, operands[3])); +} + +void +nds32_expand_float_cstore (rtx *operands) +{ + enum rtx_code code = GET_CODE (operands[1]); + enum rtx_code new_code = code; + machine_mode mode = GET_MODE (operands[2]); + + rtx cmp_op0 = operands[2]; + rtx cmp_op1 = operands[3]; + rtx tmp; + + /* Main Goal: Use compare instruction to store value. + + For example: + GT, GE: swap condition and swap operands. + reg_R = (reg_A > reg_B) --> fcmplt reg_R, reg_B, reg_A + reg_R = (reg_A >= reg_B) --> fcmple reg_R, reg_B, reg_A + + LT, LE, EQ: no need to change, it is already LT, LE, EQ. + reg_R = (reg_A < reg_B) --> fcmplt reg_R, reg_A, reg_B + reg_R = (reg_A <= reg_B) --> fcmple reg_R, reg_A, reg_B + reg_R = (reg_A == reg_B) --> fcmpeq reg_R, reg_A, reg_B + + ORDERED: reverse condition and using xor insturction to achieve 'ORDERED'. + reg_R = (reg_A != reg_B) --> fcmpun reg_R, reg_A, reg_B + xor reg_R, reg_R, const1_rtx + + NE: reverse condition and using xor insturction to achieve 'NE'. + reg_R = (reg_A != reg_B) --> fcmpeq reg_R, reg_A, reg_B + xor reg_R, reg_R, const1_rtx */ + switch (code) + { + case GT: + case GE: + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 =tmp; + new_code = swap_condition (new_code); + break; + case UNORDERED: + case LT: + case LE: + case EQ: + break; + case ORDERED: + if (mode == SFmode) + emit_insn (gen_cmpsf_un (operands[0], cmp_op0, cmp_op1)); + else + emit_insn (gen_cmpdf_un (operands[0], cmp_op0, cmp_op1)); + + emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); + return; + case NE: + if (mode == SFmode) + emit_insn (gen_cmpsf_eq (operands[0], cmp_op0, cmp_op1)); + else + emit_insn (gen_cmpdf_eq (operands[0], cmp_op0, cmp_op1)); + + emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); + return; + default: + return; + } + + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_fmt_ee (new_code, SImode, + cmp_op0, cmp_op1))); +} + enum nds32_expand_result_type nds32_expand_movcc (rtx *operands) { @@ -759,6 +899,11 @@ nds32_expand_movcc (rtx *operands) we have gcc generate original template rtx. */ return EXPAND_CREATE_TEMPLATE; } + else if ((TARGET_FPU_SINGLE && cmp0_mode == SFmode) + || (TARGET_FPU_DOUBLE && cmp0_mode == DFmode)) + { + nds32_expand_float_movcc (operands); + } else { /* Since there is only 'slt'(Set when Less Than) instruction for @@ -849,6 +994,203 @@ nds32_expand_movcc (rtx *operands) return EXPAND_CREATE_TEMPLATE; } +void +nds32_expand_float_movcc (rtx *operands) +{ + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && GET_MODE (XEXP (operands[1], 0)) == SImode + && XEXP (operands[1], 1) == const0_rtx) + { + /* If the operands[1] rtx is already (eq X 0) or (ne X 0), + we have gcc generate original template rtx. */ + return; + } + else + { + enum rtx_code code = GET_CODE (operands[1]); + enum rtx_code new_code = code; + machine_mode cmp0_mode = GET_MODE (XEXP (operands[1], 0)); + machine_mode cmp1_mode = GET_MODE (XEXP (operands[1], 1)); + rtx cmp_op0 = XEXP (operands[1], 0); + rtx cmp_op1 = XEXP (operands[1], 1); + rtx tmp; + + /* Compare instruction Operations: (cmp_op0 condition cmp_op1) ? 1 : 0, + when result is 1, and 'reverse' be set 1 for fcmovzs instructuin. */ + int reverse = 0; + + /* Main Goal: Use cmpare instruction + conditional move instruction. + Strategy : swap condition and swap comparison operands. + + For example: + a > b ? P : Q (GT) + --> a < b ? Q : P (swap condition) + --> b < a ? Q : P (swap comparison operands to achieve 'GT') + + a >= b ? P : Q (GE) + --> a <= b ? Q : P (swap condition) + --> b <= a ? Q : P (swap comparison operands to achieve 'GE') + + a < b ? P : Q (LT) + --> (NO NEED TO CHANGE, it is already 'LT') + + a >= b ? P : Q (LE) + --> (NO NEED TO CHANGE, it is already 'LE') + + a == b ? P : Q (EQ) + --> (NO NEED TO CHANGE, it is already 'EQ') */ + + switch (code) + { + case GT: + case GE: + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 =tmp; + new_code = swap_condition (new_code); + break; + case UNORDERED: + case LT: + case LE: + case EQ: + break; + case ORDERED: + case NE: + reverse = 1; + new_code = reverse_condition (new_code); + break; + case UNGT: + case UNGE: + new_code = reverse_condition_maybe_unordered (new_code); + reverse = 1; + break; + case UNLT: + case UNLE: + new_code = reverse_condition_maybe_unordered (new_code); + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 = tmp; + new_code = swap_condition (new_code); + reverse = 1; + break; + default: + return; + } + + /* Use a temporary register to store fcmpxxs result. */ + tmp = gen_reg_rtx (SImode); + + /* Create float compare instruction for SFmode and DFmode, + other MODE using cstoresi create compare instruction. */ + if ((cmp0_mode == DFmode || cmp0_mode == SFmode) + && (cmp1_mode == DFmode || cmp1_mode == SFmode)) + { + /* This emit_insn create corresponding float compare instruction */ + emit_insn (gen_rtx_SET (tmp, + gen_rtx_fmt_ee (new_code, SImode, + cmp_op0, cmp_op1))); + } + else + { + /* This emit_insn using cstoresi create corresponding + compare instruction */ + PUT_CODE (operands[1], new_code); + emit_insn (gen_cstoresi4 (tmp, operands[1], + cmp_op0, cmp_op1)); + } + /* operands[1] crete corresponding condition move instruction + for fcmovzs and fcmovns. */ + operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE, + VOIDmode, tmp, const0_rtx); + } +} + +void +nds32_emit_push_fpr_callee_saved (int base_offset) +{ + rtx fpu_insn; + rtx reg, mem; + unsigned int regno = cfun->machine->callee_saved_first_fpr_regno; + unsigned int last_fpr = cfun->machine->callee_saved_last_fpr_regno; + + while (regno <= last_fpr) + { + /* Handling two registers, using fsdi instruction. */ + reg = gen_rtx_REG (DFmode, regno); + mem = gen_frame_mem (DFmode, plus_constant (Pmode, + stack_pointer_rtx, + base_offset)); + base_offset += 8; + regno += 2; + fpu_insn = emit_move_insn (mem, reg); + RTX_FRAME_RELATED_P (fpu_insn) = 1; + } +} + +void +nds32_emit_pop_fpr_callee_saved (int gpr_padding_size) +{ + rtx fpu_insn; + rtx reg, mem, addr; + rtx dwarf, adjust_sp_rtx; + unsigned int regno = cfun->machine->callee_saved_first_fpr_regno; + unsigned int last_fpr = cfun->machine->callee_saved_last_fpr_regno; + int padding = 0; + + while (regno <= last_fpr) + { + /* Handling two registers, using fldi.bi instruction. */ + if ((regno + 1) >= last_fpr) + padding = gpr_padding_size; + + reg = gen_rtx_REG (DFmode, (regno)); + addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (8 + padding))); + mem = gen_frame_mem (DFmode, addr); + regno += 2; + fpu_insn = emit_move_insn (reg, mem); + + adjust_sp_rtx = + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + 8 + padding)); + + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, NULL_RTX); + /* Tell gcc we adjust SP in this insn. */ + dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, copy_rtx (adjust_sp_rtx), + dwarf); + RTX_FRAME_RELATED_P (fpu_insn) = 1; + REG_NOTES (fpu_insn) = dwarf; + } +} + +void +nds32_emit_v3pop_fpr_callee_saved (int base) +{ + int fpu_base_addr = base; + int regno; + rtx fpu_insn; + rtx reg, mem; + rtx dwarf; + + regno = cfun->machine->callee_saved_first_fpr_regno; + while (regno <= cfun->machine->callee_saved_last_fpr_regno) + { + /* Handling two registers, using fldi instruction. */ + reg = gen_rtx_REG (DFmode, regno); + mem = gen_frame_mem (DFmode, plus_constant (Pmode, + stack_pointer_rtx, + fpu_base_addr)); + fpu_base_addr += 8; + regno += 2; + fpu_insn = emit_move_insn (reg, mem); + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, NULL_RTX); + RTX_FRAME_RELATED_P (fpu_insn) = 1; + REG_NOTES (fpu_insn) = dwarf; + } +} + /* ------------------------------------------------------------------------ */ /* Function to return memory format. */ @@ -867,7 +1209,8 @@ nds32_mem_format (rtx op) op = XEXP (op, 0); /* 45 format. */ - if (GET_CODE (op) == REG && (mode_test == SImode)) + if (GET_CODE (op) == REG + && ((mode_test == SImode) || (mode_test == SFmode))) return ADDRESS_REG; /* 333 format for QI/HImode. */ @@ -875,7 +1218,8 @@ nds32_mem_format (rtx op) return ADDRESS_LO_REG_IMM3U; /* post_inc 333 format. */ - if ((GET_CODE (op) == POST_INC) && (mode_test == SImode)) + if ((GET_CODE (op) == POST_INC) + && ((mode_test == SImode) || (mode_test == SFmode))) { regno = REGNO(XEXP (op, 0)); @@ -885,7 +1229,7 @@ nds32_mem_format (rtx op) /* post_inc 333 format. */ if ((GET_CODE (op) == POST_MODIFY) - && (mode_test == SImode) + && ((mode_test == SImode) || (mode_test == SFmode)) && (REG_P (XEXP (XEXP (op, 1), 0))) && (CONST_INT_P (XEXP (XEXP (op, 1), 1)))) { @@ -1409,12 +1753,25 @@ nds32_output_stack_push (rtx par_rtx) otherwise, generate 'push25 Re,0'. */ sp_adjust = cfun->machine->local_size + cfun->machine->out_args_size - + cfun->machine->callee_saved_area_gpr_padding_bytes; + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)) operands[1] = GEN_INT (sp_adjust); else - operands[1] = GEN_INT (0); + { + /* Allocate callee saved fpr space. */ + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + sp_adjust = cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + operands[1] = GEN_INT (sp_adjust); + } + else + { + operands[1] = GEN_INT (0); + } + } /* Create assembly code pattern. */ snprintf (pattern, sizeof (pattern), "push25\t%%0, %%1"); @@ -1507,13 +1864,28 @@ nds32_output_stack_pop (rtx par_rtx ATTRIBUTE_UNUSED) and then use 'pop25 Re,0'. */ sp_adjust = cfun->machine->local_size + cfun->machine->out_args_size - + cfun->machine->callee_saved_area_gpr_padding_bytes; + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) && !cfun->calls_alloca) operands[1] = GEN_INT (sp_adjust); else - operands[1] = GEN_INT (0); + { + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + /* If has fpr need to restore, the $sp on callee saved fpr + position, so we need to consider gpr pading bytes and + callee saved fpr size. */ + sp_adjust = cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + operands[1] = GEN_INT (sp_adjust); + } + else + { + operands[1] = GEN_INT (0); + } + } /* Create assembly code pattern. */ snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1"); @@ -1638,6 +2010,162 @@ nds32_output_casesi_pc_relative (rtx *operands) return "jr\t$ta"; } +/* output a float load instruction */ +const char * +nds32_output_float_load (rtx *operands) +{ + char buff[100]; + const char *pattern; + rtx addr, addr_op0, addr_op1; + int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; + addr = XEXP (operands[1], 0); + switch (GET_CODE (addr)) + { + case REG: + pattern = "fl%ci\t%%0, %%1"; + break; + + case PLUS: + addr_op0 = XEXP (addr, 0); + addr_op1 = XEXP (addr, 1); + + if (REG_P (addr_op0) && REG_P (addr_op1)) + pattern = "fl%c\t%%0, %%1"; + else if (REG_P (addr_op0) && CONST_INT_P (addr_op1)) + pattern = "fl%ci\t%%0, %%1"; + else if (GET_CODE (addr_op0) == MULT && REG_P (addr_op1) + && REG_P (XEXP (addr_op0, 0)) + && CONST_INT_P (XEXP (addr_op0, 1))) + pattern = "fl%c\t%%0, %%1"; + else + gcc_unreachable (); + break; + + case POST_MODIFY: + addr_op0 = XEXP (addr, 0); + addr_op1 = XEXP (addr, 1); + + if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS + && REG_P (XEXP (addr_op1, 1))) + pattern = "fl%c.bi\t%%0, %%1"; + else if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS + && CONST_INT_P (XEXP (addr_op1, 1))) + pattern = "fl%ci.bi\t%%0, %%1"; + else + gcc_unreachable (); + break; + + case POST_INC: + if (REG_P (XEXP (addr, 0))) + { + if (dp) + pattern = "fl%ci.bi\t%%0, %%1, 8"; + else + pattern = "fl%ci.bi\t%%0, %%1, 4"; + } + else + gcc_unreachable (); + break; + + case POST_DEC: + if (REG_P (XEXP (addr, 0))) + { + if (dp) + pattern = "fl%ci.bi\t%%0, %%1, -8"; + else + pattern = "fl%ci.bi\t%%0, %%1, -4"; + } + else + gcc_unreachable (); + break; + + default: + gcc_unreachable (); + } + + sprintf (buff, pattern, dp ? 'd' : 's'); + output_asm_insn (buff, operands); + return ""; +} + +/* output a float store instruction */ +const char * +nds32_output_float_store (rtx *operands) +{ + char buff[100]; + const char *pattern; + rtx addr, addr_op0, addr_op1; + int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; + addr = XEXP (operands[0], 0); + switch (GET_CODE (addr)) + { + case REG: + pattern = "fs%ci\t%%1, %%0"; + break; + + case PLUS: + addr_op0 = XEXP (addr, 0); + addr_op1 = XEXP (addr, 1); + + if (REG_P (addr_op0) && REG_P (addr_op1)) + pattern = "fs%c\t%%1, %%0"; + else if (REG_P (addr_op0) && CONST_INT_P (addr_op1)) + pattern = "fs%ci\t%%1, %%0"; + else if (GET_CODE (addr_op0) == MULT && REG_P (addr_op1) + && REG_P (XEXP (addr_op0, 0)) + && CONST_INT_P (XEXP (addr_op0, 1))) + pattern = "fs%c\t%%1, %%0"; + else + gcc_unreachable (); + break; + + case POST_MODIFY: + addr_op0 = XEXP (addr, 0); + addr_op1 = XEXP (addr, 1); + + if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS + && REG_P (XEXP (addr_op1, 1))) + pattern = "fs%c.bi\t%%1, %%0"; + else if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS + && CONST_INT_P (XEXP (addr_op1, 1))) + pattern = "fs%ci.bi\t%%1, %%0"; + else + gcc_unreachable (); + break; + + case POST_INC: + if (REG_P (XEXP (addr, 0))) + { + if (dp) + pattern = "fs%ci.bi\t%%1, %%0, 8"; + else + pattern = "fs%ci.bi\t%%1, %%0, 4"; + } + else + gcc_unreachable (); + break; + + case POST_DEC: + if (REG_P (XEXP (addr, 0))) + { + if (dp) + pattern = "fs%ci.bi\t%%1, %%0, -8"; + else + pattern = "fs%ci.bi\t%%1, %%0, -4"; + } + else + gcc_unreachable (); + break; + + default: + gcc_unreachable (); + } + + sprintf (buff, pattern, dp ? 'd' : 's'); + output_asm_insn (buff, operands); + return ""; +} + /* Function to generate normal jump table. */ const char * nds32_output_casesi (rtx *operands) @@ -1936,6 +2464,39 @@ nds32_expand_unaligned_store (rtx *operands, enum machine_mode mode) } } +/* Using multiple load/store instruction to output doubleword instruction. */ +const char * +nds32_output_double (rtx *operands, bool load_p) +{ + char pattern[100]; + int reg = load_p ? 0 : 1; + int mem = load_p ? 1 : 0; + rtx otherops[3]; + rtx addr = XEXP (operands[mem], 0); + + otherops[0] = gen_rtx_REG (SImode, REGNO (operands[reg])); + otherops[1] = gen_rtx_REG (SImode, REGNO (operands[reg]) + 1); + + if (GET_CODE (addr) == POST_INC) + { + /* (mem (post_inc (reg))) */ + otherops[2] = XEXP (addr, 0); + snprintf (pattern, sizeof (pattern), + "%cmw.bim\t%%0, [%%2], %%1, 0", load_p ? 'l' : 's'); + } + else + { + /* (mem (reg)) */ + otherops[2] = addr; + snprintf (pattern, sizeof (pattern), + "%cmw.bi\t%%0, [%%2], %%1, 0", load_p ? 'l' : 's'); + + } + + output_asm_insn (pattern, otherops); + return ""; +} + const char * nds32_output_cbranchsi4_equality_zero (rtx_insn *insn, rtx *operands) { @@ -2120,6 +2681,115 @@ nds32_output_cbranchsi4_greater_less_zero (rtx_insn *insn, rtx *operands) return ""; } +/* Spilt a doubleword instrucion to two single word instructions. */ +void +nds32_spilt_doubleword (rtx *operands, bool load_p) +{ + int reg = load_p ? 0 : 1; + int mem = load_p ? 1 : 0; + rtx reg_rtx = load_p ? operands[0] : operands[1]; + rtx mem_rtx = load_p ? operands[1] : operands[0]; + rtx low_part[2], high_part[2]; + rtx sub_mem = XEXP (mem_rtx, 0); + + /* Generate low_part and high_part register pattern. + i.e. register pattern like: + (reg:DI) -> (subreg:SI (reg:DI)) + (subreg:SI (reg:DI)) */ + low_part[reg] = simplify_gen_subreg (SImode, reg_rtx, GET_MODE (reg_rtx), 0); + high_part[reg] = simplify_gen_subreg (SImode, reg_rtx, GET_MODE (reg_rtx), 4); + + /* Generate low_part and high_part memory pattern. + Memory format is (post_dec) will generate: + low_part: lwi.bi reg, [mem], 4 + high_part: lwi.bi reg, [mem], -12 */ + if (GET_CODE (sub_mem) == POST_DEC) + { + /* memory format is (post_dec (reg)), + so that extract (reg) from the (post_dec (reg)) pattern. */ + sub_mem = XEXP (sub_mem, 0); + + /* generate low_part and high_part memory format: + low_part: (post_modify ((reg) (plus (reg) (const 4))) + high_part: (post_modify ((reg) (plus (reg) (const -12))) */ + low_part[mem] = gen_frame_mem (SImode, + gen_rtx_POST_MODIFY (Pmode, sub_mem, + gen_rtx_PLUS (Pmode, + sub_mem, + GEN_INT (4)))); + high_part[mem] = gen_frame_mem (SImode, + gen_rtx_POST_MODIFY (Pmode, sub_mem, + gen_rtx_PLUS (Pmode, + sub_mem, + GEN_INT (-12)))); + } + else if (GET_CODE (sub_mem) == POST_MODIFY) + { + /* Memory format is (post_modify (reg) (plus (reg) (const))), + so that extract (reg) from the post_modify pattern. */ + rtx post_mem = XEXP (sub_mem, 0); + + /* Extract (const) from the (post_modify (reg) (plus (reg) (const))) + pattern. */ + + rtx plus_op = XEXP (sub_mem, 1); + rtx post_val = XEXP (plus_op, 1); + + /* Generate low_part and high_part memory format: + low_part: (post_modify ((reg) (plus (reg) (const))) + high_part: ((plus (reg) (const 4))) */ + low_part[mem] = gen_frame_mem (SImode, + gen_rtx_POST_MODIFY (Pmode, post_mem, + gen_rtx_PLUS (Pmode, + post_mem, + post_val))); + high_part[mem] = gen_frame_mem (SImode, plus_constant (Pmode, + post_mem, + 4)); + } + else + { + /* memory format: (symbol_ref), (const), (reg + const_int). */ + low_part[mem] = adjust_address (mem_rtx, SImode, 0); + high_part[mem] = adjust_address (mem_rtx, SImode, 4); + } + + /* After reload completed, we have dependent issue by low part register and + higt part memory. i.e. we cannot split a sequence + like: + load $r0, [%r1] + spilt to + lw $r0, [%r0] + lwi $r1, [%r0 + 4] + swap position + lwi $r1, [%r0 + 4] + lw $r0, [%r0] + For store instruction we don't have a problem. + + When memory format is [post_modify], we need to emit high part instruction, + before low part instruction. + expamle: + load $r0, [%r2], post_val + spilt to + load $r1, [%r2 + 4] + load $r0, [$r2], post_val. */ + if ((load_p && reg_overlap_mentioned_p (low_part[0], high_part[1])) + || GET_CODE (sub_mem) == POST_MODIFY) + { + operands[2] = high_part[0]; + operands[3] = high_part[1]; + operands[4] = low_part[0]; + operands[5] = low_part[1]; + } + else + { + operands[2] = low_part[0]; + operands[3] = low_part[1]; + operands[4] = high_part[0]; + operands[5] = high_part[1]; + } +} + /* Return true X is need use long call. */ bool nds32_long_call_p (rtx symbol) diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h index 77429abeead..642ec2867c5 100644 --- a/gcc/config/nds32/nds32-opts.h +++ b/gcc/config/nds32/nds32-opts.h @@ -29,7 +29,9 @@ enum nds32_arch_type { ARCH_V2, ARCH_V3, - ARCH_V3M + ARCH_V3M, + ARCH_V3F, + ARCH_V3S }; /* The code model defines the address generation strategy. */ @@ -46,4 +48,24 @@ enum nds32_cpu_type CPU_N9 }; +/* Which ABI to use. */ +enum abi_type +{ + NDS32_ABI_V2, + NDS32_ABI_V2_FP_PLUS +}; + +/* The various FPU number of registers. */ +enum float_reg_number +{ + NDS32_CONFIG_FPU_0, + NDS32_CONFIG_FPU_1, + NDS32_CONFIG_FPU_2, + NDS32_CONFIG_FPU_3, + NDS32_CONFIG_FPU_4, + NDS32_CONFIG_FPU_5, + NDS32_CONFIG_FPU_6, + NDS32_CONFIG_FPU_7 +}; + #endif diff --git a/gcc/config/nds32/nds32-predicates.c b/gcc/config/nds32/nds32-predicates.c index c313efcb831..5e01430c8e3 100644 --- a/gcc/config/nds32/nds32-predicates.c +++ b/gcc/config/nds32/nds32-predicates.c @@ -448,4 +448,71 @@ nds32_symbol_load_store_p (rtx_insn *insn) return false; } + +/* Vaild memory operand for floating-point loads and stores */ +bool +nds32_float_mem_operand_p (rtx op) +{ + machine_mode mode = GET_MODE (op); + rtx addr = XEXP (op, 0); + + /* Not support [symbol] [const] memory */ + if (GET_CODE (addr) == SYMBOL_REF + || GET_CODE (addr) == CONST + || GET_CODE (addr) == LO_SUM) + return false; + + if (GET_CODE (addr) == PLUS) + { + if (GET_CODE (XEXP (addr, 0)) == SYMBOL_REF) + return false; + + /* Restrict const range: (imm12s << 2) */ + if (GET_CODE (XEXP (addr, 1)) == CONST_INT) + { + if ((mode == SImode || mode == SFmode) + && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (XEXP (addr, 1))) + && !satisfies_constraint_Is14 ( XEXP(addr, 1))) + return false; + + if ((mode == DImode || mode == DFmode) + && NDS32_DOUBLE_WORD_ALIGN_P (INTVAL (XEXP (addr, 1))) + && !satisfies_constraint_Is14 (XEXP (addr, 1))) + return false; + } + } + + return true; +} + +int +nds32_cond_move_p (rtx cmp_rtx) +{ + machine_mode cmp0_mode = GET_MODE (XEXP (cmp_rtx, 0)); + machine_mode cmp1_mode = GET_MODE (XEXP (cmp_rtx, 1)); + enum rtx_code cond = GET_CODE (cmp_rtx); + + if ((cmp0_mode == DFmode || cmp0_mode == SFmode) + && (cmp1_mode == DFmode || cmp1_mode == SFmode) + && (cond == ORDERED || cond == UNORDERED)) + return true; + return false; +} + +bool +nds32_const_double_range_ok_p (rtx op, machine_mode mode, + HOST_WIDE_INT lower, HOST_WIDE_INT upper) +{ + if (GET_CODE (op) != CONST_DOUBLE + || GET_MODE (op) != mode) + return false; + + const REAL_VALUE_TYPE *rv; + long val; + + rv = CONST_DOUBLE_REAL_VALUE (op); + REAL_VALUE_TO_TARGET_SINGLE (*rv, val); + + return val >= lower && val < upper; +} /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index 42f8dd9d762..aebec3b0b34 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -58,6 +58,13 @@ extern void nds32_expand_prologue (void); extern void nds32_expand_epilogue (bool); extern void nds32_expand_prologue_v3push (void); extern void nds32_expand_epilogue_v3pop (bool); +extern void nds32_emit_push_fpr_callee_saved (int); +extern void nds32_emit_pop_fpr_callee_saved (int); +extern void nds32_emit_v3pop_fpr_callee_saved (int); + +/* Controlling Debugging Information Format. */ + +extern unsigned int nds32_dbx_register_number (unsigned int); /* ------------------------------------------------------------------------ */ @@ -101,6 +108,9 @@ extern int nds32_can_use_btgl_p (int); extern int nds32_can_use_bitci_p (int); +extern bool nds32_const_double_range_ok_p (rtx, machine_mode, + HOST_WIDE_INT, HOST_WIDE_INT); + /* Auxiliary function for 'Computing the Length of an Insn'. */ extern int nds32_adjust_insn_length (rtx_insn *, int); @@ -120,19 +130,30 @@ extern const char *nds32_output_casesi (rtx *); extern enum nds32_expand_result_type nds32_expand_cbranch (rtx *); extern enum nds32_expand_result_type nds32_expand_cstore (rtx *); +extern void nds32_expand_float_cbranch (rtx *); +extern void nds32_expand_float_cstore (rtx *); /* Auxiliary functions for conditional move generation. */ extern enum nds32_expand_result_type nds32_expand_movcc (rtx *); +extern void nds32_expand_float_movcc (rtx *); /* Auxiliary functions to identify long-call symbol. */ extern bool nds32_long_call_p (rtx); +/* Auxiliary functions to identify conditional move comparison operand. */ + +extern int nds32_cond_move_p (rtx); + /* Auxiliary functions to identify 16 bit addresing mode. */ extern enum nds32_16bit_address_type nds32_mem_format (rtx); +/* Auxiliary functions to identify floating-point addresing mode. */ + +extern bool nds32_float_mem_operand_p (rtx); + /* Auxiliary functions to output assembly code. */ extern const char *nds32_output_16bit_store (rtx *, int); @@ -140,8 +161,11 @@ extern const char *nds32_output_16bit_load (rtx *, int); extern const char *nds32_output_32bit_store (rtx *, int); extern const char *nds32_output_32bit_load (rtx *, int); extern const char *nds32_output_32bit_load_s (rtx *, int); +extern const char *nds32_output_float_load(rtx *); +extern const char *nds32_output_float_store(rtx *); extern const char *nds32_output_smw_single_word (rtx *); extern const char *nds32_output_lmw_single_word (rtx *); +extern const char *nds32_output_double (rtx *, bool); extern const char *nds32_output_cbranchsi4_equality_zero (rtx_insn *, rtx *); extern const char *nds32_output_cbranchsi4_equality_reg (rtx_insn *, rtx *); extern const char *nds32_output_cbranchsi4_equality_reg_or_const_int (rtx_insn *, @@ -154,6 +178,10 @@ extern const char *nds32_output_cbranchsi4_greater_less_zero (rtx_insn *, rtx *) extern const char *nds32_output_stack_push (rtx); extern const char *nds32_output_stack_pop (rtx); +/* Auxiliary functions to split double word RTX pattern. */ + +extern void nds32_spilt_doubleword (rtx *, bool); + /* Auxiliary functions to split large constant RTX pattern. */ extern void nds32_expand_constant (machine_mode, @@ -190,6 +218,8 @@ extern int nds32_address_cost_impl (rtx, machine_mode, addr_space_t, bool); /* Auxiliary functions for pre-define marco. */ extern void nds32_cpu_cpp_builtins(struct cpp_reader *); +extern bool nds32_split_double_word_load_store_p (rtx *,bool); + /* Functions for create nds32 specific optimization pass. */ extern rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *); diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c index eedf6f56df1..1070b474d8f 100644 --- a/gcc/config/nds32/nds32.c +++ b/gcc/config/nds32/nds32.c @@ -218,6 +218,10 @@ nds32_compute_stack_frame (void) cfun->machine->callee_saved_gpr_regs_size = 0; cfun->machine->callee_saved_first_gpr_regno = SP_REGNUM; cfun->machine->callee_saved_last_gpr_regno = SP_REGNUM; + cfun->machine->callee_saved_fpr_regs_size = 0; + cfun->machine->callee_saved_first_fpr_regno = SP_REGNUM; + cfun->machine->callee_saved_last_fpr_regno = SP_REGNUM; + /* Currently, there is no need to check $r28~$r31 because we will save them in another way. */ for (r = 0; r < 28; r++) @@ -235,6 +239,35 @@ nds32_compute_stack_frame (void) } } + /* Recording fpu callee-saved register. */ + if (TARGET_HARD_FLOAT) + { + for (r = NDS32_FIRST_FPR_REGNUM; r < NDS32_LAST_FPR_REGNUM; r++) + { + if (NDS32_REQUIRED_CALLEE_SAVED_P (r)) + { + /* Mark the first required callee-saved register. */ + if (cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM) + { + /* Make first callee-saved number is even, + bacause we use doubleword access, and this way + promise 8-byte alignemt. */ + if (!NDS32_FPR_REGNO_OK_FOR_DOUBLE (r)) + cfun->machine->callee_saved_first_fpr_regno = r - 1; + else + cfun->machine->callee_saved_first_fpr_regno = r; + } + cfun->machine->callee_saved_last_fpr_regno = r; + } + } + + /* Make last callee-saved register number is odd, + we hope callee-saved register is even. */ + int last_fpr = cfun->machine->callee_saved_last_fpr_regno; + if (NDS32_FPR_REGNO_OK_FOR_DOUBLE (last_fpr)) + cfun->machine->callee_saved_last_fpr_regno++; + } + /* Check if this function can omit prologue/epilogue code fragment. If there is 'naked' attribute in this function, we can set 'naked_p' flag to indicate that @@ -252,6 +285,8 @@ nds32_compute_stack_frame (void) if (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) || (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM && cfun->machine->callee_saved_last_gpr_regno == SP_REGNUM + && cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM + && cfun->machine->callee_saved_last_fpr_regno == SP_REGNUM && !df_regs_ever_live_p (FP_REGNUM) && !df_regs_ever_live_p (LP_REGNUM) && cfun->machine->local_size == 0)) @@ -340,7 +375,8 @@ nds32_compute_stack_frame (void) int sp_adjust = cfun->machine->local_size + cfun->machine->out_args_size - + cfun->machine->callee_saved_area_gpr_padding_bytes; + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; if (!v3pushpop_p && sp_adjust == 0 @@ -385,6 +421,18 @@ nds32_compute_stack_frame (void) + 1); } + if (TARGET_HARD_FLOAT) + { + /* Compute size of callee svaed floating-point registers. */ + if (cfun->machine->callee_saved_last_fpr_regno != SP_REGNUM) + { + cfun->machine->callee_saved_fpr_regs_size + = 4 * (cfun->machine->callee_saved_last_fpr_regno + - cfun->machine->callee_saved_first_fpr_regno + + 1); + } + } + /* Important: We need to make sure that (fp_size + gp_size + lp_size + callee_saved_gpr_regs_size) is 8-byte alignment. @@ -1130,45 +1178,61 @@ nds32_legitimate_index_p (machine_mode outer_mode, case CONST_INT: /* The alignment of the integer value is determined by 'outer_mode'. */ - if (GET_MODE_SIZE (outer_mode) == 1) + switch (GET_MODE_SIZE (outer_mode)) { + case 1: /* Further check if the value is legal for the 'outer_mode'. */ - if (!satisfies_constraint_Is15 (index)) - return false; + if (satisfies_constraint_Is15 (index)) + return true; + break; - /* Pass all test, the value is valid, return true. */ - return true; - } - if (GET_MODE_SIZE (outer_mode) == 2 - && NDS32_HALF_WORD_ALIGN_P (INTVAL (index))) - { + case 2: /* Further check if the value is legal for the 'outer_mode'. */ - if (!satisfies_constraint_Is16 (index)) - return false; + if (satisfies_constraint_Is16 (index)) + { + /* Make sure address is half word alignment. */ + if (NDS32_HALF_WORD_ALIGN_P (INTVAL (index))) + return true; + } + break; - /* Pass all test, the value is valid, return true. */ - return true; - } - if (GET_MODE_SIZE (outer_mode) == 4 - && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index))) - { + case 4: /* Further check if the value is legal for the 'outer_mode'. */ - if (!satisfies_constraint_Is17 (index)) - return false; + if (satisfies_constraint_Is17 (index)) + { + if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)) + { + if (!satisfies_constraint_Is14 (index)) + return false; + } + + /* Make sure address is word alignment. */ + if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index))) + return true; + } + break; - /* Pass all test, the value is valid, return true. */ - return true; - } - if (GET_MODE_SIZE (outer_mode) == 8 - && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index))) - { - /* Further check if the value is legal for the 'outer_mode'. */ - if (!satisfies_constraint_Is17 (gen_int_mode (INTVAL (index) + 4, - SImode))) - return false; + case 8: + if (satisfies_constraint_Is17 (gen_int_mode (INTVAL (index) + 4, + SImode))) + { + if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)) + { + if (!satisfies_constraint_Is14 (index)) + return false; + } + + /* Make sure address is word alignment. + Currently we do not have 64-bit load/store yet, + so we will use two 32-bit load/store instructions to do + memory access and they are single word alignment. */ + if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index))) + return true; + } + break; - /* Pass all test, the value is valid, return true. */ - return true; + default: + return false; } return false; @@ -1262,6 +1326,39 @@ nds32_register_passes (void) /* ------------------------------------------------------------------------ */ /* PART 3: Implement target hook stuff definitions. */ + + +/* Register Usage. */ + +static void +nds32_conditional_register_usage (void) +{ + int regno; + + if (TARGET_HARD_FLOAT) + { + for (regno = NDS32_FIRST_FPR_REGNUM; + regno <= NDS32_LAST_FPR_REGNUM; regno++) + { + fixed_regs[regno] = 0; + if (regno < NDS32_FIRST_FPR_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS) + call_used_regs[regno] = 1; + else if (regno >= NDS32_FIRST_FPR_REGNUM + 22 + && regno < NDS32_FIRST_FPR_REGNUM + 48) + call_used_regs[regno] = 1; + else + call_used_regs[regno] = 0; + } + } + else if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + { + for (regno = NDS32_FIRST_FPR_REGNUM; + regno <= NDS32_LAST_FPR_REGNUM; + regno++) + fixed_regs[regno] = 0; + } +} + /* Register Classes. */ @@ -1298,6 +1395,22 @@ nds32_register_priority (int hard_regno) } } +static bool +nds32_can_change_mode_class (machine_mode from, + machine_mode to, + reg_class_t rclass) +{ + /* Don't spill double-precision register to two singal-precision + registers */ + if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + && GET_MODE_SIZE (from) != GET_MODE_SIZE (to)) + { + return !reg_classes_intersect_p (rclass, FP_REGS); + } + + return true; +} + /* Stack Layout and Calling Conventions. */ @@ -1422,8 +1535,28 @@ nds32_function_arg (cumulative_args_t ca, machine_mode mode, are different. */ if (TARGET_HARD_FLOAT) { - /* Currently we have not implemented hard float yet. */ - gcc_unreachable (); + /* For TARGET_HARD_FLOAT calling convention, we use GPR and FPR + to pass argument. We have to further check TYPE and MODE so + that we can determine which kind of register we shall use. */ + + /* Note that we need to pass argument entirely in registers under + hard float abi. */ + if (GET_MODE_CLASS (mode) == MODE_FLOAT + && NDS32_ARG_ENTIRE_IN_FPR_REG_P (cum->fpr_offset, mode, type)) + { + /* Pick up the next available FPR register number. */ + regno + = NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (cum->fpr_offset, mode, type); + return gen_rtx_REG (mode, regno); + } + else if (GET_MODE_CLASS (mode) != MODE_FLOAT + && NDS32_ARG_ENTIRE_IN_GPR_REG_P (cum->gpr_offset, mode, type)) + { + /* Pick up the next available GPR register number. */ + regno + = NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type); + return gen_rtx_REG (mode, regno); + } } else { @@ -1506,23 +1639,20 @@ static void nds32_function_arg_advance (cumulative_args_t ca, machine_mode mode, const_tree type, bool named) { - machine_mode sub_mode; CUMULATIVE_ARGS *cum = get_cumulative_args (ca); if (named) { /* We need to further check TYPE and MODE so that we can determine - which kind of register we shall advance. */ - if (type && TREE_CODE (type) == COMPLEX_TYPE) - sub_mode = TYPE_MODE (TREE_TYPE (type)); - else - sub_mode = mode; + which kind of register we shall advance. */ /* Under hard float abi, we may advance FPR registers. */ - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (sub_mode) == MODE_FLOAT) + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) { - /* Currently we have not implemented hard float yet. */ - gcc_unreachable (); + cum->fpr_offset + = NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (cum->fpr_offset, mode, type) + - NDS32_FPR_ARG_FIRST_REGNUM + + NDS32_NEED_N_REGS_FOR_ARG (mode, type); } else { @@ -1569,22 +1699,62 @@ nds32_function_value (const_tree ret_type, mode = TYPE_MODE (ret_type); unsignedp = TYPE_UNSIGNED (ret_type); - mode = promote_mode (ret_type, mode, &unsignedp); + if (INTEGRAL_TYPE_P (ret_type)) + mode = promote_mode (ret_type, mode, &unsignedp); - return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM); + if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + return gen_rtx_REG (mode, NDS32_FPR_RET_FIRST_REGNUM); + else + return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM); } static rtx nds32_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) { + if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + return gen_rtx_REG (mode, NDS32_FPR_RET_FIRST_REGNUM); + return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM); } static bool nds32_function_value_regno_p (const unsigned int regno) { - return (regno == NDS32_GPR_RET_FIRST_REGNUM); + if (regno == NDS32_GPR_RET_FIRST_REGNUM + || (TARGET_HARD_FLOAT + && regno == NDS32_FPR_RET_FIRST_REGNUM)) + return true; + + return false; +} + +/* -- How Large Values Are Returned. */ + +static bool +nds32_return_in_memory (const_tree type, + const_tree fntype ATTRIBUTE_UNUSED) +{ + /* Note that int_size_in_bytes can return -1 if the size can vary + or is larger than an integer. */ + HOST_WIDE_INT size = int_size_in_bytes (type); + + /* For COMPLEX_TYPE, if the total size cannot be hold within two registers, + the return value is supposed to be in memory. We need to be aware of + that the size may be -1. */ + if (TREE_CODE (type) == COMPLEX_TYPE) + if (size < 0 || size > 2 * UNITS_PER_WORD) + return true; + + /* If it is BLKmode and the total size cannot be hold within two registers, + the return value is supposed to be in memory. We need to be aware of + that the size may be -1. */ + if (TYPE_MODE (type) == BLKmode) + if (size < 0 || size > 2 * UNITS_PER_WORD) + return true; + + /* For other cases, having result in memory is unnecessary. */ + return false; } /* -- Function Entry and Exit. */ @@ -1614,7 +1784,7 @@ nds32_asm_function_prologue (FILE *file) /* Use df_regs_ever_live_p() to detect if the register is ever used in the current function. */ fprintf (file, "\t! registers ever_live: "); - for (r = 0; r < 32; r++) + for (r = 0; r < 65; r++) { if (df_regs_ever_live_p (r)) fprintf (file, "%s, ", reg_names[r]); @@ -2013,6 +2183,43 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) static bool nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) { + if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + { + /* When using floating-point instructions, + we don't allow 'addr' to be [symbol_ref], [CONST] pattern. */ + if ((mode == DFmode || mode == SFmode) + && (GET_CODE (x) == SYMBOL_REF + || GET_CODE(x) == CONST)) + return false; + + /* Allow [post_modify] addressing mode, when using FPU instructions. */ + if (GET_CODE (x) == POST_MODIFY + && mode == DFmode) + { + if (GET_CODE (XEXP (x, 0)) == REG + && GET_CODE (XEXP (x, 1)) == PLUS) + { + rtx plus_op = XEXP (x, 1); + rtx op0 = XEXP (plus_op, 0); + rtx op1 = XEXP (plus_op, 1); + + if (nds32_address_register_rtx_p (op0, strict) + && CONST_INT_P (op1)) + { + if (satisfies_constraint_Is14 (op1)) + { + /* Make sure address is word alignment. + Currently we do not have 64-bit load/store yet, + so we will use two 32-bit load/store instructions to do + memory access and they are single word alignment. */ + if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (op1))) + return true; + } + } + } + } + } + /* For (mem:DI addr) or (mem:DF addr) case, we only allow 'addr' to be [reg], [symbol_ref], [const], or [reg + const_int] pattern. */ @@ -2031,6 +2238,13 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) return true; } + /* Allow [post_inc] and [post_dec] addressing mode. */ + if (GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC) + { + if (nds32_address_register_rtx_p (XEXP (x, 0), strict)) + return true; + } + /* Now check [reg], [symbol_ref], and [const]. */ if (GET_CODE (x) != REG && GET_CODE (x) != SYMBOL_REF @@ -2216,10 +2430,13 @@ nds32_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, reg_class_t from, reg_class_t to) { - if (from == HIGH_REGS || to == HIGH_REGS) - return 6; - - return 2; + if ((from == FP_REGS && to != FP_REGS) + || (from != FP_REGS && to == FP_REGS)) + return 9; + else if (from == HIGH_REGS || to == HIGH_REGS) + return optimize_size ? 6 : 2; + else + return 2; } static int @@ -2305,7 +2522,10 @@ nds32_asm_file_start (void) /* Tell assembler which ABI we are using. */ fprintf (asm_out_file, "\t! ABI version\n"); - fprintf (asm_out_file, "\t.abi_2\n"); + if (TARGET_HARD_FLOAT) + fprintf (asm_out_file, "\t.abi_2fp_plus\n"); + else + fprintf (asm_out_file, "\t.abi_2\n"); /* Tell assembler that this asm code is generated by compiler. */ fprintf (asm_out_file, "\t! This asm file is generated by compiler\n"); @@ -2334,6 +2554,15 @@ nds32_asm_file_start (void) fprintf (asm_out_file, "\t! Endian setting\t: %s\n", ((TARGET_BIG_ENDIAN) ? "big-endian" : "little-endian")); + fprintf (asm_out_file, "\t! Use SP floating-point instruction\t: %s\n", + ((TARGET_FPU_SINGLE) ? "Yes" + : "No")); + fprintf (asm_out_file, "\t! Use DP floating-point instruction\t: %s\n", + ((TARGET_FPU_DOUBLE) ? "Yes" + : "No")); + fprintf (asm_out_file, "\t! ABI version\t\t: %s\n", + ((TARGET_HARD_FLOAT) ? "ABI2FP+" + : "ABI2")); fprintf (asm_out_file, "\t! ------------------------------------\n"); @@ -2404,6 +2633,10 @@ nds32_print_operand (FILE *stream, rtx x, int code) { HOST_WIDE_INT one_position; HOST_WIDE_INT zero_position; + bool pick_lsb_p = false; + bool pick_msb_p = false; + int regno; + int op_value; switch (code) @@ -2440,6 +2673,20 @@ nds32_print_operand (FILE *stream, rtx x, int code) /* No need to handle following process, so return immediately. */ return; + case 'L': + /* X is supposed to be REG rtx. */ + gcc_assert (REG_P (x)); + /* Claim that we are going to pick LSB part of X. */ + pick_lsb_p = true; + break; + + case 'H': + /* X is supposed to be REG rtx. */ + gcc_assert (REG_P (x)); + /* Claim that we are going to pick MSB part of X. */ + pick_msb_p = true; + break; + case 'V': /* 'x' is supposed to be CONST_INT, get the value. */ gcc_assert (CONST_INT_P (x)); @@ -2489,6 +2736,38 @@ nds32_print_operand (FILE *stream, rtx x, int code) break; case REG: + /* Print a Double-precision register name. */ + if ((GET_MODE (x) == DImode || GET_MODE (x) == DFmode) + && NDS32_IS_FPR_REGNUM (REGNO (x))) + { + regno = REGNO (x); + if (!NDS32_FPR_REGNO_OK_FOR_DOUBLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + break; + } + fprintf (stream, "$fd%d", (regno - NDS32_FIRST_FPR_REGNUM) >> 1); + break; + } + + /* Print LSB or MSB part of register pair if the + constraint modifier 'L' or 'H' is specified. */ + if ((GET_MODE (x) == DImode || GET_MODE (x) == DFmode) + && NDS32_IS_GPR_REGNUM (REGNO (x))) + { + if ((pick_lsb_p && WORDS_BIG_ENDIAN) + || (pick_msb_p && !WORDS_BIG_ENDIAN)) + { + /* If we would like to print out LSB register under big-endian, + or print out MSB register under little-endian, we need to + increase register number. */ + regno = REGNO (x); + regno++; + fputs (reg_names[regno], stream); + break; + } + } + /* Forbid using static chain register ($r16) on reduced-set registers configuration. */ if (TARGET_REDUCED_REGS @@ -2503,6 +2782,33 @@ nds32_print_operand (FILE *stream, rtx x, int code) output_address (GET_MODE (x), XEXP (x, 0)); break; + case HIGH: + if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE) + { + const REAL_VALUE_TYPE *rv; + long val; + gcc_assert (GET_MODE (x) == SFmode); + + rv = CONST_DOUBLE_REAL_VALUE (XEXP (x, 0)); + REAL_VALUE_TO_TARGET_SINGLE (*rv, val); + + fprintf (stream, "hi20(0x%lx)", val); + } + else + gcc_unreachable (); + break; + + case CONST_DOUBLE: + const REAL_VALUE_TYPE *rv; + long val; + gcc_assert (GET_MODE (x) == SFmode); + + rv = CONST_DOUBLE_REAL_VALUE (x); + REAL_VALUE_TO_TARGET_SINGLE (*rv, val); + + fprintf (stream, "0x%lx", val); + break; + case CODE_LABEL: case CONST_INT: case CONST: @@ -2687,6 +2993,84 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) } } +/* -- Assembler Commands for Exception Regions. */ + +static rtx +nds32_dwarf_register_span (rtx reg) +{ + rtx dwarf_high, dwarf_low; + rtx dwarf_single; + machine_mode mode; + int regno; + + mode = GET_MODE (reg); + regno = REGNO (reg); + + /* We need to adjust dwarf register information for floating-point registers + rather than using default register number mapping. */ + if (regno >= NDS32_FIRST_FPR_REGNUM + && regno <= NDS32_LAST_FPR_REGNUM) + { + if (mode == DFmode || mode == SCmode) + { + /* By default, GCC maps increasing register numbers to increasing + memory locations, but paired FPRs in NDS32 target are always + big-endian, i.e.: + + fd0 : fs0 fs1 + (MSB) (LSB) + + We must return parallel rtx to represent such layout. */ + dwarf_high = gen_rtx_REG (word_mode, regno); + dwarf_low = gen_rtx_REG (word_mode, regno + 1); + return gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, dwarf_low, dwarf_high)); + } + else if (mode == DCmode) + { + rtx dwarf_high_re = gen_rtx_REG (word_mode, regno); + rtx dwarf_low_re = gen_rtx_REG (word_mode, regno + 1); + rtx dwarf_high_im = gen_rtx_REG (word_mode, regno); + rtx dwarf_low_im = gen_rtx_REG (word_mode, regno + 1); + return gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, dwarf_low_re, dwarf_high_re, + dwarf_high_im, dwarf_low_im)); + } + else if (mode == SFmode || mode == SImode) + { + /* Create new dwarf information with adjusted register number. */ + dwarf_single = gen_rtx_REG (word_mode, regno); + return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, dwarf_single)); + } + else + { + /* We should not be here. */ + gcc_unreachable (); + } + } + + return NULL_RTX; +} + +/* Map internal gcc register numbers to DWARF2 register numbers. */ + +unsigned int +nds32_dbx_register_number (unsigned int regno) +{ + /* The nds32 port in GDB maintains a mapping between dwarf register + number and displayed register name. For backward compatibility to + previous toolchain, currently our gdb still has four registers + (d0.l, d0.h, d1.l, and d1.h) between GPR and FPR while compiler + does not count those four registers in its register number table. + So we have to add 4 on its register number and then create new + dwarf information. Hopefully we can discard such workaround + in the future. */ + if (NDS32_IS_FPR_REGNUM (regno)) + return regno + 4; + + return regno; +} + /* Defining target-specific uses of __attribute__. */ @@ -2894,6 +3278,16 @@ nds32_option_override (void) target_flags &= ~MASK_V3PUSH; } + if (TARGET_HARD_FLOAT && !(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)) + { + if (nds32_arch_option == ARCH_V3S || nds32_arch_option == ARCH_V3F) + error ("Disable FPU ISA, " + "the ABI option must be enable '-mfloat-abi=soft'"); + else + error ("'-mabi=2fp+' option only support when FPU available, " + "must be enable '-mext-fpu-sp' or '-mext-fpu-dp'"); + } + /* Currently, we don't support PIC code generation yet. */ if (flag_pic) sorry ("position-independent code not supported"); @@ -2954,6 +3348,11 @@ nds32_cpu_cpp_builtins(struct cpp_reader *pfile) builtin_define ("__nds32__"); builtin_define ("__NDS32__"); + if (TARGET_HARD_FLOAT) + builtin_define ("__NDS32_ABI_2FP_PLUS__"); + else + builtin_define ("__NDS32_ABI_2__"); + if (TARGET_ISA_V2) builtin_define ("__NDS32_ISA_V2__"); if (TARGET_ISA_V3) @@ -2961,6 +3360,40 @@ nds32_cpu_cpp_builtins(struct cpp_reader *pfile) if (TARGET_ISA_V3M) builtin_define ("__NDS32_ISA_V3M__"); + if (TARGET_FPU_SINGLE) + builtin_define ("__NDS32_EXT_FPU_SP__"); + if (TARGET_FPU_DOUBLE) + builtin_define ("__NDS32_EXT_FPU_DP__"); + + if (TARGET_EXT_FPU_FMA) + builtin_define ("__NDS32_EXT_FPU_FMA__"); + if (NDS32_EXT_FPU_DOT_E) + builtin_define ("__NDS32_EXT_FPU_DOT_E__"); + if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + { + switch (nds32_fp_regnum) + { + case 0: + case 4: + builtin_define ("__NDS32_EXT_FPU_CONFIG_0__"); + break; + case 1: + case 5: + builtin_define ("__NDS32_EXT_FPU_CONFIG_1__"); + break; + case 2: + case 6: + builtin_define ("__NDS32_EXT_FPU_CONFIG_2__"); + break; + case 3: + case 7: + builtin_define ("__NDS32_EXT_FPU_CONFIG_3__"); + break; + default: + abort (); + } + } + if (TARGET_BIG_ENDIAN) builtin_define ("__NDS32_EB__"); else @@ -2988,6 +3421,12 @@ nds32_cpu_cpp_builtins(struct cpp_reader *pfile) builtin_assert ("cpu=nds32"); builtin_assert ("machine=nds32"); + + if (TARGET_HARD_FLOAT) + builtin_define ("__NDS32_ABI_2FP_PLUS"); + else + builtin_define ("__NDS32_ABI_2"); + #undef builtin_define #undef builtin_assert } @@ -3026,16 +3465,38 @@ nds32_adjust_reg_alloc_order (void) /* -- How Values Fit in Registers. */ +static unsigned +nds32_hard_regno_nregs (unsigned regno ATTRIBUTE_UNUSED, + machine_mode mode) +{ + return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD); +} + /* Implement TARGET_HARD_REGNO_MODE_OK. */ static bool nds32_hard_regno_mode_ok (unsigned int regno, machine_mode mode) { - /* Restrict double-word quantities to even register pairs. */ - if (targetm.hard_regno_nregs (regno, mode) == 1 - || !((regno) & 1)) + if (regno > FIRST_PSEUDO_REGISTER) return true; + if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) && NDS32_IS_FPR_REGNUM (regno)) + { + if (NDS32_IS_EXT_FPR_REGNUM(regno)) + return (NDS32_FPR_REGNO_OK_FOR_DOUBLE(regno) && (mode == DFmode)); + else if (mode == SFmode || mode == SImode) + return NDS32_FPR_REGNO_OK_FOR_SINGLE (regno); + else if (mode == DFmode) + return NDS32_FPR_REGNO_OK_FOR_DOUBLE (regno); + + return false; + } + + /* Restrict double-word quantities to even register pairs. */ + if (regno <= NDS32_LAST_GPR_REGNUM) + return (targetm.hard_regno_nregs (regno, mode) == 1 + || !((regno) & 1)); + return false; } @@ -3048,10 +3509,22 @@ nds32_hard_regno_mode_ok (unsigned int regno, machine_mode mode) static bool nds32_modes_tieable_p (machine_mode mode1, machine_mode mode2) { - return (GET_MODE_CLASS (mode1) == MODE_INT - && GET_MODE_CLASS (mode2) == MODE_INT - && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD - && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD); + if ((GET_MODE_CLASS (mode1) == MODE_INT + && GET_MODE_CLASS (mode2) == MODE_INT) + && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD + && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD) + return true; + + if (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)) + { + if ((TARGET_FPU_SINGLE && !TARGET_FPU_DOUBLE) + && (mode1 == DFmode || mode2 == DFmode)) + return false; + else + return true; + } + + return false; } #undef TARGET_MODES_TIEABLE_P @@ -3077,7 +3550,14 @@ nds32_regno_reg_class (int regno) else if (regno >= 20 && regno <= 31) return HIGH_REGS; else if (regno == 32 || regno == 33) - return FRAME_REGS; + { + /* $SFP and $AP is FRAME_REGS in fact, However prevent IRA don't + know how to allocate register for $SFP and $AP, just tell IRA they + are GENERAL_REGS, and ARM do this hack too. */ + return GENERAL_REGS; + } + else if (regno >= 34 && regno <= 97) + return FP_REGS; else return NO_REGS; } @@ -3123,6 +3603,7 @@ nds32_initial_elimination_offset (unsigned int from_reg, unsigned int to_reg) + cfun->machine->lp_size + cfun->machine->callee_saved_gpr_regs_size + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size + cfun->machine->local_size + cfun->machine->out_args_size); } @@ -3143,7 +3624,8 @@ nds32_initial_elimination_offset (unsigned int from_reg, unsigned int to_reg) + cfun->machine->gp_size + cfun->machine->lp_size + cfun->machine->callee_saved_gpr_regs_size - + cfun->machine->callee_saved_area_gpr_padding_bytes); + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size); } else { @@ -3162,10 +3644,11 @@ nds32_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fndecl ATTRIBUTE_UNUSED, int n_named_args ATTRIBUTE_UNUSED) { - /* Initial available registers - (in offset, corresponding to NDS32_GPR_ARG_FIRST_REGNUM) + /* Initial available registers. The values are offset against + NDS32_GPR_ARG_FIRST_REGNUM and NDS32_FPR_ARG_FIRST_REGNUM for passing arguments. */ cum->gpr_offset = 0; + cum->fpr_offset = 0; } /* -- Function Entry and Exit. */ @@ -3248,17 +3731,79 @@ nds32_expand_prologue (void) fp_adjust); } - /* Adjust $sp = $sp - local_size - out_args_size - - callee_saved_area_gpr_padding_bytes. */ - sp_adjust = cfun->machine->local_size - + cfun->machine->out_args_size - + cfun->machine->callee_saved_area_gpr_padding_bytes; - /* sp_adjust value may be out of range of the addi instruction, - create alternative add behavior with TA_REGNUM if necessary, - using NEGATIVE value to tell that we are decreasing address. */ - nds32_emit_adjust_frame (stack_pointer_rtx, - stack_pointer_rtx, - -1 * sp_adjust); + /* Save fpu registers. */ + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + /* When $sp moved to bottom of stack, we need to check whether + the range of offset in the FPU instruction. */ + int fpr_offset = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_fpr_regs_size; + + /* Check FPU instruction offset imm14s. */ + if (!satisfies_constraint_Is14 (GEN_INT (fpr_offset))) + { + int fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + + /* Save fpu registers, need to allocate stack space + for fpu callee registers. And now $sp position + on callee saved fpr registers. */ + nds32_emit_adjust_frame (stack_pointer_rtx, + stack_pointer_rtx, + -1 * fpr_space); + + /* Emit fpu store instruction, using [$sp + offset] store + fpu registers. */ + nds32_emit_push_fpr_callee_saved (0); + + /* Adjust $sp = $sp - local_size - out_args_size. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size; + + /* Allocate stack space for local size and out args size. */ + nds32_emit_adjust_frame (stack_pointer_rtx, + stack_pointer_rtx, + -1 * sp_adjust); + } + else + { + /* Offset range in Is14, so $sp moved to bottom of stack. */ + + /* Adjust $sp = $sp - local_size - out_args_size + - callee_saved_area_gpr_padding_bytes + - callee_saved_fpr_regs_size. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + + nds32_emit_adjust_frame (stack_pointer_rtx, + stack_pointer_rtx, + -1 * sp_adjust); + + /* Emit fpu store instruction, using [$sp + offset] store + fpu registers. */ + int fpr_position = cfun->machine->out_args_size + + cfun->machine->local_size; + nds32_emit_push_fpr_callee_saved (fpr_position); + } + } + else + { + /* Adjust $sp = $sp - local_size - out_args_size + - callee_saved_area_gpr_padding_bytes. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_gpr_padding_bytes; + + /* sp_adjust value may be out of range of the addi instruction, + create alternative add behavior with TA_REGNUM if necessary, + using NEGATIVE value to tell that we are decreasing address. */ + nds32_emit_adjust_frame (stack_pointer_rtx, + stack_pointer_rtx, + -1 * sp_adjust); + } /* Prevent the instruction scheduler from moving instructions across the boundary. */ @@ -3310,39 +3855,93 @@ nds32_expand_epilogue (bool sibcall_p) if (frame_pointer_needed) { - /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size) - - (4 * callee-saved-registers) - Note: No need to adjust - cfun->machine->callee_saved_area_gpr_padding_bytes, - because we want to adjust stack pointer - to the position for pop instruction. */ - sp_adjust = cfun->machine->fp_size - + cfun->machine->gp_size - + cfun->machine->lp_size - + cfun->machine->callee_saved_gpr_regs_size; + /* Restore fpu registers. */ + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + int gpr_padding = cfun->machine->callee_saved_area_gpr_padding_bytes; + + /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size) + - (4 * callee-saved-registers) + - (4 * exception-handling-data-registers) + - (4 * callee-saved-gpr-registers padding byte) + - (4 * callee-saved-fpr-registers) + Note: we want to adjust stack pointer + to the position for callee-saved fpr register, + And restore fpu register use .bi instruction to adjust $sp + from callee-saved fpr register to pop instruction. */ + sp_adjust = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_gpr_regs_size + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; - nds32_emit_adjust_frame (stack_pointer_rtx, - hard_frame_pointer_rtx, - -1 * sp_adjust); + nds32_emit_adjust_frame (stack_pointer_rtx, + hard_frame_pointer_rtx, + -1 * sp_adjust); + + /* Emit fpu load instruction, using .bi instruction + load fpu registers. */ + nds32_emit_pop_fpr_callee_saved (gpr_padding); + } + else + { + /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size) + - (4 * callee-saved-registers) + - (4 * exception-handling-data-registers) + Note: No need to adjust + cfun->machine->callee_saved_area_gpr_padding_bytes, + because we want to adjust stack pointer + to the position for pop instruction. */ + sp_adjust = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_gpr_regs_size; + + nds32_emit_adjust_frame (stack_pointer_rtx, + hard_frame_pointer_rtx, + -1 * sp_adjust); + } } else { - /* If frame pointer is NOT needed, - we cannot calculate the sp adjustment from frame pointer. - Instead, we calculate the adjustment by local_size, - out_args_size, and callee_saved_area_gpr_padding_bytes. - Notice that such sp adjustment value may be out of range, - so we have to deal with it as well. */ + /* Restore fpu registers. */ + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + int gpr_padding = cfun->machine->callee_saved_area_gpr_padding_bytes; - /* Adjust $sp = $sp + local_size + out_args_size - + callee_saved_area_gpr_padding_bytes. */ - sp_adjust = cfun->machine->local_size - + cfun->machine->out_args_size - + cfun->machine->callee_saved_area_gpr_padding_bytes; + /* Adjust $sp = $sp + local_size + out_args_size. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size; - nds32_emit_adjust_frame (stack_pointer_rtx, - stack_pointer_rtx, - sp_adjust); + nds32_emit_adjust_frame (stack_pointer_rtx, + stack_pointer_rtx, + sp_adjust); + + /* Emit fpu load instruction, using .bi instruction + load fpu registers, and adjust $sp from callee-saved fpr register + to callee-saved gpr register. */ + nds32_emit_pop_fpr_callee_saved (gpr_padding); + } + else + { + /* If frame pointer is NOT needed, + we cannot calculate the sp adjustment from frame pointer. + Instead, we calculate the adjustment by local_size, + out_args_size, and callee_saved_area_gpr_padding_bytes. + Notice that such sp adjustment value may be out of range, + so we have to deal with it as well. */ + + /* Adjust $sp = $sp + local_size + out_args_size + + callee_saved_area_gpr_padding_bytes. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_gpr_padding_bytes; + + nds32_emit_adjust_frame (stack_pointer_rtx, + stack_pointer_rtx, + sp_adjust); + } } /* Get callee_first_regno and callee_last_regno. */ @@ -3389,6 +3988,7 @@ nds32_expand_prologue_v3push (void) { int fp_adjust; int sp_adjust; + int fpr_space = 0; unsigned Rb, Re; /* Compute and setup stack frame size. @@ -3411,7 +4011,8 @@ nds32_expand_prologue_v3push (void) where imm8u has to be 8-byte alignment. */ sp_adjust = cfun->machine->local_size + cfun->machine->out_args_size - + cfun->machine->callee_saved_area_gpr_padding_bytes; + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)) @@ -3421,6 +4022,18 @@ nds32_expand_prologue_v3push (void) /* nds32_emit_stack_v3push(last_regno, sp_adjust), the pattern 'stack_v3push' is implemented in nds32.md. */ nds32_emit_stack_v3push (Rb, Re, sp_adjust); + + /* Save fpu registers. */ + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + /* Calculate fpr position. */ + int fpr_position = cfun->machine->local_size + + cfun->machine->out_args_size; + /* Emit fpu store instruction, using [$sp + offset] store + fpu registers. */ + nds32_emit_push_fpr_callee_saved (fpr_position); + } + /* Check frame_pointer_needed to see if we shall emit fp adjustment instruction. */ if (frame_pointer_needed) @@ -3448,12 +4061,26 @@ nds32_expand_prologue_v3push (void) } else { - /* We have to use 'push25 Re,0' and - expand one more instruction to adjust $sp later. */ + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + /* Calculate fpr space. */ + fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + + /* We have to use 'push25 Re, fpr_space', to pre-allocate + callee saved fpr registers space. */ + nds32_emit_stack_v3push (Rb, Re, fpr_space); + nds32_emit_push_fpr_callee_saved (0); + } + else + { + /* We have to use 'push25 Re,0' and + expand one more instruction to adjust $sp later. */ - /* nds32_emit_stack_v3push(last_regno, sp_adjust), - the pattern 'stack_v3push' is implemented in nds32.md. */ - nds32_emit_stack_v3push (Rb, Re, 0); + /* nds32_emit_stack_v3push(last_regno, sp_adjust), + the pattern 'stack_v3push' is implemented in nds32.md. */ + nds32_emit_stack_v3push (Rb, Re, 0); + } /* Check frame_pointer_needed to see if we shall emit fp adjustment instruction. */ @@ -3472,11 +4099,27 @@ nds32_expand_prologue_v3push (void) + cfun->machine->lp_size + cfun->machine->callee_saved_gpr_regs_size; + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + /* We use 'push25 Re, fpr_space', the $sp is + on callee saved fpr position, so need to consider + fpr space. */ + fp_adjust = fp_adjust + fpr_space; + } + nds32_emit_adjust_frame (hard_frame_pointer_rtx, stack_pointer_rtx, fp_adjust); } + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + /* We use 'push25 Re, fpr_space', + the $sp is on callee saved fpr position, + no need to consider fpr space. */ + sp_adjust = sp_adjust - fpr_space; + } + /* Because we use 'push25 Re,0', we need to expand one more instruction to adjust $sp. using NEGATIVE value to tell that we are decreasing address. */ @@ -3524,7 +4167,8 @@ nds32_expand_epilogue_v3pop (bool sibcall_p) where imm8u has to be 8-byte alignment. */ sp_adjust = cfun->machine->local_size + cfun->machine->out_args_size - + cfun->machine->callee_saved_area_gpr_padding_bytes; + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; /* We have to consider alloca issue as well. If the function does call alloca(), the stack pointer is not fixed. @@ -3537,6 +4181,16 @@ nds32_expand_epilogue_v3pop (bool sibcall_p) && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) && !cfun->calls_alloca) { + /* Restore fpu registers. */ + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + int fpr_position = cfun->machine->local_size + + cfun->machine->out_args_size; + /* Emit fpu load instruction, using [$sp + offset] restore + fpu registers. */ + nds32_emit_v3pop_fpr_callee_saved (fpr_position); + } + /* We can use 'pop25 Re,imm8u'. */ /* nds32_emit_stack_v3pop(last_regno, sp_adjust), @@ -3563,9 +4217,29 @@ nds32_expand_epilogue_v3pop (bool sibcall_p) + cfun->machine->lp_size + cfun->machine->callee_saved_gpr_regs_size; - nds32_emit_adjust_frame (stack_pointer_rtx, - hard_frame_pointer_rtx, - -1 * sp_adjust); + /* Restore fpu registers. */ + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + /* Set $sp to callee saved fpr position, we need to restore + fpr registers. */ + sp_adjust = sp_adjust + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + + nds32_emit_adjust_frame (stack_pointer_rtx, + hard_frame_pointer_rtx, + -1 * sp_adjust); + + /* Emit fpu load instruction, using [$sp + offset] restore + fpu registers. */ + nds32_emit_v3pop_fpr_callee_saved (0); + } + else + { + nds32_emit_adjust_frame (stack_pointer_rtx, + hard_frame_pointer_rtx, + -1 * sp_adjust); + } } else { @@ -3577,24 +4251,57 @@ nds32_expand_epilogue_v3pop (bool sibcall_p) so we have to deal with it as well. */ /* Adjust $sp = $sp + local_size + out_args_size - + callee_saved_area_gpr_padding_bytes. */ + + callee_saved_area_gpr_padding_bytes + + callee_saved_fpr_regs_size. */ sp_adjust = cfun->machine->local_size + cfun->machine->out_args_size - + cfun->machine->callee_saved_area_gpr_padding_bytes; - /* sp_adjust value may be out of range of the addi instruction, - create alternative add behavior with TA_REGNUM if necessary, - using POSITIVE value to tell that we are increasing - address. */ - nds32_emit_adjust_frame (stack_pointer_rtx, - stack_pointer_rtx, - sp_adjust); + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + + /* Restore fpu registers. */ + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + /* Set $sp to callee saved fpr position, we need to restore + fpr registers. */ + sp_adjust = sp_adjust + - cfun->machine->callee_saved_area_gpr_padding_bytes + - cfun->machine->callee_saved_fpr_regs_size; + + nds32_emit_adjust_frame (stack_pointer_rtx, + stack_pointer_rtx, + sp_adjust); + + /* Emit fpu load instruction, using [$sp + offset] restore + fpu registers. */ + nds32_emit_v3pop_fpr_callee_saved (0); + } + else + { + /* sp_adjust value may be out of range of the addi instruction, + create alternative add behavior with TA_REGNUM if necessary, + using POSITIVE value to tell that we are increasing + address. */ + nds32_emit_adjust_frame (stack_pointer_rtx, + stack_pointer_rtx, + sp_adjust); + } } - /* nds32_emit_stack_v3pop(last_regno, sp_adjust), - the pattern 'stack_v3pop' is implementad in nds32.md. */ - nds32_emit_stack_v3pop (Rb, Re, 0); + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + /* We have fpr need to restore, so $sp is set on callee saved fpr + position. And we use 'pop25 Re, fpr_space' to adjust $sp. */ + int fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + nds32_emit_stack_v3pop (Rb, Re, fpr_space); + } + else + { + /* nds32_emit_stack_v3pop(last_regno, sp_adjust), + the pattern 'stack_v3pop' is implementad in nds32.md. */ + nds32_emit_stack_v3pop (Rb, Re, 0); + } } - /* Generate return instruction. */ emit_jump_insn (gen_pop25return ()); } @@ -3605,11 +4312,26 @@ nds32_expand_epilogue_v3pop (bool sibcall_p) int nds32_can_use_return_insn (void) { + int sp_adjust; + /* Prior to reloading, we can't tell how many registers must be saved. Thus we can not determine whether this function has null epilogue. */ if (!reload_completed) return 0; + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + if (!cfun->machine->fp_as_gp_p + && satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) + && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) + && !cfun->calls_alloca + && NDS32_V3PUSH_AVAILABLE_P + && !(TARGET_HARD_FLOAT + && (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM))) + return 1; + /* If no stack was created, two conditions must be satisfied: 1. This is a naked function. So there is no callee-saved, local size, or outgoing size. @@ -3673,6 +4395,36 @@ nds32_adjust_insn_length (rtx_insn *insn, int length) return length; } +bool +nds32_split_double_word_load_store_p(rtx *operands, bool load_p) +{ + rtx mem = load_p ? operands[1] : operands[0]; + /* Do split at split2 if -O0 or schedule 2 not enable. */ + if (optimize == 0 || !flag_schedule_insns_after_reload) + return !satisfies_constraint_Da (mem) || MEM_VOLATILE_P (mem); + + /* Split double word load store after copy propgation. */ + if (current_pass == NULL) + return false; + + const char *pass_name = current_pass->name; + if (pass_name && ((strcmp (pass_name, "split4") == 0) + || (strcmp (pass_name, "split5") == 0))) + return !satisfies_constraint_Da (mem) || MEM_VOLATILE_P (mem); + + return false; +} + +static bool +nds32_use_blocks_for_constant_p (machine_mode mode, + const_rtx x ATTRIBUTE_UNUSED) +{ + if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + && (mode == DFmode || mode == SFmode)) + return true; + else + return false; +} /* Return align 2 (log base 2) if the next instruction of LABEL is 4 byte. */ int @@ -3720,10 +4472,16 @@ nds32_target_alignment (rtx_insn *label) /* -- Basic Characteristics of Registers. */ +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE nds32_conditional_register_usage + /* -- Order of Allocation of Registers. */ /* -- How Values Fit in Registers. */ +#undef TARGET_HARD_REGNO_NREGS +#define TARGET_HARD_REGNO_NREGS nds32_hard_regno_nregs + /* -- Handling Leaf Functions. */ /* -- Registers That Form a Stack. */ @@ -3737,6 +4495,9 @@ nds32_target_alignment (rtx_insn *label) #undef TARGET_REGISTER_PRIORITY #define TARGET_REGISTER_PRIORITY nds32_register_priority +#undef TARGET_CAN_CHANGE_MODE_CLASS +#define TARGET_CAN_CHANGE_MODE_CLASS nds32_can_change_mode_class + /* Obsolete Macros for Defining Constraints. */ @@ -3788,6 +4549,9 @@ nds32_target_alignment (rtx_insn *label) /* -- How Large Values Are Returned. */ +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY nds32_return_in_memory + /* -- Caller-Saves Register Allocation. */ /* -- Function Entry and Exit. */ @@ -3931,6 +4695,9 @@ nds32_target_alignment (rtx_insn *label) /* -- Assembler Commands for Exception Regions. */ +#undef TARGET_DWARF_REGISTER_SPAN +#define TARGET_DWARF_REGISTER_SPAN nds32_dwarf_register_span + /* -- Assembler Commands for Alignment. */ @@ -4002,6 +4769,10 @@ nds32_target_alignment (rtx_insn *label) #undef TARGET_EXPAND_BUILTIN #define TARGET_EXPAND_BUILTIN nds32_expand_builtin + +#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P +#define TARGET_USE_BLOCKS_FOR_CONSTANT_P nds32_use_blocks_for_constant_p + /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h index 02b99a17b95..c1d389c7d61 100644 --- a/gcc/config/nds32/nds32.h +++ b/gcc/config/nds32/nds32.h @@ -130,12 +130,15 @@ enum nds32_16bit_address_type /* Define maximum numbers of registers for passing arguments. */ #define NDS32_MAX_GPR_REGS_FOR_ARGS 6 +#define NDS32_MAX_FPR_REGS_FOR_ARGS 6 /* Define the register number for first argument. */ #define NDS32_GPR_ARG_FIRST_REGNUM 0 +#define NDS32_FPR_ARG_FIRST_REGNUM 34 /* Define the register number for return value. */ #define NDS32_GPR_RET_FIRST_REGNUM 0 +#define NDS32_FPR_RET_FIRST_REGNUM 34 /* Define the first integer register number. */ #define NDS32_FIRST_GPR_REGNUM 0 @@ -146,6 +149,44 @@ enum nds32_16bit_address_type #define NDS32_LAST_CALLEE_SAVE_GPR_REGNUM \ (TARGET_REDUCED_REGS ? 10 : 14) +/* Define the floating-point number of registers. */ +#define NDS32_FLOAT_REGISTER_NUMBER \ + (((nds32_fp_regnum == NDS32_CONFIG_FPU_0) \ + || (nds32_fp_regnum == NDS32_CONFIG_FPU_4)) ? 8 \ + : ((nds32_fp_regnum == NDS32_CONFIG_FPU_1) \ + || (nds32_fp_regnum == NDS32_CONFIG_FPU_5)) ? 16 \ + : ((nds32_fp_regnum == NDS32_CONFIG_FPU_2) \ + || (nds32_fp_regnum == NDS32_CONFIG_FPU_6)) ? 32 \ + : ((nds32_fp_regnum == NDS32_CONFIG_FPU_3) \ + || (nds32_fp_regnum == NDS32_CONFIG_FPU_7)) ? 64 \ + : 32) + +#define NDS32_EXT_FPU_DOT_E (nds32_fp_regnum >= 4) + +/* Define the first floating-point register number. */ +#define NDS32_FIRST_FPR_REGNUM 34 +/* Define the last floating-point register number. */ +#define NDS32_LAST_FPR_REGNUM \ + (NDS32_FIRST_FPR_REGNUM + NDS32_FLOAT_REGISTER_NUMBER - 1) + + +#define NDS32_IS_EXT_FPR_REGNUM(regno) \ + (((regno) >= NDS32_FIRST_FPR_REGNUM + 32) \ + && ((regno) < NDS32_FIRST_FPR_REGNUM + 64)) + +#define NDS32_IS_FPR_REGNUM(regno) \ + (((regno) >= NDS32_FIRST_FPR_REGNUM) \ + && ((regno) <= NDS32_LAST_FPR_REGNUM)) + +#define NDS32_FPR_REGNO_OK_FOR_SINGLE(regno) \ + ((regno) <= NDS32_LAST_FPR_REGNUM) + +#define NDS32_FPR_REGNO_OK_FOR_DOUBLE(regno) \ + ((((regno) - NDS32_FIRST_FPR_REGNUM) & 1) == 0) + +#define NDS32_IS_GPR_REGNUM(regno) \ + (((regno) <= NDS32_LAST_GPR_REGNUM)) + /* Define double word alignment bits. */ #define NDS32_DOUBLE_WORD_ALIGNMENT 64 @@ -189,7 +230,14 @@ enum nds32_16bit_address_type : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM)) \ : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM)) -/* This macro is to check if there are still available registers +#define NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG(reg_offset, mode, type) \ + ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1) \ + ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY) \ + ? (((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM + 1) & ~1) \ + : ((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM)) \ + : ((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM)) + +/* These two macros are to check if there are still available registers for passing argument, which must be entirely in registers. */ #define NDS32_ARG_ENTIRE_IN_GPR_REG_P(reg_offset, mode, type) \ ((NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (reg_offset, mode, type) \ @@ -197,13 +245,23 @@ enum nds32_16bit_address_type <= (NDS32_GPR_ARG_FIRST_REGNUM \ + NDS32_MAX_GPR_REGS_FOR_ARGS)) -/* This macro is to check if there are still available registers +#define NDS32_ARG_ENTIRE_IN_FPR_REG_P(reg_offset, mode, type) \ + ((NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (reg_offset, mode, type) \ + + NDS32_NEED_N_REGS_FOR_ARG (mode, type)) \ + <= (NDS32_FPR_ARG_FIRST_REGNUM \ + + NDS32_MAX_FPR_REGS_FOR_ARGS)) + +/* These two macros are to check if there are still available registers for passing argument, either entirely in registers or partially in registers. */ #define NDS32_ARG_PARTIAL_IN_GPR_REG_P(reg_offset, mode, type) \ (NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (reg_offset, mode, type) \ < NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS) +#define NDS32_ARG_PARTIAL_IN_FPR_REG_P(reg_offset, mode, type) \ + (NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (reg_offset, mode, type) \ + < NDS32_FPR_ARG_FIRST_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS) + /* This macro is to check if the register is required to be saved on stack. If call_used_regs[regno] == 0, regno is the callee-saved register. If df_regs_ever_live_p(regno) == true, it is used in the current function. @@ -251,6 +309,10 @@ struct GTY(()) machine_function callee-saved registers. */ int callee_saved_gpr_regs_size; + /* Number of bytes on the stack for saving floating-point + callee-saved registers. */ + int callee_saved_fpr_regs_size; + /* The padding bytes in callee-saved area may be required. */ int callee_saved_area_gpr_padding_bytes; @@ -259,6 +321,11 @@ struct GTY(()) machine_function /* The last required general purpose callee-saved register. */ int callee_saved_last_gpr_regno; + /* The first required floating-point callee-saved register. */ + int callee_saved_first_fpr_regno; + /* The last required floating-point callee-saved register. */ + int callee_saved_last_fpr_regno; + /* The padding bytes in varargs area may be required. */ int va_args_area_padding_bytes; @@ -279,6 +346,7 @@ struct GTY(()) machine_function typedef struct { unsigned int gpr_offset; + unsigned int fpr_offset; } nds32_cumulative_args; /* ------------------------------------------------------------------------ */ @@ -390,7 +458,11 @@ enum nds32_builtins /* ------------------------------------------------------------------------ */ #define TARGET_ISA_V2 (nds32_arch_option == ARCH_V2) -#define TARGET_ISA_V3 (nds32_arch_option == ARCH_V3) + +#define TARGET_ISA_V3 \ + (nds32_arch_option == ARCH_V3 \ + || nds32_arch_option == ARCH_V3F \ + || nds32_arch_option == ARCH_V3S) #define TARGET_ISA_V3M (nds32_arch_option == ARCH_V3M) #define TARGET_CMODEL_SMALL \ @@ -406,21 +478,60 @@ enum nds32_builtins (nds32_cmodel_option == CMODEL_SMALL\ || nds32_cmodel_option == CMODEL_MEDIUM) -#define TARGET_SOFT_FLOAT 1 -#define TARGET_HARD_FLOAT 0 +/* Run-time Target Specification. */ +#define TARGET_SOFT_FLOAT (nds32_abi == NDS32_ABI_V2) +/* Use hardware floating point calling convention. */ +#define TARGET_HARD_FLOAT (nds32_abi == NDS32_ABI_V2_FP_PLUS) + +/* Record arch version in TARGET_ARCH_DEFAULT. 0 means soft ABI, + 1 means hard ABI and using full floating-point instruction, + 2 means hard ABI and only using single-precision floating-point + instruction */ +#if TARGET_ARCH_DEFAULT == 1 +# define TARGET_DEFAULT_ABI NDS32_ABI_V2_FP_PLUS +# define TARGET_DEFAULT_FPU_ISA MASK_FPU_DOUBLE | MASK_FPU_SINGLE +# define TARGET_DEFAULT_FPU_FMA 0 +#else +# if TARGET_ARCH_DEFAULT == 2 +# define TARGET_DEFAULT_ABI NDS32_ABI_V2_FP_PLUS +# define TARGET_DEFAULT_FPU_ISA MASK_FPU_SINGLE +# define TARGET_DEFAULT_FPU_FMA 0 +# else +# define TARGET_DEFAULT_ABI NDS32_ABI_V2 +# define TARGET_DEFAULT_FPU_ISA 0 +# define TARGET_DEFAULT_FPU_FMA 0 +# endif +#endif + +#define TARGET_CONFIG_FPU_DEFAULT NDS32_CONFIG_FPU_2 /* ------------------------------------------------------------------------ */ /* Controlling the Compilation Driver. */ #define OPTION_DEFAULT_SPECS \ - {"arch", "%{!march=*:-march=%(VALUE)}" } + {"arch", " %{!march=*:-march=%(VALUE)}" \ + " %{march=v3f:%{!mfloat-abi=*:-mfloat-abi=hard}" \ + " %{!mno-ext-fpu-sp:%{!mext-fpu-sp:-mext-fpu-sp}}" \ + " %{!mno-ext-fpu-dp:%{!mext-fpu-dp:-mext-fpu-dp}}}" \ + " %{march=v3s:%{!mfloat-abi=*:-mfloat-abi=hard}" \ + " %{!mno-ext-fpu-sp:%{!mext-fpu-sp:-mext-fpu-sp}}}" }, \ + {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" } #define CC1_SPEC \ "" #define ASM_SPEC \ - " %{mbig-endian:-EB} %{mlittle-endian:-EL}" + " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ + " %{march=*:-march=%*}" \ + " %{mabi=*:-mabi=v%*}" \ + " %{mconfig-fpu=*:-mfpu-freg=%*}" \ + " %{mext-fpu-mac:-mmac}" \ + " %{mno-ext-fpu-mac:-mno-mac}" \ + " %{mext-fpu-sp:-mfpu-sp-ext}" \ + " %{mno-ext-fpu-sp:-mno-fpu-sp-ext}" \ + " %{mext-fpu-dp:-mfpu-dp-ext}" \ + " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" /* If user issues -mrelax, we need to pass '--relax' to linker. */ #define LINK_SPEC \ @@ -550,8 +661,8 @@ enum nds32_builtins $r30 : $lp $r31 : $sp - caller-save registers: $r0 ~ $r5, $r16 ~ $r23 - callee-save registers: $r6 ~ $r10, $r11 ~ $r14 + caller-save registers: $r0 ~ $r5, $r16 ~ $r23, $fs0 ~ $fs5, $fs22 ~ $fs47 + callee-save registers: $r6 ~ $r10, $r11 ~ $r14, $fs6 ~ $fs21, $fs48 ~ $fs63 reserved for assembler : $r15 reserved for other use : $r24, $r25, $r26, $r27 */ @@ -564,23 +675,23 @@ enum nds32_builtins 0, 0, 0, 0, 0, 0, 0, 0, \ /* r24 r25 r26 r27 r28 r29 r30 r31 */ \ 1, 1, 1, 1, 0, 1, 0, 1, \ - /* AP FP Reserved.................... */ \ + /* AP FP fs0 fs1 fs2 fs3 fs4 fs5 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fs6 fs7 fs8 fs9 fs10 fs11 fs12 fs13 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fs14 fs15 fs16 fs17 fs18 fs19 fs20 fs21 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fs22 fs23 fs24 fs25 fs26 fs27 fs28 fs29 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fs30 fs31 fd16 fd17 fd18 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fd19 fd20 fd21 fd22 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fd23 fd24 fd25 fd26 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fd27 fd28 fd29 fd30 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fd31 Reserved..................... */ \ 1, 1, 1, 1, 1 \ } @@ -599,23 +710,23 @@ enum nds32_builtins 1, 1, 1, 1, 1, 1, 1, 1, \ /* r24 r25 r26 r27 r28 r29 r30 r31 */ \ 1, 1, 1, 1, 0, 1, 0, 1, \ - /* AP FP Reserved.................... */ \ + /* AP FP fs0 fs1 fs2 fs3 fs4 fs5 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fs6 fs7 fs8 fs9 fs10 fs11 fs12 fs13 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fs14 fs15 fs16 fs17 fs18 fs19 fs20 fs21 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fs22 fs23 fs24 fs25 fs26 fs27 fs28 fs29 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fs30 fs31 fd16 fd17 fd18 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fd19 fd20 fd21 fd22 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fd23 fd24 fd25 fd26 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fd27 fd28 fd29 fd30 */ \ 1, 1, 1, 1, 1, 1, 1, 1, \ - /* Reserved............................... */ \ + /* fd31 Reserved..................... */ \ 1, 1, 1, 1, 1 \ } @@ -670,6 +781,7 @@ enum reg_class HIGH_REGS, GENERAL_REGS, FRAME_REGS, + FP_REGS, ALL_REGS, LIM_REG_CLASSES }; @@ -689,6 +801,7 @@ enum reg_class "HIGH_REGS", \ "GENERAL_REGS", \ "FRAME_REGS", \ + "FP_REGS", \ "ALL_REGS" \ } @@ -715,6 +828,8 @@ enum reg_class {0xffffffff, 0x00000000, 0x00000000, 0x00000000}, \ /* FRAME_REGS : 32, 33 */ \ {0x00000000, 0x00000003, 0x00000000, 0x00000000}, \ + /* FP_REGS : 34-98 */ \ + {0x00000000, 0xfffffffc, 0xffffffff, 0x00000003}, \ /* ALL_REGS : 0-100 */ \ {0xffffffff, 0xffffffff, 0xffffffff, 0x0000001f} \ } @@ -724,13 +839,18 @@ enum reg_class #define BASE_REG_CLASS GENERAL_REGS #define INDEX_REG_CLASS GENERAL_REGS +#define TEST_REGNO(R, TEST, VALUE) \ + ((R TEST VALUE) || ((unsigned) reg_renumber[R] TEST VALUE)) + /* Return nonzero if it is suitable for use as a base register in operand addresses. So far, we return nonzero only if "num" is a hard reg of the suitable class or a pseudo register which is allocated to a suitable hard reg. */ #define REGNO_OK_FOR_BASE_P(num) \ - ((num) < 32 || (unsigned) reg_renumber[num] < 32) + (TEST_REGNO (num, <, 32) \ + || TEST_REGNO (num, ==, FRAME_POINTER_REGNUM) \ + || TEST_REGNO (num, ==, ARG_POINTER_REGNUM)) /* Return nonzero if it is suitable for use as a index register in operand addresses. @@ -740,7 +860,9 @@ enum reg_class The difference between an index register and a base register is that the index register may be scaled. */ #define REGNO_OK_FOR_INDEX_P(num) \ - ((num) < 32 || (unsigned) reg_renumber[num] < 32) + (TEST_REGNO (num, <, 32) \ + || TEST_REGNO (num, ==, FRAME_POINTER_REGNUM) \ + || TEST_REGNO (num, ==, ARG_POINTER_REGNUM)) /* Obsolete Macros for Defining Constraints. */ @@ -768,6 +890,8 @@ enum reg_class #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LP_REGNUM) #define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LP_REGNUM) +#define DBX_REGISTER_NUMBER(REGNO) nds32_dbx_register_number (REGNO) + #define STACK_POINTER_REGNUM SP_REGNUM #define FRAME_POINTER_REGNUM 33 @@ -796,12 +920,11 @@ enum reg_class #define INIT_CUMULATIVE_ARGS(cum, fntype, libname, fndecl, n_named_args) \ nds32_init_cumulative_args (&cum, fntype, libname, fndecl, n_named_args) -/* The REGNO is an unsigned integer but NDS32_GPR_ARG_FIRST_REGNUM may be 0. - We better cast REGNO into signed integer so that we can avoid - 'comparison of unsigned expression >= 0 is always true' warning. */ -#define FUNCTION_ARG_REGNO_P(regno) \ - (((int) regno - NDS32_GPR_ARG_FIRST_REGNUM >= 0) \ - && ((int) regno - NDS32_GPR_ARG_FIRST_REGNUM < NDS32_MAX_GPR_REGS_FOR_ARGS)) +#define FUNCTION_ARG_REGNO_P(regno) \ + (IN_RANGE ((regno), NDS32_FIRST_GPR_REGNUM, NDS32_MAX_GPR_REGS_FOR_ARGS - 1) \ + || ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) \ + && IN_RANGE ((regno), NDS32_FPR_ARG_FIRST_REGNUM, \ + NDS32_FIRST_FPR_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS - 1))) #define DEFAULT_PCC_STRUCT_RETURN 0 @@ -944,15 +1067,72 @@ enum reg_class "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$ta", \ "$r16", "$r17", "$r18", "$r19", "$r20", "$r21", "$r22", "$r23", \ "$r24", "$r25", "$r26", "$r27", "$fp", "$gp", "$lp", "$sp", \ - "$AP", "$SFP", "NA", "NA", "NA", "NA", "NA", "NA", \ - "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", \ - "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", \ - "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", \ - "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", \ - "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", \ - "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", \ - "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", \ - "NA", "NA", "NA", "NA", "NA" \ + "$AP", "$SFP", "$fs0", "$fs1", "$fs2", "$fs3", "$fs4", "$fs5", \ + "$fs6", "$fs7", "$fs8", "$fs9", "$fs10","$fs11","$fs12","$fs13",\ + "$fs14","$fs15","$fs16","$fs17","$fs18","$fs19","$fs20","$fs21",\ + "$fs22","$fs23","$fs24","$fs25","$fs26","$fs27","$fs28","$fs29",\ + "$fs30","$fs31","$fs32","$fs33","$fs34","$fs35","$fs36","$fs37",\ + "$fs38","$fs39","$fs40","$fs41","$fs42","$fs43","$fs44","$fs45",\ + "$fs46","$fs47","$fs48","$fs49","$fs50","$fs51","$fs52","$fs53",\ + "$fs54","$fs55","$fs56","$fs57","$fs58","$fs59","$fs60","$fs61",\ + "$fs62","$fs63", "LB", "LE", "LC" \ +} + +#define ADDITIONAL_REGISTER_NAMES \ +{ \ + {"$r15", 15}, \ + {"$r28", 28}, {"$r29", 29}, {"$r30", 30}, {"$r31", 31}, \ + {"$a0", 0}, {"$a1", 1}, {"$a2", 2}, \ + {"$a3", 3}, {"$a4", 4}, {"$a5", 5}, \ + {"$s0", 6}, {"$s1", 7}, {"$s2", 8}, {"$s3", 9}, \ + {"$s4", 10}, {"$s5", 11}, {"$s6", 12}, {"$s7", 13}, \ + {"$s8", 14}, \ + {"$t0", 16}, {"$t1", 17}, {"$t2", 18}, {"$t3", 19}, \ + {"$t4", 20}, {"$t5", 21}, {"$t6", 22}, {"$t7", 23}, \ + {"$t8", 24}, {"$t9", 25}, \ + {"$p0", 26}, {"$p1", 27}, \ + {"$h0", 0}, {"$h1", 1}, {"$h2", 2}, {"$h3", 3}, \ + {"$h4", 4}, {"$h5", 5}, {"$h6", 6}, {"$h7", 7}, \ + {"$h8", 8}, {"$h9", 9}, {"$h10", 10}, {"$h11", 11}, \ + {"$h12", 16}, {"$h13", 17}, {"$h14", 18}, {"$h15", 19}, \ + {"$o0", 0}, {"$o1", 1}, {"$o2", 2}, {"$o3", 3}, \ + {"$o4", 4}, {"$o5", 5}, {"$o6", 6}, {"$o7", 7}, \ +} + +#define OVERLAPPING_REGISTER_NAMES \ +{ \ + {"$fd0", NDS32_FIRST_FPR_REGNUM + 0, 2}, \ + {"$fd1", NDS32_FIRST_FPR_REGNUM + 2, 2}, \ + {"$fd2", NDS32_FIRST_FPR_REGNUM + 4, 2}, \ + {"$fd3", NDS32_FIRST_FPR_REGNUM + 6, 2}, \ + {"$fd4", NDS32_FIRST_FPR_REGNUM + 8, 2}, \ + {"$fd5", NDS32_FIRST_FPR_REGNUM + 10, 2}, \ + {"$fd6", NDS32_FIRST_FPR_REGNUM + 12, 2}, \ + {"$fd7", NDS32_FIRST_FPR_REGNUM + 14, 2}, \ + {"$fd8", NDS32_FIRST_FPR_REGNUM + 16, 2}, \ + {"$fd9", NDS32_FIRST_FPR_REGNUM + 18, 2}, \ + {"$fd10", NDS32_FIRST_FPR_REGNUM + 20, 2}, \ + {"$fd11", NDS32_FIRST_FPR_REGNUM + 22, 2}, \ + {"$fd12", NDS32_FIRST_FPR_REGNUM + 24, 2}, \ + {"$fd13", NDS32_FIRST_FPR_REGNUM + 26, 2}, \ + {"$fd14", NDS32_FIRST_FPR_REGNUM + 28, 2}, \ + {"$fd15", NDS32_FIRST_FPR_REGNUM + 30, 2}, \ + {"$fd16", NDS32_FIRST_FPR_REGNUM + 32, 2}, \ + {"$fd17", NDS32_FIRST_FPR_REGNUM + 34, 2}, \ + {"$fd18", NDS32_FIRST_FPR_REGNUM + 36, 2}, \ + {"$fd19", NDS32_FIRST_FPR_REGNUM + 38, 2}, \ + {"$fd20", NDS32_FIRST_FPR_REGNUM + 40, 2}, \ + {"$fd21", NDS32_FIRST_FPR_REGNUM + 42, 2}, \ + {"$fd22", NDS32_FIRST_FPR_REGNUM + 44, 2}, \ + {"$fd23", NDS32_FIRST_FPR_REGNUM + 46, 2}, \ + {"$fd24", NDS32_FIRST_FPR_REGNUM + 48, 2}, \ + {"$fd25", NDS32_FIRST_FPR_REGNUM + 50, 2}, \ + {"$fd26", NDS32_FIRST_FPR_REGNUM + 52, 2}, \ + {"$fd27", NDS32_FIRST_FPR_REGNUM + 54, 2}, \ + {"$fd28", NDS32_FIRST_FPR_REGNUM + 56, 2}, \ + {"$fd29", NDS32_FIRST_FPR_REGNUM + 58, 2}, \ + {"$fd30", NDS32_FIRST_FPR_REGNUM + 60, 2}, \ + {"$fd31", NDS32_FIRST_FPR_REGNUM + 62, 2}, \ } /* Output normal jump table entry. */ diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md index dd50f7a62e0..25ae3144a1f 100644 --- a/gcc/config/nds32/nds32.md +++ b/gcc/config/nds32/nds32.md @@ -46,13 +46,17 @@ ;; Include DImode/DFmode operations. (include "nds32-doubleword.md") +;; Include floating-point patterns. +(include "nds32-fpu.md") + ;; Include peephole patterns. (include "nds32-peephole2.md") ;; Insn type, it is used to default other attribute values. (define_attr "type" - "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,mul,mac,div,branch,call,misc" + "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,mul,mac,div,branch,call,misc,\ + falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore" (const_string "unknown")) ;; Insn sub-type @@ -77,7 +81,7 @@ ;; pe2 : Performance Extension Version 2 Instructions ;; se : String Extension instructions (define_attr "feature" - "v1,v2,v3m,v3,pe1,pe2,se" + "v1,v2,v3m,v3,pe1,pe2,se,fpu" (const_string "v1")) ;; Enabled, which is used to enable/disable insn alternatives. @@ -107,6 +111,9 @@ (const_string "yes") (const_string "no")) (eq_attr "feature" "se") (if_then_else (match_test "TARGET_EXT_STRING") + (const_string "yes") + (const_string "no")) + (eq_attr "feature" "fpu") (if_then_else (match_test "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE") (const_string "yes") (const_string "no"))] (const_string "yes")))) @@ -193,8 +200,8 @@ }) (define_insn "*mov" - [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m, l, l, l, d, d, r, d, r, r, r") - (match_operand:QIHISI 1 "nds32_move_operand" " r, r, l, l, l, d, r, U45, U33, U37, U45, Ufe, m, Ip05, Is05, Is20, Ihig"))] + [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r,U45,U33,U37,U45, m, l, l, l, d, d, r, d, r, r, r, *f, *f, r, *f, Q") + (match_operand:QIHISI 1 "nds32_move_operand" " r, r, l, l, l, d, r,U45,U33,U37,U45,Ufe, m,Ip05, Is05, Is20, Ihig, *f, r, *f, Q, *f"))] "register_operand(operands[0], mode) || register_operand(operands[1], mode)" { @@ -227,12 +234,26 @@ return "movi\t%0, %1"; case 16: return "sethi\t%0, hi20(%1)"; + case 17: + if (TARGET_FPU_SINGLE) + return "fcpyss\t%0, %1, %1"; + else + return "#"; + case 18: + return "fmtsr\t%1, %0"; + case 19: + return "fmfsr\t%0, %1"; + case 20: + return nds32_output_float_load (operands); + case 21: + return nds32_output_float_store (operands); default: gcc_unreachable (); } } - [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu") - (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 4, 2, 2, 4, 4")]) + [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore") + (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 4, 2, 2, 4, 4, 4, 4, 4, 4, 4") + (set_attr "feature" " v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v3m, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) ;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF @@ -804,6 +825,87 @@ (set_attr "length" " 2, 4") (set_attr "feature" "v3m, v1")]) +(define_expand "negsf2" + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (match_operand:SF 1 "register_operand" "")))] + "" +{ + if (!TARGET_FPU_SINGLE && !TARGET_EXT_PERF) + { + rtx new_dst = simplify_gen_subreg (SImode, operands[0], SFmode, 0); + rtx new_src = simplify_gen_subreg (SImode, operands[1], SFmode, 0); + + emit_insn (gen_xorsi3 (new_dst, + new_src, + gen_int_mode (0x80000000, SImode))); + + DONE; + } +}) + +(define_expand "negdf2" + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" "")))] + "" +{ +}) + +(define_insn_and_split "soft_negdf2" + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" "")))] + "!TARGET_FPU_DOUBLE" + "#" + "!TARGET_FPU_DOUBLE" + [(const_int 1)] +{ + rtx src = operands[1]; + rtx dst = operands[0]; + rtx ori_dst = operands[0]; + + bool need_extra_move_for_dst_p; + /* FPU register can't change mode to SI directly, so we need create a + tmp register to handle it, and FPU register can't do `xor` or btgl. */ + if (HARD_REGISTER_P (src) + && TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (src))) + { + rtx tmp = gen_reg_rtx (DFmode); + emit_move_insn (tmp, src); + src = tmp; + } + + if (HARD_REGISTER_P (dst) + && TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (dst))) + { + need_extra_move_for_dst_p = true; + rtx tmp = gen_reg_rtx (DFmode); + dst = tmp; + } + + rtx dst_high_part = simplify_gen_subreg ( + SImode, dst, + DFmode, subreg_highpart_offset (SImode, DFmode)); + rtx dst_low_part = simplify_gen_subreg ( + SImode, dst, + DFmode, subreg_lowpart_offset (SImode, DFmode)); + rtx src_high_part = simplify_gen_subreg ( + SImode, src, + DFmode, subreg_highpart_offset (SImode, DFmode)); + rtx src_low_part = simplify_gen_subreg ( + SImode, src, + DFmode, subreg_lowpart_offset (SImode, DFmode)); + + emit_insn (gen_xorsi3 (dst_high_part, + src_high_part, + gen_int_mode (0x80000000, SImode))); + emit_move_insn (dst_low_part, src_low_part); + + if (need_extra_move_for_dst_p) + emit_move_insn (ori_dst, dst); + + DONE; +}) + + ;; ---------------------------------------------------------------------------- ;; 'ONE_COMPLIMENT' operation ;; ---------------------------------------------------------------------------- diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt index d6d2f20dbac..bb2bbce18eb 100644 --- a/gcc/config/nds32/nds32.opt +++ b/gcc/config/nds32/nds32.opt @@ -32,6 +32,31 @@ EL Target RejectNegative Alias(mlittle-endian) Generate code in little-endian mode. + +; --------------------------------------------------------------- + +mabi= +Target RejectNegative Joined Enum(abi_type) Var(nds32_abi) Init(TARGET_DEFAULT_ABI) +Specify which ABI type to generate code for: 2, 2fp+. + +Enum +Name(abi_type) Type(enum abi_type) +Known ABIs (for use with the -mabi= option): + +EnumValue +Enum(abi_type) String(2) Value(NDS32_ABI_V2) + +EnumValue +Enum(abi_type) String(2fp+) Value(NDS32_ABI_V2_FP_PLUS) + +mfloat-abi=soft +Target RejectNegative Alias(mabi=, 2) +Specify use soft floating point ABI which mean alias to -mabi=2. + +mfloat-abi=hard +Target RejectNegative Alias(mabi=, 2fp+) +Specify use soft floating point ABI which mean alias to -mabi=2fp+. + ; --------------------------------------------------------------- mreduced-regs @@ -110,6 +135,12 @@ Enum(nds32_arch_type) String(v3) Value(ARCH_V3) EnumValue Enum(nds32_arch_type) String(v3m) Value(ARCH_V3M) +EnumValue +Enum(nds32_arch_type) String(v3f) Value(ARCH_V3F) + +EnumValue +Enum(nds32_arch_type) String(v3s) Value(ARCH_V3S) + mcmodel= Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE) Specify the address generation strategy for code model. @@ -138,6 +169,38 @@ Known cpu types (for use with the -mcpu= option): EnumValue Enum(nds32_cpu_type) String(n9) Value(CPU_N9) +mconfig-fpu= +Target RejectNegative Joined Enum(float_reg_number) Var(nds32_fp_regnum) Init(TARGET_CONFIG_FPU_DEFAULT) +Specify a fpu configuration value from 0 to 7; 0-3 is as FPU spec says, and 4-7 is corresponding to 0-3. + +Enum +Name(float_reg_number) Type(enum float_reg_number) +Known floating-point number of registers (for use with the -mconfig-fpu= option): + +EnumValue +Enum(float_reg_number) String(0) Value(NDS32_CONFIG_FPU_0) + +EnumValue +Enum(float_reg_number) String(1) Value(NDS32_CONFIG_FPU_1) + +EnumValue +Enum(float_reg_number) String(2) Value(NDS32_CONFIG_FPU_2) + +EnumValue +Enum(float_reg_number) String(3) Value(NDS32_CONFIG_FPU_3) + +EnumValue +Enum(float_reg_number) String(4) Value(NDS32_CONFIG_FPU_4) + +EnumValue +Enum(float_reg_number) String(5) Value(NDS32_CONFIG_FPU_5) + +EnumValue +Enum(float_reg_number) String(6) Value(NDS32_CONFIG_FPU_6) + +EnumValue +Enum(float_reg_number) String(7) Value(NDS32_CONFIG_FPU_7) + mctor-dtor Target Report Enable constructor/destructor feature. @@ -145,3 +208,15 @@ Enable constructor/destructor feature. mrelax Target Report Guide linker to relax instructions. + +mext-fpu-fma +Target Report Mask(EXT_FPU_FMA) +Generate floating-point multiply-accumulation instructions. + +mext-fpu-sp +Target Report Mask(FPU_SINGLE) +Generate single-precision floating-point instructions. + +mext-fpu-dp +Target Report Mask(FPU_DOUBLE) +Generate double-precision floating-point instructions. diff --git a/gcc/config/nds32/predicates.md b/gcc/config/nds32/predicates.md index 066ec3471ea..9eb84685514 100644 --- a/gcc/config/nds32/predicates.md +++ b/gcc/config/nds32/predicates.md @@ -24,12 +24,21 @@ (define_predicate "nds32_greater_less_comparison_operator" (match_code "gt,ge,lt,le")) +(define_predicate "nds32_float_comparison_operator" + (match_code "eq,ne,le,lt,ge,gt,ordered,unordered,ungt,unge,unlt,unle")) + (define_predicate "nds32_movecc_comparison_operator" (match_code "eq,ne,le,leu,ge,geu")) (define_special_predicate "nds32_logical_binary_operator" (match_code "and,ior,xor")) +(define_special_predicate "nds32_conditional_call_comparison_operator" + (match_code "lt,ge")) + +(define_special_predicate "nds32_have_33_inst_operator" + (match_code "mult,and,ior,xor")) + (define_predicate "nds32_symbolic_operand" (match_code "const,symbol_ref,label_ref")) @@ -122,6 +131,18 @@ (and (match_code "mem") (match_test "nds32_valid_smw_lwm_base_p (op)"))) +(define_predicate "float_even_register_operand" + (and (match_code "reg") + (and (match_test "REGNO (op) >= NDS32_FIRST_FPR_REGNUM") + (match_test "REGNO (op) <= NDS32_LAST_FPR_REGNUM") + (match_test "(REGNO (op) & 1) == 0")))) + +(define_predicate "float_odd_register_operand" + (and (match_code "reg") + (and (match_test "REGNO (op) >= NDS32_FIRST_FPR_REGNUM") + (match_test "REGNO (op) <= NDS32_LAST_FPR_REGNUM") + (match_test "(REGNO (op) & 1) != 0")))) + (define_special_predicate "nds32_load_multiple_operation" (match_code "parallel") { -- 2.30.2