From 63ab910dd75cce3d9e595879465d6ea5fdf13602 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Sun, 8 Apr 2018 09:21:30 +0000 Subject: [PATCH] [NDS32] Implement n7 pipeline. gcc/ * config.gcc (nds32*-*-*): Check that n7 is valid to --with-cpu. * config/nds32/nds32-n7.md: New file. * config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N7. * config/nds32/nds32-pipelines-auxiliary.c: Implementation for n7 pipeline. * config/nds32/nds32-protos.h: More declarations for n7 pipeline. * config/nds32/nds32.md (pipeline_model): Add n7. * config/nds32/nds32.opt (mcpu): Support n7 pipeline cpus. * config/nds32/pipelines.md: Include n7 settings. Co-Authored-By: Chung-Ju Wu From-SVN: r259221 --- gcc/ChangeLog | 13 + gcc/config.gcc | 4 +- gcc/config/nds32/nds32-n7.md | 298 +++++++++++++++++++ gcc/config/nds32/nds32-opts.h | 1 + gcc/config/nds32/nds32-pipelines-auxiliary.c | 125 ++++++++ gcc/config/nds32/nds32-protos.h | 3 + gcc/config/nds32/nds32.md | 5 +- gcc/config/nds32/nds32.opt | 6 + gcc/config/nds32/pipelines.md | 6 + 9 files changed, 457 insertions(+), 4 deletions(-) create mode 100644 gcc/config/nds32/nds32-n7.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 970224ec842..0238883b143 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2018-04-08 Kito Cheng + Chung-Ju Wu + + * config.gcc (nds32*-*-*): Check that n7 is valid to --with-cpu. + * config/nds32/nds32-n7.md: New file. + * config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N7. + * config/nds32/nds32-pipelines-auxiliary.c: Implementation for n7 + pipeline. + * config/nds32/nds32-protos.h: More declarations for n7 pipeline. + * config/nds32/nds32.md (pipeline_model): Add n7. + * config/nds32/nds32.opt (mcpu): Support n7 pipeline cpus. + * config/nds32/pipelines.md: Include n7 settings. + 2018-04-08 Kito Cheng Chung-Ju Wu diff --git a/gcc/config.gcc b/gcc/config.gcc index 6fed7bc7740..099c2f6fa83 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4315,11 +4315,11 @@ case "${target}" in "") with_cpu=n9 ;; - n6 | n8 | e8 | s8 | n9) + n6 | n7 | n8 | e8 | s8 | n9) # OK ;; *) - echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n8 e8 s8 n9" 1>&2 + echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9" 1>&2 exit 1 ;; esac diff --git a/gcc/config/nds32/nds32-n7.md b/gcc/config/nds32/nds32-n7.md new file mode 100644 index 00000000000..3e412676d7d --- /dev/null +++ b/gcc/config/nds32/nds32-n7.md @@ -0,0 +1,298 @@ +;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; ------------------------------------------------------------------------ +;; Define N8 pipeline settings. +;; ------------------------------------------------------------------------ + +(define_automaton "nds32_n7_machine") + +;; ------------------------------------------------------------------------ +;; Pipeline Stages +;; ------------------------------------------------------------------------ +;; IF - Instruction Fetch +;; Instruction Alignment +;; Instruction Pre-decode +;; II - Instruction Issue +;; Instruction Decode +;; Register File Access +;; Instruction Execution +;; Interrupt Handling +;; EXD - Psuedo Stage +;; Load Data Completion + +(define_cpu_unit "n7_ii" "nds32_n7_machine") + +(define_insn_reservation "nds_n7_unknown" 1 + (and (eq_attr "type" "unknown") + (eq_attr "pipeline_model" "n7")) + "n7_ii") + +(define_insn_reservation "nds_n7_misc" 1 + (and (eq_attr "type" "misc") + (eq_attr "pipeline_model" "n7")) + "n7_ii") + +(define_insn_reservation "nds_n7_alu" 1 + (and (eq_attr "type" "alu") + (eq_attr "pipeline_model" "n7")) + "n7_ii") + +(define_insn_reservation "nds_n7_load" 1 + (and (match_test "nds32::load_single_p (insn)") + (eq_attr "pipeline_model" "n7")) + "n7_ii") + +(define_insn_reservation "nds_n7_store" 1 + (and (match_test "nds32::store_single_p (insn)") + (eq_attr "pipeline_model" "n7")) + "n7_ii") + +(define_insn_reservation "nds_n7_load_multiple_1" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "1")) + (eq_attr "pipeline_model" "n7")) + "n7_ii") + +(define_insn_reservation "nds_n7_load_multiple_2" 1 + (and (ior (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::load_double_p (insn)")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*2") + +(define_insn_reservation "nds_n7_load_multiple_3" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "3")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*3") + +(define_insn_reservation "nds_n7_load_multiple_4" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "4")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*4") + +(define_insn_reservation "nds_n7_load_multiple_5" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "5")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*5") + +(define_insn_reservation "nds_n7_load_multiple_6" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "6")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*6") + +(define_insn_reservation "nds_n7_load_multiple_7" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "7")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*7") + +(define_insn_reservation "nds_n7_load_multiple_8" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "8")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*8") + +(define_insn_reservation "nds_n7_load_multiple_12" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "12")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*12") + +(define_insn_reservation "nds_n7_store_multiple_1" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "1")) + (eq_attr "pipeline_model" "n7")) + "n7_ii") + +(define_insn_reservation "nds_n7_store_multiple_2" 1 + (and (ior (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::store_double_p (insn)")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*2") + +(define_insn_reservation "nds_n7_store_multiple_3" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "3")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*3") + +(define_insn_reservation "nds_n7_store_multiple_4" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "4")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*4") + +(define_insn_reservation "nds_n7_store_multiple_5" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "5")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*5") + +(define_insn_reservation "nds_n7_store_multiple_6" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "6")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*6") + +(define_insn_reservation "nds_n7_store_multiple_7" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "7")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*7") + +(define_insn_reservation "nds_n7_store_multiple_8" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "8")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*8") + +(define_insn_reservation "nds_n7_store_multiple_12" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "12")) + (eq_attr "pipeline_model" "n7")) + "n7_ii*12") + +(define_insn_reservation "nds_n7_mul_fast" 1 + (and (match_test "nds32_mul_config != MUL_TYPE_SLOW") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n7"))) + "n7_ii") + +(define_insn_reservation "nds_n7_mul_slow" 1 + (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n7"))) + "n7_ii*17") + +(define_insn_reservation "nds_n7_mac_fast" 1 + (and (match_test "nds32_mul_config != MUL_TYPE_SLOW") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n7"))) + "n7_ii*2") + +(define_insn_reservation "nds_n7_mac_slow" 1 + (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n7"))) + "n7_ii*18") + +(define_insn_reservation "nds_n7_div" 1 + (and (eq_attr "type" "div") + (eq_attr "pipeline_model" "n7")) + "n7_ii*37") + +(define_insn_reservation "nds_n7_branch" 1 + (and (eq_attr "type" "branch") + (eq_attr "pipeline_model" "n7")) + "n7_ii") + +;; ------------------------------------------------------------------------ +;; Comment Notations and Bypass Rules +;; ------------------------------------------------------------------------ +;; Producers (LHS) +;; LD_!bi +;; Load data from the memory (without updating the base register) and +;; produce the loaded data. The result is ready at EXD. +;; LMW(N, M) +;; There are N micro-operations within an instruction that loads multiple +;; words. The result produced by the M-th micro-operation is sent to +;; consumers. The result is ready at EXD. If the base register should be +;; updated, an extra micro-operation is inserted to the sequence, and the +;; result is ready at II. +;; +;; Consumers (RHS) +;; ALU, MUL, DIV +;; Require operands at II. +;; MOVD44_E +;; A double-word move instruction needs two micro-operations because the +;; reigster ports is 2R1W. The first micro-operation writes an even number +;; register, and the second micro-operation writes an odd number register. +;; Each input operand is required at II for each micro-operation. The letter +;; 'E' stands for even. +;; MAC_RaRb +;; A MAC instruction is separated into two micro-operations. The first +;; micro-operation does the multiplication, which requires operands Ra +;; and Rb at II. The second micro-options does the accumulation, which +;; requires the operand Rt at II. +;; ADDR_IN_MOP(N) +;; Because the reigster port is 2R1W, some load/store instructions are +;; separated into many micro-operations. N denotes the address input is +;; required by the N-th micro-operation. Such operand is required at II. +;; ST_bi +;; A post-increment store instruction requires its data at II. +;; ST_!bi_RI +;; A store instruction with an immediate offset requires its data at II. +;; If the offset field is a register (ST_!bi_RR), the instruction will be +;; separated into two micro-operations, and the second one requires the +;; input operand at II in order to store it to the memory. +;; SMW(N, M) +;; There are N micro-operations within an instruction that stores multiple +;; words. Each M-th micro-operation requires its data at II. If the base +;; register should be updated, an extra micro-operation is inserted to the +;; sequence. +;; BR_COND +;; If a branch instruction is conditional, its input data is required at II. + +;; LD_!bi +;; -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR, ADDR_IN_MOP(1), ST_bi, ST_!bi_RI, SMW(N, 1) +(define_bypass 2 + "nds_n7_load" + "nds_n7_alu,\ + nds_n7_mul_fast, nds_n7_mul_slow,\ + nds_n7_mac_fast, nds_n7_mac_slow,\ + nds_n7_div,\ + nds_n7_branch,\ + nds_n7_load, nds_n7_store,\ + nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\ + nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\ + nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12,\ + nds_n7_store_multiple_1,nds_n7_store_multiple_2, nds_n7_store_multiple_3,\ + nds_n7_store_multiple_4,nds_n7_store_multiple_5, nds_n7_store_multiple_6,\ + nds_n7_store_multiple_7,nds_n7_store_multiple_8, nds_n7_store_multiple_12" + "nds32_n7_load_to_ii_p" +) + +;; LMW(N, N) +;; -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR, AADR_IN_MOP(1), ST_bi, ST_!bi_RI, SMW(N, 1) +(define_bypass 2 + "nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\ + nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\ + nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12" + "nds_n7_alu,\ + nds_n7_mul_fast, nds_n7_mul_slow,\ + nds_n7_mac_fast, nds_n7_mac_slow,\ + nds_n7_div,\ + nds_n7_branch,\ + nds_n7_load, nds_n7_store,\ + nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\ + nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\ + nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12,\ + nds_n7_store_multiple_1,nds_n7_store_multiple_2, nds_n7_store_multiple_3,\ + nds_n7_store_multiple_4,nds_n7_store_multiple_5, nds_n7_store_multiple_6,\ + nds_n7_store_multiple_7,nds_n7_store_multiple_8, nds_n7_store_multiple_12" + "nds32_n7_last_load_to_ii_p" +) diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h index 8a19899e3da..5d7e1652749 100644 --- a/gcc/config/nds32/nds32-opts.h +++ b/gcc/config/nds32/nds32-opts.h @@ -38,6 +38,7 @@ enum nds32_arch_type enum nds32_cpu_type { CPU_N6, + CPU_N7, CPU_N8, CPU_E8, CPU_N9, diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.c b/gcc/config/nds32/nds32-pipelines-auxiliary.c index def793fa52e..a983238cdbb 100644 --- a/gcc/config/nds32/nds32-pipelines-auxiliary.c +++ b/gcc/config/nds32/nds32-pipelines-auxiliary.c @@ -344,6 +344,98 @@ using namespace nds32; using namespace nds32::scheduling; namespace { // anonymous namespace + +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at II. */ +bool +n7_consumed_by_ii_dep_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + /* MOVD44_E */ + case TYPE_ALU: + if (movd44_even_dep_p (consumer, def_reg)) + return true; + + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_MUL: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_MAC: + use_rtx = extract_mac_non_acc_rtx (consumer); + break; + + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. It requires two micro- + operations in order to write two registers. We have to check the + dependency from the producer to the first micro-operation. */ + case TYPE_DIV: + if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 + || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_LOAD: + /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ + if (post_update_insn_p (consumer)) + use_rtx = extract_base_reg (consumer); + else + use_rtx = extract_mem_rtx (consumer); + break; + + case TYPE_STORE: + /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ + if (post_update_insn_p (consumer)) + use_rtx = extract_base_reg (consumer); + else + use_rtx = extract_mem_rtx (consumer); + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + /* ST_bi, ST_!bi_RI */ + if (!post_update_insn_p (consumer) + && !immed_offset_p (extract_mem_rtx (consumer))) + return false; + + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_LOAD_MULTIPLE: + use_rtx = extract_base_reg (consumer); + break; + + case TYPE_STORE_MULTIPLE: + /* ADDR_IN */ + use_rtx = extract_base_reg (consumer); + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + /* SMW (N, 1) */ + use_rtx = extract_nth_access_rtx (consumer, 0); + break; + + case TYPE_BRANCH: + use_rtx = PATTERN (consumer); + break; + + default: + gcc_unreachable (); + } + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + return false; +} + /* Check the dependency between the producer defining DEF_REG and CONSUMER requiring input operand at AG (II). */ bool @@ -657,6 +749,39 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) /* ------------------------------------------------------------------------ */ +/* Guard functions for N7 core. */ + +bool +nds32_n7_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) +{ + if (post_update_insn_p (producer)) + return false; + + rtx def_reg = SET_DEST (PATTERN (producer)); + + return n7_consumed_by_ii_dep_p (consumer, def_reg); +} + +bool +nds32_n7_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) +{ + /* If PRODUCER is a post-update LMW insn, the last micro-operation updates + the base register and the result is ready in II stage, so we don't need + to handle that case in this guard function and the corresponding bypass + rule. */ + if (post_update_insn_p (producer)) + return false; + + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + if (last_def_reg == NULL_RTX) + return false; + + gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); + + return n7_consumed_by_ii_dep_p (consumer, last_def_reg); +} + /* Guard functions for N8 core. */ bool diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index 5aefa0700e8..b7522f1ed7d 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -99,6 +99,9 @@ extern bool nds32_valid_multiple_load_store_p (rtx, bool, bool); /* Auxiliary functions for guard function checking in pipelines.md. */ +extern bool nds32_n7_load_to_ii_p (rtx_insn *, rtx_insn *); +extern bool nds32_n7_last_load_to_ii_p (rtx_insn *, rtx_insn *); + extern bool nds32_n8_load_to_ii_p (rtx_insn *, rtx_insn *); extern bool nds32_n8_load_bi_to_ii_p (rtx_insn *, rtx_insn *); extern bool nds32_n8_load_to_ex_p (rtx_insn *, rtx_insn *); diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md index 1bc36e4c139..3b8107e8fbf 100644 --- a/gcc/config/nds32/nds32.md +++ b/gcc/config/nds32/nds32.md @@ -56,9 +56,10 @@ ;; ------------------------------------------------------------------------ ;; CPU pipeline model. -(define_attr "pipeline_model" "n8,e8,n9,simple" +(define_attr "pipeline_model" "n7,n8,e8,n9,simple" (const - (cond [(match_test "nds32_cpu_option == CPU_E8") (const_string "e8") + (cond [(match_test "nds32_cpu_option == CPU_N7") (const_string "n7") + (match_test "nds32_cpu_option == CPU_E8") (const_string "e8") (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8") (match_test "nds32_cpu_option == CPU_N9") (const_string "n9") (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt index f5f31eb21b9..4968b74ab45 100644 --- a/gcc/config/nds32/nds32.opt +++ b/gcc/config/nds32/nds32.opt @@ -180,6 +180,12 @@ Enum(nds32_cpu_type) String(n6) Value(CPU_N6) EnumValue Enum(nds32_cpu_type) String(n650) Value(CPU_N6) +EnumValue +Enum(nds32_cpu_type) String(n7) Value(CPU_N7) + +EnumValue +Enum(nds32_cpu_type) String(n705) Value(CPU_N7) + EnumValue Enum(nds32_cpu_type) String(n8) Value(CPU_N8) diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md index 03701fce2ee..34288076f42 100644 --- a/gcc/config/nds32/pipelines.md +++ b/gcc/config/nds32/pipelines.md @@ -18,6 +18,12 @@ ;; along with GCC; see the file COPYING3. If not see ;; . +;; ------------------------------------------------------------------------ +;; Include N7 pipeline settings. +;; ------------------------------------------------------------------------ +(include "nds32-n7.md") + + ;; ------------------------------------------------------------------------ ;; Include N8 pipeline settings. ;; ------------------------------------------------------------------------ -- 2.30.2