From: Claudiu Zissulescu Date: Wed, 31 Oct 2018 11:27:46 +0000 (+0100) Subject: [ARC] Handle store cacheline hazard. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=635aeaa20ffdf6e794f180cc8e053e8dc46db760;p=gcc.git [ARC] Handle store cacheline hazard. Handle store cacheline hazard for A700 cpus by inserting two NOP_S between ST ST LD or their logical equivalent (like ST ST NOP_S NOP_S J_L.D LD) gcc/ xxxx-xx-xx Claudiu Zissulescu * config/arc/arc-arch.h (ARC_TUNE_ARC7XX): New tune value. * config/arc/arc.c (arc_active_insn): New function. (check_store_cacheline_hazard): Likewise. (workaround_arc_anomaly): Use check_store_cacheline_hazard. (arc_override_options): Disable delay slot scheduler for older A7. (arc_store_addr_hazard_p): New implementation, old one renamed to ... (arc_store_addr_hazard_internal_p): Renamed. (arc_reorg): Don't combine into brcc instructions which are part of hardware hazard solution. * config/arc/arc.md (attr tune): Consider new arc7xx tune value. (tune_arc700): Likewise. * config/arc/arc.opt (arc7xx): New tune value. * config/arc/arc700.md: Improve A7 scheduler. From-SVN: r265676 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 051d4620998..289bb5a9222 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2018-10-31 Claudiu Zissulescu + + * config/arc/arc-arch.h (ARC_TUNE_ARC7XX): New tune value. + * config/arc/arc.c (arc_active_insn): New function. + (check_store_cacheline_hazard): Likewise. + (workaround_arc_anomaly): Use check_store_cacheline_hazard. + (arc_override_options): Disable delay slot scheduler for older + A7. + (arc_store_addr_hazard_p): New implementation, old one renamed to + ... + (arc_store_addr_hazard_internal_p): Renamed. + (arc_reorg): Don't combine into brcc instructions which are part + of hardware hazard solution. + * config/arc/arc.md (attr tune): Consider new arc7xx tune value. + (tune_arc700): Likewise. + * config/arc/arc.opt (arc7xx): New tune value. + * config/arc/arc700.md: Improve A7 scheduler. + 2018-10-31 Claudiu Zissulescu * config/arc/arc.c (arc_override_options): Remove diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h index 859af0684b8..ad540607e55 100644 --- a/gcc/config/arc/arc-arch.h +++ b/gcc/config/arc/arc-arch.h @@ -71,6 +71,7 @@ enum arc_tune_attr { ARC_TUNE_NONE, ARC_TUNE_ARC600, + ARC_TUNE_ARC7XX, ARC_TUNE_ARC700_4_2_STD, ARC_TUNE_ARC700_4_2_XMAC, ARC_TUNE_CORE_3, diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index 00ec77d120b..277b546d3d3 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -1303,6 +1303,10 @@ arc_override_options (void) if (!global_options_set.x_g_switch_value && !TARGET_NO_SDATA_SET) g_switch_value = TARGET_LL64 ? 8 : 4; + /* A7 has an issue with delay slots. */ + if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX)) + flag_delayed_branch = 0; + /* These need to be done at start up. It's convenient to do them here. */ arc_init (); } @@ -7137,11 +7141,90 @@ arc_invalid_within_doloop (const rtx_insn *insn) return NULL; } +/* Return the next active insn, skiping the inline assembly code. */ + +static rtx_insn * +arc_active_insn (rtx_insn *insn) +{ + rtx_insn *nxt = next_active_insn (insn); + + if (nxt && GET_CODE (PATTERN (nxt)) == ASM_INPUT) + nxt = next_active_insn (nxt); + return nxt; +} + +/* Search for a sequence made out of two stores and a given number of + loads, insert a nop if required. */ + +static void +check_store_cacheline_hazard (void) +{ + rtx_insn *insn, *succ0, *insn1; + bool found = false; + + for (insn = get_insns (); insn; insn = arc_active_insn (insn)) + { + succ0 = arc_active_insn (insn); + + if (!succ0) + return; + + if (!single_set (insn) || !single_set (succ0)) + continue; + + if ((get_attr_type (insn) != TYPE_STORE) + || (get_attr_type (succ0) != TYPE_STORE)) + continue; + + /* Found at least two consecutive stores. Goto the end of the + store sequence. */ + for (insn1 = succ0; insn1; insn1 = arc_active_insn (insn1)) + if (!single_set (insn1) || get_attr_type (insn1) != TYPE_STORE) + break; + + /* Now, check the next two instructions for the following cases: + 1. next instruction is a LD => insert 2 nops between store + sequence and load. + 2. next-next instruction is a LD => inset 1 nop after the store + sequence. */ + if (insn1 && single_set (insn1) + && (get_attr_type (insn1) == TYPE_LOAD)) + { + found = true; + emit_insn_before (gen_nopv (), insn1); + emit_insn_before (gen_nopv (), insn1); + } + else + { + if (insn1 && (get_attr_type (insn1) == TYPE_COMPARE)) + { + /* REG_SAVE_NOTE is used by Haifa scheduler, we are in + reorg, so it is safe to reuse it for avoiding the + current compare insn to be part of a BRcc + optimization. */ + add_reg_note (insn1, REG_SAVE_NOTE, GEN_INT (3)); + } + insn1 = arc_active_insn (insn1); + if (insn1 && single_set (insn1) + && (get_attr_type (insn1) == TYPE_LOAD)) + { + found = true; + emit_insn_before (gen_nopv (), insn1); + } + } + + insn = insn1; + if (found) + found = false; + } +} + /* Return true if a load instruction (CONSUMER) uses the same address as a store instruction (PRODUCER). This function is used to avoid st/ld address hazard in ARC700 cores. */ -bool -arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer) + +static bool +arc_store_addr_hazard_internal_p (rtx_insn* producer, rtx_insn* consumer) { rtx in_set, out_set; rtx out_addr, in_addr; @@ -7189,6 +7272,16 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer) return false; } +/* Return TRUE is we have an store address hazard. */ + +bool +arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer) +{ + if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX)) + return true; + return arc_store_addr_hazard_internal_p (producer, consumer); +} + /* The same functionality as arc_hazard. It is called in machine reorg before any other optimization. Hence, the NOP size is taken into account when doing branch shortening. */ @@ -7197,6 +7290,7 @@ static void workaround_arc_anomaly (void) { rtx_insn *insn, *succ0; + rtx_insn *succ1; /* For any architecture: call arc_hazard here. */ for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) @@ -7208,27 +7302,30 @@ workaround_arc_anomaly (void) } } - if (TARGET_ARC700) - { - rtx_insn *succ1; + if (!TARGET_ARC700) + return; - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - succ0 = next_real_insn (insn); - if (arc_store_addr_hazard_p (insn, succ0)) - { - emit_insn_after (gen_nopv (), insn); - emit_insn_after (gen_nopv (), insn); - continue; - } + /* Old A7 are suffering of a cache hazard, and we need to insert two + nops between any sequence of stores and a load. */ + if (arc_tune != ARC_TUNE_ARC7XX) + check_store_cacheline_hazard (); - /* Avoid adding nops if the instruction between the ST and LD is - a call or jump. */ - succ1 = next_real_insn (succ0); - if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0) - && arc_store_addr_hazard_p (insn, succ1)) - emit_insn_after (gen_nopv (), insn); + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + succ0 = next_real_insn (insn); + if (arc_store_addr_hazard_internal_p (insn, succ0)) + { + emit_insn_after (gen_nopv (), insn); + emit_insn_after (gen_nopv (), insn); + continue; } + + /* Avoid adding nops if the instruction between the ST and LD is + a call or jump. */ + succ1 = next_real_insn (succ0); + if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0) + && arc_store_addr_hazard_internal_p (insn, succ1)) + emit_insn_after (gen_nopv (), insn); } } @@ -7866,11 +7963,15 @@ arc_reorg (void) if (!link_insn) continue; else - /* Check if this is a data dependency. */ { + /* Check if this is a data dependency. */ rtx op, cc_clob_rtx, op0, op1, brcc_insn, note; rtx cmp0, cmp1; + /* Make sure we can use it for brcc insns. */ + if (find_reg_note (link_insn, REG_SAVE_NOTE, GEN_INT (3))) + continue; + /* Ok this is the set cc. copy args here. */ op = XEXP (pc_target, 0); diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 8afc40ff968..2c9de8d4fd6 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -600,11 +600,13 @@ ;; somehow modify them to become inelegible for delay slots if a decision ;; is made that makes conditional execution required. -(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3, \ -archs4x, archs4xd, archs4xd_slow" +(define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \ +core_3, archs4x, archs4xd, archs4xd_slow" (const (cond [(symbol_ref "arc_tune == TUNE_ARC600") (const_string "arc600") + (symbol_ref "arc_tune == ARC_TUNE_ARC7XX") + (const_string "arc7xx") (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD") (const_string "arc700_4_2_std") (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC") @@ -619,7 +621,7 @@ archs4x, archs4xd, archs4xd_slow" (const_string "none")))) (define_attr "tune_arc700" "false,true" - (if_then_else (eq_attr "tune" "arc700_4_2_std, arc700_4_2_xmac") + (if_then_else (eq_attr "tune" "arc7xx, arc700_4_2_std, arc700_4_2_xmac") (const_string "true") (const_string "false"))) diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt index ad72d0414c9..d3c283e7c07 100644 --- a/gcc/config/arc/arc.opt +++ b/gcc/config/arc/arc.opt @@ -262,6 +262,9 @@ Enum(arc_tune_attr) String(arc600) Value(ARC_TUNE_ARC600) EnumValue Enum(arc_tune_attr) String(arc601) Value(ARC_TUNE_ARC600) +EnumValue +Enum(arc_tune_attr) String(arc7xx) Value(ARC_TUNE_ARC7XX) + EnumValue Enum(arc_tune_attr) String(arc700) Value(ARC_TUNE_ARC700_4_2_STD) diff --git a/gcc/config/arc/arc700.md b/gcc/config/arc/arc700.md index a0f9f74a9f2..cbb868d8dcd 100644 --- a/gcc/config/arc/arc700.md +++ b/gcc/config/arc/arc700.md @@ -145,28 +145,14 @@ ; no functional unit runs when blockage is reserved (exclusion_set "blockage" "core, multiplier") -(define_insn_reservation "data_load_DI" 4 - (and (eq_attr "tune_arc700" "true") - (eq_attr "type" "load") - (match_operand:DI 0 "" "")) - "issue+dmp, issue+dmp, dmp_write_port, dmp_write_port") - (define_insn_reservation "data_load" 3 (and (eq_attr "tune_arc700" "true") - (eq_attr "type" "load") - (not (match_operand:DI 0 "" ""))) + (eq_attr "type" "load")) "issue+dmp, nothing, dmp_write_port") -(define_insn_reservation "data_store_DI" 2 - (and (eq_attr "tune_arc700" "true") - (eq_attr "type" "store") - (match_operand:DI 0 "" "")) - "issue+dmp_write_port, issue+dmp_write_port") - (define_insn_reservation "data_store" 1 (and (eq_attr "tune_arc700" "true") - (eq_attr "type" "store") - (not (match_operand:DI 0 "" ""))) + (eq_attr "type" "store")) "issue+dmp_write_port") (define_bypass 3 "data_store" "data_load" "arc_store_addr_hazard_p")