extern void arm_decompose_di_binop (rtx, rtx, rtx *, rtx *, rtx *, rtx *);
extern bool arm_q_bit_access (void);
extern bool arm_ge_bits_access (void);
+extern bool arm_target_insn_ok_for_lob (rtx);
#ifdef RTX_CODE
enum reg_class
#undef TARGET_CONSTANT_ALIGNMENT
#define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
+#undef TARGET_INVALID_WITHIN_DOLOOP
+#define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
+
#undef TARGET_MD_ASM_ADJUST
#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
\f
return true;
}
+/* NULL if insn INSN is valid within a low-overhead loop.
+ Otherwise return why doloop cannot be applied. */
+
+static const char *
+arm_invalid_within_doloop (const rtx_insn *insn)
+{
+ if (!TARGET_HAVE_LOB)
+ return default_invalid_within_doloop (insn);
+
+ if (CALL_P (insn))
+ return "Function call in the loop.";
+
+ if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
+ return "LR is used inside loop.";
+
+ return NULL;
+}
+
+bool
+arm_target_insn_ok_for_lob (rtx insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ /* Make sure the basic block of the target insn is a simple latch
+ having as single predecessor and successor the body of the loop
+ itself. Only simple loops with a single basic block as body are
+ supported for 'low over head loop' making sure that LE target is
+ above LE itself in the generated code. */
+
+ return single_succ_p (bb)
+ && single_pred_p (bb)
+ && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
+ && contains_no_active_insn_p (bb);
+}
+
#if CHECKING_P
namespace selftest {
\f
/* Target machine storage Layout. */
+/* Nonzero if this chip provides Armv8.1-M Mainline
+ LOB (low overhead branch features) extension instructions. */
+#define TARGET_HAVE_LOB (arm_arch8_1m_main)
/* Define this macro if it is advisable to hold scalars in registers
in a wider mode than that declared by the program. In such cases,
using a certain 'count' register and (2) the loop count can be
adjusted by modifying this register prior to the loop.
??? The possible introduction of a new block to initialize the
- new IV can potentially affect branch optimizations. */
- if (optimize > 0 && flag_modulo_sched)
+ new IV can potentially affect branch optimizations.
+
+ Also used to implement the low over head loops feature, which is part of
+ the Armv8.1-M Mainline Low Overhead Branch (LOB) extension. */
+ if (optimize > 0 && (flag_modulo_sched || TARGET_HAVE_LOB))
{
rtx s0;
rtx bcomp;
FAIL;
s0 = operands [0];
+
+ /* Low over head loop instructions require the first operand to be LR. */
+ if (TARGET_HAVE_LOB && arm_target_insn_ok_for_lob (operands [1]))
+ s0 = gen_rtx_REG (SImode, LR_REGNUM);
+
if (TARGET_THUMB2)
insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
else
gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
loc_ref, pc_rtx)));
DONE;
- }else
- FAIL;
+ }
+ else
+ FAIL;
}")
(define_insn "*clear_apsr"
"TARGET_HAVE_MVE"
"lsrl%?\\t%Q0, %R0, %1"
[(set_attr "predicable" "yes")])
+
+;; Originally expanded by 'doloop_end'.
+(define_insn "*doloop_end_internal"
+ [(parallel [(set (pc)
+ (if_then_else
+ (ne (reg:SI LR_REGNUM) (const_int 1))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))
+ (set (reg:SI LR_REGNUM)
+ (plus:SI (reg:SI LR_REGNUM) (const_int -1)))])]
+ "TARGET_32BIT && TARGET_HAVE_LOB"
+ "le\t%|lr, %l0")
+
+(define_expand "doloop_begin"
+ [(match_operand 0 "" "")
+ (match_operand 1 "" "")]
+ "TARGET_32BIT && TARGET_HAVE_LOB"
+ {
+ if (REGNO (operands[0]) == LR_REGNUM)
+ {
+ emit_insn (gen_dls_insn (operands[0]));
+ DONE;
+ }
+ else
+ FAIL;
+ })
+
+(define_insn "dls_insn"
+ [(set (reg:SI LR_REGNUM)
+ (unspec:SI [(match_operand:SI 0 "s_register_operand" "r")] UNSPEC_DLS))]
+ "TARGET_32BIT && TARGET_HAVE_LOB"
+ "dls\t%|lr, %0")
UNSPEC_CDEA ; Custom Datapath Extension instruction.
UNSPEC_VCDE ; Custom Datapath Extension instruction.
UNSPEC_VCDEA ; Custom Datapath Extension instruction.
+ UNSPEC_DLS ; Used for DLS (Do Loop Start), Armv8.1-M Mainline instruction
])
@anchor{arm_hard_ok}
ARM target supports the @code{-mfloat-abi=hard} option.
+@item arm_v8_1_lob_ok
+@anchor{arm_v8_1_lob_ok}
+ARM Target supports executing the Armv8.1-M Mainline Low Overhead Loop
+instructions @code{DLS} and @code{LE}.
+Some multilibs may be incompatible with these options.
+
+@item arm_thumb2_ok_no_arm_v8_1_lob
+ARM target generates Thumb-2 code for @code{-mthumb} but does not
+support executing the Armv8.1-M Mainline Low Overhead Loop
+instructions @code{DLS} and @code{LE}.
+
@end table
@subsubsection AArch64-specific attributes
}
/* { dg-final { scan-tree-dump-times "PHI <" 1 "ivopts"} } */
-/* { dg-final { object-size text <= 20 { target arm_thumb2 } } } */
+/* { dg-final { object-size text <= 20 { target { arm_thumb2_ok_no_arm_v8_1_lob } } } } */
/* { dg-final { object-size text <= 32 { target { arm_nothumb && { ! arm_iwmmxt_ok } } } } } */
/* { dg-final { object-size text <= 36 { target { arm_nothumb && arm_iwmmxt_ok } } } } */
--- /dev/null
+#include <string.h>
+
+/* Common code for lob tests. */
+
+#define NO_LOB asm volatile ("@ clobber lr" : : : "lr" )
+
+#define N 10000
+
+static void
+reset_data (int *a, int *b, int *c)
+{
+ memset (a, -1, N * sizeof (*a));
+ memset (b, -1, N * sizeof (*b));
+ memset (c, -1, N * sizeof (*c));
+}
--- /dev/null
+/* Check that GCC generates Armv8.1-M low over head loop instructions
+ for some simple loops. */
+/* { dg-do run } */
+/* { dg-require-effective-target arm_v8_1_lob_ok } */
+/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */
+/* { dg-options "-march=armv8.1-m.main -O3 --save-temps" } */
+#include <stdlib.h>
+#include "lob.h"
+
+int a[N];
+int b[N];
+int c[N];
+
+int
+foo (int a, int b)
+{
+ return a + b;
+}
+
+void __attribute__((noinline))
+loop1 (int *a, int *b, int *c)
+{
+ for (int i = 0; i < N; i++)
+ {
+ a[i] = i;
+ b[i] = i * 2;
+ c[i] = a[i] + b[i];
+ }
+}
+
+void __attribute__((noinline))
+loop2 (int *a, int *b, int *c)
+{
+ int i = 0;
+ while (i < N)
+ {
+ a[i] = i - 2;
+ b[i] = i * 5;
+ c[i] = a[i] + b[i];
+ i++;
+ }
+}
+
+void __attribute__((noinline))
+loop3 (int *a, int *b, int *c)
+{
+ int i = 0;
+ do
+ {
+ a[i] = i - 4;
+ b[i] = i * 3;
+ c[i] = a[i] + b[i];
+ i++;
+ } while (i < N);
+}
+
+void
+check (int *a, int *b, int *c)
+{
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (c[i] != a[i] + b[i])
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ reset_data (a, b, c);
+ loop1 (a, b ,c);
+ check (a, b ,c);
+ reset_data (a, b, c);
+ loop2 (a, b ,c);
+ check (a, b ,c);
+ reset_data (a, b, c);
+ loop3 (a, b ,c);
+ check (a, b ,c);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times {dls\s\S*,\s\S*} 3 } } */
+/* { dg-final { scan-assembler-times {le\slr,\s\S*} 3 } } */
--- /dev/null
+/* Check that GCC does not generate Armv8.1-M low over head loop instructions
+ if a non-inlineable function call takes place inside the loop. */
+/* { dg-do compile } */
+/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */
+/* { dg-options "-march=armv8.1-m.main -O3 --save-temps" } */
+#include <stdlib.h>
+#include "lob.h"
+
+int a[N];
+int b[N];
+int c[N];
+
+int __attribute__ ((noinline))
+foo (int a, int b)
+{
+ return a + b;
+}
+
+int
+main (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ a[i] = i;
+ b[i] = i * 2;
+ c[i] = foo (a[i], b[i]);
+ }
+
+ return 0;
+}
+/* { dg-final { scan-assembler-not {dls\s\S*,\s\S*} } } */
+/* { dg-final { scan-assembler-not {le\slr,\s\S*} } } */
--- /dev/null
+/* Check that GCC does not generate Armv8.1-M low over head loop instructions
+ if causes VFP emulation library calls to happen inside the loop. */
+/* { dg-do compile } */
+/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */
+/* { dg-options "-march=armv8.1-m.main -O3 --save-temps -mfloat-abi=soft" } */
+/* { dg-require-effective-target arm_softfloat } */
+#include <stdlib.h>
+#include "lob.h"
+
+double a[N];
+double b[N];
+double c[N];
+
+int
+main (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ a[i] = i;
+ b[i] = i * 2;
+ c[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+/* { dg-final { scan-assembler-not {dls\s\S*,\s\S*} } } */
+/* { dg-final { scan-assembler-not {le\slr,\s\S*} } } */
--- /dev/null
+/* Check that GCC does not generate Armv8.1-M low over head loop instructions
+ if LR is modified within the loop. */
+/* { dg-do compile } */
+/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */
+/* { dg-options "-march=armv8.1-m.main -O3 --save-temps -mfloat-abi=soft" } */
+/* { dg-require-effective-target arm_softfloat } */
+#include <stdlib.h>
+#include "lob.h"
+
+int a[N];
+int b[N];
+int c[N];
+
+static __attribute__ ((always_inline)) inline int
+foo (int a, int b)
+{
+ NO_LOB;
+ return a + b;
+}
+
+int
+main (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ a[i] = i;
+ b[i] = i * 2;
+ c[i] = foo(a[i], b[i]);
+ }
+
+ return 0;
+}
+/* { dg-final { scan-assembler-not {dls\s\S*,\s\S*} } } */
+/* { dg-final { scan-assembler-not {le\slr,\s\S*} } } */
--- /dev/null
+/* Check that GCC does not generates Armv8.1-M low over head loop
+ instructions. Innermost loop has no fixed number of iterations
+ therefore is not optimizable. Outer loops are not optimized. */
+/* { dg-do compile } */
+/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */
+/* { dg-options "-march=armv8.1-m.main -O3 --save-temps" } */
+#include <stdlib.h>
+#include "lob.h"
+
+int a[N];
+int b[N];
+int c[N];
+
+int
+main (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ a[i] = i;
+ b[i] = i * 2;
+
+ int k = b[i];
+ while (k != 0)
+ {
+ if (k % 2 == 0)
+ c[i - 1] = k % 2;
+ k /= 2;
+ }
+ c[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+/* { dg-final { scan-assembler-not {dls\s\S*,\s\S*} } } */
+/* { dg-final { scan-assembler-not {le\slr,\s\S*} } } */
--- /dev/null
+/* Check that GCC generates Armv8.1-M low over head loop instructions
+ with some less trivial loops and the result is correct. */
+/* { dg-do run } */
+/* { dg-require-effective-target arm_v8_1_lob_ok } */
+/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */
+/* { dg-options "-march=armv8.1-m.main -O3 --save-temps" } */
+#include <stdlib.h>
+#include "lob.h"
+
+#define TEST_CODE1 \
+ { \
+ for (int i = 0; i < N; i++) \
+ { \
+ a[i] = i; \
+ b[i] = i * 2; \
+ \
+ for (int k = 0; k < N; k++) \
+ { \
+ MAYBE_LOB; \
+ c[k] = k / 2; \
+ } \
+ c[i] = a[i] - b[i]; \
+ } \
+ }
+
+#define TEST_CODE2 \
+ { \
+ for (int i = 0; i < N / 2; i++) \
+ { \
+ MAYBE_LOB; \
+ if (c[i] % 2 == 0) \
+ break; \
+ a[i]++; \
+ b[i]++; \
+ } \
+ }
+
+int a1[N];
+int b1[N];
+int c1[N];
+
+int a2[N];
+int b2[N];
+int c2[N];
+
+#define MAYBE_LOB
+void __attribute__((noinline))
+loop1 (int *a, int *b, int *c)
+ TEST_CODE1;
+
+void __attribute__((noinline))
+loop2 (int *a, int *b, int *c)
+ TEST_CODE2;
+
+#undef MAYBE_LOB
+#define MAYBE_LOB NO_LOB
+
+void
+ref1 (int *a, int *b, int *c)
+ TEST_CODE1;
+
+void
+ref2 (int *a, int *b, int *c)
+ TEST_CODE2;
+
+void
+check (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (a1[i] != a2[i]
+ && b1[i] != b2[i]
+ && c1[i] != c2[i])
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ reset_data (a1, b1, c1);
+ reset_data (a2, b2, c2);
+ loop1 (a1, b1, c1);
+ ref1 (a2, b2, c2);
+ check ();
+
+ reset_data (a1, b1, c1);
+ reset_data (a2, b2, c2);
+ loop2 (a1, b1, c1);
+ ref2 (a2, b2, c2);
+ check ();
+
+ return 0;
+}
+/* { dg-final { scan-assembler-times {dls\s\S*,\s\S*} 1 } } */
+/* { dg-final { scan-assembler-times {le\slr,\s\S*} 1 } } */
/* { dg-do compile } */
-/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-require-effective-target arm_thumb2_ok_no_arm_v8_1_lob } */
/* { dg-options "-O" } */
unsigned short foo (unsigned short x, unsigned short c)
}]
}
+# Return 1 if the target supports executing the Armv8.1-M Mainline Low
+# Overhead Loop, 0 otherwise. The test is valid for ARM.
+
+proc check_effective_target_arm_v8_1_lob_ok { } {
+ if { ![istarget arm*-*-*] } {
+ return 0;
+ } else {
+ return [check_runtime arm_v8_1_lob_hw_available {
+ int
+ main (void)
+ { int i = 0;
+ asm ("movw r3, #10\n\t" /* movs? */
+ "dls lr, r3" : : : "r3", "lr");
+ loop:
+ i++;
+ asm goto ("le lr, %l0" : : : "lr" : loop);
+ return i != 10;
+ }
+ } "-march=armv8.1-m.main" ]
+ }
+}
+
+# Return 1 is this is an ARM target where -mthumb causes Thumb-2 to be
+# used and the target does not support executing the Armv8.1-M
+# Mainline Low Overhead Loop, 0 otherwise. The test is valid for ARM.
+
+proc check_effective_target_arm_thumb2_ok_no_arm_v8_1_lob { } {
+ if { [check_effective_target_arm_thumb2_ok]
+ && ![check_effective_target_arm_v8_1_lob_ok] } {
+ return 1
+ }
+ return 0
+}
+
# Returns 1 if the target is using glibc, 0 otherwise.
proc check_effective_target_glibc { } {