This patch merges loads and stores from D-registers that are of different modes.
Code like this:
typedef int __attribute__((vector_size(8))) vec;
struct pair
{
vec v;
double d;
}
Now generates a store pair instruction:
void
assign (struct pair *p, vec v)
{
p->v = v;
p->d = 1.0;
}
Whereas previously it generated two `str` instructions.
This patch also merges storing of double zero values with
long integer values:
struct pair
{
long long l;
double d;
}
void
foo (struct pair *p)
{
p->l = 10;
p->d = 0.0;
}
Now generates a single store pair instruction rather than two `str` instructions.
The patch basically generalises the mode iterators on the patterns in aarch64.md
and the peepholes in aarch64-ldpstp.md to take all combinations of pairs of modes
so, while it may be a large-ish patch, it does fairly mechanical stuff.
2018-05-22 Jackson Woodruff <jackson.woodruff@arm.com>
Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.md: New patterns to generate stp
and ldp.
(store_pair_sw, store_pair_dw): New patterns to generate stp for
single words and double words.
(load_pair_sw, load_pair_dw): Likewise.
(store_pair_sf, store_pair_df, store_pair_si, store_pair_di):
Delete.
(load_pair_sf, load_pair_df, load_pair_si, load_pair_di):
Delete.
* config/aarch64/aarch64-ldpstp.md: Modify peephole
for different mode ldpstp and add peephole for merged zero stores.
Likewise for loads.
* config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp):
Add size check.
(aarch64_gen_store_pair): Rename calls to match new patterns.
(aarch64_gen_load_pair): Rename calls to match new patterns.
* config/aarch64/aarch64-simd.md (load_pair<mode>): Rename to...
(load_pair<DREG:mode><DREG2:mode>): ... This.
(store_pair<mode>): Rename to...
(vec_store_pair<DREG:mode><DREG2:mode>): ... This.
* config/aarch64/iterators.md (DREG, DREG2, DX2, SX, SX2, DSX):
New mode iterators.
(V_INT_EQUIV): Handle SImode.
* config/aarch64/predicates.md (aarch64_reg_zero_or_fp_zero):
New predicate.
* gcc.target/aarch64/ldp_stp_6.c: New.
* gcc.target/aarch64/ldp_stp_7.c: New.
* gcc.target/aarch64/ldp_stp_8.c: New.
Co-Authored-By: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
From-SVN: r260538
+2018-05-22 Jackson Woodruff <jackson.woodruff@arm.com>
+ Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * config/aarch64/aarch64.md: New patterns to generate stp
+ and ldp.
+ (store_pair_sw, store_pair_dw): New patterns to generate stp for
+ single words and double words.
+ (load_pair_sw, load_pair_dw): Likewise.
+ (store_pair_sf, store_pair_df, store_pair_si, store_pair_di):
+ Delete.
+ (load_pair_sf, load_pair_df, load_pair_si, load_pair_di):
+ Delete.
+ * config/aarch64/aarch64-ldpstp.md: Modify peephole
+ for different mode ldpstp and add peephole for merged zero stores.
+ Likewise for loads.
+ * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp):
+ Add size check.
+ (aarch64_gen_store_pair): Rename calls to match new patterns.
+ (aarch64_gen_load_pair): Rename calls to match new patterns.
+ * config/aarch64/aarch64-simd.md (load_pair<mode>): Rename to...
+ (load_pair<DREG:mode><DREG2:mode>): ... This.
+ (store_pair<mode>): Rename to...
+ (vec_store_pair<DREG:mode><DREG2:mode>): ... This.
+ * config/aarch64/iterators.md (DREG, DREG2, DX2, SX, SX2, DSX):
+ New mode iterators.
+ (V_INT_EQUIV): Handle SImode.
+ * config/aarch64/predicates.md (aarch64_reg_zero_or_fp_zero):
+ New predicate.
+
2018-05-22 Martin Sebor <msebor@redhat.com>
PR tree-optimization/85826
})
(define_peephole2
- [(set (match_operand:VD 0 "register_operand" "")
- (match_operand:VD 1 "aarch64_mem_pair_operand" ""))
- (set (match_operand:VD 2 "register_operand" "")
- (match_operand:VD 3 "memory_operand" ""))]
- "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
+ [(set (match_operand:DREG 0 "register_operand" "")
+ (match_operand:DREG 1 "aarch64_mem_pair_operand" ""))
+ (set (match_operand:DREG2 2 "register_operand" "")
+ (match_operand:DREG2 3 "memory_operand" ""))]
+ "aarch64_operands_ok_for_ldpstp (operands, true, <DREG:MODE>mode)"
[(parallel [(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))])]
{
})
(define_peephole2
- [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "")
- (match_operand:VD 1 "register_operand" ""))
- (set (match_operand:VD 2 "memory_operand" "")
- (match_operand:VD 3 "register_operand" ""))]
- "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
+ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "")
+ (match_operand:DREG 1 "register_operand" ""))
+ (set (match_operand:DREG2 2 "memory_operand" "")
+ (match_operand:DREG2 3 "register_operand" ""))]
+ "TARGET_SIMD
+ && aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)"
[(parallel [(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))])]
{
}
})
-
;; Handle sign/zero extended consecutive load/store.
(define_peephole2
}
})
+;; Handle storing of a floating point zero with integer data.
+;; This handles cases like:
+;; struct pair { int a; float b; }
+;;
+;; p->a = 1;
+;; p->b = 0.0;
+;;
+;; We can match modes that won't work for a stp instruction
+;; as aarch64_operands_ok_for_ldpstp checks that the modes are
+;; compatible.
+(define_peephole2
+ [(set (match_operand:DSX 0 "aarch64_mem_pair_operand" "")
+ (match_operand:DSX 1 "aarch64_reg_zero_or_fp_zero" ""))
+ (set (match_operand:<FCVT_TARGET> 2 "memory_operand" "")
+ (match_operand:<FCVT_TARGET> 3 "aarch64_reg_zero_or_fp_zero" ""))]
+ "aarch64_operands_ok_for_ldpstp (operands, false, <V_INT_EQUIV>mode)"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))])]
+{
+ rtx base, offset_1, offset_2;
+
+ extract_base_offset_in_addr (operands[0], &base, &offset_1);
+ extract_base_offset_in_addr (operands[2], &base, &offset_2);
+ if (INTVAL (offset_1) > INTVAL (offset_2))
+ {
+ std::swap (operands[0], operands[2]);
+ std::swap (operands[1], operands[3]);
+ }
+})
+
;; Handle consecutive load/store whose offset is out of the range
;; supported by ldp/ldpsw/stp. We firstly adjust offset in a scratch
;; register, then merge them into ldp/ldpsw/stp by using the adjusted
[(set_attr "type" "neon_store1_1reg<q>")]
)
-(define_insn "load_pair<mode>"
- [(set (match_operand:VD 0 "register_operand" "=w")
- (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
- (set (match_operand:VD 2 "register_operand" "=w")
- (match_operand:VD 3 "memory_operand" "m"))]
+(define_insn "load_pair<DREG:mode><DREG2:mode>"
+ [(set (match_operand:DREG 0 "register_operand" "=w")
+ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
+ (set (match_operand:DREG2 2 "register_operand" "=w")
+ (match_operand:DREG2 3 "memory_operand" "m"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
- GET_MODE_SIZE (<MODE>mode)))"
+ GET_MODE_SIZE (<DREG:MODE>mode)))"
"ldp\\t%d0, %d2, %1"
[(set_attr "type" "neon_ldp")]
)
-(define_insn "store_pair<mode>"
- [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
- (match_operand:VD 1 "register_operand" "w"))
- (set (match_operand:VD 2 "memory_operand" "=m")
- (match_operand:VD 3 "register_operand" "w"))]
+(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
+ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
+ (match_operand:DREG 1 "register_operand" "w"))
+ (set (match_operand:DREG2 2 "memory_operand" "=m")
+ (match_operand:DREG2 3 "register_operand" "w"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
- GET_MODE_SIZE (<MODE>mode)))"
+ GET_MODE_SIZE (<DREG:MODE>mode)))"
"stp\\t%d1, %d3, %0"
[(set_attr "type" "neon_stp")]
)
switch (mode)
{
case E_DImode:
- return gen_store_pairdi (mem1, reg1, mem2, reg2);
+ return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
case E_DFmode:
- return gen_store_pairdf (mem1, reg1, mem2, reg2);
+ return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
default:
gcc_unreachable ();
switch (mode)
{
case E_DImode:
- return gen_load_pairdi (reg1, mem1, reg2, mem2);
+ return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
case E_DFmode:
- return gen_load_pairdf (reg1, mem1, reg2, mem2);
+ return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
default:
gcc_unreachable ();
if (!rtx_equal_p (base_1, base_2))
return false;
+ /* The operands must be of the same size. */
+ gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
+ GET_MODE_SIZE (GET_MODE (mem_2))));
+
offval_1 = INTVAL (offset_1);
offval_2 = INTVAL (offset_2);
/* We should only be trying this for fixed-sized modes. There is no
;; Operands 1 and 3 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
-(define_insn "load_pairsi"
- [(set (match_operand:SI 0 "register_operand" "=r,w")
- (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
- (set (match_operand:SI 2 "register_operand" "=r,w")
- (match_operand:SI 3 "memory_operand" "m,m"))]
- "rtx_equal_p (XEXP (operands[3], 0),
- plus_constant (Pmode,
- XEXP (operands[1], 0),
- GET_MODE_SIZE (SImode)))"
+(define_insn "load_pair_sw_<SX:mode><SX2:mode>"
+ [(set (match_operand:SX 0 "register_operand" "=r,w")
+ (match_operand:SX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+ (set (match_operand:SX2 2 "register_operand" "=r,w")
+ (match_operand:SX2 3 "memory_operand" "m,m"))]
+ "rtx_equal_p (XEXP (operands[3], 0),
+ plus_constant (Pmode,
+ XEXP (operands[1], 0),
+ GET_MODE_SIZE (<SX:MODE>mode)))"
"@
ldp\\t%w0, %w2, %1
ldp\\t%s0, %s2, %1"
(set_attr "fp" "*,yes")]
)
-(define_insn "load_pairdi"
- [(set (match_operand:DI 0 "register_operand" "=r,w")
- (match_operand:DI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
- (set (match_operand:DI 2 "register_operand" "=r,w")
- (match_operand:DI 3 "memory_operand" "m,m"))]
- "rtx_equal_p (XEXP (operands[3], 0),
- plus_constant (Pmode,
- XEXP (operands[1], 0),
- GET_MODE_SIZE (DImode)))"
+;; Storing different modes that can still be merged
+(define_insn "load_pair_dw_<DX:mode><DX2:mode>"
+ [(set (match_operand:DX 0 "register_operand" "=r,w")
+ (match_operand:DX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+ (set (match_operand:DX2 2 "register_operand" "=r,w")
+ (match_operand:DX2 3 "memory_operand" "m,m"))]
+ "rtx_equal_p (XEXP (operands[3], 0),
+ plus_constant (Pmode,
+ XEXP (operands[1], 0),
+ GET_MODE_SIZE (<DX:MODE>mode)))"
"@
ldp\\t%x0, %x2, %1
ldp\\t%d0, %d2, %1"
(set_attr "fp" "*,yes")]
)
-
;; Operands 0 and 2 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
-(define_insn "store_pairsi"
- [(set (match_operand:SI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
- (match_operand:SI 1 "aarch64_reg_or_zero" "rZ,w"))
- (set (match_operand:SI 2 "memory_operand" "=m,m")
- (match_operand:SI 3 "aarch64_reg_or_zero" "rZ,w"))]
- "rtx_equal_p (XEXP (operands[2], 0),
- plus_constant (Pmode,
- XEXP (operands[0], 0),
- GET_MODE_SIZE (SImode)))"
+(define_insn "store_pair_sw_<SX:mode><SX2:mode>"
+ [(set (match_operand:SX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+ (match_operand:SX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
+ (set (match_operand:SX2 2 "memory_operand" "=m,m")
+ (match_operand:SX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
+ "rtx_equal_p (XEXP (operands[2], 0),
+ plus_constant (Pmode,
+ XEXP (operands[0], 0),
+ GET_MODE_SIZE (<SX:MODE>mode)))"
"@
stp\\t%w1, %w3, %0
stp\\t%s1, %s3, %0"
(set_attr "fp" "*,yes")]
)
-(define_insn "store_pairdi"
- [(set (match_operand:DI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
- (match_operand:DI 1 "aarch64_reg_or_zero" "rZ,w"))
- (set (match_operand:DI 2 "memory_operand" "=m,m")
- (match_operand:DI 3 "aarch64_reg_or_zero" "rZ,w"))]
- "rtx_equal_p (XEXP (operands[2], 0),
- plus_constant (Pmode,
- XEXP (operands[0], 0),
- GET_MODE_SIZE (DImode)))"
+;; Storing different modes that can still be merged
+(define_insn "store_pair_dw_<DX:mode><DX2:mode>"
+ [(set (match_operand:DX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+ (match_operand:DX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
+ (set (match_operand:DX2 2 "memory_operand" "=m,m")
+ (match_operand:DX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
+ "rtx_equal_p (XEXP (operands[2], 0),
+ plus_constant (Pmode,
+ XEXP (operands[0], 0),
+ GET_MODE_SIZE (<DX:MODE>mode)))"
"@
stp\\t%x1, %x3, %0
stp\\t%d1, %d3, %0"
(set_attr "fp" "*,yes")]
)
-;; Operands 1 and 3 are tied together by the final condition; so we allow
-;; fairly lax checking on the second memory operation.
-(define_insn "load_pairsf"
- [(set (match_operand:SF 0 "register_operand" "=w,r")
- (match_operand:SF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
- (set (match_operand:SF 2 "register_operand" "=w,r")
- (match_operand:SF 3 "memory_operand" "m,m"))]
- "rtx_equal_p (XEXP (operands[3], 0),
- plus_constant (Pmode,
- XEXP (operands[1], 0),
- GET_MODE_SIZE (SFmode)))"
- "@
- ldp\\t%s0, %s2, %1
- ldp\\t%w0, %w2, %1"
- [(set_attr "type" "neon_load1_2reg,load_8")
- (set_attr "fp" "yes,*")]
-)
-
-(define_insn "load_pairdf"
- [(set (match_operand:DF 0 "register_operand" "=w,r")
- (match_operand:DF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
- (set (match_operand:DF 2 "register_operand" "=w,r")
- (match_operand:DF 3 "memory_operand" "m,m"))]
- "rtx_equal_p (XEXP (operands[3], 0),
- plus_constant (Pmode,
- XEXP (operands[1], 0),
- GET_MODE_SIZE (DFmode)))"
- "@
- ldp\\t%d0, %d2, %1
- ldp\\t%x0, %x2, %1"
- [(set_attr "type" "neon_load1_2reg,load_16")
- (set_attr "fp" "yes,*")]
-)
-
-;; Operands 0 and 2 are tied together by the final condition; so we allow
-;; fairly lax checking on the second memory operation.
-(define_insn "store_pairsf"
- [(set (match_operand:SF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
- (match_operand:SF 1 "aarch64_reg_or_fp_zero" "w,rY"))
- (set (match_operand:SF 2 "memory_operand" "=m,m")
- (match_operand:SF 3 "aarch64_reg_or_fp_zero" "w,rY"))]
- "rtx_equal_p (XEXP (operands[2], 0),
- plus_constant (Pmode,
- XEXP (operands[0], 0),
- GET_MODE_SIZE (SFmode)))"
- "@
- stp\\t%s1, %s3, %0
- stp\\t%w1, %w3, %0"
- [(set_attr "type" "neon_store1_2reg,store_8")
- (set_attr "fp" "yes,*")]
-)
-
-(define_insn "store_pairdf"
- [(set (match_operand:DF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
- (match_operand:DF 1 "aarch64_reg_or_fp_zero" "w,rY"))
- (set (match_operand:DF 2 "memory_operand" "=m,m")
- (match_operand:DF 3 "aarch64_reg_or_fp_zero" "w,rY"))]
- "rtx_equal_p (XEXP (operands[2], 0),
- plus_constant (Pmode,
- XEXP (operands[0], 0),
- GET_MODE_SIZE (DFmode)))"
- "@
- stp\\t%d1, %d3, %0
- stp\\t%x1, %x3, %0"
- [(set_attr "type" "neon_store1_2reg,store_16")
- (set_attr "fp" "yes,*")]
-)
-
;; Load pair with post-index writeback. This is primarily used in function
;; epilogues.
(define_insn "loadwb_pair<GPI:mode>_<P:mode>"
;; Double vector modes.
(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF])
+;; All modes stored in registers d0-d31.
+(define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF])
+
+;; Copy of the above.
+(define_mode_iterator DREG2 [V8QI V4HI V4HF V2SI V2SF DF])
+
;; Advanced SIMD, 64-bit container, all integer modes.
(define_mode_iterator VD_BHSI [V8QI V4HI V2SI])
;; Double scalar modes
(define_mode_iterator DX [DI DF])
+;; Duplicate of the above
+(define_mode_iterator DX2 [DI DF])
+
+;; Single scalar modes
+(define_mode_iterator SX [SI SF])
+
+;; Duplicate of the above
+(define_mode_iterator SX2 [SI SF])
+
+;; Single and double integer and float modes
+(define_mode_iterator DSX [DF DI SF SI])
+
+
;; Modes available for Advanced SIMD <f>mul lane operations.
(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI
(V4HF "TARGET_SIMD_F16INST")
(V4HF "V4HI") (V8HF "V8HI")
(V2SF "V2SI") (V4SF "V4SI")
(DF "DI") (V2DF "V2DI")
- (SF "SI") (HF "HI")
+ (SF "SI") (SI "SI")
+ (HF "HI")
(VNx16QI "VNx16QI")
(VNx8HI "VNx8HI") (VNx8HF "VNx8HI")
(VNx4SI "VNx4SI") (VNx4SF "VNx4SI")
(and (match_code "const_double")
(match_test "aarch64_float_const_zero_rtx_p (op)"))))
+(define_predicate "aarch64_reg_zero_or_fp_zero"
+ (ior (match_operand 0 "aarch64_reg_or_fp_zero")
+ (match_operand 0 "aarch64_reg_or_zero")))
+
(define_predicate "aarch64_reg_zero_or_m1_or_1"
(and (match_code "reg,subreg,const_int")
(ior (match_operand 0 "register_operand")
+2018-05-22 Jackson Woodruff <jackson.woodruff@arm.com>
+
+ * gcc.target/aarch64/ldp_stp_6.c: New.
+ * gcc.target/aarch64/ldp_stp_7.c: New.
+ * gcc.target/aarch64/ldp_stp_8.c: New.
+
2018-05-22 Martin Sebor <msebor@redhat.com>
PR tree-optimization/85826
--- /dev/null
+/* { dg-options "-O2" } */
+
+typedef float __attribute__ ((vector_size (8))) vec;
+
+struct pair
+{
+ vec e1;
+ double e2;
+};
+
+vec tmp;
+
+void
+stp (struct pair *p)
+{
+ p->e1 = tmp;
+ p->e2 = 1.0;
+
+ /* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */
+}
--- /dev/null
+/* { dg-options "-O2" } */
+
+struct pair
+{
+ double a;
+ long long int b;
+};
+
+void
+stp (struct pair *p)
+{
+ p->a = 0.0;
+ p->b = 1;
+}
+
+/* { dg-final { scan-assembler "stp\txzr, x\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */
+
+void
+stp2 (struct pair *p)
+{
+ p->a = 0.0;
+ p->b = 0;
+}
+
+struct reverse_pair
+{
+ long long int a;
+ double b;
+};
+
+void
+stp_reverse (struct reverse_pair *p)
+{
+ p->a = 1;
+ p->b = 0.0;
+}
+
+/* { dg-final { scan-assembler "stp\tx\[0-9\]+, xzr, \\\[x\[0-9\]+\\\]" } } */
+
+void
+stp_reverse2 (struct reverse_pair *p)
+{
+ p->a = 0;
+ p->b = 0.0;
+}
+
+/* { dg-final { scan-assembler-times "stp\txzr, xzr, \\\[x\[0-9\]+\\\]" 2 } } */
--- /dev/null
+/* { dg-options "-O2" } */
+
+typedef float __attribute__ ((vector_size (8))) fvec;
+typedef int __attribute__ ((vector_size (8))) ivec;
+
+struct pair
+{
+ double a;
+ fvec b;
+};
+
+void ldp (double *a, fvec *b, struct pair *p)
+{
+ *a = p->a + 1;
+ *b = p->b;
+}
+
+struct vec_pair
+{
+ fvec a;
+ ivec b;
+};
+
+void ldp2 (fvec *a, ivec *b, struct vec_pair *p)
+{
+ *a = p->a;
+ *b = p->b;
+}
+
+/* { dg-final { scan-assembler-times "ldp\td\[0-9\], d\[0-9\]+, \\\[x\[0-9\]+\\\]" 2 } } */