+2020-03-02 Andrew Stubbs <ams@codesourcery.com>
+
+ * config/gcn/gcn-valu.md (dpp_move<mode>): New.
+ (reduc_insn): Use 'U' and 'B' operand codes.
+ (reduc_<reduc_op>_scal_<mode>): Allow all types.
+ (reduc_<reduc_op>_scal_v64di): Delete.
+ (*<reduc_op>_dpp_shr_<mode>): Allow all 1reg types.
+ (*plus_carry_dpp_shr_v64si): Change to ...
+ (*plus_carry_dpp_shr_<mode>): ... this and allow all 1reg int types.
+ (mov_from_lane63_v64di): Change to ...
+ (mov_from_lane63_<mode>): ... this, and allow all 64-bit modes.
+ * config/gcn/gcn.c (gcn_expand_dpp_shr_insn): Increase buffer size.
+ Support UNSPEC_MOV_DPP_SHR output formats.
+ (gcn_expand_reduc_scalar): Add "use_moves" reductions.
+ Add "use_extends" reductions.
+ (print_operand_address): Add 'I' and 'U' codes.
+ * config/gcn/gcn.md (unspec): Add UNSPEC_MOV_DPP_SHR.
+
2020-03-02 Martin Liska <mliska@suse.cz>
* lto-wrapper.c: Fix typo in comment about
[(set_attr "type" "vmult")
(set_attr "length" "24")])
+(define_insn "@dpp_move<mode>"
+ [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
+ (unspec:VEC_REG_MODE
+ [(match_operand:VEC_REG_MODE 1 "register_operand" " v")
+ (match_operand:SI 2 "const_int_operand" " n")]
+ UNSPEC_MOV_DPP_SHR))]
+ ""
+ {
+ return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
+ UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
+ }
+ [(set_attr "type" "vop_dpp")
+ (set_attr "length" "16")])
+
;; }}}
;; {{{ ALU special case: add/sub
(UNSPEC_SMAX_DPP_SHR "v_max%i0")
(UNSPEC_UMIN_DPP_SHR "v_min%u0")
(UNSPEC_UMAX_DPP_SHR "v_max%u0")
- (UNSPEC_PLUS_DPP_SHR "v_add%u0")
- (UNSPEC_AND_DPP_SHR "v_and%b0")
- (UNSPEC_IOR_DPP_SHR "v_or%b0")
- (UNSPEC_XOR_DPP_SHR "v_xor%b0")])
+ (UNSPEC_PLUS_DPP_SHR "v_add%U0")
+ (UNSPEC_AND_DPP_SHR "v_and%B0")
+ (UNSPEC_IOR_DPP_SHR "v_or%B0")
+ (UNSPEC_XOR_DPP_SHR "v_xor%B0")])
(define_expand "reduc_<reduc_op>_scal_<mode>"
[(set (match_operand:<SCALAR_MODE> 0 "register_operand")
(unspec:<SCALAR_MODE>
- [(match_operand:VEC_1REG_MODE 1 "register_operand")]
+ [(match_operand:VEC_ALLREG_MODE 1 "register_operand")]
REDUC_UNSPEC))]
""
{
DONE;
})
-(define_expand "reduc_<reduc_op>_scal_v64di"
- [(set (match_operand:DI 0 "register_operand")
- (unspec:DI
- [(match_operand:V64DI 1 "register_operand")]
- REDUC_2REG_UNSPEC))]
- ""
- {
- rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
- <reduc_unspec>);
-
- /* The result of the reduction is in lane 63 of tmp. */
- emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
-
- DONE;
- })
(define_insn "*<reduc_op>_dpp_shr_<mode>"
- [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
- (unspec:VEC_1REG_MODE
- [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
- (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
+ (unspec:VEC_ALL1REG_MODE
+ [(match_operand:VEC_ALL1REG_MODE 1 "register_operand" "v")
+ (match_operand:VEC_ALL1REG_MODE 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
REDUC_UNSPEC))]
+ ; GCN3 requires a carry out, GCN5 not
"!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
&& <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
{
; Special cases for addition.
-(define_insn "*plus_carry_dpp_shr_v64si"
- [(set (match_operand:V64SI 0 "register_operand" "=v")
- (unspec:V64SI
- [(match_operand:V64SI 1 "register_operand" "v")
- (match_operand:V64SI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+(define_insn "*plus_carry_dpp_shr_<mode>"
+ [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
+ (unspec:VEC_ALL1REG_INT_MODE
+ [(match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" "v")
+ (match_operand:VEC_ALL1REG_INT_MODE 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""
{
- const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
- return gcn_expand_dpp_shr_insn (V64SImode, insn,
+ return gcn_expand_dpp_shr_insn (V64SImode, "v_add%^_u32",
UNSPEC_PLUS_CARRY_DPP_SHR,
INTVAL (operands[3]));
}
(clobber (reg:DI VCC_REG))]
""
{
- const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
- return gcn_expand_dpp_shr_insn (V64SImode, insn,
+ return gcn_expand_dpp_shr_insn (V64SImode, "v_addc%^_u32",
UNSPEC_PLUS_CARRY_IN_DPP_SHR,
INTVAL (operands[3]));
}
(set_attr "exec" "none,*")
(set_attr "length" "8")])
-(define_insn "mov_from_lane63_v64di"
- [(set (match_operand:DI 0 "register_operand" "=Sg,v")
- (unspec:DI
- [(match_operand:V64DI 1 "register_operand" "v,v")]
+(define_insn "mov_from_lane63_<mode>"
+ [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
+ (unspec:<SCALAR_MODE>
+ [(match_operand:VEC_2REG_MODE 1 "register_operand" "v,v")]
UNSPEC_MOV_FROM_LANE63))]
""
"@
gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
int unspec, int shift)
{
- static char buf[64];
+ static char buf[128];
const char *dpp;
const char *vcc_in = "";
const char *vcc_out = "";
gcc_unreachable ();
}
- sprintf (buf, "%s\t%%0%s, %%1, %%2%s %s", insn, vcc_out, vcc_in, dpp);
+ if (unspec == UNSPEC_MOV_DPP_SHR && vgpr_2reg_mode_p (mode))
+ sprintf (buf, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
+ insn, dpp, insn, dpp);
+ else if (unspec == UNSPEC_MOV_DPP_SHR)
+ sprintf (buf, "%s\t%%0, %%1 %s", insn, dpp);
+ else
+ sprintf (buf, "%s\t%%0%s, %%1, %%2%s %s", insn, vcc_out, vcc_in, dpp);
return buf;
}
rtx
gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
{
- rtx tmp = gen_reg_rtx (mode);
+ machine_mode orig_mode = mode;
+ bool use_moves = (((unspec == UNSPEC_SMIN_DPP_SHR
+ || unspec == UNSPEC_SMAX_DPP_SHR
+ || unspec == UNSPEC_UMIN_DPP_SHR
+ || unspec == UNSPEC_UMAX_DPP_SHR)
+ && mode == V64DImode)
+ || (unspec == UNSPEC_PLUS_DPP_SHR
+ && mode == V64DFmode));
+ rtx_code code = (unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
+ : unspec == UNSPEC_SMAX_DPP_SHR ? SMAX
+ : unspec == UNSPEC_UMIN_DPP_SHR ? UMIN
+ : unspec == UNSPEC_UMAX_DPP_SHR ? UMAX
+ : unspec == UNSPEC_PLUS_DPP_SHR ? PLUS
+ : UNKNOWN);
+ bool use_extends = ((unspec == UNSPEC_SMIN_DPP_SHR
+ || unspec == UNSPEC_SMAX_DPP_SHR
+ || unspec == UNSPEC_UMIN_DPP_SHR
+ || unspec == UNSPEC_UMAX_DPP_SHR)
+ && (mode == V64QImode
+ || mode == V64HImode));
+ bool unsignedp = (unspec == UNSPEC_UMIN_DPP_SHR
+ || unspec == UNSPEC_UMAX_DPP_SHR);
bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT
&& (TARGET_GCN3 || mode == V64DImode);
if (use_plus_carry)
unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
+ if (use_extends)
+ {
+ rtx tmp = gen_reg_rtx (V64SImode);
+ convert_move (tmp, src, unsignedp);
+ src = tmp;
+ mode = V64SImode;
+ }
+
/* Perform reduction by first performing the reduction operation on every
pair of lanes, then on every pair of results from the previous
iteration (thereby effectively reducing every 4 lanes) and so on until
all lanes are reduced. */
+ rtx in, out = src;
for (int i = 0, shift = 1; i < 6; i++, shift <<= 1)
{
rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
- rtx insn = gen_rtx_SET (tmp,
- gen_rtx_UNSPEC (mode,
- gen_rtvec (3,
- src, src, shift_val),
- unspec));
-
- /* Add clobber for instructions that set the carry flags. */
- if (use_plus_carry)
+ in = out;
+ out = gen_reg_rtx (mode);
+
+ if (use_moves)
{
- rtx clobber = gen_rtx_CLOBBER (VOIDmode,
- gen_rtx_REG (DImode, VCC_REG));
- insn = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (2, insn, clobber));
+ rtx tmp = gen_reg_rtx (mode);
+ emit_insn (gen_dpp_move (mode, tmp, in, shift_val));
+ emit_insn (gen_rtx_SET (out, gen_rtx_fmt_ee (code, mode, tmp, in)));
}
+ else
+ {
+ rtx insn = gen_rtx_SET (out,
+ gen_rtx_UNSPEC (mode,
+ gen_rtvec (3, in, in,
+ shift_val),
+ unspec));
+
+ /* Add clobber for instructions that set the carry flags. */
+ if (use_plus_carry)
+ {
+ rtx clobber = gen_rtx_CLOBBER (VOIDmode,
+ gen_rtx_REG (DImode, VCC_REG));
+ insn = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (2, insn, clobber));
+ }
- emit_insn (insn);
+ emit_insn (insn);
+ }
+ }
- /* The source operands for every iteration after the first
- should be TMP. */
- src = tmp;
+ if (use_extends)
+ {
+ rtx tmp = gen_reg_rtx (orig_mode);
+ convert_move (tmp, out, unsignedp);
+ out = tmp;
}
- return tmp;
+ return out;
}
/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST. */
b - print operand size as untyped operand (b8/b16/b32/b64)
B - print operand size as SI/DI untyped operand (b32/b32/b32/b64)
i - print operand size as untyped operand (i16/b32/i64)
+ I - print operand size as SI/DI untyped operand(i32/b32/i64)
u - print operand size as untyped operand (u16/u32/u64)
+ U - print operand size as SI/DI untyped operand(u32/u64)
o - print operand size as memory access size for loads
(ubyte/ushort/dword/dwordx2/wordx3/dwordx4)
s - print operand size as memory access size for stores
fputs (")", file);
return;
case 'i':
+ case 'I':
case 'u':
+ case 'U':
{
bool signed_p = code == 'i';
+ bool min32_p = code == 'I' || code == 'U';
const char *s = "";
machine_mode mode = GET_MODE (x);
if (VECTOR_MODE_P (mode))
output_operand_lossage ("invalid operand %%xn code");
return;
}
+ else if (min32_p)
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 1:
+ case 2:
+ case 4:
+ s = signed_p ? "_i32" : "_u32";
+ break;
+ case 8:
+ s = signed_p ? "_i64" : "_u64";
+ break;
+ default:
+ output_operand_lossage ("invalid operand %%xn code");
+ return;
+ }
else
switch (GET_MODE_SIZE (mode))
{
UNSPEC_PLUS_DPP_SHR
UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR
UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR
+ UNSPEC_MOV_DPP_SHR
UNSPEC_MOV_FROM_LANE63
UNSPEC_GATHER
UNSPEC_SCATTER])