2 * Copyright © 2020 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 bool can_use_SDWA(chip_class chip
, const aco_ptr
<Instruction
>& instr
)
33 if (chip
< GFX8
|| instr
->isDPP())
39 if (instr
->isVOP3()) {
40 VOP3A_instruction
*vop3
= static_cast<VOP3A_instruction
*>(instr
.get());
41 if (instr
->format
== Format::VOP3
)
43 if (vop3
->clamp
&& instr
->format
== asVOP3(Format::VOPC
) && chip
!= GFX8
)
45 if (vop3
->omod
&& chip
< GFX9
)
48 //TODO: return true if we know we will use vcc
49 if (instr
->definitions
.size() >= 2)
52 for (unsigned i
= 1; i
< instr
->operands
.size(); i
++) {
53 if (instr
->operands
[i
].isLiteral())
55 if (chip
< GFX9
&& !instr
->operands
[i
].isOfType(RegType::vgpr
))
60 if (!instr
->operands
.empty()) {
61 if (instr
->operands
[0].isLiteral())
63 if (chip
< GFX9
&& !instr
->operands
[0].isOfType(RegType::vgpr
))
67 bool is_mac
= instr
->opcode
== aco_opcode::v_mac_f32
||
68 instr
->opcode
== aco_opcode::v_mac_f16
||
69 instr
->opcode
== aco_opcode::v_fmac_f32
||
70 instr
->opcode
== aco_opcode::v_fmac_f16
;
72 if (chip
!= GFX8
&& is_mac
)
75 //TODO: return true if we know we will use vcc
76 if ((unsigned)instr
->format
& (unsigned)Format::VOPC
)
78 if (instr
->operands
.size() >= 3 && !is_mac
)
81 return instr
->opcode
!= aco_opcode::v_madmk_f32
&&
82 instr
->opcode
!= aco_opcode::v_madak_f32
&&
83 instr
->opcode
!= aco_opcode::v_madmk_f16
&&
84 instr
->opcode
!= aco_opcode::v_madak_f16
&&
85 instr
->opcode
!= aco_opcode::v_readfirstlane_b32
&&
86 instr
->opcode
!= aco_opcode::v_clrexcp
&&
87 instr
->opcode
!= aco_opcode::v_swap_b32
;
90 /* updates "instr" and returns the old instruction (or NULL if no update was needed) */
91 aco_ptr
<Instruction
> convert_to_SDWA(chip_class chip
, aco_ptr
<Instruction
>& instr
)
96 aco_ptr
<Instruction
> tmp
= std::move(instr
);
97 Format format
= (Format
)(((uint16_t)tmp
->format
& ~(uint16_t)Format::VOP3
) | (uint16_t)Format::SDWA
);
98 instr
.reset(create_instruction
<SDWA_instruction
>(tmp
->opcode
, format
, tmp
->operands
.size(), tmp
->definitions
.size()));
99 std::copy(tmp
->operands
.cbegin(), tmp
->operands
.cend(), instr
->operands
.begin());
100 std::copy(tmp
->definitions
.cbegin(), tmp
->definitions
.cend(), instr
->definitions
.begin());
102 SDWA_instruction
*sdwa
= static_cast<SDWA_instruction
*>(instr
.get());
105 VOP3A_instruction
*vop3
= static_cast<VOP3A_instruction
*>(tmp
.get());
106 memcpy(sdwa
->neg
, vop3
->neg
, sizeof(sdwa
->neg
));
107 memcpy(sdwa
->abs
, vop3
->abs
, sizeof(sdwa
->abs
));
108 sdwa
->omod
= vop3
->omod
;
109 sdwa
->clamp
= vop3
->clamp
;
112 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
113 switch (instr
->operands
[i
].bytes()) {
115 sdwa
->sel
[i
] = sdwa_ubyte
;
118 sdwa
->sel
[i
] = sdwa_uword
;
121 sdwa
->sel
[i
] = sdwa_udword
;
125 switch (instr
->definitions
[0].bytes()) {
127 sdwa
->dst_sel
= sdwa_ubyte
;
128 sdwa
->dst_preserve
= true;
131 sdwa
->dst_sel
= sdwa_uword
;
132 sdwa
->dst_preserve
= true;
135 sdwa
->dst_sel
= sdwa_udword
;
139 if (instr
->definitions
[0].getTemp().type() == RegType::sgpr
&& chip
== GFX8
)
140 instr
->definitions
[0].setFixed(vcc
);
141 if (instr
->definitions
.size() >= 2)
142 instr
->definitions
[1].setFixed(vcc
);
143 if (instr
->operands
.size() >= 3)
144 instr
->operands
[2].setFixed(vcc
);
149 bool can_use_opsel(chip_class chip
, aco_opcode op
, int idx
, bool high
)
151 /* opsel is only GFX9+ */
152 if ((high
|| idx
== -1) && chip
< GFX9
)
156 case aco_opcode::v_div_fixup_f16
:
157 case aco_opcode::v_fma_f16
:
158 case aco_opcode::v_mad_f16
:
159 case aco_opcode::v_mad_u16
:
160 case aco_opcode::v_mad_i16
:
161 case aco_opcode::v_med3_f16
:
162 case aco_opcode::v_med3_i16
:
163 case aco_opcode::v_med3_u16
:
164 case aco_opcode::v_min3_f16
:
165 case aco_opcode::v_min3_i16
:
166 case aco_opcode::v_min3_u16
:
167 case aco_opcode::v_max3_f16
:
168 case aco_opcode::v_max3_i16
:
169 case aco_opcode::v_max3_u16
:
170 case aco_opcode::v_max_u16_e64
:
171 case aco_opcode::v_max_i16_e64
:
172 case aco_opcode::v_min_u16_e64
:
173 case aco_opcode::v_min_i16_e64
:
174 case aco_opcode::v_add_i16
:
175 case aco_opcode::v_sub_i16
:
176 case aco_opcode::v_add_u16_e64
:
177 case aco_opcode::v_sub_u16_e64
:
178 case aco_opcode::v_cvt_pknorm_i16_f16
:
179 case aco_opcode::v_cvt_pknorm_u16_f16
:
180 case aco_opcode::v_lshlrev_b16_e64
:
181 case aco_opcode::v_lshrrev_b16_e64
:
182 case aco_opcode::v_ashrrev_i16_e64
:
183 case aco_opcode::v_mul_lo_u16_e64
:
185 case aco_opcode::v_pack_b32_f16
:
187 case aco_opcode::v_mad_u32_u16
:
188 case aco_opcode::v_mad_i32_i16
:
189 return idx
>= 0 && idx
< 2;