8071ace1f97449502f5c6f4327cb59adc7d0ced5
2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include <unordered_set>
31 * Implements the algorithm for dominator-tree value numbering
32 * from "Value Numbering" by Briggs, Cooper, and Simpson.
39 std::size_t operator()(Instruction
* instr
) const
41 uint64_t hash
= (uint64_t) instr
->opcode
+ (uint64_t) instr
->format
;
42 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
43 Operand op
= instr
->operands
[i
];
44 uint64_t val
= op
.isTemp() ? op
.tempId() : op
.isFixed() ? op
.physReg() : op
.constantValue();
45 hash
|= val
<< (i
+1) * 8;
47 if (instr
->isVOP3()) {
48 VOP3A_instruction
* vop3
= static_cast<VOP3A_instruction
*>(instr
);
49 for (unsigned i
= 0; i
< 3; i
++) {
50 hash
^= vop3
->abs
[i
] << (i
*3 + 0);
51 hash
^= vop3
->opsel
[i
] << (i
*3 + 1);
52 hash
^= vop3
->neg
[i
] << (i
*3 + 2);
54 hash
^= (vop3
->clamp
<< 28) * 13;
55 hash
+= vop3
->omod
<< 19;
57 switch (instr
->format
) {
60 case Format::VINTRP
: {
61 Interp_instruction
* interp
= static_cast<Interp_instruction
*>(instr
);
62 hash
^= interp
->attribute
<< 13;
63 hash
^= interp
->component
<< 27;
77 bool operator()(Instruction
* a
, Instruction
* b
) const
79 if (a
->format
!= b
->format
)
81 if (a
->opcode
!= b
->opcode
)
83 if (a
->operands
.size() != b
->operands
.size() || a
->definitions
.size() != b
->definitions
.size())
84 return false; /* possible with pseudo-instructions */
85 for (unsigned i
= 0; i
< a
->operands
.size(); i
++) {
86 if (a
->operands
[i
].isConstant()) {
87 if (!b
->operands
[i
].isConstant())
89 if (a
->operands
[i
].constantValue() != b
->operands
[i
].constantValue())
92 else if (a
->operands
[i
].isTemp()) {
93 if (!b
->operands
[i
].isTemp())
95 if (a
->operands
[i
].tempId() != b
->operands
[i
].tempId())
98 else if (a
->operands
[i
].isUndefined() ^ b
->operands
[i
].isUndefined())
100 if (a
->operands
[i
].isFixed()) {
101 if (a
->operands
[i
].physReg() == exec
)
103 if (!b
->operands
[i
].isFixed())
105 if (!(a
->operands
[i
].physReg() == b
->operands
[i
].physReg()))
109 for (unsigned i
= 0; i
< a
->definitions
.size(); i
++) {
110 if (a
->definitions
[i
].isTemp()) {
111 if (!b
->definitions
[i
].isTemp())
113 if (a
->definitions
[i
].regClass() != b
->definitions
[i
].regClass())
116 if (a
->definitions
[i
].isFixed()) {
117 if (!b
->definitions
[i
].isFixed())
119 if (!(a
->definitions
[i
].physReg() == b
->definitions
[i
].physReg()))
123 if (a
->format
== Format::PSEUDO_BRANCH
)
126 VOP3A_instruction
* a3
= static_cast<VOP3A_instruction
*>(a
);
127 VOP3A_instruction
* b3
= static_cast<VOP3A_instruction
*>(b
);
128 for (unsigned i
= 0; i
< 3; i
++) {
129 if (a3
->abs
[i
] != b3
->abs
[i
] ||
130 a3
->opsel
[i
] != b3
->opsel
[i
] ||
131 a3
->neg
[i
] != b3
->neg
[i
])
134 return a3
->clamp
== b3
->clamp
&&
135 a3
->omod
== b3
->omod
;
138 DPP_instruction
* aDPP
= static_cast<DPP_instruction
*>(a
);
139 DPP_instruction
* bDPP
= static_cast<DPP_instruction
*>(b
);
140 return aDPP
->dpp_ctrl
== bDPP
->dpp_ctrl
&&
141 aDPP
->bank_mask
== bDPP
->bank_mask
&&
142 aDPP
->row_mask
== bDPP
->row_mask
&&
143 aDPP
->bound_ctrl
== bDPP
->bound_ctrl
&&
144 aDPP
->abs
[0] == bDPP
->abs
[0] &&
145 aDPP
->abs
[1] == bDPP
->abs
[1] &&
146 aDPP
->neg
[0] == bDPP
->neg
[0] &&
147 aDPP
->neg
[1] == bDPP
->neg
[1];
151 /* Since the results depend on the exec mask, these shouldn't
152 * be value numbered (this is especially useful for subgroupBallot()). */
156 SOPK_instruction
* aK
= static_cast<SOPK_instruction
*>(a
);
157 SOPK_instruction
* bK
= static_cast<SOPK_instruction
*>(b
);
158 return aK
->imm
== bK
->imm
;
161 SMEM_instruction
* aS
= static_cast<SMEM_instruction
*>(a
);
162 SMEM_instruction
* bS
= static_cast<SMEM_instruction
*>(b
);
163 return aS
->can_reorder
&& bS
->can_reorder
&&
164 aS
->glc
== bS
->glc
&& aS
->nv
== bS
->nv
;
166 case Format::VINTRP
: {
167 Interp_instruction
* aI
= static_cast<Interp_instruction
*>(a
);
168 Interp_instruction
* bI
= static_cast<Interp_instruction
*>(b
);
169 if (aI
->attribute
!= bI
->attribute
)
171 if (aI
->component
!= bI
->component
)
175 case Format::PSEUDO_REDUCTION
:
177 case Format::MTBUF
: {
178 /* this is fine since they are only used for vertex input fetches */
179 MTBUF_instruction
* aM
= static_cast<MTBUF_instruction
*>(a
);
180 MTBUF_instruction
* bM
= static_cast<MTBUF_instruction
*>(b
);
181 return aM
->dfmt
== bM
->dfmt
&&
182 aM
->nfmt
== bM
->nfmt
&&
183 aM
->offset
== bM
->offset
&&
184 aM
->offen
== bM
->offen
&&
185 aM
->idxen
== bM
->idxen
&&
186 aM
->glc
== bM
->glc
&&
187 aM
->slc
== bM
->slc
&&
188 aM
->tfe
== bM
->tfe
&&
189 aM
->disable_wqm
== bM
->disable_wqm
;
191 /* we want to optimize these in NIR and don't hassle with load-store dependencies */
195 case Format::SCRATCH
:
199 MIMG_instruction
* aM
= static_cast<MIMG_instruction
*>(a
);
200 MIMG_instruction
* bM
= static_cast<MIMG_instruction
*>(b
);
201 return aM
->can_reorder
&& bM
->can_reorder
&&
202 aM
->dmask
== bM
->dmask
&&
203 aM
->unrm
== bM
->unrm
&&
204 aM
->glc
== bM
->glc
&&
205 aM
->slc
== bM
->slc
&&
206 aM
->tfe
== bM
->tfe
&&
208 aM
->lwe
== bM
->lwe
&&
209 aM
->r128
== bM
->r128
&&
210 aM
->a16
== bM
->a16
&&
211 aM
->d16
== bM
->d16
&&
212 aM
->disable_wqm
== bM
->disable_wqm
;
221 typedef std::unordered_set
<Instruction
*, InstrHash
, InstrPred
> expr_set
;
223 void process_block(Block
& block
,
224 expr_set
& expr_values
,
225 std::map
<uint32_t, Temp
>& renames
)
228 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.begin();
229 std::vector
<aco_ptr
<Instruction
>> new_instructions
;
230 new_instructions
.reserve(block
.instructions
.size());
233 while (it
!= block
.instructions
.end()) {
234 aco_ptr
<Instruction
>& instr
= *it
;
235 /* first, rename operands */
236 for (Operand
& op
: instr
->operands
) {
239 auto it
= renames
.find(op
.tempId());
240 if (it
!= renames
.end())
241 op
.setTemp(it
->second
);
244 if (instr
->definitions
.empty() || !run
) {
245 if (instr
->opcode
== aco_opcode::p_logical_start
)
247 else if (instr
->opcode
== aco_opcode::p_logical_end
)
249 else if (instr
->opcode
== aco_opcode::p_phi
|| instr
->opcode
== aco_opcode::p_linear_phi
) {
250 std::pair
<expr_set::iterator
, bool> res
= phi_values
.emplace(instr
.get());
252 Instruction
* orig_phi
= *(res
.first
);
253 renames
.emplace(instr
->definitions
[0].tempId(), orig_phi
->definitions
[0].getTemp()).second
;
258 new_instructions
.emplace_back(std::move(instr
));
263 /* simple copy-propagation through renaming */
264 if ((instr
->opcode
== aco_opcode::s_mov_b32
|| instr
->opcode
== aco_opcode::s_mov_b64
|| instr
->opcode
== aco_opcode::v_mov_b32
) &&
265 !instr
->definitions
[0].isFixed() && instr
->operands
[0].isTemp() && instr
->operands
[0].regClass() == instr
->definitions
[0].regClass() &&
266 !instr
->isDPP() && !((int)instr
->format
& (int)Format::SDWA
)) {
267 renames
[instr
->definitions
[0].tempId()] = instr
->operands
[0].getTemp();
270 std::pair
<expr_set::iterator
, bool> res
= expr_values
.emplace(instr
.get());
272 /* if there was already an expression with the same value number */
274 Instruction
* orig_instr
= *(res
.first
);
275 assert(instr
->definitions
.size() == orig_instr
->definitions
.size());
276 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
277 assert(instr
->definitions
[i
].regClass() == orig_instr
->definitions
[i
].regClass());
278 renames
.emplace(instr
->definitions
[i
].tempId(), orig_instr
->definitions
[i
].getTemp()).second
;
281 new_instructions
.emplace_back(std::move(instr
));
286 block
.instructions
.swap(new_instructions
);
289 void rename_phi_operands(Block
& block
, std::map
<uint32_t, Temp
>& renames
)
291 for (aco_ptr
<Instruction
>& phi
: block
.instructions
) {
292 if (phi
->opcode
!= aco_opcode::p_phi
&& phi
->opcode
!= aco_opcode::p_linear_phi
)
295 for (Operand
& op
: phi
->operands
) {
298 auto it
= renames
.find(op
.tempId());
299 if (it
!= renames
.end())
300 op
.setTemp(it
->second
);
304 } /* end namespace */
307 void value_numbering(Program
* program
)
309 std::vector
<expr_set
> expr_values(program
->blocks
.size());
310 std::map
<uint32_t, Temp
> renames
;
312 for (Block
& block
: program
->blocks
) {
313 if (block
.logical_idom
!= -1) {
314 /* initialize expr_values from idom */
315 expr_values
[block
.index
] = expr_values
[block
.logical_idom
];
316 process_block(block
, expr_values
[block
.index
], renames
);
319 process_block(block
, empty
, renames
);
323 for (Block
& block
: program
->blocks
)
324 rename_phi_operands(block
, renames
);