2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include <unordered_map>
30 * Implements the algorithm for dominator-tree value numbering
31 * from "Value Numbering" by Briggs, Cooper, and Simpson.
38 std::size_t operator()(Instruction
* instr
) const
40 uint64_t hash
= (uint64_t) instr
->opcode
+ (uint64_t) instr
->format
;
41 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
42 Operand op
= instr
->operands
[i
];
43 uint64_t val
= op
.isTemp() ? op
.tempId() : op
.isFixed() ? op
.physReg() : op
.constantValue();
44 hash
|= val
<< (i
+1) * 8;
46 if (instr
->isVOP3()) {
47 VOP3A_instruction
* vop3
= static_cast<VOP3A_instruction
*>(instr
);
48 for (unsigned i
= 0; i
< 3; i
++) {
49 hash
^= vop3
->abs
[i
] << (i
*3 + 0);
50 hash
^= vop3
->opsel
[i
] << (i
*3 + 1);
51 hash
^= vop3
->neg
[i
] << (i
*3 + 2);
53 hash
^= (vop3
->clamp
<< 28) * 13;
54 hash
+= vop3
->omod
<< 19;
56 switch (instr
->format
) {
59 case Format::VINTRP
: {
60 Interp_instruction
* interp
= static_cast<Interp_instruction
*>(instr
);
61 hash
^= interp
->attribute
<< 13;
62 hash
^= interp
->component
<< 27;
76 bool operator()(Instruction
* a
, Instruction
* b
) const
78 if (a
->format
!= b
->format
)
80 if (a
->opcode
!= b
->opcode
)
82 if (a
->operands
.size() != b
->operands
.size() || a
->definitions
.size() != b
->definitions
.size())
83 return false; /* possible with pseudo-instructions */
84 /* We can't value number v_readlane_b32 across control flow or discards
85 * because of the possibility of live-range splits.
86 * We can't value number permutes for the same reason as
87 * v_readlane_b32 and because discards affect the result */
88 if (a
->opcode
== aco_opcode::v_readfirstlane_b32
|| a
->opcode
== aco_opcode::v_readlane_b32
||
89 a
->opcode
== aco_opcode::ds_bpermute_b32
|| a
->opcode
== aco_opcode::ds_permute_b32
||
90 a
->opcode
== aco_opcode::ds_swizzle_b32
|| a
->format
== Format::PSEUDO_REDUCTION
||
91 a
->opcode
== aco_opcode::p_phi
|| a
->opcode
== aco_opcode::p_linear_phi
) {
92 if (a
->pass_flags
!= b
->pass_flags
)
95 for (unsigned i
= 0; i
< a
->operands
.size(); i
++) {
96 if (a
->operands
[i
].isConstant()) {
97 if (!b
->operands
[i
].isConstant())
99 if (a
->operands
[i
].constantValue() != b
->operands
[i
].constantValue())
102 else if (a
->operands
[i
].isTemp()) {
103 if (!b
->operands
[i
].isTemp())
105 if (a
->operands
[i
].tempId() != b
->operands
[i
].tempId())
108 else if (a
->operands
[i
].isUndefined() ^ b
->operands
[i
].isUndefined())
110 if (a
->operands
[i
].isFixed()) {
111 if (a
->operands
[i
].physReg() == exec
)
113 if (!b
->operands
[i
].isFixed())
115 if (!(a
->operands
[i
].physReg() == b
->operands
[i
].physReg()))
119 for (unsigned i
= 0; i
< a
->definitions
.size(); i
++) {
120 if (a
->definitions
[i
].isTemp()) {
121 if (!b
->definitions
[i
].isTemp())
123 if (a
->definitions
[i
].regClass() != b
->definitions
[i
].regClass())
126 if (a
->definitions
[i
].isFixed()) {
127 if (!b
->definitions
[i
].isFixed())
129 if (!(a
->definitions
[i
].physReg() == b
->definitions
[i
].physReg()))
133 if (a
->format
== Format::PSEUDO_BRANCH
)
136 VOP3A_instruction
* a3
= static_cast<VOP3A_instruction
*>(a
);
137 VOP3A_instruction
* b3
= static_cast<VOP3A_instruction
*>(b
);
138 for (unsigned i
= 0; i
< 3; i
++) {
139 if (a3
->abs
[i
] != b3
->abs
[i
] ||
140 a3
->opsel
[i
] != b3
->opsel
[i
] ||
141 a3
->neg
[i
] != b3
->neg
[i
])
144 return a3
->clamp
== b3
->clamp
&&
145 a3
->omod
== b3
->omod
;
148 DPP_instruction
* aDPP
= static_cast<DPP_instruction
*>(a
);
149 DPP_instruction
* bDPP
= static_cast<DPP_instruction
*>(b
);
150 return aDPP
->dpp_ctrl
== bDPP
->dpp_ctrl
&&
151 aDPP
->bank_mask
== bDPP
->bank_mask
&&
152 aDPP
->row_mask
== bDPP
->row_mask
&&
153 aDPP
->bound_ctrl
== bDPP
->bound_ctrl
&&
154 aDPP
->abs
[0] == bDPP
->abs
[0] &&
155 aDPP
->abs
[1] == bDPP
->abs
[1] &&
156 aDPP
->neg
[0] == bDPP
->neg
[0] &&
157 aDPP
->neg
[1] == bDPP
->neg
[1];
161 /* Since the results depend on the exec mask, these shouldn't
162 * be value numbered (this is especially useful for subgroupBallot()). */
166 SOPK_instruction
* aK
= static_cast<SOPK_instruction
*>(a
);
167 SOPK_instruction
* bK
= static_cast<SOPK_instruction
*>(b
);
168 return aK
->imm
== bK
->imm
;
171 SMEM_instruction
* aS
= static_cast<SMEM_instruction
*>(a
);
172 SMEM_instruction
* bS
= static_cast<SMEM_instruction
*>(b
);
173 return aS
->can_reorder
&& bS
->can_reorder
&&
174 aS
->glc
== bS
->glc
&& aS
->nv
== bS
->nv
;
176 case Format::VINTRP
: {
177 Interp_instruction
* aI
= static_cast<Interp_instruction
*>(a
);
178 Interp_instruction
* bI
= static_cast<Interp_instruction
*>(b
);
179 if (aI
->attribute
!= bI
->attribute
)
181 if (aI
->component
!= bI
->component
)
185 case Format::PSEUDO_REDUCTION
: {
186 Pseudo_reduction_instruction
*aR
= static_cast<Pseudo_reduction_instruction
*>(a
);
187 Pseudo_reduction_instruction
*bR
= static_cast<Pseudo_reduction_instruction
*>(b
);
188 return aR
->reduce_op
== bR
->reduce_op
&& aR
->cluster_size
== bR
->cluster_size
;
190 case Format::MTBUF
: {
191 /* this is fine since they are only used for vertex input fetches */
192 MTBUF_instruction
* aM
= static_cast<MTBUF_instruction
*>(a
);
193 MTBUF_instruction
* bM
= static_cast<MTBUF_instruction
*>(b
);
194 return aM
->can_reorder
== bM
->can_reorder
&&
195 aM
->barrier
== bM
->barrier
&&
196 aM
->dfmt
== bM
->dfmt
&&
197 aM
->nfmt
== bM
->nfmt
&&
198 aM
->offset
== bM
->offset
&&
199 aM
->offen
== bM
->offen
&&
200 aM
->idxen
== bM
->idxen
&&
201 aM
->glc
== bM
->glc
&&
202 aM
->slc
== bM
->slc
&&
203 aM
->tfe
== bM
->tfe
&&
204 aM
->disable_wqm
== bM
->disable_wqm
;
206 /* we want to optimize these in NIR and don't hassle with load-store dependencies */
210 case Format::SCRATCH
:
213 /* we already handle potential issue with permute/swizzle above */
214 DS_instruction
* aD
= static_cast<DS_instruction
*>(a
);
215 DS_instruction
* bD
= static_cast<DS_instruction
*>(b
);
216 if (a
->opcode
!= aco_opcode::ds_bpermute_b32
&&
217 a
->opcode
!= aco_opcode::ds_permute_b32
&&
218 a
->opcode
!= aco_opcode::ds_swizzle_b32
)
220 return aD
->gds
== bD
->gds
&& aD
->offset0
== bD
->offset0
&& aD
->offset1
== bD
->offset1
;
223 MIMG_instruction
* aM
= static_cast<MIMG_instruction
*>(a
);
224 MIMG_instruction
* bM
= static_cast<MIMG_instruction
*>(b
);
225 return aM
->can_reorder
&& bM
->can_reorder
&&
226 aM
->barrier
== bM
->barrier
&&
227 aM
->dmask
== bM
->dmask
&&
228 aM
->unrm
== bM
->unrm
&&
229 aM
->glc
== bM
->glc
&&
230 aM
->slc
== bM
->slc
&&
231 aM
->tfe
== bM
->tfe
&&
233 aM
->lwe
== bM
->lwe
&&
234 aM
->r128
== bM
->r128
&&
235 aM
->a16
== bM
->a16
&&
236 aM
->d16
== bM
->d16
&&
237 aM
->disable_wqm
== bM
->disable_wqm
;
245 using expr_set
= std::unordered_map
<Instruction
*, uint32_t, InstrHash
, InstrPred
>;
249 expr_set expr_values
;
250 std::map
<uint32_t, Temp
> renames
;
251 uint32_t exec_id
= 0;
253 vn_ctx(Program
* program
) : program(program
) {}
256 bool dominates(vn_ctx
& ctx
, uint32_t parent
, uint32_t child
)
258 while (parent
< child
)
259 child
= ctx
.program
->blocks
[child
].logical_idom
;
261 return parent
== child
;
264 void process_block(vn_ctx
& ctx
, Block
& block
)
266 std::vector
<aco_ptr
<Instruction
>> new_instructions
;
267 new_instructions
.reserve(block
.instructions
.size());
269 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
270 /* first, rename operands */
271 for (Operand
& op
: instr
->operands
) {
274 auto it
= ctx
.renames
.find(op
.tempId());
275 if (it
!= ctx
.renames
.end())
276 op
.setTemp(it
->second
);
279 if (instr
->definitions
.empty()) {
280 new_instructions
.emplace_back(std::move(instr
));
284 /* simple copy-propagation through renaming */
285 if ((instr
->opcode
== aco_opcode::s_mov_b32
|| instr
->opcode
== aco_opcode::s_mov_b64
|| instr
->opcode
== aco_opcode::v_mov_b32
) &&
286 !instr
->definitions
[0].isFixed() && instr
->operands
[0].isTemp() && instr
->operands
[0].regClass() == instr
->definitions
[0].regClass() &&
287 !instr
->isDPP() && !((int)instr
->format
& (int)Format::SDWA
)) {
288 ctx
.renames
[instr
->definitions
[0].tempId()] = instr
->operands
[0].getTemp();
291 if (instr
->opcode
== aco_opcode::p_discard_if
||
292 instr
->opcode
== aco_opcode::p_demote_to_helper
)
295 instr
->pass_flags
= ctx
.exec_id
;
296 std::pair
<expr_set::iterator
, bool> res
= ctx
.expr_values
.emplace(instr
.get(), block
.index
);
298 /* if there was already an expression with the same value number */
300 Instruction
* orig_instr
= res
.first
->first
;
301 assert(instr
->definitions
.size() == orig_instr
->definitions
.size());
302 /* check if the original instruction dominates the current one */
303 if (dominates(ctx
, res
.first
->second
, block
.index
)) {
304 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
305 assert(instr
->definitions
[i
].regClass() == orig_instr
->definitions
[i
].regClass());
306 ctx
.renames
[instr
->definitions
[i
].tempId()] = orig_instr
->definitions
[i
].getTemp();
309 ctx
.expr_values
.erase(res
.first
);
310 ctx
.expr_values
.emplace(instr
.get(), block
.index
);
311 new_instructions
.emplace_back(std::move(instr
));
314 new_instructions
.emplace_back(std::move(instr
));
318 block
.instructions
= std::move(new_instructions
);
321 void rename_phi_operands(Block
& block
, std::map
<uint32_t, Temp
>& renames
)
323 for (aco_ptr
<Instruction
>& phi
: block
.instructions
) {
324 if (phi
->opcode
!= aco_opcode::p_phi
&& phi
->opcode
!= aco_opcode::p_linear_phi
)
327 for (Operand
& op
: phi
->operands
) {
330 auto it
= renames
.find(op
.tempId());
331 if (it
!= renames
.end())
332 op
.setTemp(it
->second
);
336 } /* end namespace */
339 void value_numbering(Program
* program
)
343 for (Block
& block
: program
->blocks
) {
344 if (block
.logical_idom
!= -1)
345 process_block(ctx
, block
);
347 rename_phi_operands(block
, ctx
.renames
);
352 /* rename loop header phi operands */
353 for (Block
& block
: program
->blocks
) {
354 if (block
.kind
& block_kind_loop_header
)
355 rename_phi_operands(block
, ctx
.renames
);