2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33 /* map: block-id -> pair (dest, src) to store phi information */
34 typedef std::map
<uint32_t, std::vector
<std::pair
<Definition
, Operand
>>> phi_info
;
36 struct ssa_elimination_ctx
{
37 phi_info logical_phi_info
;
38 phi_info linear_phi_info
;
39 std::vector
<bool> empty_blocks
;
42 ssa_elimination_ctx(Program
* program
) : empty_blocks(program
->blocks
.size(), true), program(program
) {}
45 void collect_phi_info(ssa_elimination_ctx
& ctx
)
47 for (Block
& block
: ctx
.program
->blocks
) {
48 for (aco_ptr
<Instruction
>& phi
: block
.instructions
) {
49 if (phi
->opcode
!= aco_opcode::p_phi
&& phi
->opcode
!= aco_opcode::p_linear_phi
)
52 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
53 if (phi
->operands
[i
].isUndefined())
55 if (phi
->operands
[i
].isTemp() && phi
->operands
[i
].physReg() == phi
->definitions
[0].physReg())
58 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
.logical_preds
: block
.linear_preds
;
59 phi_info
& info
= phi
->opcode
== aco_opcode::p_phi
? ctx
.logical_phi_info
: ctx
.linear_phi_info
;
60 const auto result
= info
.emplace(preds
[i
], std::vector
<std::pair
<Definition
, Operand
>>());
61 assert(phi
->definitions
[0].size() == phi
->operands
[i
].size());
62 result
.first
->second
.emplace_back(phi
->definitions
[0], phi
->operands
[i
]);
63 ctx
.empty_blocks
[preds
[i
]] = false;
69 void insert_parallelcopies(ssa_elimination_ctx
& ctx
)
71 /* insert the parallelcopies from logical phis before p_logical_end */
72 for (auto&& entry
: ctx
.logical_phi_info
) {
73 Block
& block
= ctx
.program
->blocks
[entry
.first
];
74 unsigned idx
= block
.instructions
.size() - 1;
75 while (block
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
) {
80 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(block
.instructions
.begin(), idx
);
81 aco_ptr
<Pseudo_instruction
> pc
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_parallelcopy
, Format::PSEUDO
, entry
.second
.size(), entry
.second
.size())};
83 for (std::pair
<Definition
, Operand
>& pair
: entry
.second
)
85 pc
->definitions
[i
] = pair
.first
;
86 pc
->operands
[i
] = pair
.second
;
89 /* this shouldn't be needed since we're only copying vgprs */
90 pc
->tmp_in_scc
= false;
91 block
.instructions
.insert(it
, std::move(pc
));
94 /* insert parallelcopies for the linear phis at the end of blocks just before the branch */
95 for (auto&& entry
: ctx
.linear_phi_info
) {
96 Block
& block
= ctx
.program
->blocks
[entry
.first
];
97 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.end();
99 assert((*it
)->format
== Format::PSEUDO_BRANCH
);
100 aco_ptr
<Pseudo_instruction
> pc
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_parallelcopy
, Format::PSEUDO
, entry
.second
.size(), entry
.second
.size())};
102 for (std::pair
<Definition
, Operand
>& pair
: entry
.second
)
104 pc
->definitions
[i
] = pair
.first
;
105 pc
->operands
[i
] = pair
.second
;
108 pc
->tmp_in_scc
= block
.scc_live_out
;
109 pc
->scratch_sgpr
= block
.scratch_sgpr
;
110 block
.instructions
.insert(it
, std::move(pc
));
114 bool is_empty_block(Block
* block
, bool ignore_exec_writes
)
116 /* check if this block is empty and the exec mask is not needed */
117 for (aco_ptr
<Instruction
>& instr
: block
->instructions
) {
118 switch (instr
->opcode
) {
119 case aco_opcode::p_linear_phi
:
120 case aco_opcode::p_phi
:
121 case aco_opcode::p_logical_start
:
122 case aco_opcode::p_logical_end
:
123 case aco_opcode::p_branch
:
125 case aco_opcode::p_parallelcopy
:
126 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
127 if (ignore_exec_writes
&& instr
->definitions
[i
].physReg() == exec
)
129 if (instr
->definitions
[i
].physReg() != instr
->operands
[i
].physReg())
133 case aco_opcode::s_andn2_b64
:
134 case aco_opcode::s_andn2_b32
:
135 if (ignore_exec_writes
&& instr
->definitions
[0].physReg() == exec
)
144 void try_remove_merge_block(ssa_elimination_ctx
& ctx
, Block
* block
)
146 /* check if the successor is another merge block which restores exec */
147 // TODO: divergent loops also restore exec
148 if (block
->linear_succs
.size() != 1 ||
149 !(ctx
.program
->blocks
[block
->linear_succs
[0]].kind
& block_kind_merge
))
152 /* check if this block is empty */
153 if (!is_empty_block(block
, true))
156 /* keep the branch instruction and remove the rest */
157 aco_ptr
<Instruction
> branch
= std::move(block
->instructions
.back());
158 block
->instructions
.clear();
159 block
->instructions
.emplace_back(std::move(branch
));
162 void try_remove_invert_block(ssa_elimination_ctx
& ctx
, Block
* block
)
164 assert(block
->linear_succs
.size() == 2);
165 /* only remove this block if the successor got removed as well */
166 if (block
->linear_succs
[0] != block
->linear_succs
[1])
169 /* check if block is otherwise empty */
170 if (!is_empty_block(block
, true))
173 unsigned succ_idx
= block
->linear_succs
[0];
174 assert(block
->linear_preds
.size() == 2);
175 for (unsigned i
= 0; i
< 2; i
++) {
176 Block
*pred
= &ctx
.program
->blocks
[block
->linear_preds
[i
]];
177 pred
->linear_succs
[0] = succ_idx
;
178 ctx
.program
->blocks
[succ_idx
].linear_preds
[i
] = pred
->index
;
180 Pseudo_branch_instruction
*branch
= static_cast<Pseudo_branch_instruction
*>(pred
->instructions
.back().get());
181 assert(branch
->format
== Format::PSEUDO_BRANCH
);
182 branch
->target
[0] = succ_idx
;
183 branch
->target
[1] = succ_idx
;
186 block
->instructions
.clear();
187 block
->linear_preds
.clear();
188 block
->linear_succs
.clear();
191 void try_remove_simple_block(ssa_elimination_ctx
& ctx
, Block
* block
)
193 if (!is_empty_block(block
, false))
196 Block
& pred
= ctx
.program
->blocks
[block
->linear_preds
[0]];
197 Block
& succ
= ctx
.program
->blocks
[block
->linear_succs
[0]];
198 Pseudo_branch_instruction
* branch
= static_cast<Pseudo_branch_instruction
*>(pred
.instructions
.back().get());
199 if (branch
->opcode
== aco_opcode::p_branch
) {
200 branch
->target
[0] = succ
.index
;
201 branch
->target
[1] = succ
.index
;
202 } else if (branch
->target
[0] == block
->index
) {
203 branch
->target
[0] = succ
.index
;
204 } else if (branch
->target
[0] == succ
.index
) {
205 assert(branch
->target
[1] == block
->index
);
206 branch
->target
[1] = succ
.index
;
207 branch
->opcode
= aco_opcode::p_branch
;
208 } else if (branch
->target
[1] == block
->index
) {
209 /* check if there is a fall-through path from block to succ */
210 bool falls_through
= block
->index
< succ
.index
;
211 for (unsigned j
= block
->index
+ 1; falls_through
&& j
< succ
.index
; j
++) {
212 assert(ctx
.program
->blocks
[j
].index
== j
);
213 if (!ctx
.program
->blocks
[j
].instructions
.empty())
214 falls_through
= false;
217 branch
->target
[1] = succ
.index
;
219 /* check if there is a fall-through path for the alternative target */
220 if (block
->index
>= branch
->target
[0])
222 for (unsigned j
= block
->index
+ 1; j
< branch
->target
[0]; j
++) {
223 if (!ctx
.program
->blocks
[j
].instructions
.empty())
227 /* This is a (uniform) break or continue block. The branch condition has to be inverted. */
228 if (branch
->opcode
== aco_opcode::p_cbranch_z
)
229 branch
->opcode
= aco_opcode::p_cbranch_nz
;
230 else if (branch
->opcode
== aco_opcode::p_cbranch_nz
)
231 branch
->opcode
= aco_opcode::p_cbranch_z
;
234 /* also invert the linear successors */
235 pred
.linear_succs
[0] = pred
.linear_succs
[1];
236 pred
.linear_succs
[1] = succ
.index
;
237 branch
->target
[1] = branch
->target
[0];
238 branch
->target
[0] = succ
.index
;
244 if (branch
->target
[0] == branch
->target
[1])
245 branch
->opcode
= aco_opcode::p_branch
;
247 for (unsigned i
= 0; i
< pred
.linear_succs
.size(); i
++)
248 if (pred
.linear_succs
[i
] == block
->index
)
249 pred
.linear_succs
[i
] = succ
.index
;
251 for (unsigned i
= 0; i
< succ
.linear_preds
.size(); i
++)
252 if (succ
.linear_preds
[i
] == block
->index
)
253 succ
.linear_preds
[i
] = pred
.index
;
255 block
->instructions
.clear();
256 block
->linear_preds
.clear();
257 block
->linear_succs
.clear();
260 void jump_threading(ssa_elimination_ctx
& ctx
)
262 for (int i
= ctx
.program
->blocks
.size() - 1; i
>= 0; i
--) {
263 Block
* block
= &ctx
.program
->blocks
[i
];
265 if (!ctx
.empty_blocks
[i
])
268 if (block
->kind
& block_kind_invert
) {
269 try_remove_invert_block(ctx
, block
);
273 if (block
->linear_succs
.size() > 1)
276 if (block
->kind
& block_kind_merge
||
277 block
->kind
& block_kind_loop_exit
)
278 try_remove_merge_block(ctx
, block
);
280 if (block
->linear_preds
.size() == 1)
281 try_remove_simple_block(ctx
, block
);
285 } /* end namespace */
288 void ssa_elimination(Program
* program
)
290 ssa_elimination_ctx
ctx(program
);
292 /* Collect information about every phi-instruction */
293 collect_phi_info(ctx
);
295 /* eliminate empty blocks */
298 /* insert parallelcopies from SSA elimination */
299 insert_parallelcopies(ctx
);