2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33 /* map: block-id -> pair (dest, src) to store phi information */
34 typedef std::map
<uint32_t, std::vector
<std::pair
<Definition
, Operand
>>> phi_info
;
36 struct ssa_elimination_ctx
{
37 phi_info logical_phi_info
;
38 phi_info linear_phi_info
;
39 std::vector
<bool> empty_blocks
;
42 ssa_elimination_ctx(Program
* program
) : empty_blocks(program
->blocks
.size(), true), program(program
) {}
45 void collect_phi_info(ssa_elimination_ctx
& ctx
)
47 for (Block
& block
: ctx
.program
->blocks
) {
48 for (aco_ptr
<Instruction
>& phi
: block
.instructions
) {
49 if (phi
->opcode
!= aco_opcode::p_phi
&& phi
->opcode
!= aco_opcode::p_linear_phi
)
52 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
53 if (phi
->operands
[i
].isUndefined())
55 if (phi
->operands
[i
].isTemp() && phi
->operands
[i
].physReg() == phi
->definitions
[0].physReg())
58 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
.logical_preds
: block
.linear_preds
;
59 phi_info
& info
= phi
->opcode
== aco_opcode::p_phi
? ctx
.logical_phi_info
: ctx
.linear_phi_info
;
60 const auto result
= info
.emplace(preds
[i
], std::vector
<std::pair
<Definition
, Operand
>>());
61 result
.first
->second
.emplace_back(phi
->definitions
[0], phi
->operands
[i
]);
62 ctx
.empty_blocks
[preds
[i
]] = false;
68 void insert_parallelcopies(ssa_elimination_ctx
& ctx
)
70 /* insert the parallelcopies from logical phis before p_logical_end */
71 for (auto&& entry
: ctx
.logical_phi_info
) {
72 Block
& block
= ctx
.program
->blocks
[entry
.first
];
73 unsigned idx
= block
.instructions
.size() - 1;
74 while (block
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
) {
79 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(block
.instructions
.begin(), idx
);
80 aco_ptr
<Pseudo_instruction
> pc
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_parallelcopy
, Format::PSEUDO
, entry
.second
.size(), entry
.second
.size())};
82 for (std::pair
<Definition
, Operand
>& pair
: entry
.second
)
84 pc
->definitions
[i
] = pair
.first
;
85 pc
->operands
[i
] = pair
.second
;
88 /* this shouldn't be needed since we're only copying vgprs */
89 pc
->tmp_in_scc
= false;
90 block
.instructions
.insert(it
, std::move(pc
));
93 /* insert parallelcopies for the linear phis at the end of blocks just before the branch */
94 for (auto&& entry
: ctx
.linear_phi_info
) {
95 Block
& block
= ctx
.program
->blocks
[entry
.first
];
96 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.end();
98 assert((*it
)->format
== Format::PSEUDO_BRANCH
);
99 aco_ptr
<Pseudo_instruction
> pc
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_parallelcopy
, Format::PSEUDO
, entry
.second
.size(), entry
.second
.size())};
101 for (std::pair
<Definition
, Operand
>& pair
: entry
.second
)
103 pc
->definitions
[i
] = pair
.first
;
104 pc
->operands
[i
] = pair
.second
;
107 pc
->tmp_in_scc
= block
.scc_live_out
;
108 pc
->scratch_sgpr
= block
.scratch_sgpr
;
109 block
.instructions
.insert(it
, std::move(pc
));
114 void try_remove_merge_block(ssa_elimination_ctx
& ctx
, Block
* block
)
116 /* check if the successor is another merge block which restores exec */
117 // TODO: divergent loops also restore exec
118 if (block
->linear_succs
.size() != 1 ||
119 !(ctx
.program
->blocks
[block
->linear_succs
[0]].kind
& block_kind_merge
))
122 /* check if this block is empty and the exec mask is not needed */
123 for (aco_ptr
<Instruction
>& instr
: block
->instructions
) {
124 if (instr
->opcode
== aco_opcode::p_parallelcopy
) {
125 if (instr
->definitions
[0].physReg() == exec
)
131 if (instr
->opcode
!= aco_opcode::p_linear_phi
&&
132 instr
->opcode
!= aco_opcode::p_phi
&&
133 instr
->opcode
!= aco_opcode::p_logical_start
&&
134 instr
->opcode
!= aco_opcode::p_logical_end
&&
135 instr
->opcode
!= aco_opcode::p_branch
)
139 /* keep the branch instruction and remove the rest */
140 aco_ptr
<Instruction
> branch
= std::move(block
->instructions
.back());
141 block
->instructions
.clear();
142 block
->instructions
.emplace_back(std::move(branch
));
145 void try_remove_invert_block(ssa_elimination_ctx
& ctx
, Block
* block
)
147 assert(block
->linear_succs
.size() == 2);
148 if (block
->linear_succs
[0] != block
->linear_succs
[1])
151 /* check if we can remove this block */
152 for (aco_ptr
<Instruction
>& instr
: block
->instructions
) {
153 if (instr
->opcode
!= aco_opcode::p_linear_phi
&&
154 instr
->opcode
!= aco_opcode::p_phi
&&
155 instr
->opcode
!= aco_opcode::s_andn2_b64
&&
156 instr
->opcode
!= aco_opcode::p_branch
)
160 unsigned succ_idx
= block
->linear_succs
[0];
161 assert(block
->linear_preds
.size() == 2);
162 for (unsigned i
= 0; i
< 2; i
++) {
163 Block
*pred
= &ctx
.program
->blocks
[block
->linear_preds
[i
]];
164 pred
->linear_succs
[0] = succ_idx
;
165 ctx
.program
->blocks
[succ_idx
].linear_preds
[i
] = pred
->index
;
167 Pseudo_branch_instruction
*branch
= static_cast<Pseudo_branch_instruction
*>(pred
->instructions
.back().get());
168 assert(branch
->format
== Format::PSEUDO_BRANCH
);
169 branch
->target
[0] = succ_idx
;
170 branch
->target
[1] = succ_idx
;
173 block
->instructions
.clear();
174 block
->linear_preds
.clear();
175 block
->linear_succs
.clear();
178 void try_remove_simple_block(ssa_elimination_ctx
& ctx
, Block
* block
)
180 for (aco_ptr
<Instruction
>& instr
: block
->instructions
) {
181 if (instr
->opcode
!= aco_opcode::p_logical_start
&&
182 instr
->opcode
!= aco_opcode::p_logical_end
&&
183 instr
->opcode
!= aco_opcode::p_branch
)
187 Block
& pred
= ctx
.program
->blocks
[block
->linear_preds
[0]];
188 Block
& succ
= ctx
.program
->blocks
[block
->linear_succs
[0]];
189 Pseudo_branch_instruction
* branch
= static_cast<Pseudo_branch_instruction
*>(pred
.instructions
.back().get());
190 if (branch
->opcode
== aco_opcode::p_branch
) {
191 branch
->target
[0] = succ
.index
;
192 branch
->target
[1] = succ
.index
;
193 } else if (branch
->target
[0] == block
->index
) {
194 branch
->target
[0] = succ
.index
;
195 } else if (branch
->target
[0] == succ
.index
) {
196 assert(branch
->target
[1] == block
->index
);
197 branch
->target
[1] = succ
.index
;
198 branch
->opcode
= aco_opcode::p_branch
;
199 } else if (branch
->target
[1] == block
->index
) {
200 /* check if there is a fall-through path from block to succ */
201 bool falls_through
= true;
202 for (unsigned j
= block
->index
+ 1; falls_through
&& j
< succ
.index
; j
++) {
203 assert(ctx
.program
->blocks
[j
].index
== j
);
204 if (!ctx
.program
->blocks
[j
].instructions
.empty())
205 falls_through
= false;
208 branch
->target
[1] = succ
.index
;
210 /* check if there is a fall-through path for the alternative target */
211 for (unsigned j
= block
->index
+ 1; j
< branch
->target
[0]; j
++) {
212 if (!ctx
.program
->blocks
[j
].instructions
.empty())
216 /* This is a (uniform) break or continue block. The branch condition has to be inverted. */
217 if (branch
->opcode
== aco_opcode::p_cbranch_z
)
218 branch
->opcode
= aco_opcode::p_cbranch_nz
;
219 else if (branch
->opcode
== aco_opcode::p_cbranch_nz
)
220 branch
->opcode
= aco_opcode::p_cbranch_z
;
223 /* also invert the linear successors */
224 pred
.linear_succs
[0] = pred
.linear_succs
[1];
225 pred
.linear_succs
[1] = succ
.index
;
226 branch
->target
[1] = branch
->target
[0];
227 branch
->target
[0] = succ
.index
;
233 if (branch
->target
[0] == branch
->target
[1])
234 branch
->opcode
= aco_opcode::p_branch
;
236 for (unsigned i
= 0; i
< pred
.linear_succs
.size(); i
++)
237 if (pred
.linear_succs
[i
] == block
->index
)
238 pred
.linear_succs
[i
] = succ
.index
;
240 for (unsigned i
= 0; i
< succ
.linear_preds
.size(); i
++)
241 if (succ
.linear_preds
[i
] == block
->index
)
242 succ
.linear_preds
[i
] = pred
.index
;
244 block
->instructions
.clear();
245 block
->linear_preds
.clear();
246 block
->linear_succs
.clear();
249 void jump_threading(ssa_elimination_ctx
& ctx
)
251 for (int i
= ctx
.program
->blocks
.size() - 1; i
>= 0; i
--) {
252 Block
* block
= &ctx
.program
->blocks
[i
];
254 if (!ctx
.empty_blocks
[i
])
257 if (block
->kind
& block_kind_invert
) {
258 try_remove_invert_block(ctx
, block
);
262 if (block
->linear_succs
.size() > 1)
265 if (block
->kind
& block_kind_merge
||
266 block
->kind
& block_kind_loop_exit
)
267 try_remove_merge_block(ctx
, block
);
269 if (block
->linear_preds
.size() == 1)
270 try_remove_simple_block(ctx
, block
);
274 } /* end namespace */
277 void ssa_elimination(Program
* program
)
279 ssa_elimination_ctx
ctx(program
);
281 /* Collect information about every phi-instruction */
282 collect_phi_info(ctx
);
284 /* eliminate empty blocks */
287 /* insert parallelcopies from SSA elimination */
288 insert_parallelcopies(ctx
);