amd: Move all amd/common code that depends on LLVM to amd/llvm.
[mesa.git] / src / amd / compiler / aco_ssa_elimination.cpp
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25
26 #include "aco_ir.h"
27
28 #include <map>
29
30 namespace aco {
31 namespace {
32
33 /* map: block-id -> pair (dest, src) to store phi information */
34 typedef std::map<uint32_t, std::vector<std::pair<Definition, Operand>>> phi_info;
35
36 struct ssa_elimination_ctx {
37 phi_info logical_phi_info;
38 phi_info linear_phi_info;
39 std::vector<bool> empty_blocks;
40 Program* program;
41
42 ssa_elimination_ctx(Program* program) : empty_blocks(program->blocks.size(), true), program(program) {}
43 };
44
45 void collect_phi_info(ssa_elimination_ctx& ctx)
46 {
47 for (Block& block : ctx.program->blocks) {
48 for (aco_ptr<Instruction>& phi : block.instructions) {
49 if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi)
50 break;
51
52 for (unsigned i = 0; i < phi->operands.size(); i++) {
53 if (phi->operands[i].isUndefined())
54 continue;
55 if (phi->operands[i].isTemp() && phi->operands[i].physReg() == phi->definitions[0].physReg())
56 continue;
57
58 std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds;
59 phi_info& info = phi->opcode == aco_opcode::p_phi ? ctx.logical_phi_info : ctx.linear_phi_info;
60 const auto result = info.emplace(preds[i], std::vector<std::pair<Definition, Operand>>());
61 result.first->second.emplace_back(phi->definitions[0], phi->operands[i]);
62 ctx.empty_blocks[preds[i]] = false;
63 }
64 }
65 }
66 }
67
68 void insert_parallelcopies(ssa_elimination_ctx& ctx)
69 {
70 /* insert the parallelcopies from logical phis before p_logical_end */
71 for (auto&& entry : ctx.logical_phi_info) {
72 Block& block = ctx.program->blocks[entry.first];
73 unsigned idx = block.instructions.size() - 1;
74 while (block.instructions[idx]->opcode != aco_opcode::p_logical_end) {
75 assert(idx > 0);
76 idx--;
77 }
78
79 std::vector<aco_ptr<Instruction>>::iterator it = std::next(block.instructions.begin(), idx);
80 aco_ptr<Pseudo_instruction> pc{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, entry.second.size(), entry.second.size())};
81 unsigned i = 0;
82 for (std::pair<Definition, Operand>& pair : entry.second)
83 {
84 pc->definitions[i] = pair.first;
85 pc->operands[i] = pair.second;
86 i++;
87 }
88 /* this shouldn't be needed since we're only copying vgprs */
89 pc->tmp_in_scc = false;
90 block.instructions.insert(it, std::move(pc));
91 }
92
93 /* insert parallelcopies for the linear phis at the end of blocks just before the branch */
94 for (auto&& entry : ctx.linear_phi_info) {
95 Block& block = ctx.program->blocks[entry.first];
96 std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.end();
97 --it;
98 assert((*it)->format == Format::PSEUDO_BRANCH);
99 aco_ptr<Pseudo_instruction> pc{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, entry.second.size(), entry.second.size())};
100 unsigned i = 0;
101 for (std::pair<Definition, Operand>& pair : entry.second)
102 {
103 pc->definitions[i] = pair.first;
104 pc->operands[i] = pair.second;
105 i++;
106 }
107 pc->tmp_in_scc = block.scc_live_out;
108 pc->scratch_sgpr = block.scratch_sgpr;
109 block.instructions.insert(it, std::move(pc));
110 }
111 }
112
113
114 void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block)
115 {
116 /* check if the successor is another merge block which restores exec */
117 // TODO: divergent loops also restore exec
118 if (block->linear_succs.size() != 1 ||
119 !(ctx.program->blocks[block->linear_succs[0]].kind & block_kind_merge))
120 return;
121
122 /* check if this block is empty and the exec mask is not needed */
123 for (aco_ptr<Instruction>& instr : block->instructions) {
124 if (instr->opcode == aco_opcode::p_parallelcopy) {
125 if (instr->definitions[0].physReg() == exec)
126 continue;
127 else
128 return;
129 }
130
131 if (instr->opcode != aco_opcode::p_linear_phi &&
132 instr->opcode != aco_opcode::p_phi &&
133 instr->opcode != aco_opcode::p_logical_start &&
134 instr->opcode != aco_opcode::p_logical_end &&
135 instr->opcode != aco_opcode::p_branch)
136 return;
137 }
138
139 /* keep the branch instruction and remove the rest */
140 aco_ptr<Instruction> branch = std::move(block->instructions.back());
141 block->instructions.clear();
142 block->instructions.emplace_back(std::move(branch));
143 }
144
145 void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
146 {
147 assert(block->linear_succs.size() == 2);
148 if (block->linear_succs[0] != block->linear_succs[1])
149 return;
150
151 /* check if we can remove this block */
152 for (aco_ptr<Instruction>& instr : block->instructions) {
153 if (instr->opcode != aco_opcode::p_linear_phi &&
154 instr->opcode != aco_opcode::p_phi &&
155 instr->opcode != aco_opcode::s_andn2_b64 &&
156 instr->opcode != aco_opcode::p_branch)
157 return;
158 }
159
160 unsigned succ_idx = block->linear_succs[0];
161 assert(block->linear_preds.size() == 2);
162 for (unsigned i = 0; i < 2; i++) {
163 Block *pred = &ctx.program->blocks[block->linear_preds[i]];
164 pred->linear_succs[0] = succ_idx;
165 ctx.program->blocks[succ_idx].linear_preds[i] = pred->index;
166
167 Pseudo_branch_instruction *branch = static_cast<Pseudo_branch_instruction*>(pred->instructions.back().get());
168 assert(branch->format == Format::PSEUDO_BRANCH);
169 branch->target[0] = succ_idx;
170 branch->target[1] = succ_idx;
171 }
172
173 block->instructions.clear();
174 block->linear_preds.clear();
175 block->linear_succs.clear();
176 }
177
178 void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
179 {
180 for (aco_ptr<Instruction>& instr : block->instructions) {
181 if (instr->opcode != aco_opcode::p_logical_start &&
182 instr->opcode != aco_opcode::p_logical_end &&
183 instr->opcode != aco_opcode::p_branch)
184 return;
185 }
186
187 Block& pred = ctx.program->blocks[block->linear_preds[0]];
188 Block& succ = ctx.program->blocks[block->linear_succs[0]];
189 Pseudo_branch_instruction* branch = static_cast<Pseudo_branch_instruction*>(pred.instructions.back().get());
190 if (branch->opcode == aco_opcode::p_branch) {
191 branch->target[0] = succ.index;
192 branch->target[1] = succ.index;
193 } else if (branch->target[0] == block->index) {
194 branch->target[0] = succ.index;
195 } else if (branch->target[0] == succ.index) {
196 assert(branch->target[1] == block->index);
197 branch->target[1] = succ.index;
198 branch->opcode = aco_opcode::p_branch;
199 } else if (branch->target[1] == block->index) {
200 /* check if there is a fall-through path from block to succ */
201 bool falls_through = true;
202 for (unsigned j = block->index + 1; falls_through && j < succ.index; j++) {
203 assert(ctx.program->blocks[j].index == j);
204 if (!ctx.program->blocks[j].instructions.empty())
205 falls_through = false;
206 }
207 if (falls_through) {
208 branch->target[1] = succ.index;
209 } else {
210 /* check if there is a fall-through path for the alternative target */
211 for (unsigned j = block->index + 1; j < branch->target[0]; j++) {
212 if (!ctx.program->blocks[j].instructions.empty())
213 return;
214 }
215
216 /* This is a (uniform) break or continue block. The branch condition has to be inverted. */
217 if (branch->opcode == aco_opcode::p_cbranch_z)
218 branch->opcode = aco_opcode::p_cbranch_nz;
219 else if (branch->opcode == aco_opcode::p_cbranch_nz)
220 branch->opcode = aco_opcode::p_cbranch_z;
221 else
222 assert(false);
223 /* also invert the linear successors */
224 pred.linear_succs[0] = pred.linear_succs[1];
225 pred.linear_succs[1] = succ.index;
226 branch->target[1] = branch->target[0];
227 branch->target[0] = succ.index;
228 }
229 } else {
230 assert(false);
231 }
232
233 if (branch->target[0] == branch->target[1])
234 branch->opcode = aco_opcode::p_branch;
235
236 for (unsigned i = 0; i < pred.linear_succs.size(); i++)
237 if (pred.linear_succs[i] == block->index)
238 pred.linear_succs[i] = succ.index;
239
240 for (unsigned i = 0; i < succ.linear_preds.size(); i++)
241 if (succ.linear_preds[i] == block->index)
242 succ.linear_preds[i] = pred.index;
243
244 block->instructions.clear();
245 block->linear_preds.clear();
246 block->linear_succs.clear();
247 }
248
249 void jump_threading(ssa_elimination_ctx& ctx)
250 {
251 for (int i = ctx.program->blocks.size() - 1; i >= 0; i--) {
252 Block* block = &ctx.program->blocks[i];
253
254 if (!ctx.empty_blocks[i])
255 continue;
256
257 if (block->kind & block_kind_invert) {
258 try_remove_invert_block(ctx, block);
259 continue;
260 }
261
262 if (block->linear_succs.size() > 1)
263 continue;
264
265 if (block->kind & block_kind_merge ||
266 block->kind & block_kind_loop_exit)
267 try_remove_merge_block(ctx, block);
268
269 if (block->linear_preds.size() == 1)
270 try_remove_simple_block(ctx, block);
271 }
272 }
273
274 } /* end namespace */
275
276
277 void ssa_elimination(Program* program)
278 {
279 ssa_elimination_ctx ctx(program);
280
281 /* Collect information about every phi-instruction */
282 collect_phi_info(ctx);
283
284 /* eliminate empty blocks */
285 jump_threading(ctx);
286
287 /* insert parallelcopies from SSA elimination */
288 insert_parallelcopies(ctx);
289
290 }
291 }