aco: Initial commit of independent AMD compiler
[mesa.git] / src / amd / compiler / aco_assembler.cpp
1 #include <map>
2
3 #include "aco_ir.h"
4 #include "common/sid.h"
5
6 namespace aco {
7
8 struct asm_context {
9 Program *program;
10 enum chip_class chip_class;
11 std::map<int, SOPP_instruction*> branches;
12 std::vector<unsigned> constaddrs;
13 const int16_t* opcode;
14 // TODO: keep track of branch instructions referring blocks
15 // and, when emitting the block, correct the offset in instr
16 asm_context(Program* program) : program(program), chip_class(program->chip_class) {
17 if (chip_class <= GFX9)
18 opcode = &instr_info.opcode_gfx9[0];
19 }
20 };
21
22 void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
23 {
24 uint32_t instr_offset = out.size() * 4u;
25
26 /* lower remaining pseudo-instructions */
27 if (instr->opcode == aco_opcode::p_constaddr) {
28 unsigned dest = instr->definitions[0].physReg();
29 unsigned offset = instr->operands[0].constantValue();
30
31 /* s_getpc_b64 dest[0:1] */
32 uint32_t encoding = (0b101111101 << 23);
33 uint32_t opcode = ctx.opcode[(int)aco_opcode::s_getpc_b64];
34 if (opcode >= 55 && ctx.chip_class <= GFX9) {
35 assert(ctx.chip_class == GFX9 && opcode < 60);
36 opcode = opcode - 4;
37 }
38 encoding |= dest << 16;
39 encoding |= opcode << 8;
40 out.push_back(encoding);
41
42 /* s_add_u32 dest[0], dest[0], ... */
43 encoding = (0b10 << 30);
44 encoding |= ctx.opcode[(int)aco_opcode::s_add_u32] << 23;
45 encoding |= dest << 16;
46 encoding |= dest;
47 encoding |= 255 << 8;
48 out.push_back(encoding);
49 ctx.constaddrs.push_back(out.size());
50 out.push_back(-(instr_offset + 4) + offset);
51
52 /* s_addc_u32 dest[1], dest[1], 0 */
53 encoding = (0b10 << 30);
54 encoding |= ctx.opcode[(int)aco_opcode::s_addc_u32] << 23;
55 encoding |= (dest + 1) << 16;
56 encoding |= dest + 1;
57 encoding |= 128 << 8;
58 out.push_back(encoding);
59 return;
60 }
61
62 uint32_t opcode = ctx.opcode[(int)instr->opcode];
63 if (opcode == (uint32_t)-1) {
64 fprintf(stderr, "Unsupported opcode: ");
65 aco_print_instr(instr, stderr);
66 abort();
67 }
68
69 switch (instr->format) {
70 case Format::SOP2: {
71 uint32_t encoding = (0b10 << 30);
72 encoding |= opcode << 23;
73 encoding |= !instr->definitions.empty() ? instr->definitions[0].physReg() << 16 : 0;
74 encoding |= instr->operands.size() >= 2 ? instr->operands[1].physReg() << 8 : 0;
75 encoding |= !instr->operands.empty() ? instr->operands[0].physReg() : 0;
76 out.push_back(encoding);
77 break;
78 }
79 case Format::SOPK: {
80 uint32_t encoding = (0b1011 << 28);
81 encoding |= opcode << 23;
82 encoding |=
83 !instr->definitions.empty() && !(instr->definitions[0].physReg() == scc) ?
84 instr->definitions[0].physReg() << 16 :
85 !instr->operands.empty() && !(instr->operands[0].physReg() == scc) ?
86 instr->operands[0].physReg() << 16 : 0;
87 encoding |= static_cast<SOPK_instruction*>(instr)->imm;
88 out.push_back(encoding);
89 break;
90 }
91 case Format::SOP1: {
92 uint32_t encoding = (0b101111101 << 23);
93 if (opcode >= 55 && ctx.chip_class <= GFX9) {
94 assert(ctx.chip_class == GFX9 && opcode < 60);
95 opcode = opcode - 4;
96 }
97 encoding |= !instr->definitions.empty() ? instr->definitions[0].physReg() << 16 : 0;
98 encoding |= opcode << 8;
99 encoding |= !instr->operands.empty() ? instr->operands[0].physReg() : 0;
100 out.push_back(encoding);
101 break;
102 }
103 case Format::SOPC: {
104 uint32_t encoding = (0b101111110 << 23);
105 encoding |= opcode << 16;
106 encoding |= instr->operands.size() == 2 ? instr->operands[1].physReg() << 8 : 0;
107 encoding |= !instr->operands.empty() ? instr->operands[0].physReg() : 0;
108 out.push_back(encoding);
109 break;
110 }
111 case Format::SOPP: {
112 SOPP_instruction* sopp = static_cast<SOPP_instruction*>(instr);
113 uint32_t encoding = (0b101111111 << 23);
114 encoding |= opcode << 16;
115 encoding |= (uint16_t) sopp->imm;
116 if (sopp->block != -1)
117 ctx.branches.insert({out.size(), sopp});
118 out.push_back(encoding);
119 break;
120 }
121 case Format::SMEM: {
122 SMEM_instruction* smem = static_cast<SMEM_instruction*>(instr);
123 uint32_t encoding = (0b110000 << 26);
124 encoding |= opcode << 18;
125 if (instr->operands.size() >= 2)
126 encoding |= instr->operands[1].isConstant() ? 1 << 17 : 0;
127 bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);
128 assert(!soe || ctx.chip_class >= GFX9);
129 encoding |= soe ? 1 << 14 : 0;
130 encoding |= smem->glc ? 1 << 16 : 0;
131 if (!instr->definitions.empty() || instr->operands.size() >= 3)
132 encoding |= (!instr->definitions.empty() ? instr->definitions[0].physReg() : instr->operands[2].physReg().reg) << 6;
133 if (instr->operands.size() >= 1)
134 encoding |= instr->operands[0].physReg() >> 1;
135 out.push_back(encoding);
136 encoding = 0;
137 if (instr->operands.size() >= 2)
138 encoding |= instr->operands[1].isConstant() ? instr->operands[1].constantValue() : instr->operands[1].physReg().reg;
139 encoding |= soe ? instr->operands.back().physReg() << 25 : 0;
140 out.push_back(encoding);
141 return;
142 }
143 case Format::VOP2: {
144 uint32_t encoding = 0;
145 encoding |= opcode << 25;
146 encoding |= (0xFF & instr->definitions[0].physReg().reg) << 17;
147 encoding |= (0xFF & instr->operands[1].physReg().reg) << 9;
148 encoding |= instr->operands[0].physReg().reg;
149 out.push_back(encoding);
150 break;
151 }
152 case Format::VOP1: {
153 uint32_t encoding = (0b0111111 << 25);
154 encoding |= (0xFF & instr->definitions[0].physReg().reg) << 17;
155 encoding |= opcode << 9;
156 encoding |= instr->operands[0].physReg().reg;
157 out.push_back(encoding);
158 break;
159 }
160 case Format::VOPC: {
161 uint32_t encoding = (0b0111110 << 25);
162 encoding |= opcode << 17;
163 encoding |= (0xFF & instr->operands[1].physReg().reg) << 9;
164 encoding |= instr->operands[0].physReg().reg;
165 out.push_back(encoding);
166 break;
167 }
168 case Format::VINTRP: {
169 Interp_instruction* interp = static_cast<Interp_instruction*>(instr);
170 uint32_t encoding = (0b110101 << 26);
171 encoding |= (0xFF & instr->definitions[0].physReg().reg) << 18;
172 encoding |= opcode << 16;
173 encoding |= interp->attribute << 10;
174 encoding |= interp->component << 8;
175 if (instr->opcode == aco_opcode::v_interp_mov_f32)
176 encoding |= (0x3 & instr->operands[0].constantValue());
177 else
178 encoding |= (0xFF & instr->operands[0].physReg().reg);
179 out.push_back(encoding);
180 break;
181 }
182 case Format::DS: {
183 DS_instruction* ds = static_cast<DS_instruction*>(instr);
184 uint32_t encoding = (0b110110 << 26);
185 encoding |= opcode << 17;
186 encoding |= (ds->gds ? 1 : 0) << 16;
187 encoding |= ((0xFF & ds->offset1) << 8);
188 encoding |= (0xFFFF & ds->offset0);
189 out.push_back(encoding);
190 encoding = 0;
191 unsigned reg = !instr->definitions.empty() ? instr->definitions[0].physReg() : 0;
192 encoding |= (0xFF & reg) << 24;
193 reg = instr->operands.size() >= 3 && !(instr->operands[2].physReg() == m0) ? instr->operands[2].physReg() : 0;
194 encoding |= (0xFF & reg) << 16;
195 reg = instr->operands.size() >= 2 && !(instr->operands[1].physReg() == m0) ? instr->operands[1].physReg() : 0;
196 encoding |= (0xFF & reg) << 8;
197 encoding |= (0xFF & instr->operands[0].physReg().reg);
198 out.push_back(encoding);
199 break;
200 }
201 case Format::MUBUF: {
202 MUBUF_instruction* mubuf = static_cast<MUBUF_instruction*>(instr);
203 uint32_t encoding = (0b111000 << 26);
204 encoding |= opcode << 18;
205 encoding |= (mubuf->slc ? 1 : 0) << 17;
206 encoding |= (mubuf->lds ? 1 : 0) << 16;
207 encoding |= (mubuf->glc ? 1 : 0) << 14;
208 encoding |= (mubuf->idxen ? 1 : 0) << 13;
209 encoding |= (mubuf->offen ? 1 : 0) << 12;
210 encoding |= 0x0FFF & mubuf->offset;
211 out.push_back(encoding);
212 encoding = 0;
213 encoding |= instr->operands[2].physReg() << 24;
214 encoding |= (mubuf->tfe ? 1 : 0) << 23;
215 encoding |= (instr->operands[1].physReg() >> 2) << 16;
216 unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg().reg;
217 encoding |= (0xFF & reg) << 8;
218 encoding |= (0xFF & instr->operands[0].physReg().reg);
219 out.push_back(encoding);
220 break;
221 }
222 case Format::MTBUF: {
223 MTBUF_instruction* mtbuf = static_cast<MTBUF_instruction*>(instr);
224 uint32_t encoding = (0b111010 << 26);
225 encoding |= opcode << 15;
226 encoding |= (mtbuf->glc ? 1 : 0) << 14;
227 encoding |= (mtbuf->idxen ? 1 : 0) << 13;
228 encoding |= (mtbuf->offen ? 1 : 0) << 12;
229 encoding |= 0x0FFF & mtbuf->offset;
230 encoding |= (0xF & mtbuf->dfmt) << 19;
231 encoding |= (0x7 & mtbuf->nfmt) << 23;
232 out.push_back(encoding);
233 encoding = 0;
234 encoding |= instr->operands[2].physReg().reg << 24;
235 encoding |= (mtbuf->tfe ? 1 : 0) << 23;
236 encoding |= (mtbuf->slc ? 1 : 0) << 22;
237 encoding |= (instr->operands[1].physReg().reg >> 2) << 16;
238 unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg().reg : instr->definitions[0].physReg().reg;
239 encoding |= (0xFF & reg) << 8;
240 encoding |= (0xFF & instr->operands[0].physReg().reg);
241 out.push_back(encoding);
242 break;
243 }
244 case Format::MIMG: {
245 MIMG_instruction* mimg = static_cast<MIMG_instruction*>(instr);
246 uint32_t encoding = (0b111100 << 26);
247 encoding |= mimg->slc ? 1 << 25 : 0;
248 encoding |= opcode << 18;
249 encoding |= mimg->lwe ? 1 << 17 : 0;
250 encoding |= mimg->tfe ? 1 << 16 : 0;
251 encoding |= mimg->r128 ? 1 << 15 : 0;
252 encoding |= mimg->da ? 1 << 14 : 0;
253 encoding |= mimg->glc ? 1 << 13 : 0;
254 encoding |= mimg->unrm ? 1 << 12 : 0;
255 encoding |= (0xF & mimg->dmask) << 8;
256 out.push_back(encoding);
257 encoding = (0xFF & instr->operands[0].physReg().reg); /* VADDR */
258 if (!instr->definitions.empty()) {
259 encoding |= (0xFF & instr->definitions[0].physReg().reg) << 8; /* VDATA */
260 } else if (instr->operands.size() == 4) {
261 encoding |= (0xFF & instr->operands[3].physReg().reg) << 8; /* VDATA */
262 }
263 encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 16; /* T# (resource) */
264 if (instr->operands.size() > 2)
265 encoding |= (0x1F & (instr->operands[2].physReg() >> 2)) << 21; /* sampler */
266 // TODO VEGA: D16
267 out.push_back(encoding);
268 break;
269 }
270 case Format::FLAT:
271 case Format::SCRATCH:
272 case Format::GLOBAL: {
273 FLAT_instruction *flat = static_cast<FLAT_instruction*>(instr);
274 uint32_t encoding = (0b110111 << 26);
275 encoding |= opcode << 18;
276 encoding |= flat->offset & 0x1fff;
277 if (instr->format == Format::SCRATCH)
278 encoding |= 1 << 14;
279 else if (instr->format == Format::GLOBAL)
280 encoding |= 2 << 14;
281 encoding |= flat->lds ? 1 << 13 : 0;
282 encoding |= flat->glc ? 1 << 13 : 0;
283 encoding |= flat->slc ? 1 << 13 : 0;
284 out.push_back(encoding);
285 encoding = (0xFF & instr->operands[0].physReg().reg);
286 if (!instr->definitions.empty())
287 encoding |= (0xFF & instr->definitions[0].physReg().reg) << 24;
288 else
289 encoding |= (0xFF & instr->operands[2].physReg().reg) << 8;
290 if (!instr->operands[1].isUndefined()) {
291 assert(instr->operands[1].physReg() != 0x7f);
292 assert(instr->format != Format::FLAT);
293 encoding |= instr->operands[1].physReg() << 16;
294 } else if (instr->format != Format::FLAT) {
295 encoding |= 0x7F << 16;
296 }
297 encoding |= flat->nv ? 1 << 23 : 0;
298 out.push_back(encoding);
299 break;
300 }
301 case Format::EXP: {
302 Export_instruction* exp = static_cast<Export_instruction*>(instr);
303 uint32_t encoding = (0b110001 << 26);
304 encoding |= exp->valid_mask ? 0b1 << 12 : 0;
305 encoding |= exp->done ? 0b1 << 11 : 0;
306 encoding |= exp->compressed ? 0b1 << 10 : 0;
307 encoding |= exp->dest << 4;
308 encoding |= exp->enabled_mask;
309 out.push_back(encoding);
310 encoding = 0xFF & exp->operands[0].physReg().reg;
311 encoding |= (0xFF & exp->operands[1].physReg().reg) << 8;
312 encoding |= (0xFF & exp->operands[2].physReg().reg) << 16;
313 encoding |= (0xFF & exp->operands[3].physReg().reg) << 24;
314 out.push_back(encoding);
315 break;
316 }
317 case Format::PSEUDO:
318 case Format::PSEUDO_BARRIER:
319 unreachable("Pseudo instructions should be lowered before assembly.");
320 default:
321 if ((uint16_t) instr->format & (uint16_t) Format::VOP3A) {
322 VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr);
323
324 if ((uint16_t) instr->format & (uint16_t) Format::VOP2)
325 opcode = opcode + 0x100;
326 else if ((uint16_t) instr->format & (uint16_t) Format::VOP1)
327 opcode = opcode + 0x140;
328 else if ((uint16_t) instr->format & (uint16_t) Format::VOPC)
329 opcode = opcode + 0x0;
330 else if ((uint16_t) instr->format & (uint16_t) Format::VINTRP)
331 opcode = opcode + 0x270;
332
333 // TODO: op_sel
334 uint32_t encoding = (0b110100 << 26);
335 encoding |= opcode << 16;
336 encoding |= (vop3->clamp ? 1 : 0) << 15;
337 for (unsigned i = 0; i < 3; i++)
338 encoding |= vop3->abs[i] << (8+i);
339 if (instr->definitions.size() == 2)
340 encoding |= instr->definitions[1].physReg() << 8;
341 encoding |= (0xFF & instr->definitions[0].physReg().reg);
342 out.push_back(encoding);
343 encoding = 0;
344 if (instr->opcode == aco_opcode::v_interp_mov_f32) {
345 encoding = 0x3 & instr->operands[0].constantValue();
346 } else {
347 for (unsigned i = 0; i < instr->operands.size(); i++)
348 encoding |= instr->operands[i].physReg() << (i * 9);
349 }
350 encoding |= vop3->omod << 27;
351 for (unsigned i = 0; i < 3; i++)
352 encoding |= vop3->neg[i] << (29+i);
353 out.push_back(encoding);
354 return;
355
356 } else if (instr->isDPP()){
357 /* first emit the instruction without the DPP operand */
358 Operand dpp_op = instr->operands[0];
359 instr->operands[0] = Operand(PhysReg{250}, v1);
360 instr->format = (Format) ((uint32_t) instr->format & ~(1 << 14));
361 emit_instruction(ctx, out, instr);
362 DPP_instruction* dpp = static_cast<DPP_instruction*>(instr);
363 uint32_t encoding = (0xF & dpp->row_mask) << 28;
364 encoding |= (0xF & dpp->bank_mask) << 24;
365 encoding |= dpp->abs[1] << 23;
366 encoding |= dpp->neg[1] << 22;
367 encoding |= dpp->abs[0] << 21;
368 encoding |= dpp->neg[0] << 20;
369 encoding |= dpp->bound_ctrl << 19;
370 encoding |= dpp->dpp_ctrl << 8;
371 encoding |= (0xFF) & dpp_op.physReg().reg;
372 out.push_back(encoding);
373 return;
374 } else {
375 unreachable("unimplemented instruction format");
376 }
377 }
378
379 /* append literal dword */
380 for (const Operand& op : instr->operands) {
381 if (op.isLiteral()) {
382 out.push_back(op.constantValue());
383 break;
384 }
385 }
386 }
387
388 void emit_block(asm_context& ctx, std::vector<uint32_t>& out, Block& block)
389 {
390 for (aco_ptr<Instruction>& instr : block.instructions) {
391 #if 0
392 int start_idx = out.size();
393 std::cerr << "Encoding:\t" << std::endl;
394 aco_print_instr(&*instr, stderr);
395 std::cerr << std::endl;
396 #endif
397 emit_instruction(ctx, out, instr.get());
398 #if 0
399 for (int i = start_idx; i < out.size(); i++)
400 std::cerr << "encoding: " << "0x" << std::setfill('0') << std::setw(8) << std::hex << out[i] << std::endl;
401 #endif
402 }
403 }
404
405 void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
406 {
407 for (int idx = program->blocks.size() - 1; idx >= 0; idx--) {
408 Block& block = program->blocks[idx];
409 std::vector<aco_ptr<Instruction>>::reverse_iterator it = block.instructions.rbegin();
410 bool endBlock = false;
411 bool exported = false;
412 while ( it != block.instructions.rend())
413 {
414 if ((*it)->format == Format::EXP && endBlock) {
415 Export_instruction* exp = static_cast<Export_instruction*>((*it).get());
416 if (program->stage & hw_vs) {
417 if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) {
418 exp->done = true;
419 exported = true;
420 break;
421 }
422 } else {
423 exp->done = true;
424 exp->valid_mask = true;
425 exported = true;
426 break;
427 }
428 } else if ((*it)->definitions.size() && (*it)->definitions[0].physReg() == exec)
429 break;
430 else if ((*it)->opcode == aco_opcode::s_endpgm) {
431 if (endBlock)
432 break;
433 endBlock = true;
434 }
435 ++it;
436 }
437 if (!endBlock || exported)
438 continue;
439 /* we didn't find an Export instruction and have to insert a null export */
440 aco_ptr<Export_instruction> exp{create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4, 0)};
441 for (unsigned i = 0; i < 4; i++)
442 exp->operands[i] = Operand(v1);
443 exp->enabled_mask = 0;
444 exp->compressed = false;
445 exp->done = true;
446 exp->valid_mask = program->stage & hw_fs;
447 if (program->stage & hw_fs)
448 exp->dest = 9; /* NULL */
449 else
450 exp->dest = V_008DFC_SQ_EXP_POS;
451 /* insert the null export 1 instruction before endpgm */
452 block.instructions.insert(block.instructions.end() - 1, std::move(exp));
453 }
454 }
455
456 void fix_branches(asm_context& ctx, std::vector<uint32_t>& out)
457 {
458 for (std::pair<int, SOPP_instruction*> branch : ctx.branches)
459 {
460 int offset = (int)ctx.program->blocks[branch.second->block].offset - branch.first - 1;
461 out[branch.first] |= (uint16_t) offset;
462 }
463 }
464
465 void fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
466 {
467 for (unsigned addr : ctx.constaddrs)
468 out[addr] += out.size() * 4u;
469 }
470
471 unsigned emit_program(Program* program,
472 std::vector<uint32_t>& code)
473 {
474 asm_context ctx(program);
475
476 if (program->stage & (hw_vs | hw_fs))
477 fix_exports(ctx, code, program);
478
479 for (Block& block : program->blocks) {
480 block.offset = code.size();
481 emit_block(ctx, code, block);
482 }
483
484 fix_branches(ctx, code);
485 fix_constaddrs(ctx, code);
486
487 unsigned constant_data_offset = code.size() * sizeof(uint32_t);
488 while (program->constant_data.size() % 4u)
489 program->constant_data.push_back(0);
490 /* Copy constant data */
491 code.insert(code.end(), (uint32_t*)program->constant_data.data(),
492 (uint32_t*)(program->constant_data.data() + program->constant_data.size()));
493
494 return constant_data_offset;
495 }
496
497 }