4 #include "common/sid.h"
10 enum chip_class chip_class
;
11 std::map
<int, SOPP_instruction
*> branches
;
12 std::vector
<unsigned> constaddrs
;
13 const int16_t* opcode
;
14 // TODO: keep track of branch instructions referring blocks
15 // and, when emitting the block, correct the offset in instr
16 asm_context(Program
* program
) : program(program
), chip_class(program
->chip_class
) {
17 if (chip_class
<= GFX9
)
18 opcode
= &instr_info
.opcode_gfx9
[0];
19 else if (chip_class
== GFX10
)
20 opcode
= &instr_info
.opcode_gfx10
[0];
24 void emit_instruction(asm_context
& ctx
, std::vector
<uint32_t>& out
, Instruction
* instr
)
26 uint32_t instr_offset
= out
.size() * 4u;
28 /* lower remaining pseudo-instructions */
29 if (instr
->opcode
== aco_opcode::p_constaddr
) {
30 unsigned dest
= instr
->definitions
[0].physReg();
31 unsigned offset
= instr
->operands
[0].constantValue();
33 /* s_getpc_b64 dest[0:1] */
34 uint32_t encoding
= (0b101111101 << 23);
35 uint32_t opcode
= ctx
.opcode
[(int)aco_opcode::s_getpc_b64
];
36 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
37 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
40 encoding
|= dest
<< 16;
41 encoding
|= opcode
<< 8;
42 out
.push_back(encoding
);
44 /* s_add_u32 dest[0], dest[0], ... */
45 encoding
= (0b10 << 30);
46 encoding
|= ctx
.opcode
[(int)aco_opcode::s_add_u32
] << 23;
47 encoding
|= dest
<< 16;
50 out
.push_back(encoding
);
51 ctx
.constaddrs
.push_back(out
.size());
52 out
.push_back(-(instr_offset
+ 4) + offset
);
54 /* s_addc_u32 dest[1], dest[1], 0 */
55 encoding
= (0b10 << 30);
56 encoding
|= ctx
.opcode
[(int)aco_opcode::s_addc_u32
] << 23;
57 encoding
|= (dest
+ 1) << 16;
60 out
.push_back(encoding
);
64 uint32_t opcode
= ctx
.opcode
[(int)instr
->opcode
];
65 if (opcode
== (uint32_t)-1) {
66 fprintf(stderr
, "Unsupported opcode: ");
67 aco_print_instr(instr
, stderr
);
71 switch (instr
->format
) {
73 uint32_t encoding
= (0b10 << 30);
74 encoding
|= opcode
<< 23;
75 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
76 encoding
|= instr
->operands
.size() >= 2 ? instr
->operands
[1].physReg() << 8 : 0;
77 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
78 out
.push_back(encoding
);
82 uint32_t encoding
= (0b1011 << 28);
83 encoding
|= opcode
<< 23;
85 !instr
->definitions
.empty() && !(instr
->definitions
[0].physReg() == scc
) ?
86 instr
->definitions
[0].physReg() << 16 :
87 !instr
->operands
.empty() && !(instr
->operands
[0].physReg() == scc
) ?
88 instr
->operands
[0].physReg() << 16 : 0;
89 encoding
|= static_cast<SOPK_instruction
*>(instr
)->imm
;
90 out
.push_back(encoding
);
94 uint32_t encoding
= (0b101111101 << 23);
95 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
96 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
99 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
100 encoding
|= opcode
<< 8;
101 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
102 out
.push_back(encoding
);
106 uint32_t encoding
= (0b101111110 << 23);
107 encoding
|= opcode
<< 16;
108 encoding
|= instr
->operands
.size() == 2 ? instr
->operands
[1].physReg() << 8 : 0;
109 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
110 out
.push_back(encoding
);
114 SOPP_instruction
* sopp
= static_cast<SOPP_instruction
*>(instr
);
115 uint32_t encoding
= (0b101111111 << 23);
116 encoding
|= opcode
<< 16;
117 encoding
|= (uint16_t) sopp
->imm
;
118 if (sopp
->block
!= -1)
119 ctx
.branches
.insert({out
.size(), sopp
});
120 out
.push_back(encoding
);
124 SMEM_instruction
* smem
= static_cast<SMEM_instruction
*>(instr
);
125 bool soe
= instr
->operands
.size() >= (!instr
->definitions
.empty() ? 3 : 4);
126 bool is_load
= !instr
->definitions
.empty();
128 uint32_t encoding
= 0;
130 if (ctx
.chip_class
<= GFX9
) {
131 encoding
= (0b110000 << 26);
132 assert(!smem
->dlc
); /* Device-level coherent is not supported on GFX9 and lower */
133 encoding
|= smem
->nv
? 1 << 15 : 0;
135 encoding
= (0b111101 << 26);
136 assert(!smem
->nv
); /* Non-volatile is not supported on GFX10 */
137 encoding
|= smem
->dlc
? 1 << 14 : 0;
140 encoding
|= opcode
<< 18;
141 encoding
|= smem
->glc
? 1 << 16 : 0;
143 if (ctx
.chip_class
<= GFX9
) {
144 if (instr
->operands
.size() >= 2)
145 encoding
|= instr
->operands
[1].isConstant() ? 1 << 17 : 0; /* IMM - immediate enable */
147 if (ctx
.chip_class
== GFX9
) {
148 encoding
|= soe
? 1 << 14 : 0;
151 if (is_load
|| instr
->operands
.size() >= 3) { /* SDATA */
152 encoding
|= (is_load
? instr
->definitions
[0].physReg().reg
: instr
->operands
[2].physReg().reg
) << 6;
154 if (instr
->operands
.size() >= 1) { /* SBASE */
155 encoding
|= instr
->operands
[0].physReg().reg
>> 1;
158 out
.push_back(encoding
);
162 uint32_t soffset
= ctx
.chip_class
>= GFX10
163 ? sgpr_null
/* On GFX10 this is disabled by specifying SGPR_NULL */
164 : 0; /* On GFX9, it is disabled by the SOE bit (and it's not present on GFX8 and below) */
165 if (instr
->operands
.size() >= 2) {
166 const Operand
&op_off1
= instr
->operands
[1];
167 if (ctx
.chip_class
<= GFX9
) {
168 offset
= op_off1
.isConstant() ? op_off1
.constantValue() : op_off1
.physReg();
170 /* GFX10 only supports constants in OFFSET, so put the operand in SOFFSET if it's an SGPR */
171 if (op_off1
.isConstant()) {
172 offset
= op_off1
.constantValue();
174 soffset
= op_off1
.physReg();
175 assert(!soe
); /* There is no place to put the other SGPR offset, if any */
180 const Operand
&op_off2
= instr
->operands
.back();
181 assert(ctx
.chip_class
>= GFX9
); /* GFX8 and below don't support specifying a constant and an SGPR at the same time */
182 assert(!op_off2
.isConstant());
183 soffset
= op_off2
.physReg();
187 encoding
|= soffset
<< 25;
189 out
.push_back(encoding
);
193 uint32_t encoding
= 0;
194 encoding
|= opcode
<< 25;
195 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
196 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
197 encoding
|= instr
->operands
[0].physReg().reg
;
198 out
.push_back(encoding
);
202 uint32_t encoding
= (0b0111111 << 25);
203 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
204 encoding
|= opcode
<< 9;
205 encoding
|= instr
->operands
[0].physReg().reg
;
206 out
.push_back(encoding
);
210 uint32_t encoding
= (0b0111110 << 25);
211 encoding
|= opcode
<< 17;
212 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
213 encoding
|= instr
->operands
[0].physReg().reg
;
214 out
.push_back(encoding
);
217 case Format::VINTRP
: {
218 Interp_instruction
* interp
= static_cast<Interp_instruction
*>(instr
);
219 uint32_t encoding
= 0;
221 if (ctx
.chip_class
== GFX8
|| ctx
.chip_class
== GFX9
) {
222 encoding
= (0b110101 << 26); /* Vega ISA doc says 110010 but it's wrong */
224 encoding
= (0b110010 << 26);
228 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 18;
229 encoding
|= opcode
<< 16;
230 encoding
|= interp
->attribute
<< 10;
231 encoding
|= interp
->component
<< 8;
232 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
)
233 encoding
|= (0x3 & instr
->operands
[0].constantValue());
235 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
236 out
.push_back(encoding
);
240 DS_instruction
* ds
= static_cast<DS_instruction
*>(instr
);
241 uint32_t encoding
= (0b110110 << 26);
242 encoding
|= opcode
<< 17;
243 encoding
|= (ds
->gds
? 1 : 0) << 16;
244 encoding
|= ((0xFF & ds
->offset1
) << 8);
245 encoding
|= (0xFFFF & ds
->offset0
);
246 out
.push_back(encoding
);
248 unsigned reg
= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() : 0;
249 encoding
|= (0xFF & reg
) << 24;
250 reg
= instr
->operands
.size() >= 3 && !(instr
->operands
[2].physReg() == m0
) ? instr
->operands
[2].physReg() : 0;
251 encoding
|= (0xFF & reg
) << 16;
252 reg
= instr
->operands
.size() >= 2 && !(instr
->operands
[1].physReg() == m0
) ? instr
->operands
[1].physReg() : 0;
253 encoding
|= (0xFF & reg
) << 8;
254 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
255 out
.push_back(encoding
);
258 case Format::MUBUF
: {
259 MUBUF_instruction
* mubuf
= static_cast<MUBUF_instruction
*>(instr
);
260 uint32_t encoding
= (0b111000 << 26);
261 encoding
|= opcode
<< 18;
262 encoding
|= (mubuf
->slc
? 1 : 0) << 17;
263 encoding
|= (mubuf
->lds
? 1 : 0) << 16;
264 encoding
|= (mubuf
->glc
? 1 : 0) << 14;
265 encoding
|= (mubuf
->idxen
? 1 : 0) << 13;
266 encoding
|= (mubuf
->offen
? 1 : 0) << 12;
267 encoding
|= 0x0FFF & mubuf
->offset
;
268 out
.push_back(encoding
);
270 encoding
|= instr
->operands
[2].physReg() << 24;
271 encoding
|= (mubuf
->tfe
? 1 : 0) << 23;
272 encoding
|= (instr
->operands
[1].physReg() >> 2) << 16;
273 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg() : instr
->definitions
[0].physReg().reg
;
274 encoding
|= (0xFF & reg
) << 8;
275 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
276 out
.push_back(encoding
);
279 case Format::MTBUF
: {
280 MTBUF_instruction
* mtbuf
= static_cast<MTBUF_instruction
*>(instr
);
281 uint32_t encoding
= (0b111010 << 26);
282 encoding
|= opcode
<< 15;
283 encoding
|= (mtbuf
->glc
? 1 : 0) << 14;
284 encoding
|= (mtbuf
->idxen
? 1 : 0) << 13;
285 encoding
|= (mtbuf
->offen
? 1 : 0) << 12;
286 encoding
|= 0x0FFF & mtbuf
->offset
;
287 encoding
|= (0xF & mtbuf
->dfmt
) << 19;
288 encoding
|= (0x7 & mtbuf
->nfmt
) << 23;
289 out
.push_back(encoding
);
291 encoding
|= instr
->operands
[2].physReg().reg
<< 24;
292 encoding
|= (mtbuf
->tfe
? 1 : 0) << 23;
293 encoding
|= (mtbuf
->slc
? 1 : 0) << 22;
294 encoding
|= (instr
->operands
[1].physReg().reg
>> 2) << 16;
295 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg().reg
: instr
->definitions
[0].physReg().reg
;
296 encoding
|= (0xFF & reg
) << 8;
297 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
298 out
.push_back(encoding
);
302 MIMG_instruction
* mimg
= static_cast<MIMG_instruction
*>(instr
);
303 uint32_t encoding
= (0b111100 << 26);
304 encoding
|= mimg
->slc
? 1 << 25 : 0;
305 encoding
|= opcode
<< 18;
306 encoding
|= mimg
->lwe
? 1 << 17 : 0;
307 encoding
|= mimg
->tfe
? 1 << 16 : 0;
308 encoding
|= mimg
->r128
? 1 << 15 : 0;
309 encoding
|= mimg
->da
? 1 << 14 : 0;
310 encoding
|= mimg
->glc
? 1 << 13 : 0;
311 encoding
|= mimg
->unrm
? 1 << 12 : 0;
312 encoding
|= (0xF & mimg
->dmask
) << 8;
313 out
.push_back(encoding
);
314 encoding
= (0xFF & instr
->operands
[0].physReg().reg
); /* VADDR */
315 if (!instr
->definitions
.empty()) {
316 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 8; /* VDATA */
317 } else if (instr
->operands
.size() == 4) {
318 encoding
|= (0xFF & instr
->operands
[3].physReg().reg
) << 8; /* VDATA */
320 encoding
|= (0x1F & (instr
->operands
[1].physReg() >> 2)) << 16; /* T# (resource) */
321 if (instr
->operands
.size() > 2)
322 encoding
|= (0x1F & (instr
->operands
[2].physReg() >> 2)) << 21; /* sampler */
324 out
.push_back(encoding
);
328 case Format::SCRATCH
:
329 case Format::GLOBAL
: {
330 FLAT_instruction
*flat
= static_cast<FLAT_instruction
*>(instr
);
331 uint32_t encoding
= (0b110111 << 26);
332 encoding
|= opcode
<< 18;
333 encoding
|= flat
->offset
& 0x1fff;
334 if (instr
->format
== Format::SCRATCH
)
336 else if (instr
->format
== Format::GLOBAL
)
338 encoding
|= flat
->lds
? 1 << 13 : 0;
339 encoding
|= flat
->glc
? 1 << 13 : 0;
340 encoding
|= flat
->slc
? 1 << 13 : 0;
341 out
.push_back(encoding
);
342 encoding
= (0xFF & instr
->operands
[0].physReg().reg
);
343 if (!instr
->definitions
.empty())
344 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 24;
346 encoding
|= (0xFF & instr
->operands
[2].physReg().reg
) << 8;
347 if (!instr
->operands
[1].isUndefined()) {
348 assert(instr
->operands
[1].physReg() != 0x7f);
349 assert(instr
->format
!= Format::FLAT
);
350 encoding
|= instr
->operands
[1].physReg() << 16;
351 } else if (instr
->format
!= Format::FLAT
) {
352 encoding
|= 0x7F << 16;
354 encoding
|= flat
->nv
? 1 << 23 : 0;
355 out
.push_back(encoding
);
359 Export_instruction
* exp
= static_cast<Export_instruction
*>(instr
);
360 uint32_t encoding
= (0b110001 << 26);
361 encoding
|= exp
->valid_mask
? 0b1 << 12 : 0;
362 encoding
|= exp
->done
? 0b1 << 11 : 0;
363 encoding
|= exp
->compressed
? 0b1 << 10 : 0;
364 encoding
|= exp
->dest
<< 4;
365 encoding
|= exp
->enabled_mask
;
366 out
.push_back(encoding
);
367 encoding
= 0xFF & exp
->operands
[0].physReg().reg
;
368 encoding
|= (0xFF & exp
->operands
[1].physReg().reg
) << 8;
369 encoding
|= (0xFF & exp
->operands
[2].physReg().reg
) << 16;
370 encoding
|= (0xFF & exp
->operands
[3].physReg().reg
) << 24;
371 out
.push_back(encoding
);
375 case Format::PSEUDO_BARRIER
:
376 unreachable("Pseudo instructions should be lowered before assembly.");
378 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP3A
) {
379 VOP3A_instruction
* vop3
= static_cast<VOP3A_instruction
*>(instr
);
381 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP2
)
382 opcode
= opcode
+ 0x100;
383 else if ((uint16_t) instr
->format
& (uint16_t) Format::VOP1
)
384 opcode
= opcode
+ 0x140;
385 else if ((uint16_t) instr
->format
& (uint16_t) Format::VOPC
)
386 opcode
= opcode
+ 0x0;
387 else if ((uint16_t) instr
->format
& (uint16_t) Format::VINTRP
)
388 opcode
= opcode
+ 0x270;
391 uint32_t encoding
= (0b110100 << 26);
392 encoding
|= opcode
<< 16;
393 encoding
|= (vop3
->clamp
? 1 : 0) << 15;
394 for (unsigned i
= 0; i
< 3; i
++)
395 encoding
|= vop3
->abs
[i
] << (8+i
);
396 if (instr
->definitions
.size() == 2)
397 encoding
|= instr
->definitions
[1].physReg() << 8;
398 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
);
399 out
.push_back(encoding
);
401 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
) {
402 encoding
= 0x3 & instr
->operands
[0].constantValue();
404 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
405 encoding
|= instr
->operands
[i
].physReg() << (i
* 9);
407 encoding
|= vop3
->omod
<< 27;
408 for (unsigned i
= 0; i
< 3; i
++)
409 encoding
|= vop3
->neg
[i
] << (29+i
);
410 out
.push_back(encoding
);
413 } else if (instr
->isDPP()){
414 /* first emit the instruction without the DPP operand */
415 Operand dpp_op
= instr
->operands
[0];
416 instr
->operands
[0] = Operand(PhysReg
{250}, v1
);
417 instr
->format
= (Format
) ((uint32_t) instr
->format
& ~(1 << 14));
418 emit_instruction(ctx
, out
, instr
);
419 DPP_instruction
* dpp
= static_cast<DPP_instruction
*>(instr
);
420 uint32_t encoding
= (0xF & dpp
->row_mask
) << 28;
421 encoding
|= (0xF & dpp
->bank_mask
) << 24;
422 encoding
|= dpp
->abs
[1] << 23;
423 encoding
|= dpp
->neg
[1] << 22;
424 encoding
|= dpp
->abs
[0] << 21;
425 encoding
|= dpp
->neg
[0] << 20;
426 encoding
|= dpp
->bound_ctrl
<< 19;
427 encoding
|= dpp
->dpp_ctrl
<< 8;
428 encoding
|= (0xFF) & dpp_op
.physReg().reg
;
429 out
.push_back(encoding
);
432 unreachable("unimplemented instruction format");
436 /* append literal dword */
437 for (const Operand
& op
: instr
->operands
) {
438 if (op
.isLiteral()) {
439 out
.push_back(op
.constantValue());
445 void emit_block(asm_context
& ctx
, std::vector
<uint32_t>& out
, Block
& block
)
447 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
449 int start_idx
= out
.size();
450 std::cerr
<< "Encoding:\t" << std::endl
;
451 aco_print_instr(&*instr
, stderr
);
452 std::cerr
<< std::endl
;
454 emit_instruction(ctx
, out
, instr
.get());
456 for (int i
= start_idx
; i
< out
.size(); i
++)
457 std::cerr
<< "encoding: " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< out
[i
] << std::endl
;
462 void fix_exports(asm_context
& ctx
, std::vector
<uint32_t>& out
, Program
* program
)
464 for (int idx
= program
->blocks
.size() - 1; idx
>= 0; idx
--) {
465 Block
& block
= program
->blocks
[idx
];
466 std::vector
<aco_ptr
<Instruction
>>::reverse_iterator it
= block
.instructions
.rbegin();
467 bool endBlock
= false;
468 bool exported
= false;
469 while ( it
!= block
.instructions
.rend())
471 if ((*it
)->format
== Format::EXP
&& endBlock
) {
472 Export_instruction
* exp
= static_cast<Export_instruction
*>((*it
).get());
473 if (program
->stage
& hw_vs
) {
474 if (exp
->dest
>= V_008DFC_SQ_EXP_POS
&& exp
->dest
<= (V_008DFC_SQ_EXP_POS
+ 3)) {
481 exp
->valid_mask
= true;
485 } else if ((*it
)->definitions
.size() && (*it
)->definitions
[0].physReg() == exec
)
487 else if ((*it
)->opcode
== aco_opcode::s_endpgm
) {
494 if (!endBlock
|| exported
)
496 /* we didn't find an Export instruction and have to insert a null export */
497 aco_ptr
<Export_instruction
> exp
{create_instruction
<Export_instruction
>(aco_opcode::exp
, Format::EXP
, 4, 0)};
498 for (unsigned i
= 0; i
< 4; i
++)
499 exp
->operands
[i
] = Operand(v1
);
500 exp
->enabled_mask
= 0;
501 exp
->compressed
= false;
503 exp
->valid_mask
= program
->stage
& hw_fs
;
504 if (program
->stage
& hw_fs
)
505 exp
->dest
= 9; /* NULL */
507 exp
->dest
= V_008DFC_SQ_EXP_POS
;
508 /* insert the null export 1 instruction before endpgm */
509 block
.instructions
.insert(block
.instructions
.end() - 1, std::move(exp
));
513 void fix_branches(asm_context
& ctx
, std::vector
<uint32_t>& out
)
515 for (std::pair
<int, SOPP_instruction
*> branch
: ctx
.branches
)
517 int offset
= (int)ctx
.program
->blocks
[branch
.second
->block
].offset
- branch
.first
- 1;
518 out
[branch
.first
] |= (uint16_t) offset
;
522 void fix_constaddrs(asm_context
& ctx
, std::vector
<uint32_t>& out
)
524 for (unsigned addr
: ctx
.constaddrs
)
525 out
[addr
] += out
.size() * 4u;
528 unsigned emit_program(Program
* program
,
529 std::vector
<uint32_t>& code
)
531 asm_context
ctx(program
);
533 if (program
->stage
& (hw_vs
| hw_fs
))
534 fix_exports(ctx
, code
, program
);
536 for (Block
& block
: program
->blocks
) {
537 block
.offset
= code
.size();
538 emit_block(ctx
, code
, block
);
541 fix_branches(ctx
, code
);
542 fix_constaddrs(ctx
, code
);
544 unsigned constant_data_offset
= code
.size() * sizeof(uint32_t);
545 while (program
->constant_data
.size() % 4u)
546 program
->constant_data
.push_back(0);
547 /* Copy constant data */
548 code
.insert(code
.end(), (uint32_t*)program
->constant_data
.data(),
549 (uint32_t*)(program
->constant_data
.data() + program
->constant_data
.size()));
551 return constant_data_offset
;