4 #include "common/sid.h"
10 enum chip_class chip_class
;
11 std::map
<int, SOPP_instruction
*> branches
;
12 std::vector
<unsigned> constaddrs
;
13 const int16_t* opcode
;
14 // TODO: keep track of branch instructions referring blocks
15 // and, when emitting the block, correct the offset in instr
16 asm_context(Program
* program
) : program(program
), chip_class(program
->chip_class
) {
17 if (chip_class
<= GFX9
)
18 opcode
= &instr_info
.opcode_gfx9
[0];
19 else if (chip_class
== GFX10
)
20 opcode
= &instr_info
.opcode_gfx10
[0];
24 void emit_instruction(asm_context
& ctx
, std::vector
<uint32_t>& out
, Instruction
* instr
)
26 uint32_t instr_offset
= out
.size() * 4u;
28 /* lower remaining pseudo-instructions */
29 if (instr
->opcode
== aco_opcode::p_constaddr
) {
30 unsigned dest
= instr
->definitions
[0].physReg();
31 unsigned offset
= instr
->operands
[0].constantValue();
33 /* s_getpc_b64 dest[0:1] */
34 uint32_t encoding
= (0b101111101 << 23);
35 uint32_t opcode
= ctx
.opcode
[(int)aco_opcode::s_getpc_b64
];
36 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
37 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
40 encoding
|= dest
<< 16;
41 encoding
|= opcode
<< 8;
42 out
.push_back(encoding
);
44 /* s_add_u32 dest[0], dest[0], ... */
45 encoding
= (0b10 << 30);
46 encoding
|= ctx
.opcode
[(int)aco_opcode::s_add_u32
] << 23;
47 encoding
|= dest
<< 16;
50 out
.push_back(encoding
);
51 ctx
.constaddrs
.push_back(out
.size());
52 out
.push_back(-(instr_offset
+ 4) + offset
);
54 /* s_addc_u32 dest[1], dest[1], 0 */
55 encoding
= (0b10 << 30);
56 encoding
|= ctx
.opcode
[(int)aco_opcode::s_addc_u32
] << 23;
57 encoding
|= (dest
+ 1) << 16;
60 out
.push_back(encoding
);
64 uint32_t opcode
= ctx
.opcode
[(int)instr
->opcode
];
65 if (opcode
== (uint32_t)-1) {
66 fprintf(stderr
, "Unsupported opcode: ");
67 aco_print_instr(instr
, stderr
);
71 switch (instr
->format
) {
73 uint32_t encoding
= (0b10 << 30);
74 encoding
|= opcode
<< 23;
75 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
76 encoding
|= instr
->operands
.size() >= 2 ? instr
->operands
[1].physReg() << 8 : 0;
77 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
78 out
.push_back(encoding
);
82 uint32_t encoding
= (0b1011 << 28);
83 encoding
|= opcode
<< 23;
85 !instr
->definitions
.empty() && !(instr
->definitions
[0].physReg() == scc
) ?
86 instr
->definitions
[0].physReg() << 16 :
87 !instr
->operands
.empty() && !(instr
->operands
[0].physReg() == scc
) ?
88 instr
->operands
[0].physReg() << 16 : 0;
89 encoding
|= static_cast<SOPK_instruction
*>(instr
)->imm
;
90 out
.push_back(encoding
);
94 uint32_t encoding
= (0b101111101 << 23);
95 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
96 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
99 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
100 encoding
|= opcode
<< 8;
101 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
102 out
.push_back(encoding
);
106 uint32_t encoding
= (0b101111110 << 23);
107 encoding
|= opcode
<< 16;
108 encoding
|= instr
->operands
.size() == 2 ? instr
->operands
[1].physReg() << 8 : 0;
109 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
110 out
.push_back(encoding
);
114 SOPP_instruction
* sopp
= static_cast<SOPP_instruction
*>(instr
);
115 uint32_t encoding
= (0b101111111 << 23);
116 encoding
|= opcode
<< 16;
117 encoding
|= (uint16_t) sopp
->imm
;
118 if (sopp
->block
!= -1)
119 ctx
.branches
.insert({out
.size(), sopp
});
120 out
.push_back(encoding
);
124 SMEM_instruction
* smem
= static_cast<SMEM_instruction
*>(instr
);
125 bool soe
= instr
->operands
.size() >= (!instr
->definitions
.empty() ? 3 : 4);
126 bool is_load
= !instr
->definitions
.empty();
128 uint32_t encoding
= 0;
130 if (ctx
.chip_class
<= GFX9
) {
131 encoding
= (0b110000 << 26);
132 assert(!smem
->dlc
); /* Device-level coherent is not supported on GFX9 and lower */
133 encoding
|= smem
->nv
? 1 << 15 : 0;
135 encoding
= (0b111101 << 26);
136 assert(!smem
->nv
); /* Non-volatile is not supported on GFX10 */
137 encoding
|= smem
->dlc
? 1 << 14 : 0;
140 encoding
|= opcode
<< 18;
141 encoding
|= smem
->glc
? 1 << 16 : 0;
143 if (ctx
.chip_class
<= GFX9
) {
144 if (instr
->operands
.size() >= 2)
145 encoding
|= instr
->operands
[1].isConstant() ? 1 << 17 : 0; /* IMM - immediate enable */
147 if (ctx
.chip_class
== GFX9
) {
148 encoding
|= soe
? 1 << 14 : 0;
151 if (is_load
|| instr
->operands
.size() >= 3) { /* SDATA */
152 encoding
|= (is_load
? instr
->definitions
[0].physReg().reg
: instr
->operands
[2].physReg().reg
) << 6;
154 if (instr
->operands
.size() >= 1) { /* SBASE */
155 encoding
|= instr
->operands
[0].physReg().reg
>> 1;
158 out
.push_back(encoding
);
162 uint32_t soffset
= ctx
.chip_class
>= GFX10
163 ? sgpr_null
/* On GFX10 this is disabled by specifying SGPR_NULL */
164 : 0; /* On GFX9, it is disabled by the SOE bit (and it's not present on GFX8 and below) */
165 if (instr
->operands
.size() >= 2) {
166 const Operand
&op_off1
= instr
->operands
[1];
167 if (ctx
.chip_class
<= GFX9
) {
168 offset
= op_off1
.isConstant() ? op_off1
.constantValue() : op_off1
.physReg();
170 /* GFX10 only supports constants in OFFSET, so put the operand in SOFFSET if it's an SGPR */
171 if (op_off1
.isConstant()) {
172 offset
= op_off1
.constantValue();
174 soffset
= op_off1
.physReg();
175 assert(!soe
); /* There is no place to put the other SGPR offset, if any */
180 const Operand
&op_off2
= instr
->operands
.back();
181 assert(ctx
.chip_class
>= GFX9
); /* GFX8 and below don't support specifying a constant and an SGPR at the same time */
182 assert(!op_off2
.isConstant());
183 soffset
= op_off2
.physReg();
187 encoding
|= soffset
<< 25;
189 out
.push_back(encoding
);
193 uint32_t encoding
= 0;
194 encoding
|= opcode
<< 25;
195 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
196 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
197 encoding
|= instr
->operands
[0].physReg().reg
;
198 out
.push_back(encoding
);
202 uint32_t encoding
= (0b0111111 << 25);
203 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
204 encoding
|= opcode
<< 9;
205 encoding
|= instr
->operands
[0].physReg().reg
;
206 out
.push_back(encoding
);
210 uint32_t encoding
= (0b0111110 << 25);
211 encoding
|= opcode
<< 17;
212 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
213 encoding
|= instr
->operands
[0].physReg().reg
;
214 out
.push_back(encoding
);
217 case Format::VINTRP
: {
218 Interp_instruction
* interp
= static_cast<Interp_instruction
*>(instr
);
219 uint32_t encoding
= (0b110101 << 26);
220 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 18;
221 encoding
|= opcode
<< 16;
222 encoding
|= interp
->attribute
<< 10;
223 encoding
|= interp
->component
<< 8;
224 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
)
225 encoding
|= (0x3 & instr
->operands
[0].constantValue());
227 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
228 out
.push_back(encoding
);
232 DS_instruction
* ds
= static_cast<DS_instruction
*>(instr
);
233 uint32_t encoding
= (0b110110 << 26);
234 encoding
|= opcode
<< 17;
235 encoding
|= (ds
->gds
? 1 : 0) << 16;
236 encoding
|= ((0xFF & ds
->offset1
) << 8);
237 encoding
|= (0xFFFF & ds
->offset0
);
238 out
.push_back(encoding
);
240 unsigned reg
= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() : 0;
241 encoding
|= (0xFF & reg
) << 24;
242 reg
= instr
->operands
.size() >= 3 && !(instr
->operands
[2].physReg() == m0
) ? instr
->operands
[2].physReg() : 0;
243 encoding
|= (0xFF & reg
) << 16;
244 reg
= instr
->operands
.size() >= 2 && !(instr
->operands
[1].physReg() == m0
) ? instr
->operands
[1].physReg() : 0;
245 encoding
|= (0xFF & reg
) << 8;
246 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
247 out
.push_back(encoding
);
250 case Format::MUBUF
: {
251 MUBUF_instruction
* mubuf
= static_cast<MUBUF_instruction
*>(instr
);
252 uint32_t encoding
= (0b111000 << 26);
253 encoding
|= opcode
<< 18;
254 encoding
|= (mubuf
->slc
? 1 : 0) << 17;
255 encoding
|= (mubuf
->lds
? 1 : 0) << 16;
256 encoding
|= (mubuf
->glc
? 1 : 0) << 14;
257 encoding
|= (mubuf
->idxen
? 1 : 0) << 13;
258 encoding
|= (mubuf
->offen
? 1 : 0) << 12;
259 encoding
|= 0x0FFF & mubuf
->offset
;
260 out
.push_back(encoding
);
262 encoding
|= instr
->operands
[2].physReg() << 24;
263 encoding
|= (mubuf
->tfe
? 1 : 0) << 23;
264 encoding
|= (instr
->operands
[1].physReg() >> 2) << 16;
265 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg() : instr
->definitions
[0].physReg().reg
;
266 encoding
|= (0xFF & reg
) << 8;
267 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
268 out
.push_back(encoding
);
271 case Format::MTBUF
: {
272 MTBUF_instruction
* mtbuf
= static_cast<MTBUF_instruction
*>(instr
);
273 uint32_t encoding
= (0b111010 << 26);
274 encoding
|= opcode
<< 15;
275 encoding
|= (mtbuf
->glc
? 1 : 0) << 14;
276 encoding
|= (mtbuf
->idxen
? 1 : 0) << 13;
277 encoding
|= (mtbuf
->offen
? 1 : 0) << 12;
278 encoding
|= 0x0FFF & mtbuf
->offset
;
279 encoding
|= (0xF & mtbuf
->dfmt
) << 19;
280 encoding
|= (0x7 & mtbuf
->nfmt
) << 23;
281 out
.push_back(encoding
);
283 encoding
|= instr
->operands
[2].physReg().reg
<< 24;
284 encoding
|= (mtbuf
->tfe
? 1 : 0) << 23;
285 encoding
|= (mtbuf
->slc
? 1 : 0) << 22;
286 encoding
|= (instr
->operands
[1].physReg().reg
>> 2) << 16;
287 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg().reg
: instr
->definitions
[0].physReg().reg
;
288 encoding
|= (0xFF & reg
) << 8;
289 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
290 out
.push_back(encoding
);
294 MIMG_instruction
* mimg
= static_cast<MIMG_instruction
*>(instr
);
295 uint32_t encoding
= (0b111100 << 26);
296 encoding
|= mimg
->slc
? 1 << 25 : 0;
297 encoding
|= opcode
<< 18;
298 encoding
|= mimg
->lwe
? 1 << 17 : 0;
299 encoding
|= mimg
->tfe
? 1 << 16 : 0;
300 encoding
|= mimg
->r128
? 1 << 15 : 0;
301 encoding
|= mimg
->da
? 1 << 14 : 0;
302 encoding
|= mimg
->glc
? 1 << 13 : 0;
303 encoding
|= mimg
->unrm
? 1 << 12 : 0;
304 encoding
|= (0xF & mimg
->dmask
) << 8;
305 out
.push_back(encoding
);
306 encoding
= (0xFF & instr
->operands
[0].physReg().reg
); /* VADDR */
307 if (!instr
->definitions
.empty()) {
308 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 8; /* VDATA */
309 } else if (instr
->operands
.size() == 4) {
310 encoding
|= (0xFF & instr
->operands
[3].physReg().reg
) << 8; /* VDATA */
312 encoding
|= (0x1F & (instr
->operands
[1].physReg() >> 2)) << 16; /* T# (resource) */
313 if (instr
->operands
.size() > 2)
314 encoding
|= (0x1F & (instr
->operands
[2].physReg() >> 2)) << 21; /* sampler */
316 out
.push_back(encoding
);
320 case Format::SCRATCH
:
321 case Format::GLOBAL
: {
322 FLAT_instruction
*flat
= static_cast<FLAT_instruction
*>(instr
);
323 uint32_t encoding
= (0b110111 << 26);
324 encoding
|= opcode
<< 18;
325 encoding
|= flat
->offset
& 0x1fff;
326 if (instr
->format
== Format::SCRATCH
)
328 else if (instr
->format
== Format::GLOBAL
)
330 encoding
|= flat
->lds
? 1 << 13 : 0;
331 encoding
|= flat
->glc
? 1 << 13 : 0;
332 encoding
|= flat
->slc
? 1 << 13 : 0;
333 out
.push_back(encoding
);
334 encoding
= (0xFF & instr
->operands
[0].physReg().reg
);
335 if (!instr
->definitions
.empty())
336 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 24;
338 encoding
|= (0xFF & instr
->operands
[2].physReg().reg
) << 8;
339 if (!instr
->operands
[1].isUndefined()) {
340 assert(instr
->operands
[1].physReg() != 0x7f);
341 assert(instr
->format
!= Format::FLAT
);
342 encoding
|= instr
->operands
[1].physReg() << 16;
343 } else if (instr
->format
!= Format::FLAT
) {
344 encoding
|= 0x7F << 16;
346 encoding
|= flat
->nv
? 1 << 23 : 0;
347 out
.push_back(encoding
);
351 Export_instruction
* exp
= static_cast<Export_instruction
*>(instr
);
352 uint32_t encoding
= (0b110001 << 26);
353 encoding
|= exp
->valid_mask
? 0b1 << 12 : 0;
354 encoding
|= exp
->done
? 0b1 << 11 : 0;
355 encoding
|= exp
->compressed
? 0b1 << 10 : 0;
356 encoding
|= exp
->dest
<< 4;
357 encoding
|= exp
->enabled_mask
;
358 out
.push_back(encoding
);
359 encoding
= 0xFF & exp
->operands
[0].physReg().reg
;
360 encoding
|= (0xFF & exp
->operands
[1].physReg().reg
) << 8;
361 encoding
|= (0xFF & exp
->operands
[2].physReg().reg
) << 16;
362 encoding
|= (0xFF & exp
->operands
[3].physReg().reg
) << 24;
363 out
.push_back(encoding
);
367 case Format::PSEUDO_BARRIER
:
368 unreachable("Pseudo instructions should be lowered before assembly.");
370 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP3A
) {
371 VOP3A_instruction
* vop3
= static_cast<VOP3A_instruction
*>(instr
);
373 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP2
)
374 opcode
= opcode
+ 0x100;
375 else if ((uint16_t) instr
->format
& (uint16_t) Format::VOP1
)
376 opcode
= opcode
+ 0x140;
377 else if ((uint16_t) instr
->format
& (uint16_t) Format::VOPC
)
378 opcode
= opcode
+ 0x0;
379 else if ((uint16_t) instr
->format
& (uint16_t) Format::VINTRP
)
380 opcode
= opcode
+ 0x270;
383 uint32_t encoding
= (0b110100 << 26);
384 encoding
|= opcode
<< 16;
385 encoding
|= (vop3
->clamp
? 1 : 0) << 15;
386 for (unsigned i
= 0; i
< 3; i
++)
387 encoding
|= vop3
->abs
[i
] << (8+i
);
388 if (instr
->definitions
.size() == 2)
389 encoding
|= instr
->definitions
[1].physReg() << 8;
390 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
);
391 out
.push_back(encoding
);
393 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
) {
394 encoding
= 0x3 & instr
->operands
[0].constantValue();
396 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
397 encoding
|= instr
->operands
[i
].physReg() << (i
* 9);
399 encoding
|= vop3
->omod
<< 27;
400 for (unsigned i
= 0; i
< 3; i
++)
401 encoding
|= vop3
->neg
[i
] << (29+i
);
402 out
.push_back(encoding
);
405 } else if (instr
->isDPP()){
406 /* first emit the instruction without the DPP operand */
407 Operand dpp_op
= instr
->operands
[0];
408 instr
->operands
[0] = Operand(PhysReg
{250}, v1
);
409 instr
->format
= (Format
) ((uint32_t) instr
->format
& ~(1 << 14));
410 emit_instruction(ctx
, out
, instr
);
411 DPP_instruction
* dpp
= static_cast<DPP_instruction
*>(instr
);
412 uint32_t encoding
= (0xF & dpp
->row_mask
) << 28;
413 encoding
|= (0xF & dpp
->bank_mask
) << 24;
414 encoding
|= dpp
->abs
[1] << 23;
415 encoding
|= dpp
->neg
[1] << 22;
416 encoding
|= dpp
->abs
[0] << 21;
417 encoding
|= dpp
->neg
[0] << 20;
418 encoding
|= dpp
->bound_ctrl
<< 19;
419 encoding
|= dpp
->dpp_ctrl
<< 8;
420 encoding
|= (0xFF) & dpp_op
.physReg().reg
;
421 out
.push_back(encoding
);
424 unreachable("unimplemented instruction format");
428 /* append literal dword */
429 for (const Operand
& op
: instr
->operands
) {
430 if (op
.isLiteral()) {
431 out
.push_back(op
.constantValue());
437 void emit_block(asm_context
& ctx
, std::vector
<uint32_t>& out
, Block
& block
)
439 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
441 int start_idx
= out
.size();
442 std::cerr
<< "Encoding:\t" << std::endl
;
443 aco_print_instr(&*instr
, stderr
);
444 std::cerr
<< std::endl
;
446 emit_instruction(ctx
, out
, instr
.get());
448 for (int i
= start_idx
; i
< out
.size(); i
++)
449 std::cerr
<< "encoding: " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< out
[i
] << std::endl
;
454 void fix_exports(asm_context
& ctx
, std::vector
<uint32_t>& out
, Program
* program
)
456 for (int idx
= program
->blocks
.size() - 1; idx
>= 0; idx
--) {
457 Block
& block
= program
->blocks
[idx
];
458 std::vector
<aco_ptr
<Instruction
>>::reverse_iterator it
= block
.instructions
.rbegin();
459 bool endBlock
= false;
460 bool exported
= false;
461 while ( it
!= block
.instructions
.rend())
463 if ((*it
)->format
== Format::EXP
&& endBlock
) {
464 Export_instruction
* exp
= static_cast<Export_instruction
*>((*it
).get());
465 if (program
->stage
& hw_vs
) {
466 if (exp
->dest
>= V_008DFC_SQ_EXP_POS
&& exp
->dest
<= (V_008DFC_SQ_EXP_POS
+ 3)) {
473 exp
->valid_mask
= true;
477 } else if ((*it
)->definitions
.size() && (*it
)->definitions
[0].physReg() == exec
)
479 else if ((*it
)->opcode
== aco_opcode::s_endpgm
) {
486 if (!endBlock
|| exported
)
488 /* we didn't find an Export instruction and have to insert a null export */
489 aco_ptr
<Export_instruction
> exp
{create_instruction
<Export_instruction
>(aco_opcode::exp
, Format::EXP
, 4, 0)};
490 for (unsigned i
= 0; i
< 4; i
++)
491 exp
->operands
[i
] = Operand(v1
);
492 exp
->enabled_mask
= 0;
493 exp
->compressed
= false;
495 exp
->valid_mask
= program
->stage
& hw_fs
;
496 if (program
->stage
& hw_fs
)
497 exp
->dest
= 9; /* NULL */
499 exp
->dest
= V_008DFC_SQ_EXP_POS
;
500 /* insert the null export 1 instruction before endpgm */
501 block
.instructions
.insert(block
.instructions
.end() - 1, std::move(exp
));
505 void fix_branches(asm_context
& ctx
, std::vector
<uint32_t>& out
)
507 for (std::pair
<int, SOPP_instruction
*> branch
: ctx
.branches
)
509 int offset
= (int)ctx
.program
->blocks
[branch
.second
->block
].offset
- branch
.first
- 1;
510 out
[branch
.first
] |= (uint16_t) offset
;
514 void fix_constaddrs(asm_context
& ctx
, std::vector
<uint32_t>& out
)
516 for (unsigned addr
: ctx
.constaddrs
)
517 out
[addr
] += out
.size() * 4u;
520 unsigned emit_program(Program
* program
,
521 std::vector
<uint32_t>& code
)
523 asm_context
ctx(program
);
525 if (program
->stage
& (hw_vs
| hw_fs
))
526 fix_exports(ctx
, code
, program
);
528 for (Block
& block
: program
->blocks
) {
529 block
.offset
= code
.size();
530 emit_block(ctx
, code
, block
);
533 fix_branches(ctx
, code
);
534 fix_constaddrs(ctx
, code
);
536 unsigned constant_data_offset
= code
.size() * sizeof(uint32_t);
537 while (program
->constant_data
.size() % 4u)
538 program
->constant_data
.push_back(0);
539 /* Copy constant data */
540 code
.insert(code
.end(), (uint32_t*)program
->constant_data
.data(),
541 (uint32_t*)(program
->constant_data
.data() + program
->constant_data
.size()));
543 return constant_data_offset
;