4 #include "common/sid.h"
10 enum chip_class chip_class
;
11 std::map
<int, SOPP_instruction
*> branches
;
12 std::vector
<unsigned> constaddrs
;
13 const int16_t* opcode
;
14 // TODO: keep track of branch instructions referring blocks
15 // and, when emitting the block, correct the offset in instr
16 asm_context(Program
* program
) : program(program
), chip_class(program
->chip_class
) {
17 if (chip_class
<= GFX9
)
18 opcode
= &instr_info
.opcode_gfx9
[0];
19 else if (chip_class
== GFX10
)
20 opcode
= &instr_info
.opcode_gfx10
[0];
24 void emit_instruction(asm_context
& ctx
, std::vector
<uint32_t>& out
, Instruction
* instr
)
26 uint32_t instr_offset
= out
.size() * 4u;
28 /* lower remaining pseudo-instructions */
29 if (instr
->opcode
== aco_opcode::p_constaddr
) {
30 unsigned dest
= instr
->definitions
[0].physReg();
31 unsigned offset
= instr
->operands
[0].constantValue();
33 /* s_getpc_b64 dest[0:1] */
34 uint32_t encoding
= (0b101111101 << 23);
35 uint32_t opcode
= ctx
.opcode
[(int)aco_opcode::s_getpc_b64
];
36 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
37 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
40 encoding
|= dest
<< 16;
41 encoding
|= opcode
<< 8;
42 out
.push_back(encoding
);
44 /* s_add_u32 dest[0], dest[0], ... */
45 encoding
= (0b10 << 30);
46 encoding
|= ctx
.opcode
[(int)aco_opcode::s_add_u32
] << 23;
47 encoding
|= dest
<< 16;
50 out
.push_back(encoding
);
51 ctx
.constaddrs
.push_back(out
.size());
52 out
.push_back(-(instr_offset
+ 4) + offset
);
54 /* s_addc_u32 dest[1], dest[1], 0 */
55 encoding
= (0b10 << 30);
56 encoding
|= ctx
.opcode
[(int)aco_opcode::s_addc_u32
] << 23;
57 encoding
|= (dest
+ 1) << 16;
60 out
.push_back(encoding
);
64 uint32_t opcode
= ctx
.opcode
[(int)instr
->opcode
];
65 if (opcode
== (uint32_t)-1) {
66 fprintf(stderr
, "Unsupported opcode: ");
67 aco_print_instr(instr
, stderr
);
71 switch (instr
->format
) {
73 uint32_t encoding
= (0b10 << 30);
74 encoding
|= opcode
<< 23;
75 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
76 encoding
|= instr
->operands
.size() >= 2 ? instr
->operands
[1].physReg() << 8 : 0;
77 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
78 out
.push_back(encoding
);
82 uint32_t encoding
= (0b1011 << 28);
83 encoding
|= opcode
<< 23;
85 !instr
->definitions
.empty() && !(instr
->definitions
[0].physReg() == scc
) ?
86 instr
->definitions
[0].physReg() << 16 :
87 !instr
->operands
.empty() && !(instr
->operands
[0].physReg() == scc
) ?
88 instr
->operands
[0].physReg() << 16 : 0;
89 encoding
|= static_cast<SOPK_instruction
*>(instr
)->imm
;
90 out
.push_back(encoding
);
94 uint32_t encoding
= (0b101111101 << 23);
95 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
96 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
99 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
100 encoding
|= opcode
<< 8;
101 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
102 out
.push_back(encoding
);
106 uint32_t encoding
= (0b101111110 << 23);
107 encoding
|= opcode
<< 16;
108 encoding
|= instr
->operands
.size() == 2 ? instr
->operands
[1].physReg() << 8 : 0;
109 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
110 out
.push_back(encoding
);
114 SOPP_instruction
* sopp
= static_cast<SOPP_instruction
*>(instr
);
115 uint32_t encoding
= (0b101111111 << 23);
116 encoding
|= opcode
<< 16;
117 encoding
|= (uint16_t) sopp
->imm
;
118 if (sopp
->block
!= -1)
119 ctx
.branches
.insert({out
.size(), sopp
});
120 out
.push_back(encoding
);
124 SMEM_instruction
* smem
= static_cast<SMEM_instruction
*>(instr
);
125 uint32_t encoding
= (0b110000 << 26);
126 encoding
|= opcode
<< 18;
127 if (instr
->operands
.size() >= 2)
128 encoding
|= instr
->operands
[1].isConstant() ? 1 << 17 : 0;
129 bool soe
= instr
->operands
.size() >= (!instr
->definitions
.empty() ? 3 : 4);
130 assert(!soe
|| ctx
.chip_class
>= GFX9
);
131 encoding
|= soe
? 1 << 14 : 0;
132 encoding
|= smem
->glc
? 1 << 16 : 0;
133 if (!instr
->definitions
.empty() || instr
->operands
.size() >= 3)
134 encoding
|= (!instr
->definitions
.empty() ? instr
->definitions
[0].physReg() : instr
->operands
[2].physReg().reg
) << 6;
135 if (instr
->operands
.size() >= 1)
136 encoding
|= instr
->operands
[0].physReg() >> 1;
137 out
.push_back(encoding
);
139 if (instr
->operands
.size() >= 2)
140 encoding
|= instr
->operands
[1].isConstant() ? instr
->operands
[1].constantValue() : instr
->operands
[1].physReg().reg
;
141 encoding
|= soe
? instr
->operands
.back().physReg() << 25 : 0;
142 out
.push_back(encoding
);
146 uint32_t encoding
= 0;
147 encoding
|= opcode
<< 25;
148 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
149 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
150 encoding
|= instr
->operands
[0].physReg().reg
;
151 out
.push_back(encoding
);
155 uint32_t encoding
= (0b0111111 << 25);
156 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
157 encoding
|= opcode
<< 9;
158 encoding
|= instr
->operands
[0].physReg().reg
;
159 out
.push_back(encoding
);
163 uint32_t encoding
= (0b0111110 << 25);
164 encoding
|= opcode
<< 17;
165 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
166 encoding
|= instr
->operands
[0].physReg().reg
;
167 out
.push_back(encoding
);
170 case Format::VINTRP
: {
171 Interp_instruction
* interp
= static_cast<Interp_instruction
*>(instr
);
172 uint32_t encoding
= (0b110101 << 26);
173 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 18;
174 encoding
|= opcode
<< 16;
175 encoding
|= interp
->attribute
<< 10;
176 encoding
|= interp
->component
<< 8;
177 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
)
178 encoding
|= (0x3 & instr
->operands
[0].constantValue());
180 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
181 out
.push_back(encoding
);
185 DS_instruction
* ds
= static_cast<DS_instruction
*>(instr
);
186 uint32_t encoding
= (0b110110 << 26);
187 encoding
|= opcode
<< 17;
188 encoding
|= (ds
->gds
? 1 : 0) << 16;
189 encoding
|= ((0xFF & ds
->offset1
) << 8);
190 encoding
|= (0xFFFF & ds
->offset0
);
191 out
.push_back(encoding
);
193 unsigned reg
= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() : 0;
194 encoding
|= (0xFF & reg
) << 24;
195 reg
= instr
->operands
.size() >= 3 && !(instr
->operands
[2].physReg() == m0
) ? instr
->operands
[2].physReg() : 0;
196 encoding
|= (0xFF & reg
) << 16;
197 reg
= instr
->operands
.size() >= 2 && !(instr
->operands
[1].physReg() == m0
) ? instr
->operands
[1].physReg() : 0;
198 encoding
|= (0xFF & reg
) << 8;
199 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
200 out
.push_back(encoding
);
203 case Format::MUBUF
: {
204 MUBUF_instruction
* mubuf
= static_cast<MUBUF_instruction
*>(instr
);
205 uint32_t encoding
= (0b111000 << 26);
206 encoding
|= opcode
<< 18;
207 encoding
|= (mubuf
->slc
? 1 : 0) << 17;
208 encoding
|= (mubuf
->lds
? 1 : 0) << 16;
209 encoding
|= (mubuf
->glc
? 1 : 0) << 14;
210 encoding
|= (mubuf
->idxen
? 1 : 0) << 13;
211 encoding
|= (mubuf
->offen
? 1 : 0) << 12;
212 encoding
|= 0x0FFF & mubuf
->offset
;
213 out
.push_back(encoding
);
215 encoding
|= instr
->operands
[2].physReg() << 24;
216 encoding
|= (mubuf
->tfe
? 1 : 0) << 23;
217 encoding
|= (instr
->operands
[1].physReg() >> 2) << 16;
218 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg() : instr
->definitions
[0].physReg().reg
;
219 encoding
|= (0xFF & reg
) << 8;
220 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
221 out
.push_back(encoding
);
224 case Format::MTBUF
: {
225 MTBUF_instruction
* mtbuf
= static_cast<MTBUF_instruction
*>(instr
);
226 uint32_t encoding
= (0b111010 << 26);
227 encoding
|= opcode
<< 15;
228 encoding
|= (mtbuf
->glc
? 1 : 0) << 14;
229 encoding
|= (mtbuf
->idxen
? 1 : 0) << 13;
230 encoding
|= (mtbuf
->offen
? 1 : 0) << 12;
231 encoding
|= 0x0FFF & mtbuf
->offset
;
232 encoding
|= (0xF & mtbuf
->dfmt
) << 19;
233 encoding
|= (0x7 & mtbuf
->nfmt
) << 23;
234 out
.push_back(encoding
);
236 encoding
|= instr
->operands
[2].physReg().reg
<< 24;
237 encoding
|= (mtbuf
->tfe
? 1 : 0) << 23;
238 encoding
|= (mtbuf
->slc
? 1 : 0) << 22;
239 encoding
|= (instr
->operands
[1].physReg().reg
>> 2) << 16;
240 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg().reg
: instr
->definitions
[0].physReg().reg
;
241 encoding
|= (0xFF & reg
) << 8;
242 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
243 out
.push_back(encoding
);
247 MIMG_instruction
* mimg
= static_cast<MIMG_instruction
*>(instr
);
248 uint32_t encoding
= (0b111100 << 26);
249 encoding
|= mimg
->slc
? 1 << 25 : 0;
250 encoding
|= opcode
<< 18;
251 encoding
|= mimg
->lwe
? 1 << 17 : 0;
252 encoding
|= mimg
->tfe
? 1 << 16 : 0;
253 encoding
|= mimg
->r128
? 1 << 15 : 0;
254 encoding
|= mimg
->da
? 1 << 14 : 0;
255 encoding
|= mimg
->glc
? 1 << 13 : 0;
256 encoding
|= mimg
->unrm
? 1 << 12 : 0;
257 encoding
|= (0xF & mimg
->dmask
) << 8;
258 out
.push_back(encoding
);
259 encoding
= (0xFF & instr
->operands
[0].physReg().reg
); /* VADDR */
260 if (!instr
->definitions
.empty()) {
261 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 8; /* VDATA */
262 } else if (instr
->operands
.size() == 4) {
263 encoding
|= (0xFF & instr
->operands
[3].physReg().reg
) << 8; /* VDATA */
265 encoding
|= (0x1F & (instr
->operands
[1].physReg() >> 2)) << 16; /* T# (resource) */
266 if (instr
->operands
.size() > 2)
267 encoding
|= (0x1F & (instr
->operands
[2].physReg() >> 2)) << 21; /* sampler */
269 out
.push_back(encoding
);
273 case Format::SCRATCH
:
274 case Format::GLOBAL
: {
275 FLAT_instruction
*flat
= static_cast<FLAT_instruction
*>(instr
);
276 uint32_t encoding
= (0b110111 << 26);
277 encoding
|= opcode
<< 18;
278 encoding
|= flat
->offset
& 0x1fff;
279 if (instr
->format
== Format::SCRATCH
)
281 else if (instr
->format
== Format::GLOBAL
)
283 encoding
|= flat
->lds
? 1 << 13 : 0;
284 encoding
|= flat
->glc
? 1 << 13 : 0;
285 encoding
|= flat
->slc
? 1 << 13 : 0;
286 out
.push_back(encoding
);
287 encoding
= (0xFF & instr
->operands
[0].physReg().reg
);
288 if (!instr
->definitions
.empty())
289 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 24;
291 encoding
|= (0xFF & instr
->operands
[2].physReg().reg
) << 8;
292 if (!instr
->operands
[1].isUndefined()) {
293 assert(instr
->operands
[1].physReg() != 0x7f);
294 assert(instr
->format
!= Format::FLAT
);
295 encoding
|= instr
->operands
[1].physReg() << 16;
296 } else if (instr
->format
!= Format::FLAT
) {
297 encoding
|= 0x7F << 16;
299 encoding
|= flat
->nv
? 1 << 23 : 0;
300 out
.push_back(encoding
);
304 Export_instruction
* exp
= static_cast<Export_instruction
*>(instr
);
305 uint32_t encoding
= (0b110001 << 26);
306 encoding
|= exp
->valid_mask
? 0b1 << 12 : 0;
307 encoding
|= exp
->done
? 0b1 << 11 : 0;
308 encoding
|= exp
->compressed
? 0b1 << 10 : 0;
309 encoding
|= exp
->dest
<< 4;
310 encoding
|= exp
->enabled_mask
;
311 out
.push_back(encoding
);
312 encoding
= 0xFF & exp
->operands
[0].physReg().reg
;
313 encoding
|= (0xFF & exp
->operands
[1].physReg().reg
) << 8;
314 encoding
|= (0xFF & exp
->operands
[2].physReg().reg
) << 16;
315 encoding
|= (0xFF & exp
->operands
[3].physReg().reg
) << 24;
316 out
.push_back(encoding
);
320 case Format::PSEUDO_BARRIER
:
321 unreachable("Pseudo instructions should be lowered before assembly.");
323 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP3A
) {
324 VOP3A_instruction
* vop3
= static_cast<VOP3A_instruction
*>(instr
);
326 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP2
)
327 opcode
= opcode
+ 0x100;
328 else if ((uint16_t) instr
->format
& (uint16_t) Format::VOP1
)
329 opcode
= opcode
+ 0x140;
330 else if ((uint16_t) instr
->format
& (uint16_t) Format::VOPC
)
331 opcode
= opcode
+ 0x0;
332 else if ((uint16_t) instr
->format
& (uint16_t) Format::VINTRP
)
333 opcode
= opcode
+ 0x270;
336 uint32_t encoding
= (0b110100 << 26);
337 encoding
|= opcode
<< 16;
338 encoding
|= (vop3
->clamp
? 1 : 0) << 15;
339 for (unsigned i
= 0; i
< 3; i
++)
340 encoding
|= vop3
->abs
[i
] << (8+i
);
341 if (instr
->definitions
.size() == 2)
342 encoding
|= instr
->definitions
[1].physReg() << 8;
343 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
);
344 out
.push_back(encoding
);
346 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
) {
347 encoding
= 0x3 & instr
->operands
[0].constantValue();
349 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
350 encoding
|= instr
->operands
[i
].physReg() << (i
* 9);
352 encoding
|= vop3
->omod
<< 27;
353 for (unsigned i
= 0; i
< 3; i
++)
354 encoding
|= vop3
->neg
[i
] << (29+i
);
355 out
.push_back(encoding
);
358 } else if (instr
->isDPP()){
359 /* first emit the instruction without the DPP operand */
360 Operand dpp_op
= instr
->operands
[0];
361 instr
->operands
[0] = Operand(PhysReg
{250}, v1
);
362 instr
->format
= (Format
) ((uint32_t) instr
->format
& ~(1 << 14));
363 emit_instruction(ctx
, out
, instr
);
364 DPP_instruction
* dpp
= static_cast<DPP_instruction
*>(instr
);
365 uint32_t encoding
= (0xF & dpp
->row_mask
) << 28;
366 encoding
|= (0xF & dpp
->bank_mask
) << 24;
367 encoding
|= dpp
->abs
[1] << 23;
368 encoding
|= dpp
->neg
[1] << 22;
369 encoding
|= dpp
->abs
[0] << 21;
370 encoding
|= dpp
->neg
[0] << 20;
371 encoding
|= dpp
->bound_ctrl
<< 19;
372 encoding
|= dpp
->dpp_ctrl
<< 8;
373 encoding
|= (0xFF) & dpp_op
.physReg().reg
;
374 out
.push_back(encoding
);
377 unreachable("unimplemented instruction format");
381 /* append literal dword */
382 for (const Operand
& op
: instr
->operands
) {
383 if (op
.isLiteral()) {
384 out
.push_back(op
.constantValue());
390 void emit_block(asm_context
& ctx
, std::vector
<uint32_t>& out
, Block
& block
)
392 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
394 int start_idx
= out
.size();
395 std::cerr
<< "Encoding:\t" << std::endl
;
396 aco_print_instr(&*instr
, stderr
);
397 std::cerr
<< std::endl
;
399 emit_instruction(ctx
, out
, instr
.get());
401 for (int i
= start_idx
; i
< out
.size(); i
++)
402 std::cerr
<< "encoding: " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< out
[i
] << std::endl
;
407 void fix_exports(asm_context
& ctx
, std::vector
<uint32_t>& out
, Program
* program
)
409 for (int idx
= program
->blocks
.size() - 1; idx
>= 0; idx
--) {
410 Block
& block
= program
->blocks
[idx
];
411 std::vector
<aco_ptr
<Instruction
>>::reverse_iterator it
= block
.instructions
.rbegin();
412 bool endBlock
= false;
413 bool exported
= false;
414 while ( it
!= block
.instructions
.rend())
416 if ((*it
)->format
== Format::EXP
&& endBlock
) {
417 Export_instruction
* exp
= static_cast<Export_instruction
*>((*it
).get());
418 if (program
->stage
& hw_vs
) {
419 if (exp
->dest
>= V_008DFC_SQ_EXP_POS
&& exp
->dest
<= (V_008DFC_SQ_EXP_POS
+ 3)) {
426 exp
->valid_mask
= true;
430 } else if ((*it
)->definitions
.size() && (*it
)->definitions
[0].physReg() == exec
)
432 else if ((*it
)->opcode
== aco_opcode::s_endpgm
) {
439 if (!endBlock
|| exported
)
441 /* we didn't find an Export instruction and have to insert a null export */
442 aco_ptr
<Export_instruction
> exp
{create_instruction
<Export_instruction
>(aco_opcode::exp
, Format::EXP
, 4, 0)};
443 for (unsigned i
= 0; i
< 4; i
++)
444 exp
->operands
[i
] = Operand(v1
);
445 exp
->enabled_mask
= 0;
446 exp
->compressed
= false;
448 exp
->valid_mask
= program
->stage
& hw_fs
;
449 if (program
->stage
& hw_fs
)
450 exp
->dest
= 9; /* NULL */
452 exp
->dest
= V_008DFC_SQ_EXP_POS
;
453 /* insert the null export 1 instruction before endpgm */
454 block
.instructions
.insert(block
.instructions
.end() - 1, std::move(exp
));
458 void fix_branches(asm_context
& ctx
, std::vector
<uint32_t>& out
)
460 for (std::pair
<int, SOPP_instruction
*> branch
: ctx
.branches
)
462 int offset
= (int)ctx
.program
->blocks
[branch
.second
->block
].offset
- branch
.first
- 1;
463 out
[branch
.first
] |= (uint16_t) offset
;
467 void fix_constaddrs(asm_context
& ctx
, std::vector
<uint32_t>& out
)
469 for (unsigned addr
: ctx
.constaddrs
)
470 out
[addr
] += out
.size() * 4u;
473 unsigned emit_program(Program
* program
,
474 std::vector
<uint32_t>& code
)
476 asm_context
ctx(program
);
478 if (program
->stage
& (hw_vs
| hw_fs
))
479 fix_exports(ctx
, code
, program
);
481 for (Block
& block
: program
->blocks
) {
482 block
.offset
= code
.size();
483 emit_block(ctx
, code
, block
);
486 fix_branches(ctx
, code
);
487 fix_constaddrs(ctx
, code
);
489 unsigned constant_data_offset
= code
.size() * sizeof(uint32_t);
490 while (program
->constant_data
.size() % 4u)
491 program
->constant_data
.push_back(0);
492 /* Copy constant data */
493 code
.insert(code
.end(), (uint32_t*)program
->constant_data
.data(),
494 (uint32_t*)(program
->constant_data
.data() + program
->constant_data
.size()));
496 return constant_data_offset
;