4 #include "common/sid.h"
10 enum chip_class chip_class
;
11 std::map
<int, SOPP_instruction
*> branches
;
12 std::vector
<unsigned> constaddrs
;
13 const int16_t* opcode
;
14 // TODO: keep track of branch instructions referring blocks
15 // and, when emitting the block, correct the offset in instr
16 asm_context(Program
* program
) : program(program
), chip_class(program
->chip_class
) {
17 if (chip_class
<= GFX9
)
18 opcode
= &instr_info
.opcode_gfx9
[0];
22 void emit_instruction(asm_context
& ctx
, std::vector
<uint32_t>& out
, Instruction
* instr
)
24 uint32_t instr_offset
= out
.size() * 4u;
26 /* lower remaining pseudo-instructions */
27 if (instr
->opcode
== aco_opcode::p_constaddr
) {
28 unsigned dest
= instr
->definitions
[0].physReg();
29 unsigned offset
= instr
->operands
[0].constantValue();
31 /* s_getpc_b64 dest[0:1] */
32 uint32_t encoding
= (0b101111101 << 23);
33 uint32_t opcode
= ctx
.opcode
[(int)aco_opcode::s_getpc_b64
];
34 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
35 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
38 encoding
|= dest
<< 16;
39 encoding
|= opcode
<< 8;
40 out
.push_back(encoding
);
42 /* s_add_u32 dest[0], dest[0], ... */
43 encoding
= (0b10 << 30);
44 encoding
|= ctx
.opcode
[(int)aco_opcode::s_add_u32
] << 23;
45 encoding
|= dest
<< 16;
48 out
.push_back(encoding
);
49 ctx
.constaddrs
.push_back(out
.size());
50 out
.push_back(-(instr_offset
+ 4) + offset
);
52 /* s_addc_u32 dest[1], dest[1], 0 */
53 encoding
= (0b10 << 30);
54 encoding
|= ctx
.opcode
[(int)aco_opcode::s_addc_u32
] << 23;
55 encoding
|= (dest
+ 1) << 16;
58 out
.push_back(encoding
);
62 uint32_t opcode
= ctx
.opcode
[(int)instr
->opcode
];
63 if (opcode
== (uint32_t)-1) {
64 fprintf(stderr
, "Unsupported opcode: ");
65 aco_print_instr(instr
, stderr
);
69 switch (instr
->format
) {
71 uint32_t encoding
= (0b10 << 30);
72 encoding
|= opcode
<< 23;
73 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
74 encoding
|= instr
->operands
.size() >= 2 ? instr
->operands
[1].physReg() << 8 : 0;
75 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
76 out
.push_back(encoding
);
80 uint32_t encoding
= (0b1011 << 28);
81 encoding
|= opcode
<< 23;
83 !instr
->definitions
.empty() && !(instr
->definitions
[0].physReg() == scc
) ?
84 instr
->definitions
[0].physReg() << 16 :
85 !instr
->operands
.empty() && !(instr
->operands
[0].physReg() == scc
) ?
86 instr
->operands
[0].physReg() << 16 : 0;
87 encoding
|= static_cast<SOPK_instruction
*>(instr
)->imm
;
88 out
.push_back(encoding
);
92 uint32_t encoding
= (0b101111101 << 23);
93 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
94 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
97 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
98 encoding
|= opcode
<< 8;
99 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
100 out
.push_back(encoding
);
104 uint32_t encoding
= (0b101111110 << 23);
105 encoding
|= opcode
<< 16;
106 encoding
|= instr
->operands
.size() == 2 ? instr
->operands
[1].physReg() << 8 : 0;
107 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
108 out
.push_back(encoding
);
112 SOPP_instruction
* sopp
= static_cast<SOPP_instruction
*>(instr
);
113 uint32_t encoding
= (0b101111111 << 23);
114 encoding
|= opcode
<< 16;
115 encoding
|= (uint16_t) sopp
->imm
;
116 if (sopp
->block
!= -1)
117 ctx
.branches
.insert({out
.size(), sopp
});
118 out
.push_back(encoding
);
122 SMEM_instruction
* smem
= static_cast<SMEM_instruction
*>(instr
);
123 uint32_t encoding
= (0b110000 << 26);
124 encoding
|= opcode
<< 18;
125 if (instr
->operands
.size() >= 2)
126 encoding
|= instr
->operands
[1].isConstant() ? 1 << 17 : 0;
127 bool soe
= instr
->operands
.size() >= (!instr
->definitions
.empty() ? 3 : 4);
128 assert(!soe
|| ctx
.chip_class
>= GFX9
);
129 encoding
|= soe
? 1 << 14 : 0;
130 encoding
|= smem
->glc
? 1 << 16 : 0;
131 if (!instr
->definitions
.empty() || instr
->operands
.size() >= 3)
132 encoding
|= (!instr
->definitions
.empty() ? instr
->definitions
[0].physReg() : instr
->operands
[2].physReg().reg
) << 6;
133 if (instr
->operands
.size() >= 1)
134 encoding
|= instr
->operands
[0].physReg() >> 1;
135 out
.push_back(encoding
);
137 if (instr
->operands
.size() >= 2)
138 encoding
|= instr
->operands
[1].isConstant() ? instr
->operands
[1].constantValue() : instr
->operands
[1].physReg().reg
;
139 encoding
|= soe
? instr
->operands
.back().physReg() << 25 : 0;
140 out
.push_back(encoding
);
144 uint32_t encoding
= 0;
145 encoding
|= opcode
<< 25;
146 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
147 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
148 encoding
|= instr
->operands
[0].physReg().reg
;
149 out
.push_back(encoding
);
153 uint32_t encoding
= (0b0111111 << 25);
154 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
155 encoding
|= opcode
<< 9;
156 encoding
|= instr
->operands
[0].physReg().reg
;
157 out
.push_back(encoding
);
161 uint32_t encoding
= (0b0111110 << 25);
162 encoding
|= opcode
<< 17;
163 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
164 encoding
|= instr
->operands
[0].physReg().reg
;
165 out
.push_back(encoding
);
168 case Format::VINTRP
: {
169 Interp_instruction
* interp
= static_cast<Interp_instruction
*>(instr
);
170 uint32_t encoding
= (0b110101 << 26);
171 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 18;
172 encoding
|= opcode
<< 16;
173 encoding
|= interp
->attribute
<< 10;
174 encoding
|= interp
->component
<< 8;
175 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
)
176 encoding
|= (0x3 & instr
->operands
[0].constantValue());
178 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
179 out
.push_back(encoding
);
183 DS_instruction
* ds
= static_cast<DS_instruction
*>(instr
);
184 uint32_t encoding
= (0b110110 << 26);
185 encoding
|= opcode
<< 17;
186 encoding
|= (ds
->gds
? 1 : 0) << 16;
187 encoding
|= ((0xFF & ds
->offset1
) << 8);
188 encoding
|= (0xFFFF & ds
->offset0
);
189 out
.push_back(encoding
);
191 unsigned reg
= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() : 0;
192 encoding
|= (0xFF & reg
) << 24;
193 reg
= instr
->operands
.size() >= 3 && !(instr
->operands
[2].physReg() == m0
) ? instr
->operands
[2].physReg() : 0;
194 encoding
|= (0xFF & reg
) << 16;
195 reg
= instr
->operands
.size() >= 2 && !(instr
->operands
[1].physReg() == m0
) ? instr
->operands
[1].physReg() : 0;
196 encoding
|= (0xFF & reg
) << 8;
197 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
198 out
.push_back(encoding
);
201 case Format::MUBUF
: {
202 MUBUF_instruction
* mubuf
= static_cast<MUBUF_instruction
*>(instr
);
203 uint32_t encoding
= (0b111000 << 26);
204 encoding
|= opcode
<< 18;
205 encoding
|= (mubuf
->slc
? 1 : 0) << 17;
206 encoding
|= (mubuf
->lds
? 1 : 0) << 16;
207 encoding
|= (mubuf
->glc
? 1 : 0) << 14;
208 encoding
|= (mubuf
->idxen
? 1 : 0) << 13;
209 encoding
|= (mubuf
->offen
? 1 : 0) << 12;
210 encoding
|= 0x0FFF & mubuf
->offset
;
211 out
.push_back(encoding
);
213 encoding
|= instr
->operands
[2].physReg() << 24;
214 encoding
|= (mubuf
->tfe
? 1 : 0) << 23;
215 encoding
|= (instr
->operands
[1].physReg() >> 2) << 16;
216 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg() : instr
->definitions
[0].physReg().reg
;
217 encoding
|= (0xFF & reg
) << 8;
218 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
219 out
.push_back(encoding
);
222 case Format::MTBUF
: {
223 MTBUF_instruction
* mtbuf
= static_cast<MTBUF_instruction
*>(instr
);
224 uint32_t encoding
= (0b111010 << 26);
225 encoding
|= opcode
<< 15;
226 encoding
|= (mtbuf
->glc
? 1 : 0) << 14;
227 encoding
|= (mtbuf
->idxen
? 1 : 0) << 13;
228 encoding
|= (mtbuf
->offen
? 1 : 0) << 12;
229 encoding
|= 0x0FFF & mtbuf
->offset
;
230 encoding
|= (0xF & mtbuf
->dfmt
) << 19;
231 encoding
|= (0x7 & mtbuf
->nfmt
) << 23;
232 out
.push_back(encoding
);
234 encoding
|= instr
->operands
[2].physReg().reg
<< 24;
235 encoding
|= (mtbuf
->tfe
? 1 : 0) << 23;
236 encoding
|= (mtbuf
->slc
? 1 : 0) << 22;
237 encoding
|= (instr
->operands
[1].physReg().reg
>> 2) << 16;
238 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg().reg
: instr
->definitions
[0].physReg().reg
;
239 encoding
|= (0xFF & reg
) << 8;
240 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
241 out
.push_back(encoding
);
245 MIMG_instruction
* mimg
= static_cast<MIMG_instruction
*>(instr
);
246 uint32_t encoding
= (0b111100 << 26);
247 encoding
|= mimg
->slc
? 1 << 25 : 0;
248 encoding
|= opcode
<< 18;
249 encoding
|= mimg
->lwe
? 1 << 17 : 0;
250 encoding
|= mimg
->tfe
? 1 << 16 : 0;
251 encoding
|= mimg
->r128
? 1 << 15 : 0;
252 encoding
|= mimg
->da
? 1 << 14 : 0;
253 encoding
|= mimg
->glc
? 1 << 13 : 0;
254 encoding
|= mimg
->unrm
? 1 << 12 : 0;
255 encoding
|= (0xF & mimg
->dmask
) << 8;
256 out
.push_back(encoding
);
257 encoding
= (0xFF & instr
->operands
[0].physReg().reg
); /* VADDR */
258 if (!instr
->definitions
.empty()) {
259 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 8; /* VDATA */
260 } else if (instr
->operands
.size() == 4) {
261 encoding
|= (0xFF & instr
->operands
[3].physReg().reg
) << 8; /* VDATA */
263 encoding
|= (0x1F & (instr
->operands
[1].physReg() >> 2)) << 16; /* T# (resource) */
264 if (instr
->operands
.size() > 2)
265 encoding
|= (0x1F & (instr
->operands
[2].physReg() >> 2)) << 21; /* sampler */
267 out
.push_back(encoding
);
271 case Format::SCRATCH
:
272 case Format::GLOBAL
: {
273 FLAT_instruction
*flat
= static_cast<FLAT_instruction
*>(instr
);
274 uint32_t encoding
= (0b110111 << 26);
275 encoding
|= opcode
<< 18;
276 encoding
|= flat
->offset
& 0x1fff;
277 if (instr
->format
== Format::SCRATCH
)
279 else if (instr
->format
== Format::GLOBAL
)
281 encoding
|= flat
->lds
? 1 << 13 : 0;
282 encoding
|= flat
->glc
? 1 << 13 : 0;
283 encoding
|= flat
->slc
? 1 << 13 : 0;
284 out
.push_back(encoding
);
285 encoding
= (0xFF & instr
->operands
[0].physReg().reg
);
286 if (!instr
->definitions
.empty())
287 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 24;
289 encoding
|= (0xFF & instr
->operands
[2].physReg().reg
) << 8;
290 if (!instr
->operands
[1].isUndefined()) {
291 assert(instr
->operands
[1].physReg() != 0x7f);
292 assert(instr
->format
!= Format::FLAT
);
293 encoding
|= instr
->operands
[1].physReg() << 16;
294 } else if (instr
->format
!= Format::FLAT
) {
295 encoding
|= 0x7F << 16;
297 encoding
|= flat
->nv
? 1 << 23 : 0;
298 out
.push_back(encoding
);
302 Export_instruction
* exp
= static_cast<Export_instruction
*>(instr
);
303 uint32_t encoding
= (0b110001 << 26);
304 encoding
|= exp
->valid_mask
? 0b1 << 12 : 0;
305 encoding
|= exp
->done
? 0b1 << 11 : 0;
306 encoding
|= exp
->compressed
? 0b1 << 10 : 0;
307 encoding
|= exp
->dest
<< 4;
308 encoding
|= exp
->enabled_mask
;
309 out
.push_back(encoding
);
310 encoding
= 0xFF & exp
->operands
[0].physReg().reg
;
311 encoding
|= (0xFF & exp
->operands
[1].physReg().reg
) << 8;
312 encoding
|= (0xFF & exp
->operands
[2].physReg().reg
) << 16;
313 encoding
|= (0xFF & exp
->operands
[3].physReg().reg
) << 24;
314 out
.push_back(encoding
);
318 case Format::PSEUDO_BARRIER
:
319 unreachable("Pseudo instructions should be lowered before assembly.");
321 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP3A
) {
322 VOP3A_instruction
* vop3
= static_cast<VOP3A_instruction
*>(instr
);
324 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP2
)
325 opcode
= opcode
+ 0x100;
326 else if ((uint16_t) instr
->format
& (uint16_t) Format::VOP1
)
327 opcode
= opcode
+ 0x140;
328 else if ((uint16_t) instr
->format
& (uint16_t) Format::VOPC
)
329 opcode
= opcode
+ 0x0;
330 else if ((uint16_t) instr
->format
& (uint16_t) Format::VINTRP
)
331 opcode
= opcode
+ 0x270;
334 uint32_t encoding
= (0b110100 << 26);
335 encoding
|= opcode
<< 16;
336 encoding
|= (vop3
->clamp
? 1 : 0) << 15;
337 for (unsigned i
= 0; i
< 3; i
++)
338 encoding
|= vop3
->abs
[i
] << (8+i
);
339 if (instr
->definitions
.size() == 2)
340 encoding
|= instr
->definitions
[1].physReg() << 8;
341 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
);
342 out
.push_back(encoding
);
344 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
) {
345 encoding
= 0x3 & instr
->operands
[0].constantValue();
347 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
348 encoding
|= instr
->operands
[i
].physReg() << (i
* 9);
350 encoding
|= vop3
->omod
<< 27;
351 for (unsigned i
= 0; i
< 3; i
++)
352 encoding
|= vop3
->neg
[i
] << (29+i
);
353 out
.push_back(encoding
);
356 } else if (instr
->isDPP()){
357 /* first emit the instruction without the DPP operand */
358 Operand dpp_op
= instr
->operands
[0];
359 instr
->operands
[0] = Operand(PhysReg
{250}, v1
);
360 instr
->format
= (Format
) ((uint32_t) instr
->format
& ~(1 << 14));
361 emit_instruction(ctx
, out
, instr
);
362 DPP_instruction
* dpp
= static_cast<DPP_instruction
*>(instr
);
363 uint32_t encoding
= (0xF & dpp
->row_mask
) << 28;
364 encoding
|= (0xF & dpp
->bank_mask
) << 24;
365 encoding
|= dpp
->abs
[1] << 23;
366 encoding
|= dpp
->neg
[1] << 22;
367 encoding
|= dpp
->abs
[0] << 21;
368 encoding
|= dpp
->neg
[0] << 20;
369 encoding
|= dpp
->bound_ctrl
<< 19;
370 encoding
|= dpp
->dpp_ctrl
<< 8;
371 encoding
|= (0xFF) & dpp_op
.physReg().reg
;
372 out
.push_back(encoding
);
375 unreachable("unimplemented instruction format");
379 /* append literal dword */
380 for (const Operand
& op
: instr
->operands
) {
381 if (op
.isLiteral()) {
382 out
.push_back(op
.constantValue());
388 void emit_block(asm_context
& ctx
, std::vector
<uint32_t>& out
, Block
& block
)
390 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
392 int start_idx
= out
.size();
393 std::cerr
<< "Encoding:\t" << std::endl
;
394 aco_print_instr(&*instr
, stderr
);
395 std::cerr
<< std::endl
;
397 emit_instruction(ctx
, out
, instr
.get());
399 for (int i
= start_idx
; i
< out
.size(); i
++)
400 std::cerr
<< "encoding: " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< out
[i
] << std::endl
;
405 void fix_exports(asm_context
& ctx
, std::vector
<uint32_t>& out
, Program
* program
)
407 for (int idx
= program
->blocks
.size() - 1; idx
>= 0; idx
--) {
408 Block
& block
= program
->blocks
[idx
];
409 std::vector
<aco_ptr
<Instruction
>>::reverse_iterator it
= block
.instructions
.rbegin();
410 bool endBlock
= false;
411 bool exported
= false;
412 while ( it
!= block
.instructions
.rend())
414 if ((*it
)->format
== Format::EXP
&& endBlock
) {
415 Export_instruction
* exp
= static_cast<Export_instruction
*>((*it
).get());
416 if (program
->stage
& hw_vs
) {
417 if (exp
->dest
>= V_008DFC_SQ_EXP_POS
&& exp
->dest
<= (V_008DFC_SQ_EXP_POS
+ 3)) {
424 exp
->valid_mask
= true;
428 } else if ((*it
)->definitions
.size() && (*it
)->definitions
[0].physReg() == exec
)
430 else if ((*it
)->opcode
== aco_opcode::s_endpgm
) {
437 if (!endBlock
|| exported
)
439 /* we didn't find an Export instruction and have to insert a null export */
440 aco_ptr
<Export_instruction
> exp
{create_instruction
<Export_instruction
>(aco_opcode::exp
, Format::EXP
, 4, 0)};
441 for (unsigned i
= 0; i
< 4; i
++)
442 exp
->operands
[i
] = Operand(v1
);
443 exp
->enabled_mask
= 0;
444 exp
->compressed
= false;
446 exp
->valid_mask
= program
->stage
& hw_fs
;
447 if (program
->stage
& hw_fs
)
448 exp
->dest
= 9; /* NULL */
450 exp
->dest
= V_008DFC_SQ_EXP_POS
;
451 /* insert the null export 1 instruction before endpgm */
452 block
.instructions
.insert(block
.instructions
.end() - 1, std::move(exp
));
456 void fix_branches(asm_context
& ctx
, std::vector
<uint32_t>& out
)
458 for (std::pair
<int, SOPP_instruction
*> branch
: ctx
.branches
)
460 int offset
= (int)ctx
.program
->blocks
[branch
.second
->block
].offset
- branch
.first
- 1;
461 out
[branch
.first
] |= (uint16_t) offset
;
465 void fix_constaddrs(asm_context
& ctx
, std::vector
<uint32_t>& out
)
467 for (unsigned addr
: ctx
.constaddrs
)
468 out
[addr
] += out
.size() * 4u;
471 unsigned emit_program(Program
* program
,
472 std::vector
<uint32_t>& code
)
474 asm_context
ctx(program
);
476 if (program
->stage
& (hw_vs
| hw_fs
))
477 fix_exports(ctx
, code
, program
);
479 for (Block
& block
: program
->blocks
) {
480 block
.offset
= code
.size();
481 emit_block(ctx
, code
, block
);
484 fix_branches(ctx
, code
);
485 fix_constaddrs(ctx
, code
);
487 unsigned constant_data_offset
= code
.size() * sizeof(uint32_t);
488 while (program
->constant_data
.size() % 4u)
489 program
->constant_data
.push_back(0);
490 /* Copy constant data */
491 code
.insert(code
.end(), (uint32_t*)program
->constant_data
.data(),
492 (uint32_t*)(program
->constant_data
.data() + program
->constant_data
.size()));
494 return constant_data_offset
;