fcad107f34c103be549b21da8bc63b1de7ad6078
4 #include "common/sid.h"
5 #include "ac_shader_util.h"
11 enum chip_class chip_class
;
12 std::map
<int, SOPP_instruction
*> branches
;
13 std::vector
<unsigned> constaddrs
;
14 const int16_t* opcode
;
15 // TODO: keep track of branch instructions referring blocks
16 // and, when emitting the block, correct the offset in instr
17 asm_context(Program
* program
) : program(program
), chip_class(program
->chip_class
) {
18 if (chip_class
<= GFX9
)
19 opcode
= &instr_info
.opcode_gfx9
[0];
20 else if (chip_class
== GFX10
)
21 opcode
= &instr_info
.opcode_gfx10
[0];
25 void emit_instruction(asm_context
& ctx
, std::vector
<uint32_t>& out
, Instruction
* instr
)
27 uint32_t instr_offset
= out
.size() * 4u;
29 /* lower remaining pseudo-instructions */
30 if (instr
->opcode
== aco_opcode::p_constaddr
) {
31 unsigned dest
= instr
->definitions
[0].physReg();
32 unsigned offset
= instr
->operands
[0].constantValue();
34 /* s_getpc_b64 dest[0:1] */
35 uint32_t encoding
= (0b101111101 << 23);
36 uint32_t opcode
= ctx
.opcode
[(int)aco_opcode::s_getpc_b64
];
37 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
38 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
41 encoding
|= dest
<< 16;
42 encoding
|= opcode
<< 8;
43 out
.push_back(encoding
);
45 /* s_add_u32 dest[0], dest[0], ... */
46 encoding
= (0b10 << 30);
47 encoding
|= ctx
.opcode
[(int)aco_opcode::s_add_u32
] << 23;
48 encoding
|= dest
<< 16;
51 out
.push_back(encoding
);
52 ctx
.constaddrs
.push_back(out
.size());
53 out
.push_back(-(instr_offset
+ 4) + offset
);
55 /* s_addc_u32 dest[1], dest[1], 0 */
56 encoding
= (0b10 << 30);
57 encoding
|= ctx
.opcode
[(int)aco_opcode::s_addc_u32
] << 23;
58 encoding
|= (dest
+ 1) << 16;
61 out
.push_back(encoding
);
65 uint32_t opcode
= ctx
.opcode
[(int)instr
->opcode
];
66 if (opcode
== (uint32_t)-1) {
67 fprintf(stderr
, "Unsupported opcode: ");
68 aco_print_instr(instr
, stderr
);
72 switch (instr
->format
) {
74 uint32_t encoding
= (0b10 << 30);
75 encoding
|= opcode
<< 23;
76 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
77 encoding
|= instr
->operands
.size() >= 2 ? instr
->operands
[1].physReg() << 8 : 0;
78 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
79 out
.push_back(encoding
);
83 uint32_t encoding
= (0b1011 << 28);
84 encoding
|= opcode
<< 23;
86 !instr
->definitions
.empty() && !(instr
->definitions
[0].physReg() == scc
) ?
87 instr
->definitions
[0].physReg() << 16 :
88 !instr
->operands
.empty() && !(instr
->operands
[0].physReg() == scc
) ?
89 instr
->operands
[0].physReg() << 16 : 0;
90 encoding
|= static_cast<SOPK_instruction
*>(instr
)->imm
;
91 out
.push_back(encoding
);
95 uint32_t encoding
= (0b101111101 << 23);
96 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
97 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
100 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
101 encoding
|= opcode
<< 8;
102 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
103 out
.push_back(encoding
);
107 uint32_t encoding
= (0b101111110 << 23);
108 encoding
|= opcode
<< 16;
109 encoding
|= instr
->operands
.size() == 2 ? instr
->operands
[1].physReg() << 8 : 0;
110 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
111 out
.push_back(encoding
);
115 SOPP_instruction
* sopp
= static_cast<SOPP_instruction
*>(instr
);
116 uint32_t encoding
= (0b101111111 << 23);
117 encoding
|= opcode
<< 16;
118 encoding
|= (uint16_t) sopp
->imm
;
119 if (sopp
->block
!= -1)
120 ctx
.branches
.insert({out
.size(), sopp
});
121 out
.push_back(encoding
);
125 SMEM_instruction
* smem
= static_cast<SMEM_instruction
*>(instr
);
126 bool soe
= instr
->operands
.size() >= (!instr
->definitions
.empty() ? 3 : 4);
127 bool is_load
= !instr
->definitions
.empty();
129 uint32_t encoding
= 0;
131 if (ctx
.chip_class
<= GFX9
) {
132 encoding
= (0b110000 << 26);
133 assert(!smem
->dlc
); /* Device-level coherent is not supported on GFX9 and lower */
134 encoding
|= smem
->nv
? 1 << 15 : 0;
136 encoding
= (0b111101 << 26);
137 assert(!smem
->nv
); /* Non-volatile is not supported on GFX10 */
138 encoding
|= smem
->dlc
? 1 << 14 : 0;
141 encoding
|= opcode
<< 18;
142 encoding
|= smem
->glc
? 1 << 16 : 0;
144 if (ctx
.chip_class
<= GFX9
) {
145 if (instr
->operands
.size() >= 2)
146 encoding
|= instr
->operands
[1].isConstant() ? 1 << 17 : 0; /* IMM - immediate enable */
148 if (ctx
.chip_class
== GFX9
) {
149 encoding
|= soe
? 1 << 14 : 0;
152 if (is_load
|| instr
->operands
.size() >= 3) { /* SDATA */
153 encoding
|= (is_load
? instr
->definitions
[0].physReg().reg
: instr
->operands
[2].physReg().reg
) << 6;
155 if (instr
->operands
.size() >= 1) { /* SBASE */
156 encoding
|= instr
->operands
[0].physReg().reg
>> 1;
159 out
.push_back(encoding
);
163 uint32_t soffset
= ctx
.chip_class
>= GFX10
164 ? sgpr_null
/* On GFX10 this is disabled by specifying SGPR_NULL */
165 : 0; /* On GFX9, it is disabled by the SOE bit (and it's not present on GFX8 and below) */
166 if (instr
->operands
.size() >= 2) {
167 const Operand
&op_off1
= instr
->operands
[1];
168 if (ctx
.chip_class
<= GFX9
) {
169 offset
= op_off1
.isConstant() ? op_off1
.constantValue() : op_off1
.physReg();
171 /* GFX10 only supports constants in OFFSET, so put the operand in SOFFSET if it's an SGPR */
172 if (op_off1
.isConstant()) {
173 offset
= op_off1
.constantValue();
175 soffset
= op_off1
.physReg();
176 assert(!soe
); /* There is no place to put the other SGPR offset, if any */
181 const Operand
&op_off2
= instr
->operands
.back();
182 assert(ctx
.chip_class
>= GFX9
); /* GFX8 and below don't support specifying a constant and an SGPR at the same time */
183 assert(!op_off2
.isConstant());
184 soffset
= op_off2
.physReg();
188 encoding
|= soffset
<< 25;
190 out
.push_back(encoding
);
194 uint32_t encoding
= 0;
195 encoding
|= opcode
<< 25;
196 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
197 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
198 encoding
|= instr
->operands
[0].physReg().reg
;
199 out
.push_back(encoding
);
203 uint32_t encoding
= (0b0111111 << 25);
204 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
205 encoding
|= opcode
<< 9;
206 encoding
|= instr
->operands
[0].physReg().reg
;
207 out
.push_back(encoding
);
211 uint32_t encoding
= (0b0111110 << 25);
212 encoding
|= opcode
<< 17;
213 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
214 encoding
|= instr
->operands
[0].physReg().reg
;
215 out
.push_back(encoding
);
218 case Format::VINTRP
: {
219 Interp_instruction
* interp
= static_cast<Interp_instruction
*>(instr
);
220 uint32_t encoding
= 0;
222 if (ctx
.chip_class
== GFX8
|| ctx
.chip_class
== GFX9
) {
223 encoding
= (0b110101 << 26); /* Vega ISA doc says 110010 but it's wrong */
225 encoding
= (0b110010 << 26);
229 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 18;
230 encoding
|= opcode
<< 16;
231 encoding
|= interp
->attribute
<< 10;
232 encoding
|= interp
->component
<< 8;
233 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
)
234 encoding
|= (0x3 & instr
->operands
[0].constantValue());
236 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
237 out
.push_back(encoding
);
241 DS_instruction
* ds
= static_cast<DS_instruction
*>(instr
);
242 uint32_t encoding
= (0b110110 << 26);
243 if (ctx
.chip_class
== GFX8
|| ctx
.chip_class
== GFX9
) {
244 encoding
|= opcode
<< 17;
245 encoding
|= (ds
->gds
? 1 : 0) << 16;
247 encoding
|= opcode
<< 18;
248 encoding
|= (ds
->gds
? 1 : 0) << 17;
250 encoding
|= ((0xFF & ds
->offset1
) << 8);
251 encoding
|= (0xFFFF & ds
->offset0
);
252 out
.push_back(encoding
);
254 unsigned reg
= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() : 0;
255 encoding
|= (0xFF & reg
) << 24;
256 reg
= instr
->operands
.size() >= 3 && !(instr
->operands
[2].physReg() == m0
) ? instr
->operands
[2].physReg() : 0;
257 encoding
|= (0xFF & reg
) << 16;
258 reg
= instr
->operands
.size() >= 2 && !(instr
->operands
[1].physReg() == m0
) ? instr
->operands
[1].physReg() : 0;
259 encoding
|= (0xFF & reg
) << 8;
260 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
261 out
.push_back(encoding
);
264 case Format::MUBUF
: {
265 MUBUF_instruction
* mubuf
= static_cast<MUBUF_instruction
*>(instr
);
266 uint32_t encoding
= (0b111000 << 26);
267 encoding
|= opcode
<< 18;
268 encoding
|= (mubuf
->lds
? 1 : 0) << 16;
269 encoding
|= (mubuf
->glc
? 1 : 0) << 14;
270 encoding
|= (mubuf
->idxen
? 1 : 0) << 13;
271 encoding
|= (mubuf
->offen
? 1 : 0) << 12;
272 if (ctx
.chip_class
<= GFX9
) {
273 assert(!mubuf
->dlc
); /* Device-level coherent is not supported on GFX9 and lower */
274 encoding
|= (mubuf
->slc
? 1 : 0) << 17;
275 } else if (ctx
.chip_class
>= GFX10
) {
276 encoding
|= (mubuf
->dlc
? 1 : 0) << 15;
278 encoding
|= 0x0FFF & mubuf
->offset
;
279 out
.push_back(encoding
);
281 if (ctx
.chip_class
>= GFX10
) {
282 encoding
|= (mubuf
->slc
? 1 : 0) << 22;
284 encoding
|= instr
->operands
[2].physReg() << 24;
285 encoding
|= (mubuf
->tfe
? 1 : 0) << 23;
286 encoding
|= (instr
->operands
[1].physReg() >> 2) << 16;
287 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg() : instr
->definitions
[0].physReg().reg
;
288 encoding
|= (0xFF & reg
) << 8;
289 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
290 out
.push_back(encoding
);
293 case Format::MTBUF
: {
294 MTBUF_instruction
* mtbuf
= static_cast<MTBUF_instruction
*>(instr
);
296 uint32_t img_format
= ac_get_tbuffer_format(ctx
.chip_class
, mtbuf
->dfmt
, mtbuf
->nfmt
);
297 uint32_t encoding
= (0b111010 << 26);
298 assert(!mtbuf
->dlc
|| ctx
.chip_class
>= GFX10
);
299 encoding
|= (mtbuf
->dlc
? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
300 encoding
|= (mtbuf
->glc
? 1 : 0) << 14;
301 encoding
|= (mtbuf
->idxen
? 1 : 0) << 13;
302 encoding
|= (mtbuf
->offen
? 1 : 0) << 12;
303 encoding
|= 0x0FFF & mtbuf
->offset
;
304 encoding
|= (img_format
<< 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
306 if (ctx
.chip_class
<= GFX9
) {
307 encoding
|= opcode
<< 15;
309 encoding
|= (opcode
& 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */
312 out
.push_back(encoding
);
315 encoding
|= instr
->operands
[2].physReg().reg
<< 24;
316 encoding
|= (mtbuf
->tfe
? 1 : 0) << 23;
317 encoding
|= (mtbuf
->slc
? 1 : 0) << 22;
318 encoding
|= (instr
->operands
[1].physReg().reg
>> 2) << 16;
319 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg().reg
: instr
->definitions
[0].physReg().reg
;
320 encoding
|= (0xFF & reg
) << 8;
321 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
323 if (ctx
.chip_class
>= GFX10
) {
324 encoding
|= (((opcode
& 0x08) >> 4) << 21); /* MSB of 4-bit OPCODE */
327 out
.push_back(encoding
);
331 MIMG_instruction
* mimg
= static_cast<MIMG_instruction
*>(instr
);
332 uint32_t encoding
= (0b111100 << 26);
333 encoding
|= mimg
->slc
? 1 << 25 : 0;
334 encoding
|= opcode
<< 18;
335 encoding
|= mimg
->lwe
? 1 << 17 : 0;
336 encoding
|= mimg
->tfe
? 1 << 16 : 0;
337 encoding
|= mimg
->glc
? 1 << 13 : 0;
338 encoding
|= mimg
->unrm
? 1 << 12 : 0;
339 if (ctx
.chip_class
<= GFX9
) {
340 assert(!mimg
->dlc
); /* Device-level coherent is not supported on GFX9 and lower */
342 encoding
|= mimg
->a16
? 1 << 15 : 0;
343 encoding
|= mimg
->da
? 1 << 14 : 0;
345 encoding
|= mimg
->r128
? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
346 encoding
|= mimg
->dim
<< 3; /* GFX10: dimensionality instead of declare array */
347 encoding
|= mimg
->dlc
? 1 << 7 : 0;
349 encoding
|= (0xF & mimg
->dmask
) << 8;
350 out
.push_back(encoding
);
351 encoding
= (0xFF & instr
->operands
[0].physReg().reg
); /* VADDR */
352 if (!instr
->definitions
.empty()) {
353 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 8; /* VDATA */
354 } else if (instr
->operands
.size() == 4) {
355 encoding
|= (0xFF & instr
->operands
[3].physReg().reg
) << 8; /* VDATA */
357 encoding
|= (0x1F & (instr
->operands
[1].physReg() >> 2)) << 16; /* T# (resource) */
358 if (instr
->operands
.size() > 2)
359 encoding
|= (0x1F & (instr
->operands
[2].physReg() >> 2)) << 21; /* sampler */
361 assert(!mimg
->d16
|| ctx
.chip_class
>= GFX9
);
362 encoding
|= mimg
->d16
? 1 << 15 : 0;
363 if (ctx
.chip_class
>= GFX10
) {
364 encoding
|= mimg
->a16
? 1 << 14 : 0; /* GFX10: A16 still exists, but is in a different place */
367 out
.push_back(encoding
);
371 case Format::SCRATCH
:
372 case Format::GLOBAL
: {
373 FLAT_instruction
*flat
= static_cast<FLAT_instruction
*>(instr
);
374 uint32_t encoding
= (0b110111 << 26);
375 encoding
|= opcode
<< 18;
376 if (ctx
.chip_class
<= GFX9
) {
377 assert(flat
->offset
<= 0x1fff);
378 encoding
|= flat
->offset
& 0x1fff;
380 assert(flat
->offset
<= 0x0fff);
381 encoding
|= flat
->offset
& 0x0fff;
383 if (instr
->format
== Format::SCRATCH
)
385 else if (instr
->format
== Format::GLOBAL
)
387 encoding
|= flat
->lds
? 1 << 13 : 0;
388 encoding
|= flat
->glc
? 1 << 16 : 0;
389 encoding
|= flat
->slc
? 1 << 17 : 0;
390 if (ctx
.chip_class
>= GFX10
) {
392 encoding
|= flat
->dlc
? 1 << 12 : 0;
396 out
.push_back(encoding
);
397 encoding
= (0xFF & instr
->operands
[0].physReg());
398 if (!instr
->definitions
.empty())
399 encoding
|= (0xFF & instr
->definitions
[0].physReg()) << 24;
401 encoding
|= (0xFF & instr
->operands
[2].physReg()) << 8;
402 if (!instr
->operands
[1].isUndefined()) {
403 assert(ctx
.chip_class
>= GFX10
|| instr
->operands
[1].physReg() != 0x7F);
404 assert(instr
->format
!= Format::FLAT
);
405 encoding
|= instr
->operands
[1].physReg() << 16;
406 } else if (instr
->format
!= Format::FLAT
) {
407 if (ctx
.chip_class
<= GFX9
)
408 encoding
|= 0x7F << 16;
410 encoding
|= sgpr_null
<< 16;
412 encoding
|= flat
->nv
? 1 << 23 : 0;
413 out
.push_back(encoding
);
417 Export_instruction
* exp
= static_cast<Export_instruction
*>(instr
);
419 if (ctx
.chip_class
<= GFX9
) {
420 encoding
= (0b110001 << 26);
421 } else if (ctx
.chip_class
>= GFX10
) {
422 encoding
= (0b111110 << 26);
425 encoding
|= exp
->valid_mask
? 0b1 << 12 : 0;
426 encoding
|= exp
->done
? 0b1 << 11 : 0;
427 encoding
|= exp
->compressed
? 0b1 << 10 : 0;
428 encoding
|= exp
->dest
<< 4;
429 encoding
|= exp
->enabled_mask
;
430 out
.push_back(encoding
);
431 encoding
= 0xFF & exp
->operands
[0].physReg().reg
;
432 encoding
|= (0xFF & exp
->operands
[1].physReg().reg
) << 8;
433 encoding
|= (0xFF & exp
->operands
[2].physReg().reg
) << 16;
434 encoding
|= (0xFF & exp
->operands
[3].physReg().reg
) << 24;
435 out
.push_back(encoding
);
439 case Format::PSEUDO_BARRIER
:
440 unreachable("Pseudo instructions should be lowered before assembly.");
442 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP3A
) {
443 VOP3A_instruction
* vop3
= static_cast<VOP3A_instruction
*>(instr
);
445 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP2
) {
446 opcode
= opcode
+ 0x100;
447 } else if ((uint16_t) instr
->format
& (uint16_t) Format::VOP1
) {
448 if (ctx
.chip_class
<= GFX9
) {
449 opcode
= opcode
+ 0x140;
451 /* RDNA ISA doc says this is 0x140, but that doesn't work */
452 opcode
= opcode
+ 0x180;
454 } else if ((uint16_t) instr
->format
& (uint16_t) Format::VOPC
) {
455 opcode
= opcode
+ 0x0;
456 } else if ((uint16_t) instr
->format
& (uint16_t) Format::VINTRP
) {
457 opcode
= opcode
+ 0x270;
462 if (ctx
.chip_class
<= GFX9
) {
463 encoding
= (0b110100 << 26);
464 } else if (ctx
.chip_class
== GFX10
) {
465 encoding
= (0b110101 << 26);
468 encoding
|= opcode
<< 16;
469 encoding
|= (vop3
->clamp
? 1 : 0) << 15;
470 for (unsigned i
= 0; i
< 3; i
++)
471 encoding
|= vop3
->abs
[i
] << (8+i
);
472 if (instr
->definitions
.size() == 2)
473 encoding
|= instr
->definitions
[1].physReg() << 8;
474 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
);
475 out
.push_back(encoding
);
477 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
) {
478 encoding
= 0x3 & instr
->operands
[0].constantValue();
480 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
481 encoding
|= instr
->operands
[i
].physReg() << (i
* 9);
483 encoding
|= vop3
->omod
<< 27;
484 for (unsigned i
= 0; i
< 3; i
++)
485 encoding
|= vop3
->neg
[i
] << (29+i
);
486 out
.push_back(encoding
);
489 } else if (instr
->isDPP()){
490 /* first emit the instruction without the DPP operand */
491 Operand dpp_op
= instr
->operands
[0];
492 instr
->operands
[0] = Operand(PhysReg
{250}, v1
);
493 instr
->format
= (Format
) ((uint32_t) instr
->format
& ~(1 << 14));
494 emit_instruction(ctx
, out
, instr
);
495 DPP_instruction
* dpp
= static_cast<DPP_instruction
*>(instr
);
496 uint32_t encoding
= (0xF & dpp
->row_mask
) << 28;
497 encoding
|= (0xF & dpp
->bank_mask
) << 24;
498 encoding
|= dpp
->abs
[1] << 23;
499 encoding
|= dpp
->neg
[1] << 22;
500 encoding
|= dpp
->abs
[0] << 21;
501 encoding
|= dpp
->neg
[0] << 20;
502 encoding
|= dpp
->bound_ctrl
<< 19;
503 encoding
|= dpp
->dpp_ctrl
<< 8;
504 encoding
|= (0xFF) & dpp_op
.physReg().reg
;
505 out
.push_back(encoding
);
508 unreachable("unimplemented instruction format");
512 /* append literal dword */
513 for (const Operand
& op
: instr
->operands
) {
514 if (op
.isLiteral()) {
515 out
.push_back(op
.constantValue());
521 void emit_block(asm_context
& ctx
, std::vector
<uint32_t>& out
, Block
& block
)
523 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
525 int start_idx
= out
.size();
526 std::cerr
<< "Encoding:\t" << std::endl
;
527 aco_print_instr(&*instr
, stderr
);
528 std::cerr
<< std::endl
;
530 emit_instruction(ctx
, out
, instr
.get());
532 for (int i
= start_idx
; i
< out
.size(); i
++)
533 std::cerr
<< "encoding: " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< out
[i
] << std::endl
;
538 void fix_exports(asm_context
& ctx
, std::vector
<uint32_t>& out
, Program
* program
)
540 for (int idx
= program
->blocks
.size() - 1; idx
>= 0; idx
--) {
541 Block
& block
= program
->blocks
[idx
];
542 std::vector
<aco_ptr
<Instruction
>>::reverse_iterator it
= block
.instructions
.rbegin();
543 bool endBlock
= false;
544 bool exported
= false;
545 while ( it
!= block
.instructions
.rend())
547 if ((*it
)->format
== Format::EXP
&& endBlock
) {
548 Export_instruction
* exp
= static_cast<Export_instruction
*>((*it
).get());
549 if (program
->stage
& hw_vs
) {
550 if (exp
->dest
>= V_008DFC_SQ_EXP_POS
&& exp
->dest
<= (V_008DFC_SQ_EXP_POS
+ 3)) {
557 exp
->valid_mask
= true;
561 } else if ((*it
)->definitions
.size() && (*it
)->definitions
[0].physReg() == exec
)
563 else if ((*it
)->opcode
== aco_opcode::s_endpgm
) {
570 if (!endBlock
|| exported
)
572 /* we didn't find an Export instruction and have to insert a null export */
573 aco_ptr
<Export_instruction
> exp
{create_instruction
<Export_instruction
>(aco_opcode::exp
, Format::EXP
, 4, 0)};
574 for (unsigned i
= 0; i
< 4; i
++)
575 exp
->operands
[i
] = Operand(v1
);
576 exp
->enabled_mask
= 0;
577 exp
->compressed
= false;
579 exp
->valid_mask
= program
->stage
& hw_fs
;
580 if (program
->stage
& hw_fs
)
581 exp
->dest
= 9; /* NULL */
583 exp
->dest
= V_008DFC_SQ_EXP_POS
;
584 /* insert the null export 1 instruction before endpgm */
585 block
.instructions
.insert(block
.instructions
.end() - 1, std::move(exp
));
589 void fix_branches(asm_context
& ctx
, std::vector
<uint32_t>& out
)
591 for (std::pair
<int, SOPP_instruction
*> branch
: ctx
.branches
)
593 int offset
= (int)ctx
.program
->blocks
[branch
.second
->block
].offset
- branch
.first
- 1;
594 out
[branch
.first
] |= (uint16_t) offset
;
598 void fix_constaddrs(asm_context
& ctx
, std::vector
<uint32_t>& out
)
600 for (unsigned addr
: ctx
.constaddrs
)
601 out
[addr
] += out
.size() * 4u;
604 unsigned emit_program(Program
* program
,
605 std::vector
<uint32_t>& code
)
607 asm_context
ctx(program
);
609 if (program
->stage
& (hw_vs
| hw_fs
))
610 fix_exports(ctx
, code
, program
);
612 for (Block
& block
: program
->blocks
) {
613 block
.offset
= code
.size();
614 emit_block(ctx
, code
, block
);
617 fix_branches(ctx
, code
);
618 fix_constaddrs(ctx
, code
);
620 unsigned constant_data_offset
= code
.size() * sizeof(uint32_t);
621 while (program
->constant_data
.size() % 4u)
622 program
->constant_data
.push_back(0);
623 /* Copy constant data */
624 code
.insert(code
.end(), (uint32_t*)program
->constant_data
.data(),
625 (uint32_t*)(program
->constant_data
.data() + program
->constant_data
.size()));
627 return constant_data_offset
;