4 #include "common/sid.h"
5 #include "ac_shader_util.h"
11 enum chip_class chip_class
;
12 std::map
<int, SOPP_instruction
*> branches
;
13 std::vector
<unsigned> constaddrs
;
14 const int16_t* opcode
;
15 // TODO: keep track of branch instructions referring blocks
16 // and, when emitting the block, correct the offset in instr
17 asm_context(Program
* program
) : program(program
), chip_class(program
->chip_class
) {
18 if (chip_class
<= GFX9
)
19 opcode
= &instr_info
.opcode_gfx9
[0];
20 else if (chip_class
== GFX10
)
21 opcode
= &instr_info
.opcode_gfx10
[0];
25 void emit_instruction(asm_context
& ctx
, std::vector
<uint32_t>& out
, Instruction
* instr
)
27 uint32_t instr_offset
= out
.size() * 4u;
29 /* lower remaining pseudo-instructions */
30 if (instr
->opcode
== aco_opcode::p_constaddr
) {
31 unsigned dest
= instr
->definitions
[0].physReg();
32 unsigned offset
= instr
->operands
[0].constantValue();
34 /* s_getpc_b64 dest[0:1] */
35 uint32_t encoding
= (0b101111101 << 23);
36 uint32_t opcode
= ctx
.opcode
[(int)aco_opcode::s_getpc_b64
];
37 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
38 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
41 encoding
|= dest
<< 16;
42 encoding
|= opcode
<< 8;
43 out
.push_back(encoding
);
45 /* s_add_u32 dest[0], dest[0], ... */
46 encoding
= (0b10 << 30);
47 encoding
|= ctx
.opcode
[(int)aco_opcode::s_add_u32
] << 23;
48 encoding
|= dest
<< 16;
51 out
.push_back(encoding
);
52 ctx
.constaddrs
.push_back(out
.size());
53 out
.push_back(-(instr_offset
+ 4) + offset
);
55 /* s_addc_u32 dest[1], dest[1], 0 */
56 encoding
= (0b10 << 30);
57 encoding
|= ctx
.opcode
[(int)aco_opcode::s_addc_u32
] << 23;
58 encoding
|= (dest
+ 1) << 16;
61 out
.push_back(encoding
);
65 uint32_t opcode
= ctx
.opcode
[(int)instr
->opcode
];
66 if (opcode
== (uint32_t)-1) {
67 fprintf(stderr
, "Unsupported opcode: ");
68 aco_print_instr(instr
, stderr
);
72 switch (instr
->format
) {
74 uint32_t encoding
= (0b10 << 30);
75 encoding
|= opcode
<< 23;
76 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
77 encoding
|= instr
->operands
.size() >= 2 ? instr
->operands
[1].physReg() << 8 : 0;
78 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
79 out
.push_back(encoding
);
83 uint32_t encoding
= (0b1011 << 28);
84 encoding
|= opcode
<< 23;
86 !instr
->definitions
.empty() && !(instr
->definitions
[0].physReg() == scc
) ?
87 instr
->definitions
[0].physReg() << 16 :
88 !instr
->operands
.empty() && !(instr
->operands
[0].physReg() == scc
) ?
89 instr
->operands
[0].physReg() << 16 : 0;
90 encoding
|= static_cast<SOPK_instruction
*>(instr
)->imm
;
91 out
.push_back(encoding
);
95 uint32_t encoding
= (0b101111101 << 23);
96 if (opcode
>= 55 && ctx
.chip_class
<= GFX9
) {
97 assert(ctx
.chip_class
== GFX9
&& opcode
< 60);
100 encoding
|= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() << 16 : 0;
101 encoding
|= opcode
<< 8;
102 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
103 out
.push_back(encoding
);
107 uint32_t encoding
= (0b101111110 << 23);
108 encoding
|= opcode
<< 16;
109 encoding
|= instr
->operands
.size() == 2 ? instr
->operands
[1].physReg() << 8 : 0;
110 encoding
|= !instr
->operands
.empty() ? instr
->operands
[0].physReg() : 0;
111 out
.push_back(encoding
);
115 SOPP_instruction
* sopp
= static_cast<SOPP_instruction
*>(instr
);
116 uint32_t encoding
= (0b101111111 << 23);
117 encoding
|= opcode
<< 16;
118 encoding
|= (uint16_t) sopp
->imm
;
119 if (sopp
->block
!= -1)
120 ctx
.branches
.insert({out
.size(), sopp
});
121 out
.push_back(encoding
);
125 SMEM_instruction
* smem
= static_cast<SMEM_instruction
*>(instr
);
126 bool soe
= instr
->operands
.size() >= (!instr
->definitions
.empty() ? 3 : 4);
127 bool is_load
= !instr
->definitions
.empty();
129 uint32_t encoding
= 0;
131 if (ctx
.chip_class
<= GFX9
) {
132 encoding
= (0b110000 << 26);
133 assert(!smem
->dlc
); /* Device-level coherent is not supported on GFX9 and lower */
134 encoding
|= smem
->nv
? 1 << 15 : 0;
136 encoding
= (0b111101 << 26);
137 assert(!smem
->nv
); /* Non-volatile is not supported on GFX10 */
138 encoding
|= smem
->dlc
? 1 << 14 : 0;
141 encoding
|= opcode
<< 18;
142 encoding
|= smem
->glc
? 1 << 16 : 0;
144 if (ctx
.chip_class
<= GFX9
) {
145 if (instr
->operands
.size() >= 2)
146 encoding
|= instr
->operands
[1].isConstant() ? 1 << 17 : 0; /* IMM - immediate enable */
148 if (ctx
.chip_class
== GFX9
) {
149 encoding
|= soe
? 1 << 14 : 0;
152 if (is_load
|| instr
->operands
.size() >= 3) { /* SDATA */
153 encoding
|= (is_load
? instr
->definitions
[0].physReg().reg
: instr
->operands
[2].physReg().reg
) << 6;
155 if (instr
->operands
.size() >= 1) { /* SBASE */
156 encoding
|= instr
->operands
[0].physReg().reg
>> 1;
159 out
.push_back(encoding
);
163 uint32_t soffset
= ctx
.chip_class
>= GFX10
164 ? sgpr_null
/* On GFX10 this is disabled by specifying SGPR_NULL */
165 : 0; /* On GFX9, it is disabled by the SOE bit (and it's not present on GFX8 and below) */
166 if (instr
->operands
.size() >= 2) {
167 const Operand
&op_off1
= instr
->operands
[1];
168 if (ctx
.chip_class
<= GFX9
) {
169 offset
= op_off1
.isConstant() ? op_off1
.constantValue() : op_off1
.physReg();
171 /* GFX10 only supports constants in OFFSET, so put the operand in SOFFSET if it's an SGPR */
172 if (op_off1
.isConstant()) {
173 offset
= op_off1
.constantValue();
175 soffset
= op_off1
.physReg();
176 assert(!soe
); /* There is no place to put the other SGPR offset, if any */
181 const Operand
&op_off2
= instr
->operands
.back();
182 assert(ctx
.chip_class
>= GFX9
); /* GFX8 and below don't support specifying a constant and an SGPR at the same time */
183 assert(!op_off2
.isConstant());
184 soffset
= op_off2
.physReg();
188 encoding
|= soffset
<< 25;
190 out
.push_back(encoding
);
194 uint32_t encoding
= 0;
195 encoding
|= opcode
<< 25;
196 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
197 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
198 encoding
|= instr
->operands
[0].physReg().reg
;
199 out
.push_back(encoding
);
203 uint32_t encoding
= (0b0111111 << 25);
204 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 17;
205 encoding
|= opcode
<< 9;
206 encoding
|= instr
->operands
[0].physReg().reg
;
207 out
.push_back(encoding
);
211 uint32_t encoding
= (0b0111110 << 25);
212 encoding
|= opcode
<< 17;
213 encoding
|= (0xFF & instr
->operands
[1].physReg().reg
) << 9;
214 encoding
|= instr
->operands
[0].physReg().reg
;
215 out
.push_back(encoding
);
218 case Format::VINTRP
: {
219 Interp_instruction
* interp
= static_cast<Interp_instruction
*>(instr
);
220 uint32_t encoding
= 0;
222 if (ctx
.chip_class
== GFX8
|| ctx
.chip_class
== GFX9
) {
223 encoding
= (0b110101 << 26); /* Vega ISA doc says 110010 but it's wrong */
225 encoding
= (0b110010 << 26);
229 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 18;
230 encoding
|= opcode
<< 16;
231 encoding
|= interp
->attribute
<< 10;
232 encoding
|= interp
->component
<< 8;
233 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
)
234 encoding
|= (0x3 & instr
->operands
[0].constantValue());
236 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
237 out
.push_back(encoding
);
241 DS_instruction
* ds
= static_cast<DS_instruction
*>(instr
);
242 uint32_t encoding
= (0b110110 << 26);
243 if (ctx
.chip_class
== GFX8
|| ctx
.chip_class
== GFX9
) {
244 encoding
|= opcode
<< 17;
245 encoding
|= (ds
->gds
? 1 : 0) << 16;
247 encoding
|= opcode
<< 18;
248 encoding
|= (ds
->gds
? 1 : 0) << 17;
250 encoding
|= ((0xFF & ds
->offset1
) << 8);
251 encoding
|= (0xFFFF & ds
->offset0
);
252 out
.push_back(encoding
);
254 unsigned reg
= !instr
->definitions
.empty() ? instr
->definitions
[0].physReg() : 0;
255 encoding
|= (0xFF & reg
) << 24;
256 reg
= instr
->operands
.size() >= 3 && !(instr
->operands
[2].physReg() == m0
) ? instr
->operands
[2].physReg() : 0;
257 encoding
|= (0xFF & reg
) << 16;
258 reg
= instr
->operands
.size() >= 2 && !(instr
->operands
[1].physReg() == m0
) ? instr
->operands
[1].physReg() : 0;
259 encoding
|= (0xFF & reg
) << 8;
260 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
261 out
.push_back(encoding
);
264 case Format::MUBUF
: {
265 MUBUF_instruction
* mubuf
= static_cast<MUBUF_instruction
*>(instr
);
266 uint32_t encoding
= (0b111000 << 26);
267 encoding
|= opcode
<< 18;
268 encoding
|= (mubuf
->lds
? 1 : 0) << 16;
269 encoding
|= (mubuf
->glc
? 1 : 0) << 14;
270 encoding
|= (mubuf
->idxen
? 1 : 0) << 13;
271 encoding
|= (mubuf
->offen
? 1 : 0) << 12;
272 if (ctx
.chip_class
<= GFX9
) {
273 assert(!mubuf
->dlc
); /* Device-level coherent is not supported on GFX9 and lower */
274 encoding
|= (mubuf
->slc
? 1 : 0) << 17;
275 } else if (ctx
.chip_class
>= GFX10
) {
276 encoding
|= (mubuf
->dlc
? 1 : 0) << 15;
278 encoding
|= 0x0FFF & mubuf
->offset
;
279 out
.push_back(encoding
);
281 if (ctx
.chip_class
>= GFX10
) {
282 encoding
|= (mubuf
->slc
? 1 : 0) << 22;
284 encoding
|= instr
->operands
[2].physReg() << 24;
285 encoding
|= (mubuf
->tfe
? 1 : 0) << 23;
286 encoding
|= (instr
->operands
[1].physReg() >> 2) << 16;
287 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg() : instr
->definitions
[0].physReg().reg
;
288 encoding
|= (0xFF & reg
) << 8;
289 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
290 out
.push_back(encoding
);
293 case Format::MTBUF
: {
294 MTBUF_instruction
* mtbuf
= static_cast<MTBUF_instruction
*>(instr
);
296 uint32_t img_format
= ac_get_tbuffer_format(ctx
.chip_class
, mtbuf
->dfmt
, mtbuf
->nfmt
);
297 uint32_t encoding
= (0b111010 << 26);
298 assert(!mtbuf
->dlc
|| ctx
.chip_class
>= GFX10
);
299 encoding
|= (mtbuf
->dlc
? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
300 encoding
|= (mtbuf
->glc
? 1 : 0) << 14;
301 encoding
|= (mtbuf
->idxen
? 1 : 0) << 13;
302 encoding
|= (mtbuf
->offen
? 1 : 0) << 12;
303 encoding
|= 0x0FFF & mtbuf
->offset
;
304 encoding
|= (img_format
<< 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
306 if (ctx
.chip_class
<= GFX9
) {
307 encoding
|= opcode
<< 15;
309 encoding
|= (opcode
& 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */
312 out
.push_back(encoding
);
315 encoding
|= instr
->operands
[2].physReg().reg
<< 24;
316 encoding
|= (mtbuf
->tfe
? 1 : 0) << 23;
317 encoding
|= (mtbuf
->slc
? 1 : 0) << 22;
318 encoding
|= (instr
->operands
[1].physReg().reg
>> 2) << 16;
319 unsigned reg
= instr
->operands
.size() > 3 ? instr
->operands
[3].physReg().reg
: instr
->definitions
[0].physReg().reg
;
320 encoding
|= (0xFF & reg
) << 8;
321 encoding
|= (0xFF & instr
->operands
[0].physReg().reg
);
323 if (ctx
.chip_class
>= GFX10
) {
324 encoding
|= (((opcode
& 0x08) >> 4) << 21); /* MSB of 4-bit OPCODE */
327 out
.push_back(encoding
);
331 MIMG_instruction
* mimg
= static_cast<MIMG_instruction
*>(instr
);
332 uint32_t encoding
= (0b111100 << 26);
333 encoding
|= mimg
->slc
? 1 << 25 : 0;
334 encoding
|= opcode
<< 18;
335 encoding
|= mimg
->lwe
? 1 << 17 : 0;
336 encoding
|= mimg
->tfe
? 1 << 16 : 0;
337 encoding
|= mimg
->glc
? 1 << 13 : 0;
338 encoding
|= mimg
->unrm
? 1 << 12 : 0;
339 if (ctx
.chip_class
<= GFX9
) {
340 assert(!mimg
->dlc
); /* Device-level coherent is not supported on GFX9 and lower */
342 encoding
|= mimg
->a16
? 1 << 15 : 0;
343 encoding
|= mimg
->da
? 1 << 14 : 0;
345 encoding
|= mimg
->r128
? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
346 encoding
|= mimg
->dim
<< 3; /* GFX10: dimensionality instead of declare array */
347 encoding
|= mimg
->dlc
? 1 << 7 : 0;
349 encoding
|= (0xF & mimg
->dmask
) << 8;
350 out
.push_back(encoding
);
351 encoding
= (0xFF & instr
->operands
[0].physReg().reg
); /* VADDR */
352 if (!instr
->definitions
.empty()) {
353 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 8; /* VDATA */
354 } else if (instr
->operands
.size() == 4) {
355 encoding
|= (0xFF & instr
->operands
[3].physReg().reg
) << 8; /* VDATA */
357 encoding
|= (0x1F & (instr
->operands
[1].physReg() >> 2)) << 16; /* T# (resource) */
358 if (instr
->operands
.size() > 2)
359 encoding
|= (0x1F & (instr
->operands
[2].physReg() >> 2)) << 21; /* sampler */
361 assert(!mimg
->d16
|| ctx
.chip_class
>= GFX9
);
362 encoding
|= mimg
->d16
? 1 << 15 : 0;
363 if (ctx
.chip_class
>= GFX10
) {
364 encoding
|= mimg
->a16
? 1 << 14 : 0; /* GFX10: A16 still exists, but is in a different place */
367 out
.push_back(encoding
);
371 case Format::SCRATCH
:
372 case Format::GLOBAL
: {
373 FLAT_instruction
*flat
= static_cast<FLAT_instruction
*>(instr
);
374 uint32_t encoding
= (0b110111 << 26);
375 encoding
|= opcode
<< 18;
376 encoding
|= flat
->offset
& 0x1fff;
377 if (instr
->format
== Format::SCRATCH
)
379 else if (instr
->format
== Format::GLOBAL
)
381 encoding
|= flat
->lds
? 1 << 13 : 0;
382 encoding
|= flat
->glc
? 1 << 13 : 0;
383 encoding
|= flat
->slc
? 1 << 13 : 0;
384 out
.push_back(encoding
);
385 encoding
= (0xFF & instr
->operands
[0].physReg().reg
);
386 if (!instr
->definitions
.empty())
387 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
) << 24;
389 encoding
|= (0xFF & instr
->operands
[2].physReg().reg
) << 8;
390 if (!instr
->operands
[1].isUndefined()) {
391 assert(instr
->operands
[1].physReg() != 0x7f);
392 assert(instr
->format
!= Format::FLAT
);
393 encoding
|= instr
->operands
[1].physReg() << 16;
394 } else if (instr
->format
!= Format::FLAT
) {
395 encoding
|= 0x7F << 16;
397 encoding
|= flat
->nv
? 1 << 23 : 0;
398 out
.push_back(encoding
);
402 Export_instruction
* exp
= static_cast<Export_instruction
*>(instr
);
403 uint32_t encoding
= (0b110001 << 26);
404 encoding
|= exp
->valid_mask
? 0b1 << 12 : 0;
405 encoding
|= exp
->done
? 0b1 << 11 : 0;
406 encoding
|= exp
->compressed
? 0b1 << 10 : 0;
407 encoding
|= exp
->dest
<< 4;
408 encoding
|= exp
->enabled_mask
;
409 out
.push_back(encoding
);
410 encoding
= 0xFF & exp
->operands
[0].physReg().reg
;
411 encoding
|= (0xFF & exp
->operands
[1].physReg().reg
) << 8;
412 encoding
|= (0xFF & exp
->operands
[2].physReg().reg
) << 16;
413 encoding
|= (0xFF & exp
->operands
[3].physReg().reg
) << 24;
414 out
.push_back(encoding
);
418 case Format::PSEUDO_BARRIER
:
419 unreachable("Pseudo instructions should be lowered before assembly.");
421 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP3A
) {
422 VOP3A_instruction
* vop3
= static_cast<VOP3A_instruction
*>(instr
);
424 if ((uint16_t) instr
->format
& (uint16_t) Format::VOP2
)
425 opcode
= opcode
+ 0x100;
426 else if ((uint16_t) instr
->format
& (uint16_t) Format::VOP1
)
427 opcode
= opcode
+ 0x140;
428 else if ((uint16_t) instr
->format
& (uint16_t) Format::VOPC
)
429 opcode
= opcode
+ 0x0;
430 else if ((uint16_t) instr
->format
& (uint16_t) Format::VINTRP
)
431 opcode
= opcode
+ 0x270;
434 uint32_t encoding
= (0b110100 << 26);
435 encoding
|= opcode
<< 16;
436 encoding
|= (vop3
->clamp
? 1 : 0) << 15;
437 for (unsigned i
= 0; i
< 3; i
++)
438 encoding
|= vop3
->abs
[i
] << (8+i
);
439 if (instr
->definitions
.size() == 2)
440 encoding
|= instr
->definitions
[1].physReg() << 8;
441 encoding
|= (0xFF & instr
->definitions
[0].physReg().reg
);
442 out
.push_back(encoding
);
444 if (instr
->opcode
== aco_opcode::v_interp_mov_f32
) {
445 encoding
= 0x3 & instr
->operands
[0].constantValue();
447 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
448 encoding
|= instr
->operands
[i
].physReg() << (i
* 9);
450 encoding
|= vop3
->omod
<< 27;
451 for (unsigned i
= 0; i
< 3; i
++)
452 encoding
|= vop3
->neg
[i
] << (29+i
);
453 out
.push_back(encoding
);
456 } else if (instr
->isDPP()){
457 /* first emit the instruction without the DPP operand */
458 Operand dpp_op
= instr
->operands
[0];
459 instr
->operands
[0] = Operand(PhysReg
{250}, v1
);
460 instr
->format
= (Format
) ((uint32_t) instr
->format
& ~(1 << 14));
461 emit_instruction(ctx
, out
, instr
);
462 DPP_instruction
* dpp
= static_cast<DPP_instruction
*>(instr
);
463 uint32_t encoding
= (0xF & dpp
->row_mask
) << 28;
464 encoding
|= (0xF & dpp
->bank_mask
) << 24;
465 encoding
|= dpp
->abs
[1] << 23;
466 encoding
|= dpp
->neg
[1] << 22;
467 encoding
|= dpp
->abs
[0] << 21;
468 encoding
|= dpp
->neg
[0] << 20;
469 encoding
|= dpp
->bound_ctrl
<< 19;
470 encoding
|= dpp
->dpp_ctrl
<< 8;
471 encoding
|= (0xFF) & dpp_op
.physReg().reg
;
472 out
.push_back(encoding
);
475 unreachable("unimplemented instruction format");
479 /* append literal dword */
480 for (const Operand
& op
: instr
->operands
) {
481 if (op
.isLiteral()) {
482 out
.push_back(op
.constantValue());
488 void emit_block(asm_context
& ctx
, std::vector
<uint32_t>& out
, Block
& block
)
490 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
492 int start_idx
= out
.size();
493 std::cerr
<< "Encoding:\t" << std::endl
;
494 aco_print_instr(&*instr
, stderr
);
495 std::cerr
<< std::endl
;
497 emit_instruction(ctx
, out
, instr
.get());
499 for (int i
= start_idx
; i
< out
.size(); i
++)
500 std::cerr
<< "encoding: " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< out
[i
] << std::endl
;
505 void fix_exports(asm_context
& ctx
, std::vector
<uint32_t>& out
, Program
* program
)
507 for (int idx
= program
->blocks
.size() - 1; idx
>= 0; idx
--) {
508 Block
& block
= program
->blocks
[idx
];
509 std::vector
<aco_ptr
<Instruction
>>::reverse_iterator it
= block
.instructions
.rbegin();
510 bool endBlock
= false;
511 bool exported
= false;
512 while ( it
!= block
.instructions
.rend())
514 if ((*it
)->format
== Format::EXP
&& endBlock
) {
515 Export_instruction
* exp
= static_cast<Export_instruction
*>((*it
).get());
516 if (program
->stage
& hw_vs
) {
517 if (exp
->dest
>= V_008DFC_SQ_EXP_POS
&& exp
->dest
<= (V_008DFC_SQ_EXP_POS
+ 3)) {
524 exp
->valid_mask
= true;
528 } else if ((*it
)->definitions
.size() && (*it
)->definitions
[0].physReg() == exec
)
530 else if ((*it
)->opcode
== aco_opcode::s_endpgm
) {
537 if (!endBlock
|| exported
)
539 /* we didn't find an Export instruction and have to insert a null export */
540 aco_ptr
<Export_instruction
> exp
{create_instruction
<Export_instruction
>(aco_opcode::exp
, Format::EXP
, 4, 0)};
541 for (unsigned i
= 0; i
< 4; i
++)
542 exp
->operands
[i
] = Operand(v1
);
543 exp
->enabled_mask
= 0;
544 exp
->compressed
= false;
546 exp
->valid_mask
= program
->stage
& hw_fs
;
547 if (program
->stage
& hw_fs
)
548 exp
->dest
= 9; /* NULL */
550 exp
->dest
= V_008DFC_SQ_EXP_POS
;
551 /* insert the null export 1 instruction before endpgm */
552 block
.instructions
.insert(block
.instructions
.end() - 1, std::move(exp
));
556 void fix_branches(asm_context
& ctx
, std::vector
<uint32_t>& out
)
558 for (std::pair
<int, SOPP_instruction
*> branch
: ctx
.branches
)
560 int offset
= (int)ctx
.program
->blocks
[branch
.second
->block
].offset
- branch
.first
- 1;
561 out
[branch
.first
] |= (uint16_t) offset
;
565 void fix_constaddrs(asm_context
& ctx
, std::vector
<uint32_t>& out
)
567 for (unsigned addr
: ctx
.constaddrs
)
568 out
[addr
] += out
.size() * 4u;
571 unsigned emit_program(Program
* program
,
572 std::vector
<uint32_t>& code
)
574 asm_context
ctx(program
);
576 if (program
->stage
& (hw_vs
| hw_fs
))
577 fix_exports(ctx
, code
, program
);
579 for (Block
& block
: program
->blocks
) {
580 block
.offset
= code
.size();
581 emit_block(ctx
, code
, block
);
584 fix_branches(ctx
, code
);
585 fix_constaddrs(ctx
, code
);
587 unsigned constant_data_offset
= code
.size() * sizeof(uint32_t);
588 while (program
->constant_data
.size() % 4u)
589 program
->constant_data
.push_back(0);
590 /* Copy constant data */
591 code
.insert(code
.end(), (uint32_t*)program
->constant_data
.data(),
592 (uint32_t*)(program
->constant_data
.data() + program
->constant_data
.size()));
594 return constant_data_offset
;