2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
31 #include "sb_shader.h"
38 bc_builder::bc_builder(shader
&s
)
39 : sh(s
), ctx(s
.get_ctx()), bb(ctx
.hw_class_bit()), error(0) {}
41 int bc_builder::build() {
43 container_node
*root
= sh
.root
;
46 // FIXME reserve total size to avoid reallocs
48 for (node_iterator it
= root
->begin(), end
= root
->end();
51 cf_node
*cf
= static_cast<cf_node
*>(*it
);
52 assert(cf
->is_cf_inst() || cf
->is_alu_clause() || cf
->is_fetch_clause());
54 cf_op_flags flags
= (cf_op_flags
)cf
->bc
.op_ptr
->flags
;
59 if (cf
->bc
.is_alu_extended())
64 bb
.set_size(cf_cnt
<< 1);
69 for (node_iterator I
= root
->begin(), end
= root
->end();
72 cf_node
*cf
= static_cast<cf_node
*>(*I
);
73 cf_op_flags flags
= (cf_op_flags
)cf
->bc
.op_ptr
->flags
;
77 cf
->bc
.addr
= bb
.ndw() >> 1;
79 cf
->bc
.count
= (bb
.ndw() >> 1) - cf
->bc
.addr
- 1;
80 } else if (flags
& CF_FETCH
) {
83 cf
->bc
.addr
= bb
.ndw() >> 1;
84 build_fetch_clause(cf
);
85 cf
->bc
.count
= (((bb
.ndw() >> 1) - cf
->bc
.addr
) >> 1) - 1;
86 } else if (cf
->jump_target
) {
87 cf
->bc
.addr
= cf
->jump_target
->bc
.id
;
88 if (cf
->jump_after_target
)
94 cf_pos
= bb
.get_pos();
98 bc_dump(sh
, cerr
, &bb
).run();
104 int bc_builder::build_alu_clause(cf_node
* n
) {
105 for (node_iterator I
= n
->begin(), E
= n
->end();
108 alu_group_node
*g
= static_cast<alu_group_node
*>(*I
);
109 assert(g
->is_valid());
116 int bc_builder::build_alu_group(alu_group_node
* n
) {
118 for (node_iterator I
= n
->begin(), E
= n
->end();
121 alu_node
*a
= static_cast<alu_node
*>(*I
);
122 assert(a
->is_valid());
126 for(int i
= 0, ls
= n
->literals
.size(); i
< ls
; ++i
) {
127 bb
<< n
->literals
.at(i
).u
;
136 int bc_builder::build_fetch_clause(cf_node
* n
) {
137 for (node_iterator I
= n
->begin(), E
= n
->end();
139 fetch_node
*f
= static_cast<fetch_node
*>(*I
);
141 if (f
->bc
.op_ptr
->flags
& FF_VTX
)
150 int bc_builder::build_cf(cf_node
* n
) {
151 const bc_cf
&bc
= n
->bc
;
152 const cf_op_info
*cfop
= bc
.op_ptr
;
154 if (cfop
->flags
& CF_ALU
)
155 return build_cf_alu(n
);
156 if (cfop
->flags
& (CF_EXP
| CF_MEM
))
157 return build_cf_exp(n
);
160 bb
<< CF_WORD0_EGCM()
162 .JUMPTABLE_SEL(bc
.jumptable_sel
);
164 if (ctx
.is_evergreen())
168 .CF_CONST(bc
.cf_const
)
169 .CF_INST(ctx
.cf_opcode(bc
.op
))
172 .END_OF_PROGRAM(bc
.end_of_program
)
173 .POP_COUNT(bc
.pop_count
)
174 .VALID_PIXEL_MODE(bc
.valid_pixel_mode
)
175 .WHOLE_QUAD_MODE(bc
.whole_quad_mode
);
181 .CF_CONST(bc
.cf_const
)
182 .CF_INST(ctx
.cf_opcode(bc
.op
))
185 .POP_COUNT(bc
.pop_count
)
186 .VALID_PIXEL_MODE(bc
.valid_pixel_mode
);
188 bb
<< CF_WORD0_R6R7()
191 assert(bc
.count
< ctx
.max_fetch
);
193 bb
<< CF_WORD1_R6R7()
195 .CALL_COUNT(bc
.call_count
)
196 .CF_CONST(bc
.cf_const
)
197 .CF_INST(ctx
.cf_opcode(bc
.op
))
200 .COUNT_3(bc
.count
>> 3)
201 .END_OF_PROGRAM(bc
.end_of_program
)
202 .POP_COUNT(bc
.pop_count
)
203 .VALID_PIXEL_MODE(bc
.valid_pixel_mode
)
204 .WHOLE_QUAD_MODE(bc
.whole_quad_mode
);
210 int bc_builder::build_cf_alu(cf_node
* n
) {
211 const bc_cf
&bc
= n
->bc
;
213 assert(bc
.count
< 128);
215 if (n
->bc
.is_alu_extended()) {
216 assert(ctx
.is_egcm());
218 bb
<< CF_ALU_WORD0_EXT_EGCM()
219 .KCACHE_BANK2(bc
.kc
[2].bank
)
220 .KCACHE_BANK3(bc
.kc
[3].bank
)
221 .KCACHE_BANK_INDEX_MODE0(bc
.kc
[0].index_mode
)
222 .KCACHE_BANK_INDEX_MODE1(bc
.kc
[1].index_mode
)
223 .KCACHE_BANK_INDEX_MODE2(bc
.kc
[2].index_mode
)
224 .KCACHE_BANK_INDEX_MODE3(bc
.kc
[3].index_mode
)
225 .KCACHE_MODE2(bc
.kc
[2].mode
);
227 bb
<< CF_ALU_WORD1_EXT_EGCM()
229 .CF_INST(ctx
.cf_opcode(CF_OP_ALU_EXT
))
230 .KCACHE_ADDR2(bc
.kc
[2].addr
)
231 .KCACHE_ADDR3(bc
.kc
[3].addr
)
232 .KCACHE_MODE3(bc
.kc
[3].mode
);
235 bb
<< CF_ALU_WORD0_ALL()
237 .KCACHE_BANK0(bc
.kc
[0].bank
)
238 .KCACHE_BANK1(bc
.kc
[1].bank
)
239 .KCACHE_MODE0(bc
.kc
[0].mode
);
241 assert(bc
.count
< 128);
244 bb
<< CF_ALU_WORD1_R6()
246 .CF_INST(ctx
.cf_opcode(bc
.op
))
248 .KCACHE_ADDR0(bc
.kc
[0].addr
)
249 .KCACHE_ADDR1(bc
.kc
[1].addr
)
250 .KCACHE_MODE1(bc
.kc
[1].mode
)
251 .USES_WATERFALL(bc
.uses_waterfall
)
252 .WHOLE_QUAD_MODE(bc
.whole_quad_mode
);
254 bb
<< CF_ALU_WORD1_R7EGCM()
255 .ALT_CONST(bc
.alt_const
)
257 .CF_INST(ctx
.cf_opcode(bc
.op
))
259 .KCACHE_ADDR0(bc
.kc
[0].addr
)
260 .KCACHE_ADDR1(bc
.kc
[1].addr
)
261 .KCACHE_MODE1(bc
.kc
[1].mode
)
262 .WHOLE_QUAD_MODE(bc
.whole_quad_mode
);
267 int bc_builder::build_cf_exp(cf_node
* n
) {
268 const bc_cf
&bc
= n
->bc
;
269 const cf_op_info
*cfop
= bc
.op_ptr
;
271 if (cfop
->flags
& CF_RAT
) {
272 assert(ctx
.is_egcm());
274 bb
<< CF_ALLOC_EXPORT_WORD0_RAT_EGCM()
275 .ELEM_SIZE(bc
.elem_size
)
276 .INDEX_GPR(bc
.index_gpr
)
278 .RAT_INDEX_MODE(bc
.rat_index_mode
)
279 .RAT_INST(bc
.rat_inst
)
285 bb
<< CF_ALLOC_EXPORT_WORD0_ALL()
286 .ARRAY_BASE(bc
.array_base
)
287 .ELEM_SIZE(bc
.elem_size
)
288 .INDEX_GPR(bc
.index_gpr
)
294 if (cfop
->flags
& CF_EXP
) {
297 bb
<< CF_ALLOC_EXPORT_WORD1_SWIZ_R6R7()
299 .BURST_COUNT(bc
.burst_count
)
300 .CF_INST(ctx
.cf_opcode(bc
.op
))
301 .END_OF_PROGRAM(bc
.end_of_program
)
306 .VALID_PIXEL_MODE(bc
.valid_pixel_mode
)
307 .WHOLE_QUAD_MODE(bc
.whole_quad_mode
);
309 else if (ctx
.is_evergreen())
310 bb
<< CF_ALLOC_EXPORT_WORD1_SWIZ_EG()
312 .BURST_COUNT(bc
.burst_count
)
313 .CF_INST(ctx
.cf_opcode(bc
.op
))
314 .END_OF_PROGRAM(bc
.end_of_program
)
320 .VALID_PIXEL_MODE(bc
.valid_pixel_mode
);
323 bb
<< CF_ALLOC_EXPORT_WORD1_SWIZ_CM()
325 .BURST_COUNT(bc
.burst_count
)
326 .CF_INST(ctx
.cf_opcode(bc
.op
))
332 .VALID_PIXEL_MODE(bc
.valid_pixel_mode
);
334 } else if (cfop
->flags
& CF_MEM
) {
335 return build_cf_mem(n
);
341 int bc_builder::build_cf_mem(cf_node
* n
) {
342 const bc_cf
&bc
= n
->bc
;
345 bb
<< CF_ALLOC_EXPORT_WORD1_BUF_R6R7()
346 .ARRAY_SIZE(bc
.array_size
)
348 .BURST_COUNT(bc
.burst_count
)
349 .CF_INST(ctx
.cf_opcode(bc
.op
))
350 .COMP_MASK(bc
.comp_mask
)
351 .END_OF_PROGRAM(bc
.end_of_program
)
352 .VALID_PIXEL_MODE(bc
.valid_pixel_mode
)
353 .WHOLE_QUAD_MODE(bc
.whole_quad_mode
);
355 else if (ctx
.is_evergreen())
356 bb
<< CF_ALLOC_EXPORT_WORD1_BUF_EG()
357 .ARRAY_SIZE(bc
.array_size
)
359 .BURST_COUNT(bc
.burst_count
)
360 .CF_INST(ctx
.cf_opcode(bc
.op
))
361 .COMP_MASK(bc
.comp_mask
)
362 .END_OF_PROGRAM(bc
.end_of_program
)
364 .VALID_PIXEL_MODE(bc
.valid_pixel_mode
);
367 bb
<< CF_ALLOC_EXPORT_WORD1_BUF_CM()
368 .ARRAY_SIZE(bc
.array_size
)
370 .BURST_COUNT(bc
.burst_count
)
371 .CF_INST(ctx
.cf_opcode(bc
.op
))
372 .COMP_MASK(bc
.comp_mask
)
374 .VALID_PIXEL_MODE(bc
.valid_pixel_mode
);
379 int bc_builder::build_alu(alu_node
* n
) {
380 const bc_alu
&bc
= n
->bc
;
381 const alu_op_info
*aop
= bc
.op_ptr
;
383 bb
<< ALU_WORD0_ALL()
384 .INDEX_MODE(bc
.index_mode
)
386 .PRED_SEL(bc
.pred_sel
)
387 .SRC0_SEL(bc
.src
[0].sel
)
388 .SRC0_CHAN(bc
.src
[0].chan
)
389 .SRC0_NEG(bc
.src
[0].neg
)
390 .SRC0_REL(bc
.src
[0].rel
)
391 .SRC1_SEL(bc
.src
[1].sel
)
392 .SRC1_CHAN(bc
.src
[1].chan
)
393 .SRC1_NEG(bc
.src
[1].neg
)
394 .SRC1_REL(bc
.src
[1].rel
);
396 if (aop
->src_count
<3) {
398 bb
<< ALU_WORD1_OP2_R6()
399 .ALU_INST(ctx
.alu_opcode(bc
.op
))
400 .BANK_SWIZZLE(bc
.bank_swizzle
)
403 .DST_CHAN(bc
.dst_chan
)
405 .FOG_MERGE(bc
.fog_merge
)
407 .SRC0_ABS(bc
.src
[0].abs
)
408 .SRC1_ABS(bc
.src
[1].abs
)
409 .UPDATE_EXEC_MASK(bc
.update_exec_mask
)
410 .UPDATE_PRED(bc
.update_pred
)
411 .WRITE_MASK(bc
.write_mask
);
414 if (ctx
.is_cayman() && (aop
->flags
& AF_MOVA
)) {
416 bb
<< ALU_WORD1_OP2_MOVA_CM()
417 .ALU_INST(ctx
.alu_opcode(bc
.op
))
418 .BANK_SWIZZLE(bc
.bank_swizzle
)
420 .MOVA_DST(bc
.dst_gpr
)
421 .DST_CHAN(bc
.dst_chan
)
424 .UPDATE_EXEC_MASK(bc
.update_exec_mask
)
425 .UPDATE_PRED(bc
.update_pred
)
426 .WRITE_MASK(bc
.write_mask
)
427 .SRC0_ABS(bc
.src
[0].abs
)
428 .SRC1_ABS(bc
.src
[1].abs
);
430 } else if (ctx
.is_cayman() && (aop
->flags
& (AF_PRED
|AF_KILL
))) {
431 bb
<< ALU_WORD1_OP2_EXEC_MASK_CM()
432 .ALU_INST(ctx
.alu_opcode(bc
.op
))
433 .BANK_SWIZZLE(bc
.bank_swizzle
)
435 .DST_CHAN(bc
.dst_chan
)
437 .EXECUTE_MASK_OP(bc
.omod
)
438 .UPDATE_EXEC_MASK(bc
.update_exec_mask
)
439 .UPDATE_PRED(bc
.update_pred
)
440 .WRITE_MASK(bc
.write_mask
)
441 .SRC0_ABS(bc
.src
[0].abs
)
442 .SRC1_ABS(bc
.src
[1].abs
);
445 bb
<< ALU_WORD1_OP2_R7EGCM()
446 .ALU_INST(ctx
.alu_opcode(bc
.op
))
447 .BANK_SWIZZLE(bc
.bank_swizzle
)
450 .DST_CHAN(bc
.dst_chan
)
453 .UPDATE_EXEC_MASK(bc
.update_exec_mask
)
454 .UPDATE_PRED(bc
.update_pred
)
455 .WRITE_MASK(bc
.write_mask
)
456 .SRC0_ABS(bc
.src
[0].abs
)
457 .SRC1_ABS(bc
.src
[1].abs
);
461 bb
<< ALU_WORD1_OP3_ALL()
462 .ALU_INST(ctx
.alu_opcode(bc
.op
))
463 .BANK_SWIZZLE(bc
.bank_swizzle
)
466 .DST_CHAN(bc
.dst_chan
)
468 .SRC2_SEL(bc
.src
[2].sel
)
469 .SRC2_CHAN(bc
.src
[2].chan
)
470 .SRC2_NEG(bc
.src
[2].neg
)
471 .SRC2_REL(bc
.src
[2].rel
);
475 int bc_builder::build_fetch_tex(fetch_node
* n
) {
476 const bc_fetch
&bc
= n
->bc
;
477 const fetch_op_info
*fop
= bc
.op_ptr
;
479 assert(!(fop
->flags
& FF_VTX
));
483 .BC_FRAC_MODE(bc
.bc_frac_mode
)
484 .FETCH_WHOLE_QUAD(bc
.fetch_whole_quad
)
485 .RESOURCE_ID(bc
.resource_id
)
488 .TEX_INST(ctx
.fetch_opcode(bc
.op
));
490 else if (ctx
.is_r700())
492 .ALT_CONST(bc
.alt_const
)
493 .BC_FRAC_MODE(bc
.bc_frac_mode
)
494 .FETCH_WHOLE_QUAD(bc
.fetch_whole_quad
)
495 .RESOURCE_ID(bc
.resource_id
)
498 .TEX_INST(ctx
.fetch_opcode(bc
.op
));
501 bb
<< TEX_WORD0_EGCM()
502 .ALT_CONST(bc
.alt_const
)
503 .FETCH_WHOLE_QUAD(bc
.fetch_whole_quad
)
504 .INST_MOD(bc
.inst_mod
)
505 .RESOURCE_ID(bc
.resource_id
)
506 .RESOURCE_INDEX_MODE(bc
.resource_index_mode
)
507 .SAMPLER_INDEX_MODE(bc
.sampler_index_mode
)
510 .TEX_INST(ctx
.fetch_opcode(bc
.op
));
512 bb
<< TEX_WORD1_ALL()
513 .COORD_TYPE_X(bc
.coord_type
[0])
514 .COORD_TYPE_Y(bc
.coord_type
[1])
515 .COORD_TYPE_Z(bc
.coord_type
[2])
516 .COORD_TYPE_W(bc
.coord_type
[3])
519 .DST_SEL_X(bc
.dst_sel
[0])
520 .DST_SEL_Y(bc
.dst_sel
[1])
521 .DST_SEL_Z(bc
.dst_sel
[2])
522 .DST_SEL_W(bc
.dst_sel
[3])
523 .LOD_BIAS(bc
.lod_bias
);
525 bb
<< TEX_WORD2_ALL()
526 .OFFSET_X(bc
.offset
[0])
527 .OFFSET_Y(bc
.offset
[1])
528 .OFFSET_Z(bc
.offset
[2])
529 .SAMPLER_ID(bc
.sampler_id
)
530 .SRC_SEL_X(bc
.src_sel
[0])
531 .SRC_SEL_Y(bc
.src_sel
[1])
532 .SRC_SEL_Z(bc
.src_sel
[2])
533 .SRC_SEL_W(bc
.src_sel
[3]);
539 int bc_builder::build_fetch_vtx(fetch_node
* n
) {
540 const bc_fetch
&bc
= n
->bc
;
541 const fetch_op_info
*fop
= bc
.op_ptr
;
543 assert(fop
->flags
& FF_VTX
);
545 if (!ctx
.is_cayman())
546 bb
<< VTX_WORD0_R6R7EG()
547 .BUFFER_ID(bc
.resource_id
)
548 .FETCH_TYPE(bc
.fetch_type
)
549 .FETCH_WHOLE_QUAD(bc
.fetch_whole_quad
)
550 .MEGA_FETCH_COUNT(bc
.mega_fetch_count
)
553 .SRC_SEL_X(bc
.src_sel
[0])
554 .VC_INST(ctx
.fetch_opcode(bc
.op
));
558 .BUFFER_ID(bc
.resource_id
)
559 .COALESCED_READ(bc
.coalesced_read
)
560 .FETCH_TYPE(bc
.fetch_type
)
561 .FETCH_WHOLE_QUAD(bc
.fetch_whole_quad
)
565 .SRC_SEL_X(bc
.src_sel
[0])
566 .SRC_SEL_Y(bc
.src_sel
[1])
567 .STRUCTURED_READ(bc
.structured_read
)
568 .VC_INST(ctx
.fetch_opcode(bc
.op
));
570 if (bc
.op
== FETCH_OP_SEMFETCH
)
571 bb
<< VTX_WORD1_SEM_ALL()
572 .DATA_FORMAT(bc
.data_format
)
573 .DST_SEL_X(bc
.dst_sel
[0])
574 .DST_SEL_Y(bc
.dst_sel
[1])
575 .DST_SEL_Z(bc
.dst_sel
[2])
576 .DST_SEL_W(bc
.dst_sel
[3])
577 .FORMAT_COMP_ALL(bc
.format_comp_all
)
578 .NUM_FORMAT_ALL(bc
.num_format_all
)
579 .SEMANTIC_ID(bc
.semantic_id
)
580 .SRF_MODE_ALL(bc
.srf_mode_all
)
581 .USE_CONST_FIELDS(bc
.use_const_fields
);
583 bb
<< VTX_WORD1_GPR_ALL()
584 .DATA_FORMAT(bc
.data_format
)
587 .DST_SEL_X(bc
.dst_sel
[0])
588 .DST_SEL_Y(bc
.dst_sel
[1])
589 .DST_SEL_Z(bc
.dst_sel
[2])
590 .DST_SEL_W(bc
.dst_sel
[3])
591 .FORMAT_COMP_ALL(bc
.format_comp_all
)
592 .NUM_FORMAT_ALL(bc
.num_format_all
)
593 .SRF_MODE_ALL(bc
.srf_mode_all
)
594 .USE_CONST_FIELDS(bc
.use_const_fields
);
596 switch (ctx
.hw_class
) {
599 .CONST_BUF_NO_STRIDE(bc
.const_buf_no_stride
)
600 .ENDIAN_SWAP(bc
.endian_swap
)
601 .MEGA_FETCH(bc
.mega_fetch
)
602 .OFFSET(bc
.offset
[0]);
606 .ALT_CONST(bc
.alt_const
)
607 .CONST_BUF_NO_STRIDE(bc
.const_buf_no_stride
)
608 .ENDIAN_SWAP(bc
.endian_swap
)
609 .MEGA_FETCH(bc
.mega_fetch
)
610 .OFFSET(bc
.offset
[0]);
612 case HW_CLASS_EVERGREEN
:
614 .ALT_CONST(bc
.alt_const
)
615 .BUFFER_INDEX_MODE(bc
.resource_index_mode
)
616 .CONST_BUF_NO_STRIDE(bc
.const_buf_no_stride
)
617 .ENDIAN_SWAP(bc
.endian_swap
)
618 .MEGA_FETCH(bc
.mega_fetch
)
619 .OFFSET(bc
.offset
[0]);
621 case HW_CLASS_CAYMAN
:
623 .ALT_CONST(bc
.alt_const
)
624 .BUFFER_INDEX_MODE(bc
.resource_index_mode
)
625 .CONST_BUF_NO_STRIDE(bc
.const_buf_no_stride
)
626 .ENDIAN_SWAP(bc
.endian_swap
)
627 .OFFSET(bc
.offset
[0]);
630 assert(!"unknown hw class");