2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "sb_shader.h"
30 #include "eg_sq.h" // V_SQ_CF_INDEX_0/1
34 static const char* chans
= "xyzw01?_";
36 static const char* vec_bs
[] = {
37 "VEC_012", "VEC_021", "VEC_120", "VEC_102", "VEC_201", "VEC_210"
40 static const char* scl_bs
[] = {
41 "SCL_210", "SCL_122", "SCL_212", "SCL_221"
45 bool bc_dump::visit(cf_node
& n
, bool enter
) {
50 if ((n
.bc
.op_ptr
->flags
& CF_ALU
) && n
.bc
.is_alu_extended()) {
59 if (n
.bc
.op_ptr
->flags
& CF_CLAUSE
) {
67 bool bc_dump::visit(alu_node
& n
, bool enter
) {
73 sblog
.print_w(++group_index
, 5);
81 new_group
= n
.bc
.last
;
85 static_cast<alu_group_node
*>(n
.get_alu_group_node());
87 for (unsigned k
= 0; k
< g
->literals
.size(); ++k
) {
101 bool bc_dump::visit(fetch_node
& n
, bool enter
) {
111 static void fill_to(sb_ostringstream
&s
, int pos
) {
112 int l
= s
.str().length();
114 s
<< std::string(pos
-l
, ' ');
117 void bc_dump::dump(cf_node
& n
) {
119 s
<< n
.bc
.op_ptr
->name
;
121 if (n
.bc
.op_ptr
->flags
& CF_EXP
) {
122 static const char *exp_type
[] = {"PIXEL", "POS ", "PARAM"};
125 s
<< " " << exp_type
[n
.bc
.type
] << " ";
127 if (n
.bc
.burst_count
) {
129 s2
<< n
.bc
.array_base
<< "-" << n
.bc
.array_base
+ n
.bc
.burst_count
;
130 s
.print_wl(s2
.str(), 5);
131 s
<< " R" << n
.bc
.rw_gpr
<< "-" <<
132 n
.bc
.rw_gpr
+ n
.bc
.burst_count
<< ".";
134 s
.print_wl(n
.bc
.array_base
, 5);
135 s
<< " R" << n
.bc
.rw_gpr
<< ".";
138 for (int k
= 0; k
< 4; ++k
)
139 s
<< chans
[n
.bc
.sel
[k
]];
141 } else if (n
.bc
.op_ptr
->flags
& CF_MEM
) {
142 static const char *exp_type
[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
145 s
<< " " << exp_type
[n
.bc
.type
] << " ";
146 s
.print_wl(n
.bc
.array_base
, 5);
147 s
<< " R" << n
.bc
.rw_gpr
<< ".";
148 for (int k
= 0; k
< 4; ++k
)
149 s
<< ((n
.bc
.comp_mask
& (1 << k
)) ? chans
[k
] : '_');
151 if ((n
.bc
.op_ptr
->flags
& CF_RAT
) && (n
.bc
.type
& 1)) {
152 s
<< ", @R" << n
.bc
.index_gpr
<< ".xyz";
154 if ((n
.bc
.op_ptr
->flags
& CF_MEM
) && (n
.bc
.type
& 1)) {
155 s
<< ", @R" << n
.bc
.index_gpr
<< ".x";
158 s
<< " ES:" << n
.bc
.elem_size
;
165 if (n
.bc
.op_ptr
->flags
& CF_CLAUSE
) {
166 s
<< " " << n
.bc
.count
+1;
169 s
<< " @" << (n
.bc
.addr
<< 1);
171 if (n
.bc
.op_ptr
->flags
& CF_ALU
) {
172 static const char *index_mode
[] = {"", " CF_INDEX_0", " CF_INDEX_1"};
174 for (int k
= 0; k
< 4; ++k
) {
175 bc_kcache
&kc
= n
.bc
.kc
[k
];
177 s
<< " KC" << k
<< "[CB" << kc
.bank
<< ":" <<
178 (kc
.addr
<< 4) << "-" <<
179 (((kc
.addr
+ kc
.mode
) << 4) - 1) << index_mode
[kc
.index_mode
] << "]";
185 s
<< " CND:" << n
.bc
.cond
;
188 s
<< " POP:" << n
.bc
.pop_count
;
190 if (n
.bc
.count
&& (n
.bc
.op_ptr
->flags
& CF_EMIT
))
191 s
<< " STREAM" << n
.bc
.count
;
197 if (n
.bc
.valid_pixel_mode
)
200 if (n
.bc
.whole_quad_mode
)
203 if (n
.bc
.end_of_program
)
206 sblog
<< s
.str() << "\n";
210 static void print_sel(sb_ostream
&s
, int sel
, int rel
, int index_mode
,
212 if (rel
&& index_mode
>= 5 && sel
< 128)
214 if (rel
|| need_brackets
) {
219 if (index_mode
== 0 || index_mode
== 6)
221 else if (index_mode
== 4)
224 if (rel
|| need_brackets
) {
229 static void print_dst(sb_ostream
&s
, bc_alu
&alu
)
231 unsigned sel
= alu
.dst_gpr
;
233 if (sel
>= 128 - 4) { // clause temporary gpr
238 if (alu
.write_mask
|| (alu
.op_ptr
->src_count
== 3 && alu
.op
< LDS_OP2_LDS_ADD
)) {
240 print_sel(s
, sel
, alu
.dst_rel
, alu
.index_mode
, 0);
245 s
<< chans
[alu
.dst_chan
];
248 static void print_src(sb_ostream
&s
, bc_alu
&alu
, unsigned idx
)
250 bc_alu_src
*src
= &alu
.src
[idx
];
251 unsigned sel
= src
->sel
, need_sel
= 1, need_chan
= 1, need_brackets
= 0;
260 } else if (sel
< 128) {
263 } else if (sel
< 160) {
267 } else if (sel
< 192) {
271 } else if (sel
>= 448) {
274 } else if (sel
>= 288) {
278 } else if (sel
>= 256) {
286 case ALU_SRC_LDS_OQ_A
:
290 case ALU_SRC_LDS_OQ_B
:
294 case ALU_SRC_LDS_OQ_A_POP
:
298 case ALU_SRC_LDS_OQ_B_POP
:
302 case ALU_SRC_LDS_DIRECT_A
:
303 s
<< "LDS_A["; s
.print_zw_hex(src
->value
.u
, 8); s
<< "]";
305 case ALU_SRC_LDS_DIRECT_B
:
306 s
<< "LDS_B["; s
.print_zw_hex(src
->value
.u
, 8); s
<< "]";
315 case ALU_SRC_LITERAL
:
317 s
.print_zw_hex(src
->value
.u
, 8);
318 s
<< " " << src
->value
.f
<< "]";
324 case ALU_SRC_M_1_INT
:
336 case ALU_SRC_MASK_LO
:
339 case ALU_SRC_MASK_HI
:
342 case ALU_SRC_HW_WAVE_ID
:
345 case ALU_SRC_SIMD_ID
:
352 s
<< "??IMM_" << sel
;
358 print_sel(s
, sel
, src
->rel
, alu
.index_mode
, need_brackets
);
361 s
<< "." << chans
[src
->chan
];
367 void bc_dump::dump(alu_node
& n
) {
369 static const char *omod_str
[] = {"","*2","*4","/2"};
370 static const char *slots
= "xyzwt";
372 s
<< (n
.bc
.update_exec_mask
? "M" : " ");
373 s
<< (n
.bc
.update_pred
? "P" : " ");
375 s
<< (n
.bc
.pred_sel
>=2 ? (n
.bc
.pred_sel
== 2 ? "0" : "1") : " ");
378 s
<< slots
[n
.bc
.slot
] << ": ";
380 s
<< n
.bc
.op_ptr
->name
<< omod_str
[n
.bc
.omod
] << (n
.bc
.clamp
? "_sat" : "");
385 for (int k
= 0; k
< n
.bc
.op_ptr
->src_count
; ++k
) {
386 s
<< (k
? ", " : ", ");
387 print_src(s
, n
.bc
, k
);
390 if (n
.bc
.bank_swizzle
) {
392 if (n
.bc
.slot
== SLOT_TRANS
)
393 s
<< " " << scl_bs
[n
.bc
.bank_swizzle
];
395 s
<< " " << vec_bs
[n
.bc
.bank_swizzle
];
398 if (ctx
.is_cayman()) {
399 if (n
.bc
.op
== ALU_OP1_MOVA_INT
) {
400 static const char *mova_str
[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1",
401 " Unknown MOVA_INT dest" };
402 s
<< mova_str
[std::min(n
.bc
.dst_gpr
, 4u)]; // CM_V_SQ_MOVA_DST_AR_*
406 if (n
.bc
.lds_idx_offset
) {
407 s
<< " IDX_OFFSET:" << n
.bc
.lds_idx_offset
;
410 sblog
<< s
.str() << "\n";
413 int bc_dump::init() {
415 s
<< "===== SHADER #" << sh
.id
;
422 std::string target
= std::string(" ") +
423 sh
.get_full_target_name() + " =====";
425 while (s
.str().length() + target
.length() < 80)
430 sblog
<< "\n" << s
.str() << "\n";
435 s
<< "===== " << ndw
<< " dw ===== " << sh
.ngpr
436 << " gprs ===== " << sh
.nstack
<< " stack ";
439 while (s
.str().length() < 80)
442 sblog
<< s
.str() << "\n";
447 int bc_dump::done() {
449 s
<< "===== SHADER_END ";
451 while (s
.str().length() < 80)
454 sblog
<< s
.str() << "\n\n";
459 bc_dump::bc_dump(shader
& s
, bytecode
* bc
) :
460 vpass(s
), bc_data(), ndw(), id(),
461 new_group(), group_index() {
464 bc_data
= bc
->data();
469 void bc_dump::dump(fetch_node
& n
) {
471 static const char * fetch_type
[] = {"VERTEX", "INSTANCE", ""};
472 unsigned gds
= n
.bc
.op_ptr
->flags
& FF_GDS
;
473 bool gds_has_ret
= gds
&& n
.bc
.op
>= FETCH_OP_GDS_ADD_RET
&&
474 n
.bc
.op
<= FETCH_OP_GDS_USHORT_READ_RET
;
475 bool show_dst
= !gds
|| (gds
&& gds_has_ret
);
477 s
<< n
.bc
.op_ptr
->name
;
482 print_sel(s
, n
.bc
.dst_gpr
, n
.bc
.dst_rel
, INDEX_LOOP
, 0);
484 for (int k
= 0; k
< 4; ++k
)
485 s
<< chans
[n
.bc
.dst_sel
[k
]];
490 print_sel(s
, n
.bc
.src_gpr
, n
.bc
.src_rel
, INDEX_LOOP
, 0);
493 unsigned vtx
= n
.bc
.op_ptr
->flags
& FF_VTX
;
494 unsigned num_src_comp
= gds
? 3 : vtx
? ctx
.is_cayman() ? 2 : 1 : 4;
496 for (unsigned k
= 0; k
< num_src_comp
; ++k
)
497 s
<< chans
[n
.bc
.src_sel
[k
]];
499 if (vtx
&& n
.bc
.offset
[0]) {
500 s
<< " + " << n
.bc
.offset
[0] << "b ";
504 s
<< ", RID:" << n
.bc
.resource_id
;
507 s
<< " UAV:" << n
.bc
.uav_id
;
508 if (n
.bc
.uav_index_mode
)
509 s
<< " UAV:SQ_CF_INDEX_" << (n
.bc
.uav_index_mode
- V_SQ_CF_INDEX_0
);
510 if (n
.bc
.bcast_first_req
)
512 if (n
.bc
.alloc_consume
)
515 s
<< " " << fetch_type
[n
.bc
.fetch_type
];
516 if (!ctx
.is_cayman() && n
.bc
.mega_fetch_count
)
517 s
<< " MFC:" << n
.bc
.mega_fetch_count
;
518 if (n
.bc
.fetch_whole_quad
)
520 if (ctx
.is_egcm() && n
.bc
.resource_index_mode
)
521 s
<< " RIM:SQ_CF_INDEX_" << (n
.bc
.resource_index_mode
- V_SQ_CF_INDEX_0
);
522 if (ctx
.is_egcm() && n
.bc
.sampler_index_mode
)
523 s
<< " SID:SQ_CF_INDEX_" << (n
.bc
.sampler_index_mode
- V_SQ_CF_INDEX_0
);
525 s
<< " UCF:" << n
.bc
.use_const_fields
526 << " FMT(DTA:" << n
.bc
.data_format
527 << " NUM:" << n
.bc
.num_format_all
528 << " COMP:" << n
.bc
.format_comp_all
529 << " MODE:" << n
.bc
.srf_mode_all
<< ")";
531 s
<< ", SID:" << n
.bc
.sampler_id
;
533 s
<< " LB:" << n
.bc
.lod_bias
;
535 for (unsigned k
= 0; k
< 4; ++k
)
536 s
<< (n
.bc
.coord_type
[k
] ? "N" : "U");
537 for (unsigned k
= 0; k
< 3; ++k
)
539 s
<< " O" << chans
[k
] << ":" << n
.bc
.offset
[k
];
540 if (ctx
.is_egcm() && n
.bc
.resource_index_mode
)
541 s
<< " RIM:SQ_CF_INDEX_" << (n
.bc
.resource_index_mode
- V_SQ_CF_INDEX_0
);
542 if (ctx
.is_egcm() && n
.bc
.sampler_index_mode
)
543 s
<< " SID:SQ_CF_INDEX_" << (n
.bc
.sampler_index_mode
- V_SQ_CF_INDEX_0
);
546 if (n
.bc
.op_ptr
->flags
& FF_MEM
) {
547 s
<< ", ELEM_SIZE:" << n
.bc
.elem_size
;
552 if (n
.bc
.burst_count
)
553 s
<< ", BURST_COUNT:" << n
.bc
.burst_count
;
554 s
<< ", ARRAY_BASE:" << n
.bc
.array_base
;
555 s
<< ", ARRAY_SIZE:" << n
.bc
.array_size
;
558 sblog
<< s
.str() << "\n";
561 void bc_dump::dump_dw(unsigned dw_id
, unsigned count
) {
565 assert(dw_id
+ count
<= ndw
);
567 sblog
.print_zw(dw_id
, 4);
570 sblog
.print_zw_hex(bc_data
[dw_id
++], 8);
575 } // namespace r600_sb