radeon: make texture logging more useful
[mesa.git] / src / gallium / drivers / r600 / sb / sb_shader.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include "sb_bc.h"
28 #include "sb_shader.h"
29 #include "sb_pass.h"
30
31 namespace r600_sb {
32
33 shader::shader(sb_context &sctx, shader_target t, unsigned id)
34 : ctx(sctx), next_temp_value_index(temp_regid_offset),
35 prep_regs_count(), pred_sels(),
36 regions(), inputs(), undef(), val_pool(sizeof(value)),
37 pool(), all_nodes(), src_stats(), opt_stats(), errors(),
38 optimized(), id(id),
39 coal(*this), bbs(),
40 target(t), vt(ex), ex(*this), root(),
41 compute_interferences(),
42 has_alu_predication(), uses_gradients(), safe_math(), ngpr(), nstack() {}
43
44 bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
45
46 unsigned slot_flags = ctx.alu_slots(n->bc.op);
47 unsigned slot = n->bc.dst_chan;
48
49 if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) &&
50 (slot_flags & AF_S))
51 slot = SLOT_TRANS;
52
53 if (slots[slot])
54 return false;
55
56 n->bc.slot = slot;
57 slots[slot] = n;
58 return true;
59 }
60
61 void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
62 bool src) {
63 unsigned chan = 0;
64 while (comp_mask) {
65 if (comp_mask & 1) {
66 value *v = get_gpr_value(src, gpr, chan, false);
67 v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN);
68 if (!v->is_rel()) {
69 v->gpr = v->pin_gpr = v->select;
70 v->fix();
71 }
72 if (v->array && !v->array->gpr) {
73 // if pinned value can be accessed with indirect addressing
74 // pin the entire array to its original location
75 v->array->gpr = v->array->base_gpr;
76 }
77 vec.push_back(v);
78 }
79 comp_mask >>= 1;
80 ++chan;
81 }
82 }
83
84 cf_node* shader::create_clause(node_subtype nst) {
85 cf_node *n = create_cf();
86
87 n->subtype = nst;
88
89 switch (nst) {
90 case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
91 case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
92 case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
93 default: assert(!"invalid clause type"); break;
94 }
95
96 n->bc.barrier = 1;
97 return n;
98 }
99
100 void shader::create_bbs() {
101 create_bbs(root, bbs);
102 }
103
104 void shader::expand_bbs() {
105 expand_bbs(bbs);
106 }
107
108 alu_node* shader::create_mov(value* dst, value* src) {
109 alu_node *n = create_alu();
110 n->bc.set_op(ALU_OP1_MOV);
111 n->dst.push_back(dst);
112 n->src.push_back(src);
113 dst->def = n;
114
115 return n;
116 }
117
118 alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) {
119 alu_node *n = create_mov(dst, src);
120
121 dst->assign_source(src);
122 n->flags |= NF_COPY_MOV | NF_DONT_HOIST;
123
124 if (affcost && dst->is_sgpr() && src->is_sgpr())
125 coal.add_edge(src, dst, affcost);
126
127 return n;
128 }
129
130 value* shader::get_value(value_kind kind, sel_chan id,
131 unsigned version) {
132 if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count)
133 return val_pool[id - 1];
134
135
136 unsigned key = (kind << 28) | (version << 16) | id;
137 value_map::iterator i = reg_values.find(key);
138 if (i != reg_values.end()) {
139 return i->second;
140 }
141 value *v = create_value(kind, id, version);
142 reg_values.insert(std::make_pair(key, v));
143 return v;
144 }
145
146 value* shader::get_special_value(unsigned sv_id, unsigned version) {
147 sel_chan id(sv_id, 0);
148 return get_value(VLK_SPECIAL_REG, id, version);
149 }
150
151 void shader::fill_array_values(gpr_array *a, vvec &vv) {
152 unsigned sz = a->array_size;
153 vv.resize(sz);
154 for (unsigned i = 0; i < a->array_size; ++i) {
155 vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(),
156 false);
157 }
158 }
159
160 value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
161 unsigned version) {
162 sel_chan id(reg, chan);
163 value *v;
164 gpr_array *a = get_gpr_array(reg, chan);
165 if (rel) {
166 assert(a);
167 v = create_value(VLK_REL_REG, id, 0);
168 v->rel = get_special_value(SV_AR_INDEX);
169 fill_array_values(a, v->muse);
170 if (!src)
171 fill_array_values(a, v->mdef);
172 } else {
173 if (version == 0 && reg < prep_regs_count)
174 return (val_pool[id - 1]);
175
176 v = get_value(VLK_REG, id, version);
177 }
178
179 v->array = a;
180 v->pin_gpr = v->select;
181
182 return v;
183 }
184
185 value* shader::create_temp_value() {
186 sel_chan id(++next_temp_value_index, 0);
187 return get_value(VLK_TEMP, id, 0);
188 }
189
190 value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) {
191 return get_ro_value(kcache_values, VLK_KCACHE,
192 sel_chan((bank << 12) | index, chan));
193 }
194
195 void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
196 if (inputs.size() <= gpr)
197 inputs.resize(gpr+1);
198
199 shader_input &i = inputs[gpr];
200 i.preloaded = preloaded;
201 i.comp_mask = comp_mask;
202
203 if (preloaded) {
204 add_pinned_gpr_values(root->dst, gpr, comp_mask, true);
205 }
206
207 }
208
209 void shader::init() {
210 assert(!root);
211 root = create_container();
212 }
213
214 void shader::init_call_fs(cf_node* cf) {
215 unsigned gpr = 0;
216
217 assert(target == TARGET_VS);
218
219 for(inputs_vec::const_iterator I = inputs.begin(),
220 E = inputs.end(); I != E; ++I, ++gpr) {
221 if (!I->preloaded)
222 add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false);
223 else
224 add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true);
225 }
226 }
227
228 void shader::set_undef(val_set& s) {
229 value *undefined = get_undef_value();
230 if (!undefined->gvn_source)
231 vt.add_value(undefined);
232
233 val_set &vs = s;
234
235 for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) {
236 value *v = *I;
237
238 assert(!v->is_readonly() && !v->is_rel());
239
240 v->gvn_source = undefined->gvn_source;
241 }
242 }
243
244 value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) {
245 value *v = val_pool.create(k, regid, ver);
246 return v;
247 }
248
249 value* shader::get_undef_value() {
250 if (!undef)
251 undef = create_value(VLK_UNDEF, 0, 0);
252 return undef;
253 }
254
255 node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) {
256 node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags);
257 all_nodes.push_back(n);
258 return n;
259 }
260
261 alu_node* shader::create_alu() {
262 alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node();
263 all_nodes.push_back(n);
264 return n;
265 }
266
267 alu_group_node* shader::create_alu_group() {
268 alu_group_node* n =
269 new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
270 all_nodes.push_back(n);
271 return n;
272 }
273
274 alu_packed_node* shader::create_alu_packed() {
275 alu_packed_node* n =
276 new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
277 all_nodes.push_back(n);
278 return n;
279 }
280
281 cf_node* shader::create_cf() {
282 cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node();
283 n->bc.barrier = 1;
284 all_nodes.push_back(n);
285 return n;
286 }
287
288 fetch_node* shader::create_fetch() {
289 fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node();
290 all_nodes.push_back(n);
291 return n;
292 }
293
294 region_node* shader::create_region() {
295 region_node *n = new (pool.allocate(sizeof(region_node)))
296 region_node(regions.size());
297 regions.push_back(n);
298 all_nodes.push_back(n);
299 return n;
300 }
301
302 depart_node* shader::create_depart(region_node* target) {
303 depart_node* n = new (pool.allocate(sizeof(depart_node)))
304 depart_node(target, target->departs.size());
305 target->departs.push_back(n);
306 all_nodes.push_back(n);
307 return n;
308 }
309
310 repeat_node* shader::create_repeat(region_node* target) {
311 repeat_node* n = new (pool.allocate(sizeof(repeat_node)))
312 repeat_node(target, target->repeats.size() + 1);
313 target->repeats.push_back(n);
314 all_nodes.push_back(n);
315 return n;
316 }
317
318 container_node* shader::create_container(node_type nt, node_subtype nst,
319 node_flags flags) {
320 container_node *n = new (pool.allocate(sizeof(container_node)))
321 container_node(nt, nst, flags);
322 all_nodes.push_back(n);
323 return n;
324 }
325
326 if_node* shader::create_if() {
327 if_node* n = new (pool.allocate(sizeof(if_node))) if_node();
328 all_nodes.push_back(n);
329 return n;
330 }
331
332 bb_node* shader::create_bb(unsigned id, unsigned loop_level) {
333 bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level);
334 all_nodes.push_back(n);
335 return n;
336 }
337
338 value* shader::get_special_ro_value(unsigned sel) {
339 return get_ro_value(special_ro_values, VLK_PARAM, sel);
340 }
341
342 value* shader::get_const_value(const literal &v) {
343 value *val = get_ro_value(const_values, VLK_CONST, v);
344 val->literal_value = v;
345 return val;
346 }
347
348 shader::~shader() {
349 for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end();
350 I != E; ++I)
351 (*I)->~node();
352
353 for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end();
354 I != E; ++I) {
355 delete *I;
356 }
357 }
358
359 void shader::dump_ir() {
360 if (ctx.dump_pass)
361 dump(*this).run();
362 }
363
364 value* shader::get_value_version(value* v, unsigned ver) {
365 assert(!v->is_readonly() && !v->is_rel());
366 value *vv = get_value(v->kind, v->select, ver);
367 assert(vv);
368
369 if (v->array) {
370 vv->array = v->array;
371 }
372
373 return vv;
374 }
375
376 gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) {
377
378 for (regarray_vec::iterator I = gpr_arrays.begin(),
379 E = gpr_arrays.end(); I != E; ++I) {
380 gpr_array* a = *I;
381 unsigned achan = a->base_gpr.chan();
382 unsigned areg = a->base_gpr.sel();
383 if (achan == chan && (reg >= areg && reg < areg+a->array_size))
384 return a;
385 }
386 return NULL;
387 }
388
389 void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count,
390 unsigned comp_mask) {
391 unsigned chan = 0;
392 while (comp_mask) {
393 if (comp_mask & 1) {
394 gpr_array *a = new gpr_array(
395 sel_chan(gpr_start, chan), gpr_count);
396
397 SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr
398 << " [" << a->array_size << "]\n";
399 );
400
401 gpr_arrays.push_back(a);
402 }
403 comp_mask >>= 1;
404 ++chan;
405 }
406 }
407
408 value* shader::get_pred_sel(int sel) {
409 assert(sel == 0 || sel == 1);
410 if (!pred_sels[sel])
411 pred_sels[sel] = get_const_value(sel);
412
413 return pred_sels[sel];
414 }
415
416 cf_node* shader::create_cf(unsigned op) {
417 cf_node *c = create_cf();
418 c->bc.set_op(op);
419 c->bc.barrier = 1;
420 return c;
421 }
422
423 std::string shader::get_full_target_name() {
424 std::string s = get_shader_target_name();
425 s += "/";
426 s += ctx.get_hw_chip_name();
427 s += "/";
428 s += ctx.get_hw_class_name();
429 return s;
430 }
431
432 const char* shader::get_shader_target_name() {
433 switch (target) {
434 case TARGET_VS: return "VS";
435 case TARGET_PS: return "PS";
436 case TARGET_GS: return "GS";
437 case TARGET_COMPUTE: return "COMPUTE";
438 case TARGET_FETCH: return "FETCH";
439 default:
440 return "INVALID_TARGET";
441 }
442 }
443
444 void shader::simplify_dep_rep(node* dr) {
445 container_node *p = dr->parent;
446 if (p->is_repeat()) {
447 repeat_node *r = static_cast<repeat_node*>(p);
448 r->target->expand_repeat(r);
449 } else if (p->is_depart()) {
450 depart_node *d = static_cast<depart_node*>(p);
451 d->target->expand_depart(d);
452 }
453 if (dr->next)
454 dr->parent->cut(dr->next, NULL);
455 }
456
457
458 // FIXME this is used in some places as the max non-temp gpr,
459 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
460 unsigned shader::first_temp_gpr() {
461 return MAX_GPR - ctx.alu_temp_gprs;
462 }
463
464 unsigned shader::num_nontemp_gpr() {
465 return MAX_GPR - 2 * ctx.alu_temp_gprs;
466 }
467
468 void shader::set_uses_kill() {
469 if (root->src.empty())
470 root->src.resize(1);
471
472 if (!root->src[0])
473 root->src[0] = get_special_value(SV_VALID_MASK);
474 }
475
476 alu_node* shader::clone(alu_node* n) {
477 alu_node *c = create_alu();
478
479 // FIXME: this may be wrong with indirect operands
480 c->src = n->src;
481 c->dst = n->dst;
482
483 c->bc = n->bc;
484 c->pred = n->pred;
485
486 return c;
487 }
488
489 void shader::collect_stats(bool opt) {
490 if (!sb_context::dump_stat)
491 return;
492
493 shader_stats &s = opt ? opt_stats : src_stats;
494
495 s.shaders = 1;
496 s.ngpr = ngpr;
497 s.nstack = nstack;
498 s.collect(root);
499
500 if (opt)
501 ctx.opt_stats.accumulate(s);
502 else
503 ctx.src_stats.accumulate(s);
504 }
505
506 value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) {
507 value_map::iterator I = vm.find(key);
508 if (I != vm.end())
509 return I->second;
510 value *v = create_value(vk, key, 0);
511 v->flags = VLF_READONLY;
512 vm.insert(std::make_pair(key, v));
513 return v;
514 }
515
516 void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
517
518 bool inside_bb = false;
519 bool last_inside_bb = true;
520 node_iterator bb_start(n->begin()), I(bb_start), E(n->end());
521
522 for (; I != E; ++I) {
523 node *k = *I;
524 inside_bb = k->type == NT_OP;
525
526 if (inside_bb && !last_inside_bb)
527 bb_start = I;
528 else if (!inside_bb) {
529 if (last_inside_bb
530 && I->type != NT_REPEAT
531 && I->type != NT_DEPART
532 && I->type != NT_IF) {
533 bb_node *bb = create_bb(bbs.size(), loop_level);
534 bbs.push_back(bb);
535 n->insert_node_before(*bb_start, bb);
536 if (bb_start != I)
537 bb->move(bb_start, I);
538 }
539
540 if (k->is_container()) {
541
542 bool loop = false;
543 if (k->type == NT_REGION) {
544 loop = static_cast<region_node*>(k)->is_loop();
545 }
546
547 create_bbs(static_cast<container_node*>(k), bbs,
548 loop_level + loop);
549 }
550 }
551
552 if (k->type == NT_DEPART)
553 return;
554
555 last_inside_bb = inside_bb;
556 }
557
558 if (last_inside_bb) {
559 bb_node *bb = create_bb(bbs.size(), loop_level);
560 bbs.push_back(bb);
561 if (n->empty())
562 n->push_back(bb);
563 else {
564 n->insert_node_before(*bb_start, bb);
565 if (bb_start != n->end())
566 bb->move(bb_start, n->end());
567 }
568 } else {
569 if (n->last && n->last->type == NT_IF) {
570 bb_node *bb = create_bb(bbs.size(), loop_level);
571 bbs.push_back(bb);
572 n->push_back(bb);
573 }
574 }
575 }
576
577 void shader::expand_bbs(bbs_vec &bbs) {
578
579 for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) {
580 bb_node *b = *I;
581 b->expand();
582 }
583 }
584
585 sched_queue_id shader::get_queue_id(node* n) {
586 switch (n->subtype) {
587 case NST_ALU_INST:
588 case NST_ALU_PACKED_INST:
589 case NST_COPY:
590 case NST_PSI:
591 return SQ_ALU;
592 case NST_FETCH_INST: {
593 fetch_node *f = static_cast<fetch_node*>(n);
594 if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
595 return SQ_VTX;
596 return SQ_TEX;
597 }
598 case NST_CF_INST:
599 return SQ_CF;
600 default:
601 assert(0);
602 return SQ_NUM;
603 }
604 }
605
606 void shader_stats::collect(node *n) {
607 if (n->is_alu_inst())
608 ++alu;
609 else if (n->is_fetch_inst())
610 ++fetch;
611 else if (n->is_container()) {
612 container_node *c = static_cast<container_node*>(n);
613
614 if (n->is_alu_group())
615 ++alu_groups;
616 else if (n->is_alu_clause())
617 ++alu_clauses;
618 else if (n->is_fetch_clause())
619 ++fetch_clauses;
620 else if (n->is_cf_inst())
621 ++cf;
622
623 if (!c->empty()) {
624 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
625 collect(*I);
626 }
627 }
628 }
629 }
630
631 void shader_stats::accumulate(shader_stats& s) {
632 ++shaders;
633 ndw += s.ndw;
634 ngpr += s.ngpr;
635 nstack += s.nstack;
636
637 alu += s.alu;
638 alu_groups += s.alu_groups;
639 alu_clauses += s.alu_clauses;
640 fetch += s.fetch;
641 fetch_clauses += s.fetch_clauses;
642 cf += s.cf;
643 }
644
645 void shader_stats::dump() {
646 sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
647 << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
648 << ", alu:" << alu << ", fetch:" << fetch
649 << ", fetch clauses:" << fetch_clauses
650 << ", cf:" << cf;
651
652 if (shaders > 1)
653 sblog << ", shaders:" << shaders;
654
655 sblog << "\n";
656 }
657
658 static void print_diff(unsigned d1, unsigned d2) {
659 if (d1)
660 sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
661 else if (d2)
662 sblog << "N/A";
663 else
664 sblog << "0%";
665 }
666
667 void shader_stats::dump_diff(shader_stats& s) {
668 sblog << "dw:"; print_diff(ndw, s.ndw);
669 sblog << ", gpr:" ; print_diff(ngpr, s.ngpr);
670 sblog << ", stk:" ; print_diff(nstack, s.nstack);
671 sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups);
672 sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses);
673 sblog << ", alu:" ; print_diff(alu, s.alu);
674 sblog << ", fetch:" ; print_diff(fetch, s.fetch);
675 sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses);
676 sblog << ", cf:" ; print_diff(cf, s.cf);
677 sblog << "\n";
678 }
679
680 } // namespace r600_sb