r600g/sb: move chip & class name functions to sb_context
[mesa.git] / src / gallium / drivers / r600 / sb / sb_shader.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include "sb_bc.h"
28 #include "sb_shader.h"
29 #include "sb_pass.h"
30
31 namespace r600_sb {
32
33 shader::shader(sb_context &sctx, shader_target t, unsigned id)
34 : ctx(sctx), next_temp_value_index(temp_regid_offset),
35 prep_regs_count(), pred_sels(),
36 regions(), inputs(), undef(), val_pool(sizeof(value)),
37 pool(), all_nodes(), src_stats(), opt_stats(), errors(),
38 optimized(), id(id),
39 coal(*this), bbs(),
40 target(t), vt(ex), ex(*this), root(),
41 compute_interferences(),
42 has_alu_predication(), uses_gradients(), safe_math(), ngpr(), nstack() {}
43
44 bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
45
46 unsigned slot_flags = ctx.alu_slots(n->bc.op);
47 unsigned slot = n->bc.dst_chan;
48
49 if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) &&
50 (slot_flags & AF_S))
51 slot = SLOT_TRANS;
52
53 if (slots[slot])
54 return false;
55
56 n->bc.slot = slot;
57 slots[slot] = n;
58 return true;
59 }
60
61 void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
62 bool src) {
63 unsigned chan = 0;
64 while (comp_mask) {
65 if (comp_mask & 1) {
66 value *v = get_gpr_value(src, gpr, chan, false);
67 v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN);
68 if (!v->is_rel()) {
69 v->gpr = v->pin_gpr = v->select;
70 v->fix();
71 }
72 if (v->array && !v->array->gpr) {
73 // if pinned value can be accessed with indirect addressing
74 // pin the entire array to its original location
75 v->array->gpr = v->array->base_gpr;
76 }
77 vec.push_back(v);
78 }
79 comp_mask >>= 1;
80 ++chan;
81 }
82 }
83
84 cf_node* shader::create_clause(node_subtype nst) {
85 cf_node *n = create_cf();
86
87 n->subtype = nst;
88
89 switch (nst) {
90 case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
91 case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
92 case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
93 default: assert(!"invalid clause type"); break;
94 }
95
96 n->bc.barrier = 1;
97 return n;
98 }
99
100 void shader::create_bbs() {
101 create_bbs(root, bbs);
102 }
103
104 void shader::expand_bbs() {
105 expand_bbs(bbs);
106 }
107
108 alu_node* shader::create_mov(value* dst, value* src) {
109 alu_node *n = create_alu();
110 n->bc.set_op(ALU_OP1_MOV);
111 n->dst.push_back(dst);
112 n->src.push_back(src);
113 dst->def = n;
114
115 return n;
116 }
117
118 alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) {
119 alu_node *n = create_mov(dst, src);
120
121 dst->assign_source(src);
122 n->flags |= NF_COPY_MOV | NF_DONT_HOIST;
123
124 if (affcost && dst->is_sgpr() && src->is_sgpr())
125 coal.add_edge(src, dst, affcost);
126
127 return n;
128 }
129
130 value* shader::get_value(value_kind kind, sel_chan id,
131 unsigned version) {
132 if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count)
133 return val_pool[id - 1];
134
135
136 unsigned key = (kind << 28) | (version << 16) | id;
137 value_map::iterator i = reg_values.find(key);
138 if (i != reg_values.end()) {
139 return i->second;
140 }
141 value *v = create_value(kind, id, version);
142 reg_values.insert(std::make_pair(key, v));
143 return v;
144 }
145
146 value* shader::get_special_value(unsigned sv_id, unsigned version) {
147 sel_chan id(sv_id, 0);
148 return get_value(VLK_SPECIAL_REG, id, version);
149 }
150
151 void shader::fill_array_values(gpr_array *a, vvec &vv) {
152 unsigned sz = a->array_size;
153 vv.resize(sz);
154 for (unsigned i = 0; i < a->array_size; ++i) {
155 vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(),
156 false);
157 }
158 }
159
160 value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
161 unsigned version) {
162 sel_chan id(reg, chan);
163 value *v;
164 gpr_array *a = get_gpr_array(reg, chan);
165 if (rel) {
166 assert(a);
167 v = create_value(VLK_REL_REG, id, 0);
168 v->rel = get_special_value(SV_AR_INDEX);
169 fill_array_values(a, v->muse);
170 if (!src)
171 fill_array_values(a, v->mdef);
172 } else {
173 if (version == 0 && reg < prep_regs_count)
174 return (val_pool[id - 1]);
175
176 v = get_value(VLK_REG, id, version);
177 }
178
179 v->array = a;
180 v->pin_gpr = v->select;
181
182 return v;
183 }
184
185 value* shader::create_temp_value() {
186 sel_chan id(++next_temp_value_index, 0);
187 return get_value(VLK_TEMP, id, 0);
188 }
189
190 value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) {
191 return get_ro_value(kcache_values, VLK_KCACHE,
192 sel_chan((bank << 12) | index, chan));
193 }
194
195 void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
196 if (inputs.size() <= gpr)
197 inputs.resize(gpr+1);
198
199 shader_input &i = inputs[gpr];
200 i.preloaded = preloaded;
201 i.comp_mask = comp_mask;
202
203 if (preloaded) {
204 add_pinned_gpr_values(root->dst, gpr, comp_mask, true);
205 }
206
207 }
208
209 void shader::init() {
210 assert(!root);
211 root = create_container();
212 }
213
214 void shader::init_call_fs(cf_node* cf) {
215 unsigned gpr = 0;
216
217 assert(target == TARGET_VS);
218
219 for(inputs_vec::const_iterator I = inputs.begin(),
220 E = inputs.end(); I != E; ++I, ++gpr) {
221 if (!I->preloaded)
222 add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false);
223 else
224 add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true);
225 }
226 }
227
228 void shader::set_undef(val_set& s) {
229 value *undefined = get_undef_value();
230 if (!undefined->gvn_source)
231 vt.add_value(undefined);
232
233 val_set &vs = s;
234
235 for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) {
236 value *v = *I;
237
238 assert(!v->is_readonly() && !v->is_rel());
239
240 v->gvn_source = undefined->gvn_source;
241 }
242 }
243
244 value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) {
245 value *v = val_pool.create(k, regid, ver);
246 return v;
247 }
248
249 value* shader::get_undef_value() {
250 if (!undef)
251 undef = create_value(VLK_UNDEF, 0, 0);
252 return undef;
253 }
254
255 node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) {
256 node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags);
257 all_nodes.push_back(n);
258 return n;
259 }
260
261 alu_node* shader::create_alu() {
262 alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node();
263 memset(&n->bc, 0, sizeof(bc_alu));
264 all_nodes.push_back(n);
265 return n;
266 }
267
268 alu_group_node* shader::create_alu_group() {
269 alu_group_node* n =
270 new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
271 all_nodes.push_back(n);
272 return n;
273 }
274
275 alu_packed_node* shader::create_alu_packed() {
276 alu_packed_node* n =
277 new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
278 all_nodes.push_back(n);
279 return n;
280 }
281
282 cf_node* shader::create_cf() {
283 cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node();
284 memset(&n->bc, 0, sizeof(bc_cf));
285 n->bc.barrier = 1;
286 all_nodes.push_back(n);
287 return n;
288 }
289
290 fetch_node* shader::create_fetch() {
291 fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node();
292 memset(&n->bc, 0, sizeof(bc_fetch));
293 all_nodes.push_back(n);
294 return n;
295 }
296
297 region_node* shader::create_region() {
298 region_node *n = new (pool.allocate(sizeof(region_node)))
299 region_node(regions.size());
300 regions.push_back(n);
301 all_nodes.push_back(n);
302 return n;
303 }
304
305 depart_node* shader::create_depart(region_node* target) {
306 depart_node* n = new (pool.allocate(sizeof(depart_node)))
307 depart_node(target, target->departs.size());
308 target->departs.push_back(n);
309 all_nodes.push_back(n);
310 return n;
311 }
312
313 repeat_node* shader::create_repeat(region_node* target) {
314 repeat_node* n = new (pool.allocate(sizeof(repeat_node)))
315 repeat_node(target, target->repeats.size() + 1);
316 target->repeats.push_back(n);
317 all_nodes.push_back(n);
318 return n;
319 }
320
321 container_node* shader::create_container(node_type nt, node_subtype nst,
322 node_flags flags) {
323 container_node *n = new (pool.allocate(sizeof(container_node)))
324 container_node(nt, nst, flags);
325 all_nodes.push_back(n);
326 return n;
327 }
328
329 if_node* shader::create_if() {
330 if_node* n = new (pool.allocate(sizeof(if_node))) if_node();
331 all_nodes.push_back(n);
332 return n;
333 }
334
335 bb_node* shader::create_bb(unsigned id, unsigned loop_level) {
336 bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level);
337 all_nodes.push_back(n);
338 return n;
339 }
340
341 value* shader::get_special_ro_value(unsigned sel) {
342 return get_ro_value(special_ro_values, VLK_PARAM, sel);
343 }
344
345 value* shader::get_const_value(const literal &v) {
346 value *val = get_ro_value(const_values, VLK_CONST, v);
347 val->literal_value = v;
348 return val;
349 }
350
351 shader::~shader() {
352 for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end();
353 I != E; ++I)
354 (*I)->~node();
355
356 for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end();
357 I != E; ++I) {
358 delete *I;
359 }
360 }
361
362 void shader::dump_ir() {
363 if (ctx.dump_pass)
364 dump(*this).run();
365 }
366
367 value* shader::get_value_version(value* v, unsigned ver) {
368 assert(!v->is_readonly() && !v->is_rel());
369 value *vv = get_value(v->kind, v->select, ver);
370 assert(vv);
371
372 if (v->array) {
373 vv->array = v->array;
374 }
375
376 return vv;
377 }
378
379 gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) {
380
381 for (regarray_vec::iterator I = gpr_arrays.begin(),
382 E = gpr_arrays.end(); I != E; ++I) {
383 gpr_array* a = *I;
384 unsigned achan = a->base_gpr.chan();
385 unsigned areg = a->base_gpr.sel();
386 if (achan == chan && (reg >= areg && reg < areg+a->array_size))
387 return a;
388 }
389 return NULL;
390 }
391
392 void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count,
393 unsigned comp_mask) {
394 unsigned chan = 0;
395 while (comp_mask) {
396 if (comp_mask & 1) {
397 gpr_array *a = new gpr_array(
398 sel_chan(gpr_start, chan), gpr_count);
399
400 SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr
401 << " [" << a->array_size << "]\n";
402 );
403
404 gpr_arrays.push_back(a);
405 }
406 comp_mask >>= 1;
407 ++chan;
408 }
409 }
410
411 value* shader::get_pred_sel(int sel) {
412 assert(sel == 0 || sel == 1);
413 if (!pred_sels[sel])
414 pred_sels[sel] = get_const_value(sel);
415
416 return pred_sels[sel];
417 }
418
419 cf_node* shader::create_cf(unsigned op) {
420 cf_node *c = create_cf();
421 c->bc.set_op(op);
422 c->bc.barrier = 1;
423 return c;
424 }
425
426 std::string shader::get_full_target_name() {
427 std::string s = get_shader_target_name();
428 s += "/";
429 s += ctx.get_hw_chip_name();
430 s += "/";
431 s += ctx.get_hw_class_name();
432 return s;
433 }
434
435 const char* shader::get_shader_target_name() {
436 switch (target) {
437 case TARGET_VS: return "VS";
438 case TARGET_PS: return "PS";
439 case TARGET_GS: return "GS";
440 case TARGET_COMPUTE: return "COMPUTE";
441 case TARGET_FETCH: return "FETCH";
442 default:
443 return "INVALID_TARGET";
444 }
445 }
446
447 void shader::simplify_dep_rep(node* dr) {
448 container_node *p = dr->parent;
449 if (p->is_repeat()) {
450 repeat_node *r = static_cast<repeat_node*>(p);
451 r->target->expand_repeat(r);
452 } else if (p->is_depart()) {
453 depart_node *d = static_cast<depart_node*>(p);
454 d->target->expand_depart(d);
455 }
456 if (dr->next)
457 dr->parent->cut(dr->next, NULL);
458 }
459
460
461 // FIXME this is used in some places as the max non-temp gpr,
462 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
463 unsigned shader::first_temp_gpr() {
464 return MAX_GPR - ctx.alu_temp_gprs;
465 }
466
467 unsigned shader::num_nontemp_gpr() {
468 return MAX_GPR - 2 * ctx.alu_temp_gprs;
469 }
470
471 void shader::set_uses_kill() {
472 if (root->src.empty())
473 root->src.resize(1);
474
475 if (!root->src[0])
476 root->src[0] = get_special_value(SV_VALID_MASK);
477 }
478
479 alu_node* shader::clone(alu_node* n) {
480 alu_node *c = create_alu();
481
482 // FIXME: this may be wrong with indirect operands
483 c->src = n->src;
484 c->dst = n->dst;
485
486 c->bc = n->bc;
487 c->pred = n->pred;
488
489 return c;
490 }
491
492 void shader::collect_stats(bool opt) {
493 if (!sb_context::dump_stat)
494 return;
495
496 shader_stats &s = opt ? opt_stats : src_stats;
497
498 s.shaders = 1;
499 s.ngpr = ngpr;
500 s.nstack = nstack;
501 s.collect(root);
502
503 if (opt)
504 ctx.opt_stats.accumulate(s);
505 else
506 ctx.src_stats.accumulate(s);
507 }
508
509 value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) {
510 value_map::iterator I = vm.find(key);
511 if (I != vm.end())
512 return I->second;
513 value *v = create_value(vk, key, 0);
514 v->flags = VLF_READONLY;
515 vm.insert(std::make_pair(key, v));
516 return v;
517 }
518
519 void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
520
521 bool inside_bb = false;
522 bool last_inside_bb = true;
523 node_iterator bb_start(n->begin()), I(bb_start), E(n->end());
524
525 for (; I != E; ++I) {
526 node *k = *I;
527 inside_bb = k->type == NT_OP;
528
529 if (inside_bb && !last_inside_bb)
530 bb_start = I;
531 else if (!inside_bb) {
532 if (last_inside_bb
533 && I->type != NT_REPEAT
534 && I->type != NT_DEPART
535 && I->type != NT_IF) {
536 bb_node *bb = create_bb(bbs.size(), loop_level);
537 bbs.push_back(bb);
538 n->insert_node_before(*bb_start, bb);
539 if (bb_start != I)
540 bb->move(bb_start, I);
541 }
542
543 if (k->is_container()) {
544
545 bool loop = false;
546 if (k->type == NT_REGION) {
547 loop = static_cast<region_node*>(k)->is_loop();
548 }
549
550 create_bbs(static_cast<container_node*>(k), bbs,
551 loop_level + loop);
552 }
553 }
554
555 if (k->type == NT_DEPART)
556 return;
557
558 last_inside_bb = inside_bb;
559 }
560
561 if (last_inside_bb) {
562 bb_node *bb = create_bb(bbs.size(), loop_level);
563 bbs.push_back(bb);
564 if (n->empty())
565 n->push_back(bb);
566 else {
567 n->insert_node_before(*bb_start, bb);
568 if (bb_start != n->end())
569 bb->move(bb_start, n->end());
570 }
571 } else {
572 if (n->last && n->last->type == NT_IF) {
573 bb_node *bb = create_bb(bbs.size(), loop_level);
574 bbs.push_back(bb);
575 n->push_back(bb);
576 }
577 }
578 }
579
580 void shader::expand_bbs(bbs_vec &bbs) {
581
582 for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) {
583 bb_node *b = *I;
584 b->expand();
585 }
586 }
587
588 sched_queue_id shader::get_queue_id(node* n) {
589 switch (n->subtype) {
590 case NST_ALU_INST:
591 case NST_ALU_PACKED_INST:
592 case NST_COPY:
593 case NST_PSI:
594 return SQ_ALU;
595 case NST_FETCH_INST: {
596 fetch_node *f = static_cast<fetch_node*>(n);
597 if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
598 return SQ_VTX;
599 return SQ_TEX;
600 }
601 case NST_CF_INST:
602 return SQ_CF;
603 default:
604 assert(0);
605 return SQ_NUM;
606 }
607 }
608
609 void shader_stats::collect(node *n) {
610 if (n->is_alu_inst())
611 ++alu;
612 else if (n->is_fetch_inst())
613 ++fetch;
614 else if (n->is_container()) {
615 container_node *c = static_cast<container_node*>(n);
616
617 if (n->is_alu_group())
618 ++alu_groups;
619 else if (n->is_alu_clause())
620 ++alu_clauses;
621 else if (n->is_fetch_clause())
622 ++fetch_clauses;
623 else if (n->is_cf_inst())
624 ++cf;
625
626 if (!c->empty()) {
627 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
628 collect(*I);
629 }
630 }
631 }
632 }
633
634 void shader_stats::accumulate(shader_stats& s) {
635 ++shaders;
636 ndw += s.ndw;
637 ngpr += s.ngpr;
638 nstack += s.nstack;
639
640 alu += s.alu;
641 alu_groups += s.alu_groups;
642 alu_clauses += s.alu_clauses;
643 fetch += s.fetch;
644 fetch_clauses += s.fetch_clauses;
645 cf += s.cf;
646 }
647
648 void shader_stats::dump() {
649 sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
650 << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
651 << ", alu:" << alu << ", fetch:" << fetch
652 << ", fetch clauses:" << fetch_clauses
653 << ", cf:" << cf;
654
655 if (shaders > 1)
656 sblog << ", shaders:" << shaders;
657
658 sblog << "\n";
659 }
660
661 static void print_diff(unsigned d1, unsigned d2) {
662 if (d1)
663 sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
664 else if (d2)
665 sblog << "N/A";
666 else
667 sblog << "0%";
668 }
669
670 void shader_stats::dump_diff(shader_stats& s) {
671 sblog << "dw:"; print_diff(ndw, s.ndw);
672 sblog << ", gpr:" ; print_diff(ngpr, s.ngpr);
673 sblog << ", stk:" ; print_diff(nstack, s.nstack);
674 sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups);
675 sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses);
676 sblog << ", alu:" ; print_diff(alu, s.alu);
677 sblog << ", fetch:" ; print_diff(fetch, s.fetch);
678 sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses);
679 sblog << ", cf:" ; print_diff(cf, s.cf);
680 sblog << "\n";
681 }
682
683 } // namespace r600_sb