Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / drivers / r600 / sb / sb_shader.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include "sb_bc.h"
28 #include "sb_shader.h"
29 #include "sb_pass.h"
30
31 namespace r600_sb {
32
33 shader::shader(sb_context &sctx, shader_target t, unsigned id)
34 : ctx(sctx), next_temp_value_index(temp_regid_offset),
35 prep_regs_count(), pred_sels(),
36 regions(), inputs(), undef(), val_pool(sizeof(value)),
37 pool(), all_nodes(), src_stats(), opt_stats(), errors(),
38 optimized(), id(id),
39 coal(*this), bbs(),
40 target(t), vt(ex), ex(*this), root(),
41 compute_interferences(),
42 has_alu_predication(),
43 uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {}
44
45 bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
46
47 unsigned slot_flags = ctx.alu_slots(n->bc.op);
48 unsigned slot = n->bc.dst_chan;
49
50 if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) &&
51 (slot_flags & AF_S))
52 slot = SLOT_TRANS;
53
54 if (slots[slot])
55 return false;
56
57 n->bc.slot = slot;
58 slots[slot] = n;
59 return true;
60 }
61
62 void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
63 bool src) {
64 unsigned chan = 0;
65 while (comp_mask) {
66 if (comp_mask & 1) {
67 value *v = get_gpr_value(src, gpr, chan, false);
68 v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN);
69 if (!v->is_rel()) {
70 v->gpr = v->pin_gpr = v->select;
71 v->fix();
72 }
73 if (v->array && !v->array->gpr) {
74 // if pinned value can be accessed with indirect addressing
75 // pin the entire array to its original location
76 v->array->gpr = v->array->base_gpr;
77 }
78 vec.push_back(v);
79 }
80 comp_mask >>= 1;
81 ++chan;
82 }
83 }
84
85 cf_node* shader::create_clause(node_subtype nst) {
86 cf_node *n = create_cf();
87
88 n->subtype = nst;
89
90 switch (nst) {
91 case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
92 case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
93 case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
94 default: assert(!"invalid clause type"); break;
95 }
96
97 n->bc.barrier = 1;
98 return n;
99 }
100
101 void shader::create_bbs() {
102 create_bbs(root, bbs);
103 }
104
105 void shader::expand_bbs() {
106 expand_bbs(bbs);
107 }
108
109 alu_node* shader::create_mov(value* dst, value* src) {
110 alu_node *n = create_alu();
111 n->bc.set_op(ALU_OP1_MOV);
112 n->dst.push_back(dst);
113 n->src.push_back(src);
114 dst->def = n;
115
116 return n;
117 }
118
119 alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) {
120 alu_node *n = create_mov(dst, src);
121
122 dst->assign_source(src);
123 n->flags |= NF_COPY_MOV | NF_DONT_HOIST;
124
125 if (affcost && dst->is_sgpr() && src->is_sgpr())
126 coal.add_edge(src, dst, affcost);
127
128 return n;
129 }
130
131 value* shader::get_value(value_kind kind, sel_chan id,
132 unsigned version) {
133 if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count)
134 return val_pool[id - 1];
135
136
137 unsigned key = (kind << 28) | (version << 16) | id;
138 value_map::iterator i = reg_values.find(key);
139 if (i != reg_values.end()) {
140 return i->second;
141 }
142 value *v = create_value(kind, id, version);
143 reg_values.insert(std::make_pair(key, v));
144 return v;
145 }
146
147 value* shader::get_special_value(unsigned sv_id, unsigned version) {
148 sel_chan id(sv_id, 0);
149 return get_value(VLK_SPECIAL_REG, id, version);
150 }
151
152 void shader::fill_array_values(gpr_array *a, vvec &vv) {
153 unsigned sz = a->array_size;
154 vv.resize(sz);
155 for (unsigned i = 0; i < a->array_size; ++i) {
156 vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(),
157 false);
158 }
159 }
160
161 value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
162 unsigned version) {
163 sel_chan id(reg, chan);
164 value *v;
165 gpr_array *a = get_gpr_array(reg, chan);
166 if (rel) {
167 assert(a);
168 v = create_value(VLK_REL_REG, id, 0);
169 v->rel = get_special_value(SV_AR_INDEX);
170 fill_array_values(a, v->muse);
171 if (!src)
172 fill_array_values(a, v->mdef);
173 } else {
174 if (version == 0 && reg < prep_regs_count)
175 return (val_pool[id - 1]);
176
177 v = get_value(VLK_REG, id, version);
178 }
179
180 v->array = a;
181 v->pin_gpr = v->select;
182
183 return v;
184 }
185
186 value* shader::create_temp_value() {
187 sel_chan id(++next_temp_value_index, 0);
188 return get_value(VLK_TEMP, id, 0);
189 }
190
191 value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) {
192 return get_ro_value(kcache_values, VLK_KCACHE,
193 sel_chan((bank << 12) | index, chan));
194 }
195
196 void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
197 if (inputs.size() <= gpr)
198 inputs.resize(gpr+1);
199
200 shader_input &i = inputs[gpr];
201 i.preloaded = preloaded;
202 i.comp_mask = comp_mask;
203
204 if (preloaded) {
205 add_pinned_gpr_values(root->dst, gpr, comp_mask, true);
206 }
207
208 }
209
210 void shader::init() {
211 assert(!root);
212 root = create_container();
213 }
214
215 void shader::init_call_fs(cf_node* cf) {
216 unsigned gpr = 0;
217
218 assert(target == TARGET_VS || target == TARGET_ES);
219
220 for(inputs_vec::const_iterator I = inputs.begin(),
221 E = inputs.end(); I != E; ++I, ++gpr) {
222 if (!I->preloaded)
223 add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false);
224 else
225 add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true);
226 }
227 }
228
229 void shader::set_undef(val_set& s) {
230 value *undefined = get_undef_value();
231 if (!undefined->gvn_source)
232 vt.add_value(undefined);
233
234 val_set &vs = s;
235
236 for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) {
237 value *v = *I;
238
239 assert(!v->is_readonly() && !v->is_rel());
240
241 v->gvn_source = undefined->gvn_source;
242 }
243 }
244
245 value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) {
246 value *v = val_pool.create(k, regid, ver);
247 return v;
248 }
249
250 value* shader::get_undef_value() {
251 if (!undef)
252 undef = create_value(VLK_UNDEF, 0, 0);
253 return undef;
254 }
255
256 node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) {
257 node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags);
258 all_nodes.push_back(n);
259 return n;
260 }
261
262 alu_node* shader::create_alu() {
263 alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node();
264 all_nodes.push_back(n);
265 return n;
266 }
267
268 alu_group_node* shader::create_alu_group() {
269 alu_group_node* n =
270 new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
271 all_nodes.push_back(n);
272 return n;
273 }
274
275 alu_packed_node* shader::create_alu_packed() {
276 alu_packed_node* n =
277 new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
278 all_nodes.push_back(n);
279 return n;
280 }
281
282 cf_node* shader::create_cf() {
283 cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node();
284 n->bc.barrier = 1;
285 all_nodes.push_back(n);
286 return n;
287 }
288
289 fetch_node* shader::create_fetch() {
290 fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node();
291 all_nodes.push_back(n);
292 return n;
293 }
294
295 region_node* shader::create_region() {
296 region_node *n = new (pool.allocate(sizeof(region_node)))
297 region_node(regions.size());
298 regions.push_back(n);
299 all_nodes.push_back(n);
300 return n;
301 }
302
303 depart_node* shader::create_depart(region_node* target) {
304 depart_node* n = new (pool.allocate(sizeof(depart_node)))
305 depart_node(target, target->departs.size());
306 target->departs.push_back(n);
307 all_nodes.push_back(n);
308 return n;
309 }
310
311 repeat_node* shader::create_repeat(region_node* target) {
312 repeat_node* n = new (pool.allocate(sizeof(repeat_node)))
313 repeat_node(target, target->repeats.size() + 1);
314 target->repeats.push_back(n);
315 all_nodes.push_back(n);
316 return n;
317 }
318
319 container_node* shader::create_container(node_type nt, node_subtype nst,
320 node_flags flags) {
321 container_node *n = new (pool.allocate(sizeof(container_node)))
322 container_node(nt, nst, flags);
323 all_nodes.push_back(n);
324 return n;
325 }
326
327 if_node* shader::create_if() {
328 if_node* n = new (pool.allocate(sizeof(if_node))) if_node();
329 all_nodes.push_back(n);
330 return n;
331 }
332
333 bb_node* shader::create_bb(unsigned id, unsigned loop_level) {
334 bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level);
335 all_nodes.push_back(n);
336 return n;
337 }
338
339 value* shader::get_special_ro_value(unsigned sel) {
340 return get_ro_value(special_ro_values, VLK_PARAM, sel);
341 }
342
343 value* shader::get_const_value(const literal &v) {
344 value *val = get_ro_value(const_values, VLK_CONST, v);
345 val->literal_value = v;
346 return val;
347 }
348
349 shader::~shader() {
350 for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end();
351 I != E; ++I)
352 (*I)->~node();
353
354 for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end();
355 I != E; ++I) {
356 delete *I;
357 }
358 }
359
360 void shader::dump_ir() {
361 if (ctx.dump_pass)
362 dump(*this).run();
363 }
364
365 value* shader::get_value_version(value* v, unsigned ver) {
366 assert(!v->is_readonly() && !v->is_rel());
367 value *vv = get_value(v->kind, v->select, ver);
368 assert(vv);
369
370 if (v->array) {
371 vv->array = v->array;
372 }
373
374 return vv;
375 }
376
377 gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) {
378
379 for (regarray_vec::iterator I = gpr_arrays.begin(),
380 E = gpr_arrays.end(); I != E; ++I) {
381 gpr_array* a = *I;
382 unsigned achan = a->base_gpr.chan();
383 unsigned areg = a->base_gpr.sel();
384 if (achan == chan && (reg >= areg && reg < areg+a->array_size))
385 return a;
386 }
387 return NULL;
388 }
389
390 void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count,
391 unsigned comp_mask) {
392 unsigned chan = 0;
393 while (comp_mask) {
394 if (comp_mask & 1) {
395 gpr_array *a = new gpr_array(
396 sel_chan(gpr_start, chan), gpr_count);
397
398 SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr
399 << " [" << a->array_size << "]\n";
400 );
401
402 gpr_arrays.push_back(a);
403 }
404 comp_mask >>= 1;
405 ++chan;
406 }
407 }
408
409 value* shader::get_pred_sel(int sel) {
410 assert(sel == 0 || sel == 1);
411 if (!pred_sels[sel])
412 pred_sels[sel] = get_const_value(sel);
413
414 return pred_sels[sel];
415 }
416
417 cf_node* shader::create_cf(unsigned op) {
418 cf_node *c = create_cf();
419 c->bc.set_op(op);
420 c->bc.barrier = 1;
421 return c;
422 }
423
424 std::string shader::get_full_target_name() {
425 std::string s = get_shader_target_name();
426 s += "/";
427 s += ctx.get_hw_chip_name();
428 s += "/";
429 s += ctx.get_hw_class_name();
430 return s;
431 }
432
433 const char* shader::get_shader_target_name() {
434 switch (target) {
435 case TARGET_VS: return "VS";
436 case TARGET_ES: return "ES";
437 case TARGET_PS: return "PS";
438 case TARGET_GS: return "GS";
439 case TARGET_COMPUTE: return "COMPUTE";
440 case TARGET_FETCH: return "FETCH";
441 default:
442 return "INVALID_TARGET";
443 }
444 }
445
446 void shader::simplify_dep_rep(node* dr) {
447 container_node *p = dr->parent;
448 if (p->is_repeat()) {
449 repeat_node *r = static_cast<repeat_node*>(p);
450 r->target->expand_repeat(r);
451 } else if (p->is_depart()) {
452 depart_node *d = static_cast<depart_node*>(p);
453 d->target->expand_depart(d);
454 }
455 if (dr->next)
456 dr->parent->cut(dr->next, NULL);
457 }
458
459
460 // FIXME this is used in some places as the max non-temp gpr,
461 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
462 unsigned shader::first_temp_gpr() {
463 return MAX_GPR - ctx.alu_temp_gprs;
464 }
465
466 unsigned shader::num_nontemp_gpr() {
467 return MAX_GPR - 2 * ctx.alu_temp_gprs;
468 }
469
470 void shader::set_uses_kill() {
471 if (root->src.empty())
472 root->src.resize(1);
473
474 if (!root->src[0])
475 root->src[0] = get_special_value(SV_VALID_MASK);
476 }
477
478 alu_node* shader::clone(alu_node* n) {
479 alu_node *c = create_alu();
480
481 // FIXME: this may be wrong with indirect operands
482 c->src = n->src;
483 c->dst = n->dst;
484
485 c->bc = n->bc;
486 c->pred = n->pred;
487
488 return c;
489 }
490
491 void shader::collect_stats(bool opt) {
492 if (!sb_context::dump_stat)
493 return;
494
495 shader_stats &s = opt ? opt_stats : src_stats;
496
497 s.shaders = 1;
498 s.ngpr = ngpr;
499 s.nstack = nstack;
500 s.collect(root);
501
502 if (opt)
503 ctx.opt_stats.accumulate(s);
504 else
505 ctx.src_stats.accumulate(s);
506 }
507
508 value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) {
509 value_map::iterator I = vm.find(key);
510 if (I != vm.end())
511 return I->second;
512 value *v = create_value(vk, key, 0);
513 v->flags = VLF_READONLY;
514 vm.insert(std::make_pair(key, v));
515 return v;
516 }
517
518 void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
519
520 bool inside_bb = false;
521 bool last_inside_bb = true;
522 node_iterator bb_start(n->begin()), I(bb_start), E(n->end());
523
524 for (; I != E; ++I) {
525 node *k = *I;
526 inside_bb = k->type == NT_OP;
527
528 if (inside_bb && !last_inside_bb)
529 bb_start = I;
530 else if (!inside_bb) {
531 if (last_inside_bb
532 && I->type != NT_REPEAT
533 && I->type != NT_DEPART
534 && I->type != NT_IF) {
535 bb_node *bb = create_bb(bbs.size(), loop_level);
536 bbs.push_back(bb);
537 n->insert_node_before(*bb_start, bb);
538 if (bb_start != I)
539 bb->move(bb_start, I);
540 }
541
542 if (k->is_container()) {
543
544 bool loop = false;
545 if (k->type == NT_REGION) {
546 loop = static_cast<region_node*>(k)->is_loop();
547 }
548
549 create_bbs(static_cast<container_node*>(k), bbs,
550 loop_level + loop);
551 }
552 }
553
554 if (k->type == NT_DEPART)
555 return;
556
557 last_inside_bb = inside_bb;
558 }
559
560 if (last_inside_bb) {
561 bb_node *bb = create_bb(bbs.size(), loop_level);
562 bbs.push_back(bb);
563 if (n->empty())
564 n->push_back(bb);
565 else {
566 n->insert_node_before(*bb_start, bb);
567 if (bb_start != n->end())
568 bb->move(bb_start, n->end());
569 }
570 } else {
571 if (n->last && n->last->type == NT_IF) {
572 bb_node *bb = create_bb(bbs.size(), loop_level);
573 bbs.push_back(bb);
574 n->push_back(bb);
575 }
576 }
577 }
578
579 void shader::expand_bbs(bbs_vec &bbs) {
580
581 for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) {
582 bb_node *b = *I;
583 b->expand();
584 }
585 }
586
587 sched_queue_id shader::get_queue_id(node* n) {
588 switch (n->subtype) {
589 case NST_ALU_INST:
590 case NST_ALU_PACKED_INST:
591 case NST_COPY:
592 case NST_PSI:
593 return SQ_ALU;
594 case NST_FETCH_INST: {
595 fetch_node *f = static_cast<fetch_node*>(n);
596 if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
597 return SQ_VTX;
598 return SQ_TEX;
599 }
600 case NST_CF_INST:
601 return SQ_CF;
602 default:
603 assert(0);
604 return SQ_NUM;
605 }
606 }
607
608 void shader_stats::collect(node *n) {
609 if (n->is_alu_inst())
610 ++alu;
611 else if (n->is_fetch_inst())
612 ++fetch;
613 else if (n->is_container()) {
614 container_node *c = static_cast<container_node*>(n);
615
616 if (n->is_alu_group())
617 ++alu_groups;
618 else if (n->is_alu_clause())
619 ++alu_clauses;
620 else if (n->is_fetch_clause())
621 ++fetch_clauses;
622 else if (n->is_cf_inst())
623 ++cf;
624
625 if (!c->empty()) {
626 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
627 collect(*I);
628 }
629 }
630 }
631 }
632
633 void shader_stats::accumulate(shader_stats& s) {
634 ++shaders;
635 ndw += s.ndw;
636 ngpr += s.ngpr;
637 nstack += s.nstack;
638
639 alu += s.alu;
640 alu_groups += s.alu_groups;
641 alu_clauses += s.alu_clauses;
642 fetch += s.fetch;
643 fetch_clauses += s.fetch_clauses;
644 cf += s.cf;
645 }
646
647 void shader_stats::dump() {
648 sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
649 << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
650 << ", alu:" << alu << ", fetch:" << fetch
651 << ", fetch clauses:" << fetch_clauses
652 << ", cf:" << cf;
653
654 if (shaders > 1)
655 sblog << ", shaders:" << shaders;
656
657 sblog << "\n";
658 }
659
660 static void print_diff(unsigned d1, unsigned d2) {
661 if (d1)
662 sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
663 else if (d2)
664 sblog << "N/A";
665 else
666 sblog << "0%";
667 }
668
669 void shader_stats::dump_diff(shader_stats& s) {
670 sblog << "dw:"; print_diff(ndw, s.ndw);
671 sblog << ", gpr:" ; print_diff(ngpr, s.ngpr);
672 sblog << ", stk:" ; print_diff(nstack, s.nstack);
673 sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups);
674 sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses);
675 sblog << ", alu:" ; print_diff(alu, s.alu);
676 sblog << ", fetch:" ; print_diff(fetch, s.fetch);
677 sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses);
678 sblog << ", cf:" ; print_diff(cf, s.cf);
679 sblog << "\n";
680 }
681
682 } // namespace r600_sb