r600/sfn: Add support for shared atomics
[mesa.git] / src / gallium / drivers / r600 / sb / sb_shader.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include "sb_bc.h"
28 #include "sb_shader.h"
29 #include "sb_pass.h"
30
31 namespace r600_sb {
32
33 shader::shader(sb_context &sctx, shader_target t, unsigned id)
34 : ctx(sctx), next_temp_value_index(temp_regid_offset),
35 prep_regs_count(), pred_sels(),
36 regions(), inputs(), undef(), val_pool(sizeof(value)),
37 pool(), all_nodes(), src_stats(), opt_stats(), errors(),
38 optimized(), id(id),
39 coal(*this), bbs(),
40 target(t), vt(ex), ex(*this), root(),
41 compute_interferences(),
42 has_alu_predication(),
43 uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {}
44
45 bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
46
47 unsigned slot_flags = ctx.alu_slots(n->bc.op);
48 unsigned slot = n->bc.dst_chan;
49
50 if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) &&
51 (slot_flags & AF_S))
52 slot = SLOT_TRANS;
53
54 if (slots[slot])
55 return false;
56
57 n->bc.slot = slot;
58 slots[slot] = n;
59 return true;
60 }
61
62 void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
63 bool src) {
64 unsigned chan = 0;
65 while (comp_mask) {
66 if (comp_mask & 1) {
67 value *v = get_gpr_value(src, gpr, chan, false);
68 v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN);
69 if (!v->is_rel()) {
70 v->gpr = v->pin_gpr = v->select;
71 v->fix();
72 }
73 if (v->array && !v->array->gpr) {
74 // if pinned value can be accessed with indirect addressing
75 // pin the entire array to its original location
76 v->array->gpr = v->array->base_gpr;
77 }
78 vec.push_back(v);
79 }
80 comp_mask >>= 1;
81 ++chan;
82 }
83 }
84
85 cf_node* shader::create_clause(node_subtype nst) {
86 cf_node *n = create_cf();
87
88 n->subtype = nst;
89
90 switch (nst) {
91 case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
92 case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
93 case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
94 case NST_GDS_CLAUSE: n->bc.set_op(CF_OP_GDS); break;
95 default: assert(!"invalid clause type"); break;
96 }
97
98 n->bc.barrier = 1;
99 return n;
100 }
101
102 void shader::create_bbs() {
103 create_bbs(root, bbs);
104 }
105
106 void shader::expand_bbs() {
107 expand_bbs(bbs);
108 }
109
110 alu_node* shader::create_mov(value* dst, value* src) {
111 alu_node *n = create_alu();
112 n->bc.set_op(ALU_OP1_MOV);
113 n->dst.push_back(dst);
114 n->src.push_back(src);
115 dst->def = n;
116
117 return n;
118 }
119
120 alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) {
121 alu_node *n = create_mov(dst, src);
122
123 dst->assign_source(src);
124 n->flags |= NF_COPY_MOV | NF_DONT_HOIST;
125
126 if (affcost && dst->is_sgpr() && src->is_sgpr())
127 coal.add_edge(src, dst, affcost);
128
129 return n;
130 }
131
132 value* shader::get_value(value_kind kind, sel_chan id,
133 unsigned version) {
134 if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count)
135 return val_pool[id - 1];
136
137
138 unsigned key = (kind << 28) | (version << 16) | id;
139 value_map::iterator i = reg_values.find(key);
140 if (i != reg_values.end()) {
141 return i->second;
142 }
143 value *v = create_value(kind, id, version);
144 reg_values.insert(std::make_pair(key, v));
145 return v;
146 }
147
148 value* shader::get_special_value(unsigned sv_id, unsigned version) {
149 sel_chan id(sv_id, 0);
150 return get_value(VLK_SPECIAL_REG, id, version);
151 }
152
153 void shader::fill_array_values(gpr_array *a, vvec &vv) {
154 unsigned sz = a->array_size;
155 vv.resize(sz);
156 for (unsigned i = 0; i < a->array_size; ++i) {
157 vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(),
158 false);
159 }
160 }
161
162 value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
163 unsigned version) {
164 sel_chan id(reg, chan);
165 value *v;
166 gpr_array *a = get_gpr_array(reg, chan);
167 if (rel) {
168 assert(a);
169 v = create_value(VLK_REL_REG, id, 0);
170 v->rel = get_special_value(SV_AR_INDEX);
171 fill_array_values(a, v->muse);
172 if (!src)
173 fill_array_values(a, v->mdef);
174 } else {
175 if (version == 0 && reg < prep_regs_count)
176 return (val_pool[id - 1]);
177
178 v = get_value(VLK_REG, id, version);
179 }
180
181 v->array = a;
182 v->pin_gpr = v->select;
183
184 return v;
185 }
186
187 value* shader::create_temp_value() {
188 sel_chan id(++next_temp_value_index, 0);
189 return get_value(VLK_TEMP, id, 0);
190 }
191
192 value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode) {
193 return get_ro_value(kcache_values, VLK_KCACHE,
194 sel_chan(bank, index, chan, index_mode));
195 }
196
197 void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
198 if (inputs.size() <= gpr)
199 inputs.resize(gpr+1);
200
201 shader_input &i = inputs[gpr];
202 i.preloaded = preloaded;
203 i.comp_mask = comp_mask;
204
205 if (preloaded) {
206 add_pinned_gpr_values(root->dst, gpr, comp_mask, true);
207 }
208
209 }
210
211 void shader::init() {
212 assert(!root);
213 root = create_container();
214 }
215
216 void shader::init_call_fs(cf_node* cf) {
217 unsigned gpr = 0;
218
219 assert(target == TARGET_LS || target == TARGET_VS || target == TARGET_ES);
220
221 for(inputs_vec::const_iterator I = inputs.begin(),
222 E = inputs.end(); I != E; ++I, ++gpr) {
223 if (!I->preloaded)
224 add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false);
225 else
226 add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true);
227 }
228 }
229
230 void shader::set_undef(val_set& s) {
231 value *undefined = get_undef_value();
232 if (!undefined->gvn_source)
233 vt.add_value(undefined);
234
235 val_set &vs = s;
236
237 for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) {
238 value *v = *I;
239
240 assert(!v->is_readonly() && !v->is_rel());
241
242 v->gvn_source = undefined->gvn_source;
243 }
244 }
245
246 value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) {
247 value *v = val_pool.create(k, regid, ver);
248 return v;
249 }
250
251 value* shader::get_undef_value() {
252 if (!undef)
253 undef = create_value(VLK_UNDEF, 0, 0);
254 return undef;
255 }
256
257 node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) {
258 node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags);
259 all_nodes.push_back(n);
260 return n;
261 }
262
263 alu_node* shader::create_alu() {
264 alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node();
265 all_nodes.push_back(n);
266 return n;
267 }
268
269 alu_group_node* shader::create_alu_group() {
270 alu_group_node* n =
271 new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
272 all_nodes.push_back(n);
273 return n;
274 }
275
276 alu_packed_node* shader::create_alu_packed() {
277 alu_packed_node* n =
278 new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
279 all_nodes.push_back(n);
280 return n;
281 }
282
283 cf_node* shader::create_cf() {
284 cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node();
285 n->bc.barrier = 1;
286 all_nodes.push_back(n);
287 return n;
288 }
289
290 fetch_node* shader::create_fetch() {
291 fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node();
292 all_nodes.push_back(n);
293 return n;
294 }
295
296 region_node* shader::create_region() {
297 region_node *n = new (pool.allocate(sizeof(region_node)))
298 region_node(regions.size());
299 regions.push_back(n);
300 all_nodes.push_back(n);
301 return n;
302 }
303
304 depart_node* shader::create_depart(region_node* target) {
305 depart_node* n = new (pool.allocate(sizeof(depart_node)))
306 depart_node(target, target->departs.size());
307 target->departs.push_back(n);
308 all_nodes.push_back(n);
309 return n;
310 }
311
312 repeat_node* shader::create_repeat(region_node* target) {
313 repeat_node* n = new (pool.allocate(sizeof(repeat_node)))
314 repeat_node(target, target->repeats.size() + 1);
315 target->repeats.push_back(n);
316 all_nodes.push_back(n);
317 return n;
318 }
319
320 container_node* shader::create_container(node_type nt, node_subtype nst,
321 node_flags flags) {
322 container_node *n = new (pool.allocate(sizeof(container_node)))
323 container_node(nt, nst, flags);
324 all_nodes.push_back(n);
325 return n;
326 }
327
328 if_node* shader::create_if() {
329 if_node* n = new (pool.allocate(sizeof(if_node))) if_node();
330 all_nodes.push_back(n);
331 return n;
332 }
333
334 bb_node* shader::create_bb(unsigned id, unsigned loop_level) {
335 bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level);
336 all_nodes.push_back(n);
337 return n;
338 }
339
340 value* shader::get_special_ro_value(unsigned sel) {
341 return get_ro_value(special_ro_values, VLK_PARAM, sel);
342 }
343
344 value* shader::get_const_value(const literal &v) {
345 value *val = get_ro_value(const_values, VLK_CONST, v);
346 val->literal_value = v;
347 return val;
348 }
349
350 shader::~shader() {
351 for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end();
352 I != E; ++I)
353 (*I)->~node();
354
355 for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end();
356 I != E; ++I) {
357 delete *I;
358 }
359 }
360
361 void shader::dump_ir() {
362 if (ctx.dump_pass)
363 dump(*this).run();
364 }
365
366 value* shader::get_value_version(value* v, unsigned ver) {
367 assert(!v->is_readonly() && !v->is_rel());
368 value *vv = get_value(v->kind, v->select, ver);
369 assert(vv);
370
371 if (v->array) {
372 vv->array = v->array;
373 }
374
375 return vv;
376 }
377
378 gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) {
379
380 for (regarray_vec::iterator I = gpr_arrays.begin(),
381 E = gpr_arrays.end(); I != E; ++I) {
382 gpr_array* a = *I;
383 unsigned achan = a->base_gpr.chan();
384 unsigned areg = a->base_gpr.sel();
385 if (achan == chan && (reg >= areg && reg < areg+a->array_size))
386 return a;
387 }
388 return NULL;
389 }
390
391 void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count,
392 unsigned comp_mask) {
393 unsigned chan = 0;
394 while (comp_mask) {
395 if (comp_mask & 1) {
396 gpr_array *a = new gpr_array(
397 sel_chan(gpr_start, chan), gpr_count);
398
399 SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr
400 << " [" << a->array_size << "]\n";
401 );
402
403 gpr_arrays.push_back(a);
404 }
405 comp_mask >>= 1;
406 ++chan;
407 }
408 }
409
410 value* shader::get_pred_sel(int sel) {
411 assert(sel == 0 || sel == 1);
412 if (!pred_sels[sel])
413 pred_sels[sel] = get_const_value(sel);
414
415 return pred_sels[sel];
416 }
417
418 cf_node* shader::create_cf(unsigned op) {
419 cf_node *c = create_cf();
420 c->bc.set_op(op);
421 c->bc.barrier = 1;
422 return c;
423 }
424
425 std::string shader::get_full_target_name() {
426 std::string s = get_shader_target_name();
427 s += "/";
428 s += ctx.get_hw_chip_name();
429 s += "/";
430 s += ctx.get_hw_class_name();
431 return s;
432 }
433
434 const char* shader::get_shader_target_name() {
435 switch (target) {
436 case TARGET_VS: return "VS";
437 case TARGET_ES: return "ES";
438 case TARGET_PS: return "PS";
439 case TARGET_GS: return "GS";
440 case TARGET_HS: return "HS";
441 case TARGET_LS: return "LS";
442 case TARGET_COMPUTE: return "COMPUTE";
443 case TARGET_FETCH: return "FETCH";
444 default:
445 return "INVALID_TARGET";
446 }
447 }
448
449 void shader::simplify_dep_rep(node* dr) {
450 container_node *p = dr->parent;
451 if (p->is_repeat()) {
452 repeat_node *r = static_cast<repeat_node*>(p);
453 r->target->expand_repeat(r);
454 } else if (p->is_depart()) {
455 depart_node *d = static_cast<depart_node*>(p);
456 d->target->expand_depart(d);
457 }
458 if (dr->next)
459 dr->parent->cut(dr->next, NULL);
460 }
461
462
463 // FIXME this is used in some places as the max non-temp gpr,
464 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
465 unsigned shader::first_temp_gpr() {
466 return MAX_GPR - ctx.alu_temp_gprs;
467 }
468
469 unsigned shader::num_nontemp_gpr() {
470 return MAX_GPR - 2 * ctx.alu_temp_gprs;
471 }
472
473 void shader::set_uses_kill() {
474 if (root->src.empty())
475 root->src.resize(1);
476
477 if (!root->src[0])
478 root->src[0] = get_special_value(SV_VALID_MASK);
479 }
480
481 alu_node* shader::clone(alu_node* n) {
482 alu_node *c = create_alu();
483
484 // FIXME: this may be wrong with indirect operands
485 c->src = n->src;
486 c->dst = n->dst;
487
488 c->bc = n->bc;
489 c->pred = n->pred;
490
491 return c;
492 }
493
494 void shader::collect_stats(bool opt) {
495 if (!sb_context::dump_stat)
496 return;
497
498 shader_stats &s = opt ? opt_stats : src_stats;
499
500 s.shaders = 1;
501 s.ngpr = ngpr;
502 s.nstack = nstack;
503 s.collect(root);
504
505 if (opt)
506 ctx.opt_stats.accumulate(s);
507 else
508 ctx.src_stats.accumulate(s);
509 }
510
511 value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) {
512 value_map::iterator I = vm.find(key);
513 if (I != vm.end())
514 return I->second;
515 value *v = create_value(vk, key, 0);
516 v->flags = VLF_READONLY;
517 vm.insert(std::make_pair(key, v));
518 return v;
519 }
520
521 void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
522
523 bool inside_bb = false;
524 bool last_inside_bb = true;
525 node_iterator bb_start(n->begin()), I(bb_start), E(n->end());
526
527 for (; I != E; ++I) {
528 node *k = *I;
529 inside_bb = k->type == NT_OP;
530
531 if (inside_bb && !last_inside_bb)
532 bb_start = I;
533 else if (!inside_bb) {
534 if (last_inside_bb
535 && I->type != NT_REPEAT
536 && I->type != NT_DEPART
537 && I->type != NT_IF) {
538 bb_node *bb = create_bb(bbs.size(), loop_level);
539 bbs.push_back(bb);
540 n->insert_node_before(*bb_start, bb);
541 if (bb_start != I)
542 bb->move(bb_start, I);
543 }
544
545 if (k->is_container()) {
546
547 bool loop = false;
548 if (k->type == NT_REGION) {
549 loop = static_cast<region_node*>(k)->is_loop();
550 }
551
552 create_bbs(static_cast<container_node*>(k), bbs,
553 loop_level + loop);
554 }
555 }
556
557 if (k->type == NT_DEPART)
558 return;
559
560 last_inside_bb = inside_bb;
561 }
562
563 if (last_inside_bb) {
564 bb_node *bb = create_bb(bbs.size(), loop_level);
565 bbs.push_back(bb);
566 if (n->empty())
567 n->push_back(bb);
568 else {
569 n->insert_node_before(*bb_start, bb);
570 if (bb_start != n->end())
571 bb->move(bb_start, n->end());
572 }
573 } else {
574 if (n->last && n->last->type == NT_IF) {
575 bb_node *bb = create_bb(bbs.size(), loop_level);
576 bbs.push_back(bb);
577 n->push_back(bb);
578 }
579 }
580 }
581
582 void shader::expand_bbs(bbs_vec &bbs) {
583
584 for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) {
585 bb_node *b = *I;
586 b->expand();
587 }
588 }
589
590 sched_queue_id shader::get_queue_id(node* n) {
591 switch (n->subtype) {
592 case NST_ALU_INST:
593 case NST_ALU_PACKED_INST:
594 case NST_COPY:
595 case NST_PSI:
596 return SQ_ALU;
597 case NST_FETCH_INST: {
598 fetch_node *f = static_cast<fetch_node*>(n);
599 if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
600 return SQ_VTX;
601 if (f->bc.op_ptr->flags & FF_GDS)
602 return SQ_GDS;
603 return SQ_TEX;
604 }
605 case NST_CF_INST:
606 return SQ_CF;
607 default:
608 assert(0);
609 return SQ_NUM;
610 }
611 }
612
613 void shader_stats::collect(node *n) {
614 if (n->is_alu_inst())
615 ++alu;
616 else if (n->is_fetch_inst())
617 ++fetch;
618 else if (n->is_container()) {
619 container_node *c = static_cast<container_node*>(n);
620
621 if (n->is_alu_group())
622 ++alu_groups;
623 else if (n->is_alu_clause())
624 ++alu_clauses;
625 else if (n->is_fetch_clause())
626 ++fetch_clauses;
627 else if (n->is_cf_inst())
628 ++cf;
629
630 if (!c->empty()) {
631 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
632 collect(*I);
633 }
634 }
635 }
636 }
637
638 void shader_stats::accumulate(shader_stats& s) {
639 ++shaders;
640 ndw += s.ndw;
641 ngpr += s.ngpr;
642 nstack += s.nstack;
643
644 alu += s.alu;
645 alu_groups += s.alu_groups;
646 alu_clauses += s.alu_clauses;
647 fetch += s.fetch;
648 fetch_clauses += s.fetch_clauses;
649 cf += s.cf;
650 }
651
652 void shader_stats::dump() {
653 sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
654 << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
655 << ", alu:" << alu << ", fetch:" << fetch
656 << ", fetch clauses:" << fetch_clauses
657 << ", cf:" << cf;
658
659 if (shaders > 1)
660 sblog << ", shaders:" << shaders;
661
662 sblog << "\n";
663 }
664
665 static void print_diff(unsigned d1, unsigned d2) {
666 if (d1)
667 sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
668 else if (d2)
669 sblog << "N/A";
670 else
671 sblog << "0%";
672 }
673
674 void shader_stats::dump_diff(shader_stats& s) {
675 sblog << "dw:"; print_diff(ndw, s.ndw);
676 sblog << ", gpr:" ; print_diff(ngpr, s.ngpr);
677 sblog << ", stk:" ; print_diff(nstack, s.nstack);
678 sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups);
679 sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses);
680 sblog << ", alu:" ; print_diff(alu, s.alu);
681 sblog << ", fetch:" ; print_diff(fetch, s.fetch);
682 sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses);
683 sblog << ", cf:" ; print_diff(cf, s.cf);
684 sblog << "\n";
685 }
686
687 } // namespace r600_sb