r600g/sb: dump sampler/resource index modes for textures.
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_parser.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define BCP_DEBUG 0
28
29 #if BCP_DEBUG
30 #define BCP_DUMP(q) do { q } while (0)
31 #else
32 #define BCP_DUMP(q)
33 #endif
34
35 #include "r600_pipe.h"
36 #include "r600_shader.h"
37
38 #include <stack>
39
40 #include "sb_bc.h"
41 #include "sb_shader.h"
42 #include "sb_pass.h"
43
44 namespace r600_sb {
45
46 int bc_parser::decode() {
47
48 dw = bc->bytecode;
49 bc_ndw = bc->ndw;
50 max_cf = 0;
51
52 dec = new bc_decoder(ctx, dw, bc_ndw);
53
54 shader_target t = TARGET_UNKNOWN;
55
56 if (pshader) {
57 switch (bc->type) {
58 case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break;
59 case TGSI_PROCESSOR_VERTEX:
60 t = pshader->vs_as_es ? TARGET_ES : TARGET_VS;
61 break;
62 case TGSI_PROCESSOR_GEOMETRY: t = TARGET_GS; break;
63 case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break;
64 default: assert(!"unknown shader target"); return -1; break;
65 }
66 } else {
67 if (bc->type == TGSI_PROCESSOR_COMPUTE)
68 t = TARGET_COMPUTE;
69 else
70 t = TARGET_FETCH;
71 }
72
73 sh = new shader(ctx, t, bc->debug_id);
74 sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE);
75
76 int r = decode_shader();
77
78 delete dec;
79
80 sh->ngpr = bc->ngpr;
81 sh->nstack = bc->nstack;
82
83 return r;
84 }
85
86 int bc_parser::decode_shader() {
87 int r = 0;
88 unsigned i = 0;
89 bool eop = false;
90
91 sh->init();
92
93 do {
94 eop = false;
95 if ((r = decode_cf(i, eop)))
96 return r;
97
98 } while (!eop || (i >> 1) < max_cf);
99
100 return 0;
101 }
102
103 int bc_parser::prepare() {
104 int r = 0;
105 if ((r = parse_decls()))
106 return r;
107 if ((r = prepare_ir()))
108 return r;
109 return 0;
110 }
111
112 int bc_parser::parse_decls() {
113
114 if (!pshader) {
115 if (gpr_reladdr)
116 sh->add_gpr_array(0, bc->ngpr, 0x0F);
117
118 // compute shaders have some values preloaded in R0, R1
119 sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
120 sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
121 return 0;
122 }
123
124 if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
125
126 assert(pshader->num_arrays);
127
128 if (pshader->num_arrays) {
129 for (unsigned i = 0; i < pshader->num_arrays; ++i) {
130 r600_shader_array &a = pshader->arrays[i];
131 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
132 }
133 } else {
134 sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
135 }
136 }
137
138 // GS inputs can add indirect addressing
139 if (sh->target == TARGET_GS) {
140 if (pshader->num_arrays) {
141 for (unsigned i = 0; i < pshader->num_arrays; ++i) {
142 r600_shader_array &a = pshader->arrays[i];
143 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
144 }
145 }
146 }
147
148 if (sh->target == TARGET_VS || sh->target == TARGET_ES)
149 sh->add_input(0, 1, 0x0F);
150 else if (sh->target == TARGET_GS) {
151 sh->add_input(0, 1, 0x0F);
152 sh->add_input(1, 1, 0x0F);
153 }
154
155 bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
156 && sh->target == TARGET_PS;
157
158 bool ij_interpolators[6];
159 memset(ij_interpolators, 0, sizeof(ij_interpolators));
160
161 for (unsigned i = 0; i < pshader->ninput; ++i) {
162 r600_shader_io & in = pshader->input[i];
163 bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
164 sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
165 if (ps_interp && in.spi_sid) {
166 int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location);
167 if (k >= 0)
168 ij_interpolators[k] |= true;
169 }
170 }
171
172 if (ps_interp) {
173 /* add the egcm ij interpolators to live inputs */
174 unsigned num_ij = 0;
175 for (unsigned i = 0; i < Elements(ij_interpolators); i++) {
176 num_ij += ij_interpolators[i];
177 }
178
179 unsigned mask = (1 << (2 * num_ij)) - 1;
180 unsigned gpr = 0;
181
182 while (mask) {
183 sh->add_input(gpr, true, mask & 0x0F);
184 ++gpr;
185 mask >>= 4;
186 }
187 }
188
189 return 0;
190 }
191
192 int bc_parser::decode_cf(unsigned &i, bool &eop) {
193
194 int r;
195
196 cf_node *cf = sh->create_cf();
197 sh->root->push_back(cf);
198
199 unsigned id = i >> 1;
200
201 cf->bc.id = id;
202
203 if (cf_map.size() < id + 1)
204 cf_map.resize(id + 1);
205
206 cf_map[id] = cf;
207
208 if ((r = dec->decode_cf(i, cf->bc)))
209 return r;
210
211 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
212
213 if (flags & CF_ALU) {
214 if ((r = decode_alu_clause(cf)))
215 return r;
216 } else if (flags & CF_FETCH) {
217 if ((r = decode_fetch_clause(cf)))
218 return r;;
219 } else if (flags & CF_EXP) {
220 if (cf->bc.rw_rel)
221 gpr_reladdr = true;
222 assert(!cf->bc.rw_rel);
223 } else if (flags & CF_MEM) {
224 if (cf->bc.rw_rel)
225 gpr_reladdr = true;
226 assert(!cf->bc.rw_rel);
227 } else if (flags & CF_BRANCH) {
228 if (cf->bc.addr > max_cf)
229 max_cf = cf->bc.addr;
230 }
231
232 eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END ||
233 cf->bc.op == CF_OP_RET;
234 return 0;
235 }
236
237 int bc_parser::decode_alu_clause(cf_node* cf) {
238 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
239
240 cf->subtype = NST_ALU_CLAUSE;
241
242 cgroup = 0;
243 memset(slots[0], 0, 5*sizeof(slots[0][0]));
244
245 unsigned ng = 0;
246
247 do {
248 decode_alu_group(cf, i, gcnt);
249 assert(gcnt <= cnt);
250 cnt -= gcnt;
251 ng++;
252 } while (cnt);
253
254 return 0;
255 }
256
257 int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
258 int r;
259 alu_node *n;
260 alu_group_node *g = sh->create_alu_group();
261
262 cgroup = !cgroup;
263 memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
264 gcnt = 0;
265
266 unsigned literal_mask = 0;
267
268 do {
269 n = sh->create_alu();
270 g->push_back(n);
271
272 if ((r = dec->decode_alu(i, n->bc)))
273 return r;
274
275 if (!sh->assign_slot(n, slots[cgroup])) {
276 assert(!"alu slot assignment failed");
277 return -1;
278 }
279
280 gcnt++;
281
282 } while (gcnt <= 5 && !n->bc.last);
283
284 assert(n->bc.last);
285
286 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
287 n = static_cast<alu_node*>(*I);
288
289 if (n->bc.dst_rel)
290 gpr_reladdr = true;
291
292 for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
293 bc_alu_src &src = n->bc.src[k];
294 if (src.rel)
295 gpr_reladdr = true;
296 if (src.sel == ALU_SRC_LITERAL) {
297 literal_mask |= (1 << src.chan);
298 src.value.u = dw[i + src.chan];
299 }
300 }
301 }
302
303 unsigned literal_ndw = 0;
304 while (literal_mask) {
305 g->literals.push_back(dw[i + literal_ndw]);
306 literal_ndw += 1;
307 literal_mask >>= 1;
308 }
309
310 literal_ndw = (literal_ndw + 1) & ~1u;
311
312 i += literal_ndw;
313 gcnt += literal_ndw >> 1;
314
315 cf->push_back(g);
316 return 0;
317 }
318
319 int bc_parser::prepare_alu_clause(cf_node* cf) {
320
321 // loop over alu groups
322 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
323 assert(I->subtype == NST_ALU_GROUP);
324 alu_group_node *g = static_cast<alu_group_node*>(*I);
325 prepare_alu_group(cf, g);
326 }
327
328 return 0;
329 }
330
331 int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
332
333 alu_node *n;
334
335 cgroup = !cgroup;
336 memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
337
338 for (node_iterator I = g->begin(), E = g->end();
339 I != E; ++I) {
340 n = static_cast<alu_node*>(*I);
341
342 if (!sh->assign_slot(n, slots[cgroup])) {
343 assert(!"alu slot assignment failed");
344 return -1;
345 }
346
347 unsigned src_count = n->bc.op_ptr->src_count;
348
349 if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
350 n->flags |= NF_ALU_4SLOT;
351
352 n->src.resize(src_count);
353
354 unsigned flags = n->bc.op_ptr->flags;
355
356 if (flags & AF_PRED) {
357 n->dst.resize(3);
358 if (n->bc.update_pred)
359 n->dst[1] = sh->get_special_value(SV_ALU_PRED);
360 if (n->bc.update_exec_mask)
361 n->dst[2] = sh->get_special_value(SV_EXEC_MASK);
362
363 n->flags |= NF_DONT_HOIST;
364
365 } else if (flags & AF_KILL) {
366
367 n->dst.resize(2);
368 n->dst[1] = sh->get_special_value(SV_VALID_MASK);
369 sh->set_uses_kill();
370
371 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE |
372 NF_DONT_KILL | NF_SCHEDULE_EARLY;
373
374 } else {
375 n->dst.resize(1);
376 }
377
378 if (flags & AF_MOVA) {
379
380 n->dst[0] = sh->get_special_value(SV_AR_INDEX);
381
382 n->flags |= NF_DONT_HOIST;
383
384 } else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) {
385 assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X);
386
387 value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan,
388 n->bc.dst_rel);
389
390 n->dst[0] = v;
391 }
392
393 if (n->bc.pred_sel) {
394 sh->has_alu_predication = true;
395 n->pred = sh->get_special_value(SV_ALU_PRED);
396 }
397
398 for (unsigned s = 0; s < src_count; ++s) {
399 bc_alu_src &src = n->bc.src[s];
400
401 if (src.sel == ALU_SRC_LITERAL) {
402 n->src[s] = sh->get_const_value(src.value);
403 } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
404 unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
405 SLOT_TRANS : src.chan;
406
407 // XXX shouldn't happen but llvm backend uses PS on cayman
408 if (prev_slot == SLOT_TRANS && ctx.is_cayman())
409 prev_slot = SLOT_X;
410
411 alu_node *prev_alu = slots[pgroup][prev_slot];
412
413 assert(prev_alu);
414
415 if (!prev_alu->dst[0]) {
416 value * t = sh->create_temp_value();
417 prev_alu->dst[0] = t;
418 }
419
420 value *d = prev_alu->dst[0];
421
422 if (d->is_rel()) {
423 d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr,
424 prev_alu->bc.dst_chan,
425 prev_alu->bc.dst_rel);
426 }
427
428 n->src[s] = d;
429 } else if (ctx.is_kcache_sel(src.sel)) {
430 unsigned sel = src.sel, kc_addr;
431 unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1);
432
433 bc_kcache &kc = cf->bc.kc[kc_set];
434 kc_addr = (kc.addr << 4) + (sel & 0x1F);
435 n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan);
436 } else if (src.sel < MAX_GPR) {
437 value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel);
438
439 n->src[s] = v;
440
441 } else if (src.sel >= ALU_SRC_PARAM_OFFSET) {
442 // using slot for value channel because in fact the slot
443 // determines the channel that is loaded by INTERP_LOAD_P0
444 // (and maybe some others).
445 // otherwise GVN will consider INTERP_LOAD_P0s with the same
446 // param index as equal instructions and leave only one of them
447 n->src[s] = sh->get_special_ro_value(sel_chan(src.sel,
448 n->bc.slot));
449 } else {
450 switch (src.sel) {
451 case ALU_SRC_0:
452 n->src[s] = sh->get_const_value(0);
453 break;
454 case ALU_SRC_0_5:
455 n->src[s] = sh->get_const_value(0.5f);
456 break;
457 case ALU_SRC_1:
458 n->src[s] = sh->get_const_value(1.0f);
459 break;
460 case ALU_SRC_1_INT:
461 n->src[s] = sh->get_const_value(1);
462 break;
463 case ALU_SRC_M_1_INT:
464 n->src[s] = sh->get_const_value(-1);
465 break;
466 default:
467 n->src[s] = sh->get_special_ro_value(src.sel);
468 break;
469 }
470 }
471 }
472 }
473
474 // pack multislot instructions into alu_packed_node
475
476 alu_packed_node *p = NULL;
477 for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) {
478 N = I + 1;
479 alu_node *a = static_cast<alu_node*>(*I);
480 unsigned sflags = a->bc.slot_flags;
481
482 if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) {
483 if (!p)
484 p = sh->create_alu_packed();
485
486 a->remove();
487 p->push_back(a);
488 }
489 }
490
491 if (p) {
492 g->push_front(p);
493
494 if (p->count() == 3 && ctx.is_cayman()) {
495 // cayman's scalar instruction that can use 3 or 4 slots
496
497 // FIXME for simplicity we'll always add 4th slot,
498 // but probably we might want to always remove 4th slot and make
499 // sure that regalloc won't choose 'w' component for dst
500
501 alu_node *f = static_cast<alu_node*>(p->first);
502 alu_node *a = sh->create_alu();
503 a->src = f->src;
504 a->dst.resize(f->dst.size());
505 a->bc = f->bc;
506 a->bc.slot = SLOT_W;
507 p->push_back(a);
508 }
509 }
510
511 return 0;
512 }
513
514 int bc_parser::decode_fetch_clause(cf_node* cf) {
515 int r;
516 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
517
518 cf->subtype = NST_TEX_CLAUSE;
519
520 while (cnt--) {
521 fetch_node *n = sh->create_fetch();
522 cf->push_back(n);
523 if ((r = dec->decode_fetch(i, n->bc)))
524 return r;
525 if (n->bc.src_rel || n->bc.dst_rel)
526 gpr_reladdr = true;
527
528 }
529 return 0;
530 }
531
532 int bc_parser::prepare_fetch_clause(cf_node *cf) {
533
534 vvec grad_v, grad_h, texture_offsets;
535
536 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
537
538 fetch_node *n = static_cast<fetch_node*>(*I);
539 assert(n->is_valid());
540
541 unsigned flags = n->bc.op_ptr->flags;
542
543 unsigned vtx = flags & FF_VTX;
544 unsigned num_src = vtx ? ctx.vtx_src_num : 4;
545
546 n->dst.resize(4);
547
548 if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) {
549 sh->uses_gradients = true;
550 }
551
552 if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) {
553
554 vvec *grad = NULL;
555
556 switch (n->bc.op) {
557 case FETCH_OP_SET_GRADIENTS_V:
558 grad = &grad_v;
559 break;
560 case FETCH_OP_SET_GRADIENTS_H:
561 grad = &grad_h;
562 break;
563 case FETCH_OP_SET_TEXTURE_OFFSETS:
564 grad = &texture_offsets;
565 break;
566 default:
567 assert(!"unexpected SET_GRAD instruction");
568 return -1;
569 }
570
571 if (grad->empty())
572 grad->resize(4);
573
574 for(unsigned s = 0; s < 4; ++s) {
575 unsigned sw = n->bc.src_sel[s];
576 if (sw <= SEL_W)
577 (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr,
578 sw, false);
579 else if (sw == SEL_0)
580 (*grad)[s] = sh->get_const_value(0.0f);
581 else if (sw == SEL_1)
582 (*grad)[s] = sh->get_const_value(1.0f);
583 }
584 } else {
585 // Fold source values for instructions with hidden target values in to the instructions
586 // using them. The set instructions are later re-emitted by bc_finalizer
587 if (flags & FF_USEGRAD) {
588 n->src.resize(12);
589 std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4);
590 std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8);
591 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
592 n->src.resize(8);
593 std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4);
594 } else {
595 n->src.resize(4);
596 }
597
598 for(int s = 0; s < 4; ++s) {
599 if (n->bc.dst_sel[s] != SEL_MASK)
600 n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false);
601 // NOTE: it doesn't matter here which components of the result we
602 // are using, but original n->bc.dst_sel should be taken into
603 // account when building the bytecode
604 }
605 for(unsigned s = 0; s < num_src; ++s) {
606 if (n->bc.src_sel[s] <= SEL_W)
607 n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr,
608 n->bc.src_sel[s], false);
609 }
610
611 }
612 }
613
614 return 0;
615 }
616
617 int bc_parser::prepare_ir() {
618
619 for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) {
620 cf_node *c = *I;
621
622 if (!c)
623 continue;
624
625 unsigned flags = c->bc.op_ptr->flags;
626
627 if (flags & CF_ALU) {
628 prepare_alu_clause(c);
629 } else if (flags & CF_FETCH) {
630 prepare_fetch_clause(c);
631 } else if (c->bc.op == CF_OP_CALL_FS) {
632 sh->init_call_fs(c);
633 c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
634 } else if (flags & CF_LOOP_START) {
635 prepare_loop(c);
636 } else if (c->bc.op == CF_OP_JUMP) {
637 prepare_if(c);
638 } else if (c->bc.op == CF_OP_LOOP_END) {
639 loop_stack.pop();
640 } else if (c->bc.op == CF_OP_LOOP_CONTINUE) {
641 assert(!loop_stack.empty());
642 repeat_node *rep = sh->create_repeat(loop_stack.top());
643 if (c->parent->first != c)
644 rep->move(c->parent->first, c);
645 c->replace_with(rep);
646 sh->simplify_dep_rep(rep);
647 } else if (c->bc.op == CF_OP_LOOP_BREAK) {
648 assert(!loop_stack.empty());
649 depart_node *dep = sh->create_depart(loop_stack.top());
650 if (c->parent->first != c)
651 dep->move(c->parent->first, c);
652 c->replace_with(dep);
653 sh->simplify_dep_rep(dep);
654 } else if (flags & CF_EXP) {
655
656 // unroll burst exports
657
658 assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE);
659
660 c->bc.set_op(CF_OP_EXPORT);
661
662 unsigned burst_count = c->bc.burst_count;
663 unsigned eop = c->bc.end_of_program;
664
665 c->bc.end_of_program = 0;
666 c->bc.burst_count = 0;
667
668 do {
669 c->src.resize(4);
670
671 for(int s = 0; s < 4; ++s) {
672 switch (c->bc.sel[s]) {
673 case SEL_0:
674 c->src[s] = sh->get_const_value(0.0f);
675 break;
676 case SEL_1:
677 c->src[s] = sh->get_const_value(1.0f);
678 break;
679 case SEL_MASK:
680 break;
681 default:
682 if (c->bc.sel[s] <= SEL_W)
683 c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr,
684 c->bc.sel[s], false);
685 else
686 assert(!"invalid src_sel for export");
687 }
688 }
689
690 if (!burst_count--)
691 break;
692
693 cf_node *cf_next = sh->create_cf();
694 cf_next->bc = c->bc;
695 ++cf_next->bc.rw_gpr;
696 ++cf_next->bc.array_base;
697
698 c->insert_after(cf_next);
699 c = cf_next;
700
701 } while (1);
702
703 c->bc.end_of_program = eop;
704 } else if (flags & CF_MEM) {
705
706 unsigned burst_count = c->bc.burst_count;
707 unsigned eop = c->bc.end_of_program;
708
709 c->bc.end_of_program = 0;
710 c->bc.burst_count = 0;
711
712 do {
713
714 c->src.resize(4);
715
716 for(int s = 0; s < 4; ++s) {
717 if (c->bc.comp_mask & (1 << s))
718 c->src[s] =
719 sh->get_gpr_value(true, c->bc.rw_gpr, s, false);
720 }
721
722 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write
723 c->src.resize(8);
724 for(int s = 0; s < 3; ++s) {
725 c->src[4 + s] =
726 sh->get_gpr_value(true, c->bc.index_gpr, s, false);
727 }
728
729 // FIXME probably we can relax it a bit
730 c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
731 }
732
733 if (flags & CF_EMIT) {
734 // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
735 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
736 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
737 if (sh->target == TARGET_ES) {
738 // For ES shaders this is an export
739 c->flags |= NF_DONT_KILL;
740 }
741 }
742
743 if (!burst_count--)
744 break;
745
746 cf_node *cf_next = sh->create_cf();
747 cf_next->bc = c->bc;
748 ++cf_next->bc.rw_gpr;
749
750 // FIXME is it correct?
751 cf_next->bc.array_base += cf_next->bc.elem_size + 1;
752
753 c->insert_after(cf_next);
754 c = cf_next;
755 } while (1);
756
757 c->bc.end_of_program = eop;
758
759 } else if (flags & CF_EMIT) {
760 c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
761
762 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
763 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
764 }
765 }
766
767 assert(loop_stack.empty());
768 return 0;
769 }
770
771 int bc_parser::prepare_loop(cf_node* c) {
772 assert(c->bc.addr-1 < cf_map.size());
773
774 cf_node *end = cf_map[c->bc.addr - 1];
775 assert(end->bc.op == CF_OP_LOOP_END);
776 assert(c->parent == end->parent);
777
778 region_node *reg = sh->create_region();
779 repeat_node *rep = sh->create_repeat(reg);
780
781 reg->push_back(rep);
782 c->insert_before(reg);
783 rep->move(c, end->next);
784
785 reg->src_loop = true;
786
787 loop_stack.push(reg);
788 return 0;
789 }
790
791 int bc_parser::prepare_if(cf_node* c) {
792 assert(c->bc.addr-1 < cf_map.size());
793 cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
794
795 if (!end)
796 return 0; // not quite sure how this happens, malformed input?
797
798 BCP_DUMP(
799 sblog << "parsing JUMP @" << c->bc.id;
800 sblog << "\n";
801 );
802
803 if (end->bc.op == CF_OP_ELSE) {
804 BCP_DUMP(
805 sblog << " found ELSE : ";
806 dump::dump_op(end);
807 sblog << "\n";
808 );
809
810 c_else = end;
811 end = cf_map[c_else->bc.addr];
812 } else {
813 BCP_DUMP(
814 sblog << " no else\n";
815 );
816
817 c_else = end;
818 }
819
820 if (c_else->parent != c->parent)
821 c_else = NULL;
822
823 if (end && end->parent != c->parent)
824 end = NULL;
825
826 region_node *reg = sh->create_region();
827
828 depart_node *dep2 = sh->create_depart(reg);
829 depart_node *dep = sh->create_depart(reg);
830 if_node *n_if = sh->create_if();
831
832 c->insert_before(reg);
833
834 if (c_else != end)
835 dep->move(c_else, end);
836 dep2->move(c, end);
837
838 reg->push_back(dep);
839 dep->push_front(n_if);
840 n_if->push_back(dep2);
841
842 n_if->cond = sh->get_special_value(SV_EXEC_MASK);
843
844 return 0;
845 }
846
847
848 } // namespace r600_sb