r600g/sb: fix handling of preloaded inputs for compute shaders
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_parser.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define BCP_DEBUG 0
28
29 #if BCP_DEBUG
30 #define BCP_DUMP(q) do { q } while (0)
31 #else
32 #define BCP_DUMP(q)
33 #endif
34
35 extern "C" {
36 #include "r600_pipe.h"
37 #include "r600_shader.h"
38 }
39
40 #include <stack>
41
42 #include "sb_bc.h"
43 #include "sb_shader.h"
44 #include "sb_pass.h"
45
46 namespace r600_sb {
47
48 int bc_parser::decode() {
49
50 dw = bc->bytecode;
51 bc_ndw = bc->ndw;
52 max_cf = 0;
53
54 dec = new bc_decoder(ctx, dw, bc_ndw);
55
56 shader_target t = TARGET_UNKNOWN;
57
58 if (pshader) {
59 switch (bc->type) {
60 case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break;
61 case TGSI_PROCESSOR_VERTEX: t = TARGET_VS; break;
62 case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break;
63 default: assert(!"unknown shader target"); return -1; break;
64 }
65 } else {
66 if (bc->type == TGSI_PROCESSOR_COMPUTE)
67 t = TARGET_COMPUTE;
68 else
69 t = TARGET_FETCH;
70 }
71
72 sh = new shader(ctx, t, bc->debug_id);
73 int r = decode_shader();
74
75 delete dec;
76
77 sh->ngpr = bc->ngpr;
78 sh->nstack = bc->nstack;
79
80 return r;
81 }
82
83 int bc_parser::decode_shader() {
84 int r = 0;
85 unsigned i = 0;
86 bool eop = false;
87
88 sh->init();
89
90 do {
91 eop = false;
92 if ((r = decode_cf(i, eop)))
93 return r;
94
95 } while (!eop || (i >> 1) <= max_cf);
96
97 return 0;
98 }
99
100 int bc_parser::prepare() {
101 int r = 0;
102 if ((r = parse_decls()))
103 return r;
104 if ((r = prepare_ir()))
105 return r;
106 return 0;
107 }
108
109 int bc_parser::parse_decls() {
110
111 if (!pshader) {
112 if (gpr_reladdr)
113 sh->add_gpr_array(0, bc->ngpr, 0x0F);
114
115 // compute shaders have some values preloaded in R0, R1
116 sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
117 sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
118 return 0;
119 }
120
121 if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
122
123 assert(pshader->num_arrays);
124
125 if (pshader->num_arrays) {
126 for (unsigned i = 0; i < pshader->num_arrays; ++i) {
127 r600_shader_array &a = pshader->arrays[i];
128 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
129 }
130 } else {
131 sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
132 }
133 }
134
135 if (sh->target == TARGET_VS)
136 sh->add_input(0, 1, 0x0F);
137
138 bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
139 && sh->target == TARGET_PS;
140
141 unsigned linear = 0, persp = 0, centroid = 1;
142
143 for (unsigned i = 0; i < pshader->ninput; ++i) {
144 r600_shader_io & in = pshader->input[i];
145 bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
146 sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
147 if (ps_interp && in.spi_sid) {
148 if (in.interpolate == TGSI_INTERPOLATE_LINEAR ||
149 in.interpolate == TGSI_INTERPOLATE_COLOR)
150 linear = 1;
151 else if (in.interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
152 persp = 1;
153 if (in.centroid)
154 centroid = 2;
155 }
156 }
157
158 if (ps_interp) {
159 unsigned mask = (1 << (2 * (linear + persp) * centroid)) - 1;
160 unsigned gpr = 0;
161
162 while (mask) {
163 sh->add_input(gpr, true, mask & 0x0F);
164 ++gpr;
165 mask >>= 4;
166 }
167 }
168
169 return 0;
170 }
171
172 int bc_parser::decode_cf(unsigned &i, bool &eop) {
173
174 int r;
175
176 cf_node *cf = sh->create_cf();
177 sh->root->push_back(cf);
178
179 unsigned id = i >> 1;
180
181 cf->bc.id = id;
182
183 if (cf_map.size() < id + 1)
184 cf_map.resize(id + 1);
185
186 cf_map[id] = cf;
187
188 if ((r = dec->decode_cf(i, cf->bc)))
189 return r;
190
191 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
192
193 if (flags & CF_ALU) {
194 if ((r = decode_alu_clause(cf)))
195 return r;
196 } else if (flags & CF_FETCH) {
197 if ((r = decode_fetch_clause(cf)))
198 return r;;
199 } else if (flags & CF_EXP) {
200 if (cf->bc.rw_rel)
201 gpr_reladdr = true;
202 assert(!cf->bc.rw_rel);
203 } else if (flags & (CF_STRM | CF_RAT)) {
204 if (cf->bc.rw_rel)
205 gpr_reladdr = true;
206 assert(!cf->bc.rw_rel);
207 } else if (flags & CF_BRANCH) {
208 if (cf->bc.addr > max_cf)
209 max_cf = cf->bc.addr;
210 }
211
212 eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END ||
213 cf->bc.op == CF_OP_RET;
214 return 0;
215 }
216
217 int bc_parser::decode_alu_clause(cf_node* cf) {
218 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
219
220 cf->subtype = NST_ALU_CLAUSE;
221
222 cgroup = 0;
223 memset(slots[0], 0, 5*sizeof(slots[0][0]));
224
225 unsigned ng = 0;
226
227 do {
228 decode_alu_group(cf, i, gcnt);
229 assert(gcnt <= cnt);
230 cnt -= gcnt;
231 ng++;
232 } while (cnt);
233
234 return 0;
235 }
236
237 int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
238 int r;
239 alu_node *n;
240 alu_group_node *g = sh->create_alu_group();
241
242 cgroup = !cgroup;
243 memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
244 gcnt = 0;
245
246 unsigned literal_mask = 0;
247
248 do {
249 n = sh->create_alu();
250 g->push_back(n);
251
252 if ((r = dec->decode_alu(i, n->bc)))
253 return r;
254
255 if (!sh->assign_slot(n, slots[cgroup])) {
256 assert(!"alu slot assignment failed");
257 return -1;
258 }
259
260 gcnt++;
261
262 } while (gcnt <= 5 && !n->bc.last);
263
264 assert(n->bc.last);
265
266 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
267 n = static_cast<alu_node*>(*I);
268
269 if (n->bc.dst_rel)
270 gpr_reladdr = true;
271
272 for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
273 bc_alu_src &src = n->bc.src[k];
274 if (src.rel)
275 gpr_reladdr = true;
276 if (src.sel == ALU_SRC_LITERAL) {
277 literal_mask |= (1 << src.chan);
278 src.value.u = dw[i + src.chan];
279 }
280 }
281 }
282
283 unsigned literal_ndw = 0;
284 while (literal_mask) {
285 g->literals.push_back(dw[i + literal_ndw]);
286 literal_ndw += 1;
287 literal_mask >>= 1;
288 }
289
290 literal_ndw = (literal_ndw + 1) & ~1u;
291
292 i += literal_ndw;
293 gcnt += literal_ndw >> 1;
294
295 cf->push_back(g);
296 return 0;
297 }
298
299 int bc_parser::prepare_alu_clause(cf_node* cf) {
300
301 // loop over alu groups
302 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
303 assert(I->subtype == NST_ALU_GROUP);
304 alu_group_node *g = static_cast<alu_group_node*>(*I);
305 prepare_alu_group(cf, g);
306 }
307
308 return 0;
309 }
310
311 int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
312
313 alu_node *n;
314
315 cgroup = !cgroup;
316 memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
317
318 for (node_iterator I = g->begin(), E = g->end();
319 I != E; ++I) {
320 n = static_cast<alu_node*>(*I);
321
322 if (!sh->assign_slot(n, slots[cgroup])) {
323 assert(!"alu slot assignment failed");
324 return -1;
325 }
326
327 unsigned src_count = n->bc.op_ptr->src_count;
328
329 if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
330 n->flags |= NF_ALU_4SLOT;
331
332 n->src.resize(src_count);
333
334 unsigned flags = n->bc.op_ptr->flags;
335
336 if (flags & AF_PRED) {
337 n->dst.resize(3);
338 if (n->bc.update_pred)
339 n->dst[1] = sh->get_special_value(SV_ALU_PRED);
340 if (n->bc.update_exec_mask)
341 n->dst[2] = sh->get_special_value(SV_EXEC_MASK);
342
343 n->flags |= NF_DONT_HOIST;
344
345 } else if (flags & AF_KILL) {
346
347 n->dst.resize(2);
348 n->dst[1] = sh->get_special_value(SV_VALID_MASK);
349 sh->set_uses_kill();
350
351 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE |
352 NF_DONT_KILL | NF_SCHEDULE_EARLY;
353
354 } else {
355 n->dst.resize(1);
356 }
357
358 if (flags & AF_MOVA) {
359
360 n->dst[0] = sh->get_special_value(SV_AR_INDEX);
361
362 n->flags |= NF_DONT_HOIST;
363
364 } else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) {
365 assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X);
366
367 value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan,
368 n->bc.dst_rel);
369
370 n->dst[0] = v;
371 }
372
373 if (n->bc.pred_sel) {
374 sh->has_alu_predication = true;
375 n->pred = sh->get_special_value(SV_ALU_PRED);
376 }
377
378 for (unsigned s = 0; s < src_count; ++s) {
379 bc_alu_src &src = n->bc.src[s];
380
381 if (src.sel == ALU_SRC_LITERAL) {
382 n->src[s] = sh->get_const_value(src.value);
383 } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
384 unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
385 SLOT_TRANS : src.chan;
386 alu_node *prev_alu = slots[pgroup][prev_slot];
387
388 assert(prev_alu);
389
390 if (!prev_alu->dst[0]) {
391 value * t = sh->create_temp_value();
392 prev_alu->dst[0] = t;
393 }
394
395 value *d = prev_alu->dst[0];
396
397 if (d->is_rel()) {
398 d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr,
399 prev_alu->bc.dst_chan,
400 prev_alu->bc.dst_rel);
401 }
402
403 n->src[s] = d;
404 } else if (ctx.is_kcache_sel(src.sel)) {
405 unsigned sel = src.sel, kc_addr;
406 unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1);
407
408 bc_kcache &kc = cf->bc.kc[kc_set];
409 kc_addr = (kc.addr << 4) + (sel & 0x1F);
410 n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan);
411 } else if (src.sel < MAX_GPR) {
412 value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel);
413
414 n->src[s] = v;
415
416 } else if (src.sel >= ALU_SRC_PARAM_OFFSET) {
417 // using slot for value channel because in fact the slot
418 // determines the channel that is loaded by INTERP_LOAD_P0
419 // (and maybe some others).
420 // otherwise GVN will consider INTERP_LOAD_P0s with the same
421 // param index as equal instructions and leave only one of them
422 n->src[s] = sh->get_special_ro_value(sel_chan(src.sel,
423 n->bc.slot));
424 } else {
425 switch (src.sel) {
426 case ALU_SRC_0:
427 n->src[s] = sh->get_const_value(0);
428 break;
429 case ALU_SRC_0_5:
430 n->src[s] = sh->get_const_value(0.5f);
431 break;
432 case ALU_SRC_1:
433 n->src[s] = sh->get_const_value(1.0f);
434 break;
435 case ALU_SRC_1_INT:
436 n->src[s] = sh->get_const_value(1);
437 break;
438 case ALU_SRC_M_1_INT:
439 n->src[s] = sh->get_const_value(-1);
440 break;
441 default:
442 n->src[s] = sh->get_special_ro_value(src.sel);
443 break;
444 }
445 }
446 }
447 }
448
449 // pack multislot instructions into alu_packed_node
450
451 alu_packed_node *p = NULL;
452 for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) {
453 N = I + 1;
454 alu_node *a = static_cast<alu_node*>(*I);
455 unsigned sflags = a->bc.slot_flags;
456
457 if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) {
458 if (!p)
459 p = sh->create_alu_packed();
460
461 a->remove();
462 p->push_back(a);
463 }
464 }
465
466 if (p) {
467 g->push_front(p);
468
469 if (p->count() == 3 && ctx.is_cayman()) {
470 // cayman's scalar instruction that can use 3 or 4 slots
471
472 // FIXME for simplicity we'll always add 4th slot,
473 // but probably we might want to always remove 4th slot and make
474 // sure that regalloc won't choose 'w' component for dst
475
476 alu_node *f = static_cast<alu_node*>(p->first);
477 alu_node *a = sh->create_alu();
478 a->src = f->src;
479 a->dst.resize(f->dst.size());
480 a->bc = f->bc;
481 a->bc.slot = SLOT_W;
482 p->push_back(a);
483 }
484 }
485
486 return 0;
487 }
488
489 int bc_parser::decode_fetch_clause(cf_node* cf) {
490 int r;
491 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
492
493 cf->subtype = NST_TEX_CLAUSE;
494
495 while (cnt--) {
496 fetch_node *n = sh->create_fetch();
497 cf->push_back(n);
498 if ((r = dec->decode_fetch(i, n->bc)))
499 return r;
500 if (n->bc.src_rel || n->bc.dst_rel)
501 gpr_reladdr = true;
502
503 }
504 return 0;
505 }
506
507 int bc_parser::prepare_fetch_clause(cf_node *cf) {
508
509 vvec grad_v, grad_h;
510
511 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
512
513 fetch_node *n = static_cast<fetch_node*>(*I);
514 assert(n->is_valid());
515
516 unsigned flags = n->bc.op_ptr->flags;
517
518 unsigned vtx = flags & FF_VTX;
519 unsigned num_src = vtx ? ctx.vtx_src_num : 4;
520
521 n->dst.resize(4);
522
523 if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) {
524 sh->uses_gradients = true;
525 }
526
527 if (flags & FF_SETGRAD) {
528
529 vvec *grad = NULL;
530
531 switch (n->bc.op) {
532 case FETCH_OP_SET_GRADIENTS_V:
533 grad = &grad_v;
534 break;
535 case FETCH_OP_SET_GRADIENTS_H:
536 grad = &grad_h;
537 break;
538 default:
539 assert(!"unexpected SET_GRAD instruction");
540 return -1;
541 }
542
543 if (grad->empty())
544 grad->resize(4);
545
546 for(unsigned s = 0; s < 4; ++s) {
547 unsigned sw = n->bc.src_sel[s];
548 if (sw <= SEL_W)
549 (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr,
550 sw, false);
551 else if (sw == SEL_0)
552 (*grad)[s] = sh->get_const_value(0.0f);
553 else if (sw == SEL_1)
554 (*grad)[s] = sh->get_const_value(1.0f);
555 }
556 } else {
557
558 if (flags & FF_USEGRAD) {
559 n->src.resize(12);
560 std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4);
561 std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8);
562 } else {
563 n->src.resize(4);
564 }
565
566 for(int s = 0; s < 4; ++s) {
567 if (n->bc.dst_sel[s] != SEL_MASK)
568 n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false);
569 // NOTE: it doesn't matter here which components of the result we
570 // are using, but original n->bc.dst_sel should be taken into
571 // account when building the bytecode
572 }
573 for(unsigned s = 0; s < num_src; ++s) {
574 if (n->bc.src_sel[s] <= SEL_W)
575 n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr,
576 n->bc.src_sel[s], false);
577 }
578
579 }
580 }
581
582 return 0;
583 }
584
585 int bc_parser::prepare_ir() {
586
587 for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) {
588 cf_node *c = *I;
589
590 if (!c)
591 continue;
592
593 unsigned flags = c->bc.op_ptr->flags;
594
595 if (flags & CF_ALU) {
596 prepare_alu_clause(c);
597 } else if (flags & CF_FETCH) {
598 prepare_fetch_clause(c);
599 } else if (c->bc.op == CF_OP_CALL_FS) {
600 sh->init_call_fs(c);
601 c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
602 } else if (flags & CF_LOOP_START) {
603 prepare_loop(c);
604 } else if (c->bc.op == CF_OP_JUMP) {
605 prepare_if(c);
606 } else if (c->bc.op == CF_OP_LOOP_END) {
607 loop_stack.pop();
608 } else if (c->bc.op == CF_OP_LOOP_CONTINUE) {
609 assert(!loop_stack.empty());
610 repeat_node *rep = sh->create_repeat(loop_stack.top());
611 if (c->parent->first != c)
612 rep->move(c->parent->first, c);
613 c->replace_with(rep);
614 sh->simplify_dep_rep(rep);
615 } else if (c->bc.op == CF_OP_LOOP_BREAK) {
616 assert(!loop_stack.empty());
617 depart_node *dep = sh->create_depart(loop_stack.top());
618 if (c->parent->first != c)
619 dep->move(c->parent->first, c);
620 c->replace_with(dep);
621 sh->simplify_dep_rep(dep);
622 } else if (flags & CF_EXP) {
623
624 // unroll burst exports
625
626 assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE);
627
628 c->bc.set_op(CF_OP_EXPORT);
629
630 unsigned burst_count = c->bc.burst_count;
631 unsigned eop = c->bc.end_of_program;
632
633 c->bc.end_of_program = 0;
634 c->bc.burst_count = 0;
635
636 do {
637 c->src.resize(4);
638
639 for(int s = 0; s < 4; ++s) {
640 switch (c->bc.sel[s]) {
641 case SEL_0:
642 c->src[s] = sh->get_const_value(0.0f);
643 break;
644 case SEL_1:
645 c->src[s] = sh->get_const_value(1.0f);
646 break;
647 case SEL_MASK:
648 break;
649 default:
650 if (c->bc.sel[s] <= SEL_W)
651 c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr,
652 c->bc.sel[s], false);
653 else
654 assert(!"invalid src_sel for export");
655 }
656 }
657
658 if (!burst_count--)
659 break;
660
661 cf_node *cf_next = sh->create_cf();
662 cf_next->bc = c->bc;
663 ++cf_next->bc.rw_gpr;
664 ++cf_next->bc.array_base;
665
666 c->insert_after(cf_next);
667 c = cf_next;
668
669 } while (1);
670
671 c->bc.end_of_program = eop;
672 } else if (flags & (CF_STRM | CF_RAT)) {
673
674 unsigned burst_count = c->bc.burst_count;
675 unsigned eop = c->bc.end_of_program;
676
677 c->bc.end_of_program = 0;
678 c->bc.burst_count = 0;
679
680 do {
681
682 c->src.resize(4);
683
684 for(int s = 0; s < 4; ++s) {
685 if (c->bc.comp_mask & (1 << s))
686 c->src[s] =
687 sh->get_gpr_value(true, c->bc.rw_gpr, s, false);
688 }
689
690 if ((flags & CF_RAT) && (c->bc.type & 1)) { // indexed write
691 c->src.resize(8);
692 for(int s = 0; s < 3; ++s) {
693 c->src[4 + s] =
694 sh->get_gpr_value(true, c->bc.index_gpr, s, false);
695 }
696
697 // FIXME probably we can relax it a bit
698 c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
699 }
700
701 if (!burst_count--)
702 break;
703
704 cf_node *cf_next = sh->create_cf();
705 cf_next->bc = c->bc;
706 ++cf_next->bc.rw_gpr;
707
708 // FIXME is it correct?
709 cf_next->bc.array_base += cf_next->bc.elem_size + 1;
710
711 c->insert_after(cf_next);
712 c = cf_next;
713 } while (1);
714
715 c->bc.end_of_program = eop;
716
717 }
718 }
719
720 assert(loop_stack.empty());
721 return 0;
722 }
723
724 int bc_parser::prepare_loop(cf_node* c) {
725
726 cf_node *end = cf_map[c->bc.addr - 1];
727 assert(end->bc.op == CF_OP_LOOP_END);
728 assert(c->parent == end->parent);
729
730 region_node *reg = sh->create_region();
731 repeat_node *rep = sh->create_repeat(reg);
732
733 reg->push_back(rep);
734 c->insert_before(reg);
735 rep->move(c, end->next);
736
737 loop_stack.push(reg);
738 return 0;
739 }
740
741 int bc_parser::prepare_if(cf_node* c) {
742 cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
743
744 BCP_DUMP(
745 sblog << "parsing JUMP @" << c->bc.id;
746 sblog << "\n";
747 );
748
749 if (end->bc.op == CF_OP_ELSE) {
750 BCP_DUMP(
751 sblog << " found ELSE : ";
752 dump::dump_op(end);
753 sblog << "\n";
754 );
755
756 c_else = end;
757 end = cf_map[c_else->bc.addr];
758 } else {
759 BCP_DUMP(
760 sblog << " no else\n";
761 );
762
763 c_else = end;
764 }
765
766 if (c_else->parent != c->parent)
767 c_else = NULL;
768
769 if (end->parent != c->parent)
770 end = NULL;
771
772 region_node *reg = sh->create_region();
773
774 depart_node *dep2 = sh->create_depart(reg);
775 depart_node *dep = sh->create_depart(reg);
776 if_node *n_if = sh->create_if();
777
778 c->insert_before(reg);
779
780 if (c_else != end)
781 dep->move(c_else, end);
782 dep2->move(c, end);
783
784 reg->push_back(dep);
785 dep->push_front(n_if);
786 n_if->push_back(dep2);
787
788 n_if->cond = sh->get_special_value(SV_EXEC_MASK);
789
790 return 0;
791 }
792
793
794 } // namespace r600_sb