r600g,sb: Don't use standard macro name
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_parser.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define BCP_DEBUG 0
28
29 #if BCP_DEBUG
30 #define BCP_DUMP(q) do { q } while (0)
31 #else
32 #define BCP_DUMP(q)
33 #endif
34
35 #include "r600_pipe.h"
36 #include "r600_shader.h"
37 #include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1
38
39 #include <stack>
40
41 #include "sb_bc.h"
42 #include "sb_shader.h"
43 #include "sb_pass.h"
44 #include "util/macros.h"
45
46 namespace r600_sb {
47
48 int bc_parser::decode() {
49
50 dw = bc->bytecode;
51 bc_ndw = bc->ndw;
52 max_cf = 0;
53
54 dec = new bc_decoder(ctx, dw, bc_ndw);
55
56 shader_target t = TARGET_UNKNOWN;
57
58 if (pshader) {
59 switch (bc->type) {
60 case PIPE_SHADER_FRAGMENT: t = TARGET_PS; break;
61 case PIPE_SHADER_VERTEX:
62 t = pshader->vs_as_ls ? TARGET_LS : (pshader->vs_as_es ? TARGET_ES : TARGET_VS);
63 break;
64 case PIPE_SHADER_GEOMETRY: t = TARGET_GS; break;
65 case PIPE_SHADER_COMPUTE: t = TARGET_COMPUTE; break;
66 case PIPE_SHADER_TESS_CTRL: t = TARGET_HS; break;
67 case PIPE_SHADER_TESS_EVAL: t = pshader->tes_as_es ? TARGET_ES : TARGET_VS; break;
68 default: assert(!"unknown shader target"); return -1; break;
69 }
70 } else {
71 if (bc->type == PIPE_SHADER_COMPUTE)
72 t = TARGET_COMPUTE;
73 else
74 t = TARGET_FETCH;
75 }
76
77 sh = new shader(ctx, t, bc->debug_id);
78 sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE);
79
80 int r = decode_shader();
81
82 delete dec;
83
84 sh->ngpr = bc->ngpr;
85 sh->nstack = bc->nstack;
86
87 return r;
88 }
89
90 int bc_parser::decode_shader() {
91 int r = 0;
92 unsigned i = 0;
93 bool eop = false;
94
95 sh->init();
96
97 do {
98 eop = false;
99 if ((r = decode_cf(i, eop)))
100 return r;
101
102 } while (!eop || (i >> 1) < max_cf);
103
104 return 0;
105 }
106
107 int bc_parser::prepare() {
108 int r = 0;
109 if ((r = parse_decls()))
110 return r;
111 if ((r = prepare_ir()))
112 return r;
113 return 0;
114 }
115
116 int bc_parser::parse_decls() {
117
118 if (!pshader) {
119 if (gpr_reladdr)
120 sh->add_gpr_array(0, bc->ngpr, 0x0F);
121
122 // compute shaders have some values preloaded in R0, R1
123 sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
124 sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
125 return 0;
126 }
127
128 if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) {
129
130 assert(pshader->num_arrays);
131
132 if (pshader->num_arrays) {
133 for (unsigned i = 0; i < pshader->num_arrays; ++i) {
134 r600_shader_array &a = pshader->arrays[i];
135 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
136 }
137 } else {
138 sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
139 }
140 }
141
142 // GS inputs can add indirect addressing
143 if (sh->target == TARGET_GS) {
144 if (pshader->num_arrays) {
145 for (unsigned i = 0; i < pshader->num_arrays; ++i) {
146 r600_shader_array &a = pshader->arrays[i];
147 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
148 }
149 }
150 }
151
152 if (sh->target == TARGET_VS || sh->target == TARGET_ES || sh->target == TARGET_HS)
153 sh->add_input(0, 1, 0x0F);
154 else if (sh->target == TARGET_GS) {
155 sh->add_input(0, 1, 0x0F);
156 sh->add_input(1, 1, 0x0F);
157 }
158
159 bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
160 && sh->target == TARGET_PS;
161
162 bool ij_interpolators[6];
163 memset(ij_interpolators, 0, sizeof(ij_interpolators));
164
165 for (unsigned i = 0; i < pshader->ninput; ++i) {
166 r600_shader_io & in = pshader->input[i];
167 bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
168 sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
169 if (ps_interp && in.spi_sid) {
170 int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location);
171 if (k >= 0)
172 ij_interpolators[k] |= true;
173 }
174 }
175
176 if (ps_interp) {
177 /* add the egcm ij interpolators to live inputs */
178 unsigned num_ij = 0;
179 for (unsigned i = 0; i < Elements(ij_interpolators); i++) {
180 num_ij += ij_interpolators[i];
181 }
182
183 unsigned mask = (1 << (2 * num_ij)) - 1;
184 unsigned gpr = 0;
185
186 while (mask) {
187 sh->add_input(gpr, true, mask & 0x0F);
188 ++gpr;
189 mask >>= 4;
190 }
191 }
192
193 return 0;
194 }
195
196 int bc_parser::decode_cf(unsigned &i, bool &eop) {
197
198 int r;
199
200 cf_node *cf = sh->create_cf();
201 sh->root->push_back(cf);
202
203 unsigned id = i >> 1;
204
205 cf->bc.id = id;
206
207 if (cf_map.size() < id + 1)
208 cf_map.resize(id + 1);
209
210 cf_map[id] = cf;
211
212 if ((r = dec->decode_cf(i, cf->bc)))
213 return r;
214
215 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
216
217 if (flags & CF_ALU) {
218 if ((r = decode_alu_clause(cf)))
219 return r;
220 } else if (flags & CF_FETCH) {
221 if ((r = decode_fetch_clause(cf)))
222 return r;
223 } else if (flags & CF_EXP) {
224 if (cf->bc.rw_rel)
225 gpr_reladdr = true;
226 assert(!cf->bc.rw_rel);
227 } else if (flags & CF_MEM) {
228 if (cf->bc.rw_rel)
229 gpr_reladdr = true;
230 assert(!cf->bc.rw_rel);
231 } else if (flags & CF_BRANCH) {
232 if (cf->bc.addr > max_cf)
233 max_cf = cf->bc.addr;
234 }
235
236 eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END ||
237 cf->bc.op == CF_OP_RET;
238 return 0;
239 }
240
241 int bc_parser::decode_alu_clause(cf_node* cf) {
242 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
243
244 cf->subtype = NST_ALU_CLAUSE;
245
246 cgroup = 0;
247 memset(slots[0], 0, 5*sizeof(slots[0][0]));
248
249 unsigned ng = 0;
250
251 do {
252 decode_alu_group(cf, i, gcnt);
253 assert(gcnt <= cnt);
254 cnt -= gcnt;
255 ng++;
256 } while (cnt);
257
258 return 0;
259 }
260
261 int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
262 int r;
263 alu_node *n;
264 alu_group_node *g = sh->create_alu_group();
265
266 cgroup = !cgroup;
267 memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
268 gcnt = 0;
269
270 unsigned literal_mask = 0;
271
272 do {
273 n = sh->create_alu();
274 g->push_back(n);
275
276 if ((r = dec->decode_alu(i, n->bc)))
277 return r;
278
279 if (!sh->assign_slot(n, slots[cgroup])) {
280 assert(!"alu slot assignment failed");
281 return -1;
282 }
283
284 gcnt++;
285
286 } while (gcnt <= 5 && !n->bc.last);
287
288 assert(n->bc.last);
289
290 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
291 n = static_cast<alu_node*>(*I);
292
293 if (n->bc.dst_rel)
294 gpr_reladdr = true;
295
296 for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
297 bc_alu_src &src = n->bc.src[k];
298 if (src.rel)
299 gpr_reladdr = true;
300 if (src.sel == ALU_SRC_LITERAL) {
301 literal_mask |= (1 << src.chan);
302 src.value.u = dw[i + src.chan];
303 }
304 }
305 }
306
307 unsigned literal_ndw = 0;
308 while (literal_mask) {
309 g->literals.push_back(dw[i + literal_ndw]);
310 literal_ndw += 1;
311 literal_mask >>= 1;
312 }
313
314 literal_ndw = (literal_ndw + 1) & ~1u;
315
316 i += literal_ndw;
317 gcnt += literal_ndw >> 1;
318
319 cf->push_back(g);
320 return 0;
321 }
322
323 int bc_parser::prepare_alu_clause(cf_node* cf) {
324
325 // loop over alu groups
326 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
327 assert(I->subtype == NST_ALU_GROUP);
328 alu_group_node *g = static_cast<alu_group_node*>(*I);
329 prepare_alu_group(cf, g);
330 }
331
332 return 0;
333 }
334
335 void bc_parser::save_set_cf_index(value *val, unsigned idx)
336 {
337 assert(idx <= 1);
338 assert(val);
339 cf_index_value[idx] = val;
340 }
341 value *bc_parser::get_cf_index_value(unsigned idx)
342 {
343 assert(idx <= 1);
344 assert(cf_index_value[idx]);
345 return cf_index_value[idx];
346 }
347 void bc_parser::save_mova(alu_node *mova)
348 {
349 assert(mova);
350 this->mova = mova;
351 }
352 alu_node *bc_parser::get_mova()
353 {
354 assert(mova);
355 return mova;
356 }
357
358 int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
359
360 alu_node *n;
361
362 cgroup = !cgroup;
363 memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
364
365 for (node_iterator I = g->begin(), E = g->end();
366 I != E; ++I) {
367 n = static_cast<alu_node*>(*I);
368 bool ubo_indexing[2] = {};
369
370 if (!sh->assign_slot(n, slots[cgroup])) {
371 assert(!"alu slot assignment failed");
372 return -1;
373 }
374
375 unsigned src_count = n->bc.op_ptr->src_count;
376
377 if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
378 n->flags |= NF_ALU_4SLOT;
379
380 n->src.resize(src_count);
381
382 unsigned flags = n->bc.op_ptr->flags;
383
384 if (flags & AF_PRED) {
385 n->dst.resize(3);
386 if (n->bc.update_pred)
387 n->dst[1] = sh->get_special_value(SV_ALU_PRED);
388 if (n->bc.update_exec_mask)
389 n->dst[2] = sh->get_special_value(SV_EXEC_MASK);
390
391 n->flags |= NF_DONT_HOIST;
392
393 } else if (flags & AF_KILL) {
394
395 n->dst.resize(2);
396 n->dst[1] = sh->get_special_value(SV_VALID_MASK);
397 sh->set_uses_kill();
398
399 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE |
400 NF_DONT_KILL | NF_SCHEDULE_EARLY;
401
402 } else {
403 n->dst.resize(1);
404 }
405
406 if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) {
407 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
408 // DCE will kill this op
409 save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1);
410 } else if (flags & AF_MOVA) {
411
412 n->dst[0] = sh->get_special_value(SV_AR_INDEX);
413 save_mova(n);
414
415 n->flags |= NF_DONT_HOIST;
416
417 } else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) {
418 assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X);
419
420 value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan,
421 n->bc.dst_rel);
422
423 n->dst[0] = v;
424 }
425
426 if (n->bc.pred_sel) {
427 sh->has_alu_predication = true;
428 n->pred = sh->get_special_value(SV_ALU_PRED);
429 }
430
431 for (unsigned s = 0; s < src_count; ++s) {
432 bc_alu_src &src = n->bc.src[s];
433
434 if (src.sel == ALU_SRC_LITERAL) {
435 n->src[s] = sh->get_const_value(src.value);
436 } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
437 unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
438 SLOT_TRANS : src.chan;
439
440 // XXX shouldn't happen but llvm backend uses PS on cayman
441 if (prev_slot == SLOT_TRANS && ctx.is_cayman())
442 prev_slot = SLOT_X;
443
444 alu_node *prev_alu = slots[pgroup][prev_slot];
445
446 assert(prev_alu);
447
448 if (!prev_alu->dst[0]) {
449 value * t = sh->create_temp_value();
450 prev_alu->dst[0] = t;
451 }
452
453 value *d = prev_alu->dst[0];
454
455 if (d->is_rel()) {
456 d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr,
457 prev_alu->bc.dst_chan,
458 prev_alu->bc.dst_rel);
459 }
460
461 n->src[s] = d;
462 } else if (ctx.is_kcache_sel(src.sel)) {
463 unsigned sel = src.sel, kc_addr;
464 unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1);
465
466 bc_kcache &kc = cf->bc.kc[kc_set];
467 kc_addr = (kc.addr << 4) + (sel & 0x1F);
468 n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode);
469
470 if (kc.index_mode != KC_INDEX_NONE) {
471 assert(kc.index_mode != KC_LOCK_LOOP);
472 ubo_indexing[kc.index_mode - KC_INDEX_0] = true;
473 }
474 } else if (src.sel < MAX_GPR) {
475 value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel);
476
477 n->src[s] = v;
478
479 } else if (src.sel >= ALU_SRC_PARAM_OFFSET) {
480 // using slot for value channel because in fact the slot
481 // determines the channel that is loaded by INTERP_LOAD_P0
482 // (and maybe some others).
483 // otherwise GVN will consider INTERP_LOAD_P0s with the same
484 // param index as equal instructions and leave only one of them
485 n->src[s] = sh->get_special_ro_value(sel_chan(src.sel,
486 n->bc.slot));
487 } else {
488 switch (src.sel) {
489 case ALU_SRC_0:
490 n->src[s] = sh->get_const_value(0);
491 break;
492 case ALU_SRC_0_5:
493 n->src[s] = sh->get_const_value(0.5f);
494 break;
495 case ALU_SRC_1:
496 n->src[s] = sh->get_const_value(1.0f);
497 break;
498 case ALU_SRC_1_INT:
499 n->src[s] = sh->get_const_value(1);
500 break;
501 case ALU_SRC_M_1_INT:
502 n->src[s] = sh->get_const_value(-1);
503 break;
504 default:
505 n->src[s] = sh->get_special_ro_value(src.sel);
506 break;
507 }
508 }
509 }
510
511 // add UBO index values if any as dependencies
512 if (ubo_indexing[0]) {
513 n->src.push_back(get_cf_index_value(0));
514 }
515 if (ubo_indexing[1]) {
516 n->src.push_back(get_cf_index_value(1));
517 }
518
519 if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) &&
520 ctx.is_cayman())
521 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
522 save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1);
523 }
524
525 // pack multislot instructions into alu_packed_node
526
527 alu_packed_node *p = NULL;
528 for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) {
529 N = I + 1;
530 alu_node *a = static_cast<alu_node*>(*I);
531 unsigned sflags = a->bc.slot_flags;
532
533 if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) {
534 if (!p)
535 p = sh->create_alu_packed();
536
537 a->remove();
538 p->push_back(a);
539 }
540 }
541
542 if (p) {
543 g->push_front(p);
544
545 if (p->count() == 3 && ctx.is_cayman()) {
546 // cayman's scalar instruction that can use 3 or 4 slots
547
548 // FIXME for simplicity we'll always add 4th slot,
549 // but probably we might want to always remove 4th slot and make
550 // sure that regalloc won't choose 'w' component for dst
551
552 alu_node *f = static_cast<alu_node*>(p->first);
553 alu_node *a = sh->create_alu();
554 a->src = f->src;
555 a->dst.resize(f->dst.size());
556 a->bc = f->bc;
557 a->bc.slot = SLOT_W;
558 p->push_back(a);
559 }
560 }
561
562 return 0;
563 }
564
565 int bc_parser::decode_fetch_clause(cf_node* cf) {
566 int r;
567 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
568
569 cf->subtype = NST_TEX_CLAUSE;
570
571 while (cnt--) {
572 fetch_node *n = sh->create_fetch();
573 cf->push_back(n);
574 if ((r = dec->decode_fetch(i, n->bc)))
575 return r;
576 if (n->bc.src_rel || n->bc.dst_rel)
577 gpr_reladdr = true;
578
579 }
580 return 0;
581 }
582
583 int bc_parser::prepare_fetch_clause(cf_node *cf) {
584
585 vvec grad_v, grad_h, texture_offsets;
586
587 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
588
589 fetch_node *n = static_cast<fetch_node*>(*I);
590 assert(n->is_valid());
591
592 unsigned flags = n->bc.op_ptr->flags;
593
594 unsigned vtx = flags & FF_VTX;
595 unsigned num_src = vtx ? ctx.vtx_src_num : 4;
596
597 n->dst.resize(4);
598
599 if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) {
600 sh->uses_gradients = true;
601 }
602
603 if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) {
604
605 vvec *grad = NULL;
606
607 switch (n->bc.op) {
608 case FETCH_OP_SET_GRADIENTS_V:
609 grad = &grad_v;
610 break;
611 case FETCH_OP_SET_GRADIENTS_H:
612 grad = &grad_h;
613 break;
614 case FETCH_OP_SET_TEXTURE_OFFSETS:
615 grad = &texture_offsets;
616 break;
617 default:
618 assert(!"unexpected SET_GRAD instruction");
619 return -1;
620 }
621
622 if (grad->empty())
623 grad->resize(4);
624
625 for(unsigned s = 0; s < 4; ++s) {
626 unsigned sw = n->bc.src_sel[s];
627 if (sw <= SEL_W)
628 (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr,
629 sw, false);
630 else if (sw == SEL_0)
631 (*grad)[s] = sh->get_const_value(0.0f);
632 else if (sw == SEL_1)
633 (*grad)[s] = sh->get_const_value(1.0f);
634 }
635 } else {
636 // Fold source values for instructions with hidden target values in to the instructions
637 // using them. The set instructions are later re-emitted by bc_finalizer
638 if (flags & FF_USEGRAD) {
639 n->src.resize(12);
640 std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4);
641 std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8);
642 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
643 n->src.resize(8);
644 std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4);
645 } else {
646 n->src.resize(4);
647 }
648
649 for(int s = 0; s < 4; ++s) {
650 if (n->bc.dst_sel[s] != SEL_MASK)
651 n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false);
652 // NOTE: it doesn't matter here which components of the result we
653 // are using, but original n->bc.dst_sel should be taken into
654 // account when building the bytecode
655 }
656 for(unsigned s = 0; s < num_src; ++s) {
657 if (n->bc.src_sel[s] <= SEL_W)
658 n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr,
659 n->bc.src_sel[s], false);
660 }
661
662 // Scheduler will emit the appropriate instructions to set CF_IDX0/1
663 if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
664 n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1));
665 }
666 if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
667 n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1));
668 }
669 }
670 }
671
672 return 0;
673 }
674
675 int bc_parser::prepare_ir() {
676
677 for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) {
678 cf_node *c = *I;
679
680 if (!c)
681 continue;
682
683 unsigned flags = c->bc.op_ptr->flags;
684
685 if (flags & CF_ALU) {
686 prepare_alu_clause(c);
687 } else if (flags & CF_FETCH) {
688 prepare_fetch_clause(c);
689 } else if (c->bc.op == CF_OP_CALL_FS) {
690 sh->init_call_fs(c);
691 c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
692 } else if (flags & CF_LOOP_START) {
693 prepare_loop(c);
694 } else if (c->bc.op == CF_OP_JUMP) {
695 prepare_if(c);
696 } else if (c->bc.op == CF_OP_LOOP_END) {
697 loop_stack.pop();
698 } else if (c->bc.op == CF_OP_LOOP_CONTINUE) {
699 assert(!loop_stack.empty());
700 repeat_node *rep = sh->create_repeat(loop_stack.top());
701 if (c->parent->first != c)
702 rep->move(c->parent->first, c);
703 c->replace_with(rep);
704 sh->simplify_dep_rep(rep);
705 } else if (c->bc.op == CF_OP_LOOP_BREAK) {
706 assert(!loop_stack.empty());
707 depart_node *dep = sh->create_depart(loop_stack.top());
708 if (c->parent->first != c)
709 dep->move(c->parent->first, c);
710 c->replace_with(dep);
711 sh->simplify_dep_rep(dep);
712 } else if (flags & CF_EXP) {
713
714 // unroll burst exports
715
716 assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE);
717
718 c->bc.set_op(CF_OP_EXPORT);
719
720 unsigned burst_count = c->bc.burst_count;
721 unsigned eop = c->bc.end_of_program;
722
723 c->bc.end_of_program = 0;
724 c->bc.burst_count = 0;
725
726 do {
727 c->src.resize(4);
728
729 for(int s = 0; s < 4; ++s) {
730 switch (c->bc.sel[s]) {
731 case SEL_0:
732 c->src[s] = sh->get_const_value(0.0f);
733 break;
734 case SEL_1:
735 c->src[s] = sh->get_const_value(1.0f);
736 break;
737 case SEL_MASK:
738 break;
739 default:
740 if (c->bc.sel[s] <= SEL_W)
741 c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr,
742 c->bc.sel[s], false);
743 else
744 assert(!"invalid src_sel for export");
745 }
746 }
747
748 if (!burst_count--)
749 break;
750
751 cf_node *cf_next = sh->create_cf();
752 cf_next->bc = c->bc;
753 ++cf_next->bc.rw_gpr;
754 ++cf_next->bc.array_base;
755
756 c->insert_after(cf_next);
757 c = cf_next;
758
759 } while (1);
760
761 c->bc.end_of_program = eop;
762 } else if (flags & CF_MEM) {
763
764 unsigned burst_count = c->bc.burst_count;
765 unsigned eop = c->bc.end_of_program;
766
767 c->bc.end_of_program = 0;
768 c->bc.burst_count = 0;
769
770 do {
771
772 c->src.resize(4);
773
774 for(int s = 0; s < 4; ++s) {
775 if (c->bc.comp_mask & (1 << s))
776 c->src[s] =
777 sh->get_gpr_value(true, c->bc.rw_gpr, s, false);
778 }
779
780 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write
781 c->src.resize(8);
782 for(int s = 0; s < 3; ++s) {
783 c->src[4 + s] =
784 sh->get_gpr_value(true, c->bc.index_gpr, s, false);
785 }
786
787 // FIXME probably we can relax it a bit
788 c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
789 }
790
791 if (flags & CF_EMIT) {
792 // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
793 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
794 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
795 if (sh->target == TARGET_ES) {
796 // For ES shaders this is an export
797 c->flags |= NF_DONT_KILL;
798 }
799 }
800
801 if (!burst_count--)
802 break;
803
804 cf_node *cf_next = sh->create_cf();
805 cf_next->bc = c->bc;
806 ++cf_next->bc.rw_gpr;
807
808 // FIXME is it correct?
809 cf_next->bc.array_base += cf_next->bc.elem_size + 1;
810
811 c->insert_after(cf_next);
812 c = cf_next;
813 } while (1);
814
815 c->bc.end_of_program = eop;
816
817 } else if (flags & CF_EMIT) {
818 /* quick peephole */
819 cf_node *prev = static_cast<cf_node *>(c->prev);
820 if (c->bc.op == CF_OP_CUT_VERTEX &&
821 prev && prev->is_valid() &&
822 prev->bc.op == CF_OP_EMIT_VERTEX &&
823 c->bc.count == prev->bc.count) {
824 prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX);
825 prev->bc.end_of_program = c->bc.end_of_program;
826 c->remove();
827 }
828 else {
829 c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
830
831 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
832 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
833 }
834 }
835 }
836
837 assert(loop_stack.empty());
838 return 0;
839 }
840
841 int bc_parser::prepare_loop(cf_node* c) {
842 assert(c->bc.addr-1 < cf_map.size());
843
844 cf_node *end = cf_map[c->bc.addr - 1];
845 assert(end->bc.op == CF_OP_LOOP_END);
846 assert(c->parent == end->parent);
847
848 region_node *reg = sh->create_region();
849 repeat_node *rep = sh->create_repeat(reg);
850
851 reg->push_back(rep);
852 c->insert_before(reg);
853 rep->move(c, end->next);
854
855 reg->src_loop = true;
856
857 loop_stack.push(reg);
858 return 0;
859 }
860
861 int bc_parser::prepare_if(cf_node* c) {
862 assert(c->bc.addr-1 < cf_map.size());
863 cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
864
865 if (!end)
866 return 0; // not quite sure how this happens, malformed input?
867
868 BCP_DUMP(
869 sblog << "parsing JUMP @" << c->bc.id;
870 sblog << "\n";
871 );
872
873 if (end->bc.op == CF_OP_ELSE) {
874 BCP_DUMP(
875 sblog << " found ELSE : ";
876 dump::dump_op(end);
877 sblog << "\n";
878 );
879
880 c_else = end;
881 end = cf_map[c_else->bc.addr];
882 } else {
883 BCP_DUMP(
884 sblog << " no else\n";
885 );
886
887 c_else = end;
888 }
889
890 if (c_else->parent != c->parent)
891 c_else = NULL;
892
893 if (end && end->parent != c->parent)
894 end = NULL;
895
896 region_node *reg = sh->create_region();
897
898 depart_node *dep2 = sh->create_depart(reg);
899 depart_node *dep = sh->create_depart(reg);
900 if_node *n_if = sh->create_if();
901
902 c->insert_before(reg);
903
904 if (c_else != end)
905 dep->move(c_else, end);
906 dep2->move(c, end);
907
908 reg->push_back(dep);
909 dep->push_front(n_if);
910 n_if->push_back(dep2);
911
912 n_if->cond = sh->get_special_value(SV_EXEC_MASK);
913
914 return 0;
915 }
916
917
918 } // namespace r600_sb