nvc0: recognize r63 as zero in constant folding
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_pc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #define NOUVEAU_DEBUG 1
24
25 #include "nvc0_pc.h"
26 #include "nvc0_program.h"
27
28 boolean
29 nvc0_insn_can_load(struct nv_instruction *nvi, int s,
30 struct nv_instruction *ld)
31 {
32 int i;
33
34 if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) {
35 if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s)))
36 return FALSE;
37 if (!(nvc0_op_info_table[nvi->opcode].immediate & 4))
38 if (ld->src[0]->value->reg.imm.u32 & 0xfff)
39 return FALSE;
40 } else
41 if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s)))
42 return FALSE;
43
44 if (ld->indirect >= 0)
45 return FALSE;
46
47 for (i = 0; i < 3 && nvi->src[i]; ++i)
48 if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
49 return FALSE;
50
51 return TRUE;
52 }
53
54 /* Return whether this instruction can be executed conditionally. */
55 boolean
56 nvc0_insn_is_predicateable(struct nv_instruction *nvi)
57 {
58 int s;
59
60 if (!nv_op_predicateable(nvi->opcode))
61 return FALSE;
62 if (nvi->predicate >= 0)
63 return FALSE;
64 for (s = 0; s < 4 && nvi->src[s]; ++s)
65 if (nvi->src[s]->value->reg.file == NV_FILE_IMM)
66 return FALSE;
67 return TRUE;
68 }
69
70 int
71 nvc0_insn_refcount(struct nv_instruction *nvi)
72 {
73 int rc = 0;
74 int i;
75 for (i = 0; i < 5 && nvi->def[i]; ++i) {
76 if (!nvi->def[i])
77 return rc;
78 rc += nvi->def[i]->refc;
79 }
80 return rc;
81 }
82
83 int
84 nvc0_pc_replace_value(struct nv_pc *pc,
85 struct nv_value *old_val,
86 struct nv_value *new_val)
87 {
88 int i, n, s;
89
90 if (old_val == new_val)
91 return old_val->refc;
92
93 for (i = 0, n = 0; i < pc->num_refs; ++i) {
94 if (pc->refs[i]->value == old_val) {
95 ++n;
96 for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s)
97 if (pc->refs[i]->insn->src[s] == pc->refs[i])
98 break;
99 assert(s < 6);
100 nv_reference(pc, pc->refs[i]->insn, s, new_val);
101 }
102 }
103 return n;
104 }
105
106 static INLINE boolean
107 is_gpr63(struct nv_value *val)
108 {
109 return (val->reg.file == NV_FILE_GPR && val->reg.id == 63);
110 }
111
112 struct nv_value *
113 nvc0_pc_find_constant(struct nv_ref *ref)
114 {
115 struct nv_value *src;
116
117 if (!ref)
118 return NULL;
119
120 src = ref->value;
121 while (src->insn && src->insn->opcode == NV_OP_MOV) {
122 assert(!src->insn->src[0]->mod);
123 src = src->insn->src[0]->value;
124 }
125 if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) ||
126 (src->insn &&
127 src->insn->opcode == NV_OP_LD &&
128 src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
129 src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
130 return src;
131 return NULL;
132 }
133
134 struct nv_value *
135 nvc0_pc_find_immediate(struct nv_ref *ref)
136 {
137 struct nv_value *src = nvc0_pc_find_constant(ref);
138
139 return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL;
140 }
141
142 static void
143 nv_pc_free_refs(struct nv_pc *pc)
144 {
145 int i;
146 for (i = 0; i < pc->num_refs; i += 64)
147 FREE(pc->refs[i]);
148 FREE(pc->refs);
149 }
150
151 static const char *
152 edge_name(ubyte type)
153 {
154 switch (type) {
155 case CFG_EDGE_FORWARD: return "forward";
156 case CFG_EDGE_BACK: return "back";
157 case CFG_EDGE_LOOP_ENTER: return "loop";
158 case CFG_EDGE_LOOP_LEAVE: return "break";
159 case CFG_EDGE_FAKE: return "fake";
160 default:
161 return "?";
162 }
163 }
164
165 void
166 nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
167 void *priv)
168 {
169 struct nv_basic_block *bb[64], *bbb[16], *b;
170 int j, p, pp;
171
172 bb[0] = root;
173 p = 1;
174 pp = 0;
175
176 while (p > 0) {
177 b = bb[--p];
178 b->priv = 0;
179
180 for (j = 1; j >= 0; --j) {
181 if (!b->out[j])
182 continue;
183
184 switch (b->out_kind[j]) {
185 case CFG_EDGE_BACK:
186 continue;
187 case CFG_EDGE_FORWARD:
188 case CFG_EDGE_FAKE:
189 if (++b->out[j]->priv == b->out[j]->num_in)
190 bb[p++] = b->out[j];
191 break;
192 case CFG_EDGE_LOOP_ENTER:
193 bb[p++] = b->out[j];
194 break;
195 case CFG_EDGE_LOOP_LEAVE:
196 bbb[pp++] = b->out[j];
197 break;
198 default:
199 assert(0);
200 break;
201 }
202 }
203
204 f(priv, b);
205
206 if (!p) {
207 p = pp;
208 for (; pp > 0; --pp)
209 bb[pp - 1] = bbb[pp - 1];
210 }
211 }
212 }
213
214 static void
215 nv_do_print_function(void *priv, struct nv_basic_block *b)
216 {
217 struct nv_instruction *i;
218
219 debug_printf("=== BB %i ", b->id);
220 if (b->out[0])
221 debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
222 if (b->out[1])
223 debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
224 debug_printf("===\n");
225
226 i = b->phi;
227 if (!i)
228 i = b->entry;
229 for (; i; i = i->next)
230 nvc0_print_instruction(i);
231 }
232
233 void
234 nvc0_print_function(struct nv_basic_block *root)
235 {
236 if (root->subroutine)
237 debug_printf("SUBROUTINE %i\n", root->subroutine);
238 else
239 debug_printf("MAIN\n");
240
241 nvc0_pc_pass_in_order(root, nv_do_print_function, root);
242 }
243
244 void
245 nvc0_print_program(struct nv_pc *pc)
246 {
247 int i;
248 for (i = 0; i < pc->num_subroutines + 1; ++i)
249 if (pc->root[i])
250 nvc0_print_function(pc->root[i]);
251 }
252
253 #if NOUVEAU_DEBUG > 1
254 static void
255 nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
256 {
257 int i;
258
259 b->pass_seq = pc->pass_seq;
260
261 fprintf(f, "\t%i [shape=box]\n", b->id);
262
263 for (i = 0; i < 2; ++i) {
264 if (!b->out[i])
265 continue;
266 switch (b->out_kind[i]) {
267 case CFG_EDGE_FORWARD:
268 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
269 break;
270 case CFG_EDGE_LOOP_ENTER:
271 fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
272 break;
273 case CFG_EDGE_LOOP_LEAVE:
274 fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
275 break;
276 case CFG_EDGE_BACK:
277 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
278 continue;
279 case CFG_EDGE_FAKE:
280 fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
281 break;
282 default:
283 assert(0);
284 break;
285 }
286 if (b->out[i]->pass_seq < pc->pass_seq)
287 nv_do_print_cfgraph(pc, f, b->out[i]);
288 }
289 }
290
291 /* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
292 static void
293 nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
294 {
295 FILE *f;
296
297 f = fopen(filepath, "a");
298 if (!f)
299 return;
300
301 fprintf(f, "digraph G {\n");
302
303 ++pc->pass_seq;
304
305 nv_do_print_cfgraph(pc, f, pc->root[subr]);
306
307 fprintf(f, "}\n");
308
309 fclose(f);
310 }
311 #endif
312
313 static INLINE void
314 nvc0_pc_print_binary(struct nv_pc *pc)
315 {
316 unsigned i;
317
318 NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8);
319
320 for (i = 0; i < pc->emit_size / 4; i += 2) {
321 debug_printf("0x%08x ", pc->emit[i + 0]);
322 debug_printf("0x%08x ", pc->emit[i + 1]);
323 if ((i % 16) == 15)
324 debug_printf("\n");
325 }
326 debug_printf("\n");
327 }
328
329 static int
330 nvc0_emit_program(struct nv_pc *pc)
331 {
332 uint32_t *code = pc->emit;
333 int n;
334
335 NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size);
336
337 pc->emit_pos = 0;
338 for (n = 0; n < pc->num_blocks; ++n) {
339 struct nv_instruction *i;
340 struct nv_basic_block *b = pc->bb_list[n];
341
342 for (i = b->entry; i; i = i->next) {
343 nvc0_emit_instruction(pc, i);
344 pc->emit += 2;
345 pc->emit_pos += 8;
346 }
347 }
348 assert(pc->emit == &code[pc->emit_size / 4]);
349
350 pc->emit[0] = 0x00001de7;
351 pc->emit[1] = 0x80000000;
352 pc->emit_size += 8;
353
354 pc->emit = code;
355
356 #ifdef NOUVEAU_DEBUG
357 nvc0_pc_print_binary(pc);
358 #else
359 debug_printf("not printing binary\n");
360 #endif
361 return 0;
362 }
363
364 int
365 nvc0_generate_code(struct nvc0_translation_info *ti)
366 {
367 struct nv_pc *pc;
368 int ret;
369 int i;
370
371 pc = CALLOC_STRUCT(nv_pc);
372 if (!pc)
373 return 1;
374
375 pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT;
376
377 pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0]));
378 if (!pc->root) {
379 FREE(pc);
380 return 1;
381 }
382 pc->num_subroutines = ti->num_subrs;
383
384 ret = nvc0_tgsi_to_nc(pc, ti);
385 if (ret)
386 goto out;
387 #if NOUVEAU_DEBUG > 1
388 nvc0_print_program(pc);
389 #endif
390
391 pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE;
392
393 /* optimization */
394 ret = nvc0_pc_exec_pass0(pc);
395 if (ret)
396 goto out;
397 #ifdef NOUVEAU_DEBUG
398 nvc0_print_program(pc);
399 #endif
400
401 /* register allocation */
402 ret = nvc0_pc_exec_pass1(pc);
403 if (ret)
404 goto out;
405 #if NOUVEAU_DEBUG > 1
406 nvc0_print_program(pc);
407 nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0);
408 #endif
409
410 /* prepare for emission */
411 ret = nvc0_pc_exec_pass2(pc);
412 if (ret)
413 goto out;
414 assert(!(pc->emit_size % 8));
415
416 pc->emit = CALLOC(pc->emit_size / 4 + 2, 4);
417 if (!pc->emit) {
418 ret = 3;
419 goto out;
420 }
421 ret = nvc0_emit_program(pc);
422 if (ret)
423 goto out;
424
425 ti->prog->code = pc->emit;
426 ti->prog->code_base = 0;
427 ti->prog->code_size = pc->emit_size;
428 ti->prog->parm_size = 0;
429
430 ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1);
431
432 ti->prog->relocs = pc->reloc_entries;
433 ti->prog->num_relocs = pc->num_relocs;
434
435 NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
436
437 out:
438 nv_pc_free_refs(pc);
439
440 for (i = 0; i < pc->num_blocks; ++i)
441 FREE(pc->bb_list[i]);
442 if (pc->root)
443 FREE(pc->root);
444 if (ret) {
445 /* on success, these will be referenced by struct nvc0_program */
446 if (pc->emit)
447 FREE(pc->emit);
448 if (pc->immd_buf)
449 FREE(pc->immd_buf);
450 if (pc->reloc_entries)
451 FREE(pc->reloc_entries);
452 }
453 FREE(pc);
454 return ret;
455 }
456
457 static void
458 nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
459 {
460 if (!b->phi) {
461 i->prev = NULL;
462 b->phi = i;
463 i->next = b->entry;
464 if (b->entry) {
465 assert(!b->entry->prev && b->exit);
466 b->entry->prev = i;
467 } else {
468 b->entry = i;
469 b->exit = i;
470 }
471 } else {
472 assert(b->entry);
473 if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
474 assert(b->entry == b->exit);
475 b->entry->next = i;
476 i->prev = b->entry;
477 b->entry = i;
478 b->exit = i;
479 } else { /* insert before entry */
480 assert(b->entry->prev && b->exit);
481 i->next = b->entry;
482 i->prev = b->entry->prev;
483 b->entry->prev = i;
484 i->prev->next = i;
485 }
486 }
487 }
488
489 void
490 nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
491 {
492 if (i->opcode == NV_OP_PHI) {
493 nvbb_insert_phi(b, i);
494 } else {
495 i->prev = b->exit;
496 if (b->exit)
497 b->exit->next = i;
498 b->exit = i;
499 if (!b->entry)
500 b->entry = i;
501 else
502 if (i->prev && i->prev->opcode == NV_OP_PHI)
503 b->entry = i;
504 }
505
506 i->bb = b;
507 b->num_instructions++;
508 }
509
510 void
511 nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
512 {
513 if (!at->next) {
514 nvc0_insn_append(at->bb, ni);
515 return;
516 }
517 ni->next = at->next;
518 ni->prev = at;
519 ni->next->prev = ni;
520 ni->prev->next = ni;
521 }
522
523 void
524 nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni)
525 {
526 nvc0_insn_insert_after(at, ni);
527 nvc0_insns_permute(at, ni);
528 }
529
530 void
531 nvc0_insn_delete(struct nv_instruction *nvi)
532 {
533 struct nv_basic_block *b = nvi->bb;
534 int s;
535
536 /* debug_printf("REM: "); nv_print_instruction(nvi); */
537
538 for (s = 0; s < 6 && nvi->src[s]; ++s)
539 nv_reference(NULL, nvi, s, NULL);
540
541 if (nvi->next)
542 nvi->next->prev = nvi->prev;
543 else {
544 assert(nvi == b->exit);
545 b->exit = nvi->prev;
546 }
547
548 if (nvi->prev)
549 nvi->prev->next = nvi->next;
550
551 if (nvi == b->entry) {
552 /* PHIs don't get hooked to b->entry */
553 b->entry = nvi->next;
554 assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
555 }
556
557 if (nvi == b->phi) {
558 if (nvi->opcode != NV_OP_PHI)
559 NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n");
560
561 assert(!nvi->prev);
562 if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
563 b->phi = NULL;
564 else
565 b->phi = nvi->next;
566 }
567 }
568
569 void
570 nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2)
571 {
572 struct nv_basic_block *b = i1->bb;
573
574 assert(i1->opcode != NV_OP_PHI &&
575 i2->opcode != NV_OP_PHI);
576 assert(i1->next == i2);
577
578 if (b->exit == i2)
579 b->exit = i1;
580
581 if (b->entry == i1)
582 b->entry = i2;
583
584 i2->prev = i1->prev;
585 i1->next = i2->next;
586 i2->next = i1;
587 i1->prev = i2;
588
589 if (i2->prev)
590 i2->prev->next = i2;
591 if (i1->next)
592 i1->next->prev = i1;
593 }
594
595 void
596 nvc0_bblock_attach(struct nv_basic_block *parent,
597 struct nv_basic_block *b, ubyte edge_kind)
598 {
599 assert(b->num_in < 8);
600
601 if (parent->out[0]) {
602 assert(!parent->out[1]);
603 parent->out[1] = b;
604 parent->out_kind[1] = edge_kind;
605 } else {
606 parent->out[0] = b;
607 parent->out_kind[0] = edge_kind;
608 }
609
610 b->in[b->num_in] = parent;
611 b->in_kind[b->num_in++] = edge_kind;
612 }
613
614 /* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
615
616 boolean
617 nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
618 {
619 int j;
620
621 if (b == d)
622 return TRUE;
623
624 for (j = 0; j < b->num_in; ++j)
625 if ((b->in_kind[j] != CFG_EDGE_BACK) &&
626 !nvc0_bblock_dominated_by(b->in[j], d))
627 return FALSE;
628
629 return j ? TRUE : FALSE;
630 }
631
632 /* check if @bf (future) can be reached from @bp (past), stop at @bt */
633 boolean
634 nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
635 struct nv_basic_block *bt)
636 {
637 struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
638 int i, p, n;
639
640 p = 0;
641 n = 1;
642 q[0] = bp;
643
644 while (p < n) {
645 b = q[p++];
646
647 if (b == bf)
648 break;
649 if (b == bt)
650 continue;
651 assert(n <= (1024 - 2));
652
653 for (i = 0; i < 2; ++i) {
654 if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
655 q[n] = b->out[i];
656 q[n++]->priv = 1;
657 }
658 }
659 }
660 for (--n; n >= 0; --n)
661 q[n]->priv = 0;
662
663 return (b == bf);
664 }
665
666 static struct nv_basic_block *
667 nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
668 {
669 struct nv_basic_block *out;
670 int i;
671
672 if (!nvc0_bblock_dominated_by(df, b)) {
673 for (i = 0; i < df->num_in; ++i) {
674 if (df->in_kind[i] == CFG_EDGE_BACK)
675 continue;
676 if (nvc0_bblock_dominated_by(df->in[i], b))
677 return df;
678 }
679 }
680 for (i = 0; i < 2 && df->out[i]; ++i) {
681 if (df->out_kind[i] == CFG_EDGE_BACK)
682 continue;
683 if ((out = nvbb_find_dom_frontier(b, df->out[i])))
684 return out;
685 }
686 return NULL;
687 }
688
689 struct nv_basic_block *
690 nvc0_bblock_dom_frontier(struct nv_basic_block *b)
691 {
692 struct nv_basic_block *df;
693 int i;
694
695 for (i = 0; i < 2 && b->out[i]; ++i)
696 if ((df = nvbb_find_dom_frontier(b, b->out[i])))
697 return df;
698 return NULL;
699 }