nvc0: sync textures with render targets ourselves
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_pc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #define NOUVEAU_DEBUG 1
24
25 #include "nvc0_pc.h"
26 #include "nvc0_program.h"
27
28 boolean
29 nvc0_insn_can_load(struct nv_instruction *nvi, int s,
30 struct nv_instruction *ld)
31 {
32 int i;
33
34 if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) {
35 if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s)))
36 return FALSE;
37 if (!(nvc0_op_info_table[nvi->opcode].immediate & 4))
38 if (ld->src[0]->value->reg.imm.u32 & 0xfff)
39 return FALSE;
40 } else
41 if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s)))
42 return FALSE;
43
44 if (ld->indirect >= 0)
45 return FALSE;
46
47 for (i = 0; i < 3 && nvi->src[i]; ++i)
48 if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
49 return FALSE;
50
51 return TRUE;
52 }
53
54 /* Return whether this instruction can be executed conditionally. */
55 boolean
56 nvc0_insn_is_predicateable(struct nv_instruction *nvi)
57 {
58 if (nvi->predicate >= 0) /* already predicated */
59 return FALSE;
60 if (!nvc0_op_info_table[nvi->opcode].predicate &&
61 !nvc0_op_info_table[nvi->opcode].pseudo)
62 return FALSE;
63 return TRUE;
64 }
65
66 int
67 nvc0_insn_refcount(struct nv_instruction *nvi)
68 {
69 int rc = 0;
70 int i;
71 for (i = 0; i < 5 && nvi->def[i]; ++i) {
72 if (!nvi->def[i])
73 return rc;
74 rc += nvi->def[i]->refc;
75 }
76 return rc;
77 }
78
79 int
80 nvc0_pc_replace_value(struct nv_pc *pc,
81 struct nv_value *old_val,
82 struct nv_value *new_val)
83 {
84 int i, n, s;
85
86 if (old_val == new_val)
87 return old_val->refc;
88
89 for (i = 0, n = 0; i < pc->num_refs; ++i) {
90 if (pc->refs[i]->value == old_val) {
91 ++n;
92 for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s)
93 if (pc->refs[i]->insn->src[s] == pc->refs[i])
94 break;
95 assert(s < 6);
96 nv_reference(pc, pc->refs[i]->insn, s, new_val);
97 }
98 }
99 return n;
100 }
101
102 static INLINE boolean
103 is_gpr63(struct nv_value *val)
104 {
105 return (val->reg.file == NV_FILE_GPR && val->reg.id == 63);
106 }
107
108 struct nv_value *
109 nvc0_pc_find_constant(struct nv_ref *ref)
110 {
111 struct nv_value *src;
112
113 if (!ref)
114 return NULL;
115
116 src = ref->value;
117 while (src->insn && src->insn->opcode == NV_OP_MOV) {
118 assert(!src->insn->src[0]->mod);
119 src = src->insn->src[0]->value;
120 }
121 if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) ||
122 (src->insn &&
123 src->insn->opcode == NV_OP_LD &&
124 src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
125 src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
126 return src;
127 return NULL;
128 }
129
130 struct nv_value *
131 nvc0_pc_find_immediate(struct nv_ref *ref)
132 {
133 struct nv_value *src = nvc0_pc_find_constant(ref);
134
135 return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL;
136 }
137
138 static void
139 nv_pc_free_refs(struct nv_pc *pc)
140 {
141 int i;
142 for (i = 0; i < pc->num_refs; i += 64)
143 FREE(pc->refs[i]);
144 FREE(pc->refs);
145 }
146
147 static const char *
148 edge_name(ubyte type)
149 {
150 switch (type) {
151 case CFG_EDGE_FORWARD: return "forward";
152 case CFG_EDGE_BACK: return "back";
153 case CFG_EDGE_LOOP_ENTER: return "loop";
154 case CFG_EDGE_LOOP_LEAVE: return "break";
155 case CFG_EDGE_FAKE: return "fake";
156 default:
157 return "?";
158 }
159 }
160
161 void
162 nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
163 void *priv)
164 {
165 struct nv_basic_block *bb[64], *bbb[16], *b;
166 int j, p, pp;
167
168 bb[0] = root;
169 p = 1;
170 pp = 0;
171
172 while (p > 0) {
173 b = bb[--p];
174 b->priv = 0;
175
176 for (j = 1; j >= 0; --j) {
177 if (!b->out[j])
178 continue;
179
180 switch (b->out_kind[j]) {
181 case CFG_EDGE_BACK:
182 continue;
183 case CFG_EDGE_FORWARD:
184 case CFG_EDGE_FAKE:
185 if (++b->out[j]->priv == b->out[j]->num_in)
186 bb[p++] = b->out[j];
187 break;
188 case CFG_EDGE_LOOP_ENTER:
189 bb[p++] = b->out[j];
190 break;
191 case CFG_EDGE_LOOP_LEAVE:
192 if (!b->out[j]->priv) {
193 bbb[pp++] = b->out[j];
194 b->out[j]->priv = 1;
195 }
196 break;
197 default:
198 assert(0);
199 break;
200 }
201 }
202
203 f(priv, b);
204
205 if (!p) {
206 p = pp;
207 for (; pp > 0; --pp)
208 bb[pp - 1] = bbb[pp - 1];
209 }
210 }
211 }
212
213 static void
214 nv_do_print_function(void *priv, struct nv_basic_block *b)
215 {
216 struct nv_instruction *i;
217
218 debug_printf("=== BB %i ", b->id);
219 if (b->out[0])
220 debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
221 if (b->out[1])
222 debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
223 debug_printf("===\n");
224
225 i = b->phi;
226 if (!i)
227 i = b->entry;
228 for (; i; i = i->next)
229 nvc0_print_instruction(i);
230 }
231
232 void
233 nvc0_print_function(struct nv_basic_block *root)
234 {
235 if (root->subroutine)
236 debug_printf("SUBROUTINE %i\n", root->subroutine);
237 else
238 debug_printf("MAIN\n");
239
240 nvc0_pc_pass_in_order(root, nv_do_print_function, root);
241 }
242
243 void
244 nvc0_print_program(struct nv_pc *pc)
245 {
246 int i;
247 for (i = 0; i < pc->num_subroutines + 1; ++i)
248 if (pc->root[i])
249 nvc0_print_function(pc->root[i]);
250 }
251
252 #if NOUVEAU_DEBUG > 1
253 static void
254 nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
255 {
256 int i;
257
258 b->pass_seq = pc->pass_seq;
259
260 fprintf(f, "\t%i [shape=box]\n", b->id);
261
262 for (i = 0; i < 2; ++i) {
263 if (!b->out[i])
264 continue;
265 switch (b->out_kind[i]) {
266 case CFG_EDGE_FORWARD:
267 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
268 break;
269 case CFG_EDGE_LOOP_ENTER:
270 fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
271 break;
272 case CFG_EDGE_LOOP_LEAVE:
273 fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
274 break;
275 case CFG_EDGE_BACK:
276 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
277 continue;
278 case CFG_EDGE_FAKE:
279 fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
280 break;
281 default:
282 assert(0);
283 break;
284 }
285 if (b->out[i]->pass_seq < pc->pass_seq)
286 nv_do_print_cfgraph(pc, f, b->out[i]);
287 }
288 }
289
290 /* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
291 static void
292 nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
293 {
294 FILE *f;
295
296 f = fopen(filepath, "a");
297 if (!f)
298 return;
299
300 fprintf(f, "digraph G {\n");
301
302 ++pc->pass_seq;
303
304 nv_do_print_cfgraph(pc, f, pc->root[subr]);
305
306 fprintf(f, "}\n");
307
308 fclose(f);
309 }
310 #endif
311
312 static INLINE void
313 nvc0_pc_print_binary(struct nv_pc *pc)
314 {
315 unsigned i;
316
317 NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8);
318
319 for (i = 0; i < pc->emit_size / 4; i += 2) {
320 debug_printf("0x%08x ", pc->emit[i + 0]);
321 debug_printf("0x%08x ", pc->emit[i + 1]);
322 if ((i % 16) == 15)
323 debug_printf("\n");
324 }
325 debug_printf("\n");
326 }
327
328 static int
329 nvc0_emit_program(struct nv_pc *pc)
330 {
331 uint32_t *code = pc->emit;
332 int n;
333
334 NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size);
335
336 pc->emit_pos = 0;
337 for (n = 0; n < pc->num_blocks; ++n) {
338 struct nv_instruction *i;
339 struct nv_basic_block *b = pc->bb_list[n];
340
341 for (i = b->entry; i; i = i->next) {
342 nvc0_emit_instruction(pc, i);
343 pc->emit += 2;
344 pc->emit_pos += 8;
345 }
346 }
347 assert(pc->emit == &code[pc->emit_size / 4]);
348
349 pc->emit[0] = 0x00001de7;
350 pc->emit[1] = 0x80000000;
351 pc->emit_size += 8;
352
353 pc->emit = code;
354
355 #ifdef NOUVEAU_DEBUG
356 nvc0_pc_print_binary(pc);
357 #else
358 debug_printf("not printing binary\n");
359 #endif
360 return 0;
361 }
362
363 int
364 nvc0_generate_code(struct nvc0_translation_info *ti)
365 {
366 struct nv_pc *pc;
367 int ret;
368 int i;
369
370 pc = CALLOC_STRUCT(nv_pc);
371 if (!pc)
372 return 1;
373
374 pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT;
375
376 pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0]));
377 if (!pc->root) {
378 FREE(pc);
379 return 1;
380 }
381 pc->num_subroutines = ti->num_subrs;
382
383 ret = nvc0_tgsi_to_nc(pc, ti);
384 if (ret)
385 goto out;
386 #if NOUVEAU_DEBUG > 1
387 nvc0_print_program(pc);
388 #endif
389
390 pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE;
391
392 /* optimization */
393 ret = nvc0_pc_exec_pass0(pc);
394 if (ret)
395 goto out;
396 #ifdef NOUVEAU_DEBUG
397 nvc0_print_program(pc);
398 #endif
399
400 /* register allocation */
401 ret = nvc0_pc_exec_pass1(pc);
402 if (ret)
403 goto out;
404 #if NOUVEAU_DEBUG > 1
405 nvc0_print_program(pc);
406 nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0);
407 #endif
408
409 /* prepare for emission */
410 ret = nvc0_pc_exec_pass2(pc);
411 if (ret)
412 goto out;
413 assert(!(pc->emit_size % 8));
414
415 pc->emit = CALLOC(pc->emit_size / 4 + 2, 4);
416 if (!pc->emit) {
417 ret = 3;
418 goto out;
419 }
420 ret = nvc0_emit_program(pc);
421 if (ret)
422 goto out;
423
424 ti->prog->code = pc->emit;
425 ti->prog->code_base = 0;
426 ti->prog->code_size = pc->emit_size;
427 ti->prog->parm_size = 0;
428
429 ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1);
430
431 ti->prog->relocs = pc->reloc_entries;
432 ti->prog->num_relocs = pc->num_relocs;
433
434 NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
435
436 out:
437 nv_pc_free_refs(pc);
438
439 for (i = 0; i < pc->num_blocks; ++i)
440 FREE(pc->bb_list[i]);
441 if (pc->root)
442 FREE(pc->root);
443 if (ret) {
444 /* on success, these will be referenced by struct nvc0_program */
445 if (pc->emit)
446 FREE(pc->emit);
447 if (pc->immd_buf)
448 FREE(pc->immd_buf);
449 if (pc->reloc_entries)
450 FREE(pc->reloc_entries);
451 }
452 FREE(pc);
453 return ret;
454 }
455
456 static void
457 nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
458 {
459 if (!b->phi) {
460 i->prev = NULL;
461 b->phi = i;
462 i->next = b->entry;
463 if (b->entry) {
464 assert(!b->entry->prev && b->exit);
465 b->entry->prev = i;
466 } else {
467 b->entry = i;
468 b->exit = i;
469 }
470 } else {
471 assert(b->entry);
472 if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
473 assert(b->entry == b->exit);
474 b->entry->next = i;
475 i->prev = b->entry;
476 b->entry = i;
477 b->exit = i;
478 } else { /* insert before entry */
479 assert(b->entry->prev && b->exit);
480 i->next = b->entry;
481 i->prev = b->entry->prev;
482 b->entry->prev = i;
483 i->prev->next = i;
484 }
485 }
486 }
487
488 void
489 nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
490 {
491 if (i->opcode == NV_OP_PHI) {
492 nvbb_insert_phi(b, i);
493 } else {
494 i->prev = b->exit;
495 if (b->exit)
496 b->exit->next = i;
497 b->exit = i;
498 if (!b->entry)
499 b->entry = i;
500 else
501 if (i->prev && i->prev->opcode == NV_OP_PHI)
502 b->entry = i;
503 }
504
505 i->bb = b;
506 b->num_instructions++;
507
508 if (i->prev && i->prev->terminator)
509 nvc0_insns_permute(i->prev, i);
510 }
511
512 void
513 nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
514 {
515 if (!at->next) {
516 nvc0_insn_append(at->bb, ni);
517 return;
518 }
519 ni->next = at->next;
520 ni->prev = at;
521 ni->next->prev = ni;
522 ni->prev->next = ni;
523 ni->bb = at->bb;
524 ni->bb->num_instructions++;
525 }
526
527 void
528 nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni)
529 {
530 nvc0_insn_insert_after(at, ni);
531 nvc0_insns_permute(at, ni);
532 }
533
534 void
535 nvc0_insn_delete(struct nv_instruction *nvi)
536 {
537 struct nv_basic_block *b = nvi->bb;
538 int s;
539
540 /* debug_printf("REM: "); nv_print_instruction(nvi); */
541
542 for (s = 0; s < 6 && nvi->src[s]; ++s)
543 nv_reference(NULL, nvi, s, NULL);
544
545 if (nvi->next)
546 nvi->next->prev = nvi->prev;
547 else {
548 assert(nvi == b->exit);
549 b->exit = nvi->prev;
550 }
551
552 if (nvi->prev)
553 nvi->prev->next = nvi->next;
554
555 if (nvi == b->entry) {
556 /* PHIs don't get hooked to b->entry */
557 b->entry = nvi->next;
558 assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
559 }
560
561 if (nvi == b->phi) {
562 if (nvi->opcode != NV_OP_PHI)
563 NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n");
564
565 assert(!nvi->prev);
566 if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
567 b->phi = NULL;
568 else
569 b->phi = nvi->next;
570 }
571 }
572
573 void
574 nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2)
575 {
576 struct nv_basic_block *b = i1->bb;
577
578 assert(i1->opcode != NV_OP_PHI &&
579 i2->opcode != NV_OP_PHI);
580 assert(i1->next == i2);
581
582 if (b->exit == i2)
583 b->exit = i1;
584
585 if (b->entry == i1)
586 b->entry = i2;
587
588 i2->prev = i1->prev;
589 i1->next = i2->next;
590 i2->next = i1;
591 i1->prev = i2;
592
593 if (i2->prev)
594 i2->prev->next = i2;
595 if (i1->next)
596 i1->next->prev = i1;
597 }
598
599 void
600 nvc0_bblock_attach(struct nv_basic_block *parent,
601 struct nv_basic_block *b, ubyte edge_kind)
602 {
603 assert(b->num_in < 8);
604
605 if (parent->out[0]) {
606 assert(!parent->out[1]);
607 parent->out[1] = b;
608 parent->out_kind[1] = edge_kind;
609 } else {
610 parent->out[0] = b;
611 parent->out_kind[0] = edge_kind;
612 }
613
614 b->in[b->num_in] = parent;
615 b->in_kind[b->num_in++] = edge_kind;
616 }
617
618 /* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
619
620 boolean
621 nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
622 {
623 int j;
624
625 if (b == d)
626 return TRUE;
627
628 for (j = 0; j < b->num_in; ++j)
629 if ((b->in_kind[j] != CFG_EDGE_BACK) &&
630 !nvc0_bblock_dominated_by(b->in[j], d))
631 return FALSE;
632
633 return j ? TRUE : FALSE;
634 }
635
636 /* check if @bf (future) can be reached from @bp (past), stop at @bt */
637 boolean
638 nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
639 struct nv_basic_block *bt)
640 {
641 struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
642 int i, p, n;
643
644 p = 0;
645 n = 1;
646 q[0] = bp;
647
648 while (p < n) {
649 b = q[p++];
650
651 if (b == bf)
652 break;
653 if (b == bt)
654 continue;
655 assert(n <= (1024 - 2));
656
657 for (i = 0; i < 2; ++i) {
658 if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
659 q[n] = b->out[i];
660 q[n++]->priv = 1;
661 }
662 }
663 }
664 for (--n; n >= 0; --n)
665 q[n]->priv = 0;
666
667 return (b == bf);
668 }
669
670 static struct nv_basic_block *
671 nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
672 {
673 struct nv_basic_block *out;
674 int i;
675
676 if (!nvc0_bblock_dominated_by(df, b)) {
677 for (i = 0; i < df->num_in; ++i) {
678 if (df->in_kind[i] == CFG_EDGE_BACK)
679 continue;
680 if (nvc0_bblock_dominated_by(df->in[i], b))
681 return df;
682 }
683 }
684 for (i = 0; i < 2 && df->out[i]; ++i) {
685 if (df->out_kind[i] == CFG_EDGE_BACK)
686 continue;
687 if ((out = nvbb_find_dom_frontier(b, df->out[i])))
688 return out;
689 }
690 return NULL;
691 }
692
693 struct nv_basic_block *
694 nvc0_bblock_dom_frontier(struct nv_basic_block *b)
695 {
696 struct nv_basic_block *df;
697 int i;
698
699 for (i = 0; i < 2 && b->out[i]; ++i)
700 if ((df = nvbb_find_dom_frontier(b, b->out[i])))
701 return df;
702 return NULL;
703 }