Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_pc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #define NOUVEAU_DEBUG 1
24
25 #include "nvc0_pc.h"
26 #include "nvc0_program.h"
27
28 boolean
29 nvc0_insn_can_load(struct nv_instruction *nvi, int s,
30 struct nv_instruction *ld)
31 {
32 int i;
33
34 if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) {
35 if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s)))
36 return FALSE;
37 if (!(nvc0_op_info_table[nvi->opcode].immediate & 4))
38 if (ld->src[0]->value->reg.imm.u32 & 0xfff)
39 return FALSE;
40 } else
41 if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s)))
42 return FALSE;
43
44 if (ld->indirect >= 0)
45 return FALSE;
46
47 /* a few ops can use g[] sources directly, but we don't support g[] yet */
48 if (ld->src[0]->value->reg.file == NV_FILE_MEM_L ||
49 ld->src[0]->value->reg.file == NV_FILE_MEM_G)
50 return FALSE;
51
52 for (i = 0; i < 3 && nvi->src[i]; ++i)
53 if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
54 return FALSE;
55
56 return TRUE;
57 }
58
59 /* Return whether this instruction can be executed conditionally. */
60 boolean
61 nvc0_insn_is_predicateable(struct nv_instruction *nvi)
62 {
63 if (nvi->predicate >= 0) /* already predicated */
64 return FALSE;
65 if (!nvc0_op_info_table[nvi->opcode].predicate &&
66 !nvc0_op_info_table[nvi->opcode].pseudo)
67 return FALSE;
68 return TRUE;
69 }
70
71 int
72 nvc0_insn_refcount(struct nv_instruction *nvi)
73 {
74 int rc = 0;
75 int i;
76 for (i = 0; i < 5 && nvi->def[i]; ++i) {
77 if (!nvi->def[i])
78 return rc;
79 rc += nvi->def[i]->refc;
80 }
81 return rc;
82 }
83
84 int
85 nvc0_pc_replace_value(struct nv_pc *pc,
86 struct nv_value *old_val,
87 struct nv_value *new_val)
88 {
89 int i, n, s;
90
91 if (old_val == new_val)
92 return old_val->refc;
93
94 for (i = 0, n = 0; i < pc->num_refs; ++i) {
95 if (pc->refs[i]->value == old_val) {
96 ++n;
97 for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s)
98 if (pc->refs[i]->insn->src[s] == pc->refs[i])
99 break;
100 assert(s < 6);
101 nv_reference(pc, pc->refs[i]->insn, s, new_val);
102 }
103 }
104 return n;
105 }
106
107 static INLINE boolean
108 is_gpr63(struct nv_value *val)
109 {
110 return (val->reg.file == NV_FILE_GPR && val->reg.id == 63);
111 }
112
113 struct nv_value *
114 nvc0_pc_find_constant(struct nv_ref *ref)
115 {
116 struct nv_value *src;
117
118 if (!ref)
119 return NULL;
120
121 src = ref->value;
122 while (src->insn && src->insn->opcode == NV_OP_MOV) {
123 assert(!src->insn->src[0]->mod);
124 src = src->insn->src[0]->value;
125 }
126 if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) ||
127 (src->insn &&
128 src->insn->opcode == NV_OP_LD &&
129 src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
130 src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
131 return src;
132 return NULL;
133 }
134
135 struct nv_value *
136 nvc0_pc_find_immediate(struct nv_ref *ref)
137 {
138 struct nv_value *src = nvc0_pc_find_constant(ref);
139
140 return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL;
141 }
142
143 static void
144 nv_pc_free_refs(struct nv_pc *pc)
145 {
146 int i;
147 for (i = 0; i < pc->num_refs; i += 64)
148 FREE(pc->refs[i]);
149 FREE(pc->refs);
150 }
151
152 static const char *
153 edge_name(ubyte type)
154 {
155 switch (type) {
156 case CFG_EDGE_FORWARD: return "forward";
157 case CFG_EDGE_BACK: return "back";
158 case CFG_EDGE_LOOP_ENTER: return "loop";
159 case CFG_EDGE_LOOP_LEAVE: return "break";
160 case CFG_EDGE_FAKE: return "fake";
161 default:
162 return "?";
163 }
164 }
165
166 void
167 nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
168 void *priv)
169 {
170 struct nv_basic_block *bb[64], *bbb[16], *b;
171 int j, p, pp;
172
173 bb[0] = root;
174 p = 1;
175 pp = 0;
176
177 while (p > 0) {
178 b = bb[--p];
179 b->priv = 0;
180
181 for (j = 1; j >= 0; --j) {
182 if (!b->out[j])
183 continue;
184
185 switch (b->out_kind[j]) {
186 case CFG_EDGE_BACK:
187 continue;
188 case CFG_EDGE_FORWARD:
189 case CFG_EDGE_FAKE:
190 if (++b->out[j]->priv == b->out[j]->num_in)
191 bb[p++] = b->out[j];
192 break;
193 case CFG_EDGE_LOOP_ENTER:
194 bb[p++] = b->out[j];
195 break;
196 case CFG_EDGE_LOOP_LEAVE:
197 if (!b->out[j]->priv) {
198 bbb[pp++] = b->out[j];
199 b->out[j]->priv = 1;
200 }
201 break;
202 default:
203 assert(0);
204 break;
205 }
206 }
207
208 f(priv, b);
209
210 if (!p) {
211 p = pp;
212 for (; pp > 0; --pp)
213 bb[pp - 1] = bbb[pp - 1];
214 }
215 }
216 }
217
218 static void
219 nv_do_print_function(void *priv, struct nv_basic_block *b)
220 {
221 struct nv_instruction *i;
222
223 debug_printf("=== BB %i ", b->id);
224 if (b->out[0])
225 debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
226 if (b->out[1])
227 debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
228 debug_printf("===\n");
229
230 i = b->phi;
231 if (!i)
232 i = b->entry;
233 for (; i; i = i->next)
234 nvc0_print_instruction(i);
235 }
236
237 void
238 nvc0_print_function(struct nv_basic_block *root)
239 {
240 if (root->subroutine)
241 debug_printf("SUBROUTINE %i\n", root->subroutine);
242 else
243 debug_printf("MAIN\n");
244
245 nvc0_pc_pass_in_order(root, nv_do_print_function, root);
246 }
247
248 void
249 nvc0_print_program(struct nv_pc *pc)
250 {
251 int i;
252 for (i = 0; i < pc->num_subroutines + 1; ++i)
253 if (pc->root[i])
254 nvc0_print_function(pc->root[i]);
255 }
256
257 #if NOUVEAU_DEBUG > 1
258 static void
259 nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
260 {
261 int i;
262
263 b->pass_seq = pc->pass_seq;
264
265 fprintf(f, "\t%i [shape=box]\n", b->id);
266
267 for (i = 0; i < 2; ++i) {
268 if (!b->out[i])
269 continue;
270 switch (b->out_kind[i]) {
271 case CFG_EDGE_FORWARD:
272 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
273 break;
274 case CFG_EDGE_LOOP_ENTER:
275 fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
276 break;
277 case CFG_EDGE_LOOP_LEAVE:
278 fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
279 break;
280 case CFG_EDGE_BACK:
281 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
282 continue;
283 case CFG_EDGE_FAKE:
284 fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
285 break;
286 default:
287 assert(0);
288 break;
289 }
290 if (b->out[i]->pass_seq < pc->pass_seq)
291 nv_do_print_cfgraph(pc, f, b->out[i]);
292 }
293 }
294
295 /* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
296 static void
297 nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
298 {
299 FILE *f;
300
301 f = fopen(filepath, "a");
302 if (!f)
303 return;
304
305 fprintf(f, "digraph G {\n");
306
307 ++pc->pass_seq;
308
309 nv_do_print_cfgraph(pc, f, pc->root[subr]);
310
311 fprintf(f, "}\n");
312
313 fclose(f);
314 }
315 #endif
316
317 static INLINE void
318 nvc0_pc_print_binary(struct nv_pc *pc)
319 {
320 unsigned i;
321
322 NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8);
323
324 for (i = 0; i < pc->emit_size / 4; i += 2) {
325 debug_printf("0x%08x ", pc->emit[i + 0]);
326 debug_printf("0x%08x ", pc->emit[i + 1]);
327 if ((i % 16) == 15)
328 debug_printf("\n");
329 }
330 debug_printf("\n");
331 }
332
333 static int
334 nvc0_emit_program(struct nv_pc *pc)
335 {
336 uint32_t *code = pc->emit;
337 int n;
338
339 NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size);
340
341 pc->emit_pos = 0;
342 for (n = 0; n < pc->num_blocks; ++n) {
343 struct nv_instruction *i;
344 struct nv_basic_block *b = pc->bb_list[n];
345
346 for (i = b->entry; i; i = i->next) {
347 nvc0_emit_instruction(pc, i);
348 pc->emit += 2;
349 pc->emit_pos += 8;
350 }
351 }
352 assert(pc->emit == &code[pc->emit_size / 4]);
353
354 pc->emit[0] = 0x00001de7;
355 pc->emit[1] = 0x80000000;
356 pc->emit_size += 8;
357
358 pc->emit = code;
359
360 #ifdef NOUVEAU_DEBUG
361 nvc0_pc_print_binary(pc);
362 #else
363 debug_printf("not printing binary\n");
364 #endif
365 return 0;
366 }
367
368 int
369 nvc0_generate_code(struct nvc0_translation_info *ti)
370 {
371 struct nv_pc *pc;
372 int ret;
373 int i;
374
375 pc = CALLOC_STRUCT(nv_pc);
376 if (!pc)
377 return 1;
378
379 pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT;
380
381 pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0]));
382 if (!pc->root) {
383 FREE(pc);
384 return 1;
385 }
386 pc->num_subroutines = ti->num_subrs;
387
388 ret = nvc0_tgsi_to_nc(pc, ti);
389 if (ret)
390 goto out;
391 #if NOUVEAU_DEBUG > 1
392 nvc0_print_program(pc);
393 #endif
394
395 pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE;
396
397 /* optimization */
398 ret = nvc0_pc_exec_pass0(pc);
399 if (ret)
400 goto out;
401 #ifdef NOUVEAU_DEBUG
402 nvc0_print_program(pc);
403 #endif
404
405 /* register allocation */
406 ret = nvc0_pc_exec_pass1(pc);
407 if (ret)
408 goto out;
409 #if NOUVEAU_DEBUG > 1
410 nvc0_print_program(pc);
411 nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0);
412 #endif
413
414 /* prepare for emission */
415 ret = nvc0_pc_exec_pass2(pc);
416 if (ret)
417 goto out;
418 assert(!(pc->emit_size % 8));
419
420 pc->emit = CALLOC(pc->emit_size / 4 + 2, 4);
421 if (!pc->emit) {
422 ret = 3;
423 goto out;
424 }
425 ret = nvc0_emit_program(pc);
426 if (ret)
427 goto out;
428
429 ti->prog->code = pc->emit;
430 ti->prog->code_base = 0;
431 ti->prog->code_size = pc->emit_size;
432 ti->prog->parm_size = 0;
433
434 ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1);
435
436 ti->prog->relocs = pc->reloc_entries;
437 ti->prog->num_relocs = pc->num_relocs;
438
439 NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
440
441 out:
442 nv_pc_free_refs(pc);
443
444 for (i = 0; i < pc->num_blocks; ++i)
445 FREE(pc->bb_list[i]);
446 if (pc->root)
447 FREE(pc->root);
448 if (ret) {
449 /* on success, these will be referenced by struct nvc0_program */
450 if (pc->emit)
451 FREE(pc->emit);
452 if (pc->immd_buf)
453 FREE(pc->immd_buf);
454 if (pc->reloc_entries)
455 FREE(pc->reloc_entries);
456 }
457 FREE(pc);
458 return ret;
459 }
460
461 static void
462 nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
463 {
464 if (!b->phi) {
465 i->prev = NULL;
466 b->phi = i;
467 i->next = b->entry;
468 if (b->entry) {
469 assert(!b->entry->prev && b->exit);
470 b->entry->prev = i;
471 } else {
472 b->entry = i;
473 b->exit = i;
474 }
475 } else {
476 assert(b->entry);
477 if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
478 assert(b->entry == b->exit);
479 b->entry->next = i;
480 i->prev = b->entry;
481 b->entry = i;
482 b->exit = i;
483 } else { /* insert before entry */
484 assert(b->entry->prev && b->exit);
485 i->next = b->entry;
486 i->prev = b->entry->prev;
487 b->entry->prev = i;
488 i->prev->next = i;
489 }
490 }
491 }
492
493 void
494 nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
495 {
496 if (i->opcode == NV_OP_PHI) {
497 nvbb_insert_phi(b, i);
498 } else {
499 i->prev = b->exit;
500 if (b->exit)
501 b->exit->next = i;
502 b->exit = i;
503 if (!b->entry)
504 b->entry = i;
505 else
506 if (i->prev && i->prev->opcode == NV_OP_PHI)
507 b->entry = i;
508 }
509
510 i->bb = b;
511 b->num_instructions++;
512
513 if (i->prev && i->prev->terminator)
514 nvc0_insns_permute(i->prev, i);
515 }
516
517 void
518 nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
519 {
520 if (!at->next) {
521 nvc0_insn_append(at->bb, ni);
522 return;
523 }
524 ni->next = at->next;
525 ni->prev = at;
526 ni->next->prev = ni;
527 ni->prev->next = ni;
528 ni->bb = at->bb;
529 ni->bb->num_instructions++;
530 }
531
532 void
533 nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni)
534 {
535 nvc0_insn_insert_after(at, ni);
536 nvc0_insns_permute(at, ni);
537 }
538
539 void
540 nvc0_insn_delete(struct nv_instruction *nvi)
541 {
542 struct nv_basic_block *b = nvi->bb;
543 int s;
544
545 /* debug_printf("REM: "); nv_print_instruction(nvi); */
546
547 for (s = 0; s < 6 && nvi->src[s]; ++s)
548 nv_reference(NULL, nvi, s, NULL);
549
550 if (nvi->next)
551 nvi->next->prev = nvi->prev;
552 else {
553 assert(nvi == b->exit);
554 b->exit = nvi->prev;
555 }
556
557 if (nvi->prev)
558 nvi->prev->next = nvi->next;
559
560 if (nvi == b->entry) {
561 /* PHIs don't get hooked to b->entry */
562 b->entry = nvi->next;
563 assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
564 }
565
566 if (nvi == b->phi) {
567 if (nvi->opcode != NV_OP_PHI)
568 NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n");
569
570 assert(!nvi->prev);
571 if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
572 b->phi = NULL;
573 else
574 b->phi = nvi->next;
575 }
576 }
577
578 void
579 nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2)
580 {
581 struct nv_basic_block *b = i1->bb;
582
583 assert(i1->opcode != NV_OP_PHI &&
584 i2->opcode != NV_OP_PHI);
585 assert(i1->next == i2);
586
587 if (b->exit == i2)
588 b->exit = i1;
589
590 if (b->entry == i1)
591 b->entry = i2;
592
593 i2->prev = i1->prev;
594 i1->next = i2->next;
595 i2->next = i1;
596 i1->prev = i2;
597
598 if (i2->prev)
599 i2->prev->next = i2;
600 if (i1->next)
601 i1->next->prev = i1;
602 }
603
604 void
605 nvc0_bblock_attach(struct nv_basic_block *parent,
606 struct nv_basic_block *b, ubyte edge_kind)
607 {
608 assert(b->num_in < 8);
609
610 if (parent->out[0]) {
611 assert(!parent->out[1]);
612 parent->out[1] = b;
613 parent->out_kind[1] = edge_kind;
614 } else {
615 parent->out[0] = b;
616 parent->out_kind[0] = edge_kind;
617 }
618
619 b->in[b->num_in] = parent;
620 b->in_kind[b->num_in++] = edge_kind;
621 }
622
623 /* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
624
625 boolean
626 nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
627 {
628 int j;
629
630 if (b == d)
631 return TRUE;
632
633 for (j = 0; j < b->num_in; ++j)
634 if ((b->in_kind[j] != CFG_EDGE_BACK) &&
635 !nvc0_bblock_dominated_by(b->in[j], d))
636 return FALSE;
637
638 return j ? TRUE : FALSE;
639 }
640
641 /* check if @bf (future) can be reached from @bp (past), stop at @bt */
642 boolean
643 nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
644 struct nv_basic_block *bt)
645 {
646 struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
647 int i, p, n;
648
649 p = 0;
650 n = 1;
651 q[0] = bp;
652
653 while (p < n) {
654 b = q[p++];
655
656 if (b == bf)
657 break;
658 if (b == bt)
659 continue;
660 assert(n <= (1024 - 2));
661
662 for (i = 0; i < 2; ++i) {
663 if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
664 q[n] = b->out[i];
665 q[n++]->priv = 1;
666 }
667 }
668 }
669 for (--n; n >= 0; --n)
670 q[n]->priv = 0;
671
672 return (b == bf);
673 }
674
675 static struct nv_basic_block *
676 nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
677 {
678 struct nv_basic_block *out;
679 int i;
680
681 if (!nvc0_bblock_dominated_by(df, b)) {
682 for (i = 0; i < df->num_in; ++i) {
683 if (df->in_kind[i] == CFG_EDGE_BACK)
684 continue;
685 if (nvc0_bblock_dominated_by(df->in[i], b))
686 return df;
687 }
688 }
689 for (i = 0; i < 2 && df->out[i]; ++i) {
690 if (df->out_kind[i] == CFG_EDGE_BACK)
691 continue;
692 if ((out = nvbb_find_dom_frontier(b, df->out[i])))
693 return out;
694 }
695 return NULL;
696 }
697
698 struct nv_basic_block *
699 nvc0_bblock_dom_frontier(struct nv_basic_block *b)
700 {
701 struct nv_basic_block *df;
702 int i;
703
704 for (i = 0; i < 2 && b->out[i]; ++i)
705 if ((df = nvbb_find_dom_frontier(b, b->out[i])))
706 return df;
707 return NULL;
708 }