r600g: add support for s3tc formats.
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_pc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #define NOUVEAU_DEBUG 1
24
25 #include "nvc0_pc.h"
26 #include "nvc0_program.h"
27
28 boolean
29 nvc0_insn_can_load(struct nv_instruction *nvi, int s,
30 struct nv_instruction *ld)
31 {
32 int i;
33
34 if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) {
35 if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s)))
36 return FALSE;
37 if (!(nvc0_op_info_table[nvi->opcode].immediate & 4))
38 if (ld->src[0]->value->reg.imm.u32 & 0xfff)
39 return FALSE;
40 } else
41 if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s)))
42 return FALSE;
43
44 if (ld->indirect >= 0)
45 return FALSE;
46
47 for (i = 0; i < 3 && nvi->src[i]; ++i)
48 if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
49 return FALSE;
50
51 return TRUE;
52 }
53
54 /* Return whether this instruction can be executed conditionally. */
55 boolean
56 nvc0_insn_is_predicateable(struct nv_instruction *nvi)
57 {
58 if (nvi->predicate >= 0) /* already predicated */
59 return FALSE;
60 if (!nvc0_op_info_table[nvi->opcode].predicate &&
61 !nvc0_op_info_table[nvi->opcode].pseudo)
62 return FALSE;
63 return TRUE;
64 }
65
66 int
67 nvc0_insn_refcount(struct nv_instruction *nvi)
68 {
69 int rc = 0;
70 int i;
71 for (i = 0; i < 5 && nvi->def[i]; ++i) {
72 if (!nvi->def[i])
73 return rc;
74 rc += nvi->def[i]->refc;
75 }
76 return rc;
77 }
78
79 int
80 nvc0_pc_replace_value(struct nv_pc *pc,
81 struct nv_value *old_val,
82 struct nv_value *new_val)
83 {
84 int i, n, s;
85
86 if (old_val == new_val)
87 return old_val->refc;
88
89 for (i = 0, n = 0; i < pc->num_refs; ++i) {
90 if (pc->refs[i]->value == old_val) {
91 ++n;
92 for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s)
93 if (pc->refs[i]->insn->src[s] == pc->refs[i])
94 break;
95 assert(s < 6);
96 nv_reference(pc, pc->refs[i]->insn, s, new_val);
97 }
98 }
99 return n;
100 }
101
102 static INLINE boolean
103 is_gpr63(struct nv_value *val)
104 {
105 return (val->reg.file == NV_FILE_GPR && val->reg.id == 63);
106 }
107
108 struct nv_value *
109 nvc0_pc_find_constant(struct nv_ref *ref)
110 {
111 struct nv_value *src;
112
113 if (!ref)
114 return NULL;
115
116 src = ref->value;
117 while (src->insn && src->insn->opcode == NV_OP_MOV) {
118 assert(!src->insn->src[0]->mod);
119 src = src->insn->src[0]->value;
120 }
121 if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) ||
122 (src->insn &&
123 src->insn->opcode == NV_OP_LD &&
124 src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
125 src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
126 return src;
127 return NULL;
128 }
129
130 struct nv_value *
131 nvc0_pc_find_immediate(struct nv_ref *ref)
132 {
133 struct nv_value *src = nvc0_pc_find_constant(ref);
134
135 return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL;
136 }
137
138 static void
139 nv_pc_free_refs(struct nv_pc *pc)
140 {
141 int i;
142 for (i = 0; i < pc->num_refs; i += 64)
143 FREE(pc->refs[i]);
144 FREE(pc->refs);
145 }
146
147 static const char *
148 edge_name(ubyte type)
149 {
150 switch (type) {
151 case CFG_EDGE_FORWARD: return "forward";
152 case CFG_EDGE_BACK: return "back";
153 case CFG_EDGE_LOOP_ENTER: return "loop";
154 case CFG_EDGE_LOOP_LEAVE: return "break";
155 case CFG_EDGE_FAKE: return "fake";
156 default:
157 return "?";
158 }
159 }
160
161 void
162 nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
163 void *priv)
164 {
165 struct nv_basic_block *bb[64], *bbb[16], *b;
166 int j, p, pp;
167
168 bb[0] = root;
169 p = 1;
170 pp = 0;
171
172 while (p > 0) {
173 b = bb[--p];
174 b->priv = 0;
175
176 for (j = 1; j >= 0; --j) {
177 if (!b->out[j])
178 continue;
179
180 switch (b->out_kind[j]) {
181 case CFG_EDGE_BACK:
182 continue;
183 case CFG_EDGE_FORWARD:
184 case CFG_EDGE_FAKE:
185 if (++b->out[j]->priv == b->out[j]->num_in)
186 bb[p++] = b->out[j];
187 break;
188 case CFG_EDGE_LOOP_ENTER:
189 bb[p++] = b->out[j];
190 break;
191 case CFG_EDGE_LOOP_LEAVE:
192 bbb[pp++] = b->out[j];
193 break;
194 default:
195 assert(0);
196 break;
197 }
198 }
199
200 f(priv, b);
201
202 if (!p) {
203 p = pp;
204 for (; pp > 0; --pp)
205 bb[pp - 1] = bbb[pp - 1];
206 }
207 }
208 }
209
210 static void
211 nv_do_print_function(void *priv, struct nv_basic_block *b)
212 {
213 struct nv_instruction *i;
214
215 debug_printf("=== BB %i ", b->id);
216 if (b->out[0])
217 debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
218 if (b->out[1])
219 debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
220 debug_printf("===\n");
221
222 i = b->phi;
223 if (!i)
224 i = b->entry;
225 for (; i; i = i->next)
226 nvc0_print_instruction(i);
227 }
228
229 void
230 nvc0_print_function(struct nv_basic_block *root)
231 {
232 if (root->subroutine)
233 debug_printf("SUBROUTINE %i\n", root->subroutine);
234 else
235 debug_printf("MAIN\n");
236
237 nvc0_pc_pass_in_order(root, nv_do_print_function, root);
238 }
239
240 void
241 nvc0_print_program(struct nv_pc *pc)
242 {
243 int i;
244 for (i = 0; i < pc->num_subroutines + 1; ++i)
245 if (pc->root[i])
246 nvc0_print_function(pc->root[i]);
247 }
248
249 #if NOUVEAU_DEBUG > 1
250 static void
251 nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
252 {
253 int i;
254
255 b->pass_seq = pc->pass_seq;
256
257 fprintf(f, "\t%i [shape=box]\n", b->id);
258
259 for (i = 0; i < 2; ++i) {
260 if (!b->out[i])
261 continue;
262 switch (b->out_kind[i]) {
263 case CFG_EDGE_FORWARD:
264 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
265 break;
266 case CFG_EDGE_LOOP_ENTER:
267 fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
268 break;
269 case CFG_EDGE_LOOP_LEAVE:
270 fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
271 break;
272 case CFG_EDGE_BACK:
273 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
274 continue;
275 case CFG_EDGE_FAKE:
276 fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
277 break;
278 default:
279 assert(0);
280 break;
281 }
282 if (b->out[i]->pass_seq < pc->pass_seq)
283 nv_do_print_cfgraph(pc, f, b->out[i]);
284 }
285 }
286
287 /* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
288 static void
289 nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
290 {
291 FILE *f;
292
293 f = fopen(filepath, "a");
294 if (!f)
295 return;
296
297 fprintf(f, "digraph G {\n");
298
299 ++pc->pass_seq;
300
301 nv_do_print_cfgraph(pc, f, pc->root[subr]);
302
303 fprintf(f, "}\n");
304
305 fclose(f);
306 }
307 #endif
308
309 static INLINE void
310 nvc0_pc_print_binary(struct nv_pc *pc)
311 {
312 unsigned i;
313
314 NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8);
315
316 for (i = 0; i < pc->emit_size / 4; i += 2) {
317 debug_printf("0x%08x ", pc->emit[i + 0]);
318 debug_printf("0x%08x ", pc->emit[i + 1]);
319 if ((i % 16) == 15)
320 debug_printf("\n");
321 }
322 debug_printf("\n");
323 }
324
325 static int
326 nvc0_emit_program(struct nv_pc *pc)
327 {
328 uint32_t *code = pc->emit;
329 int n;
330
331 NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size);
332
333 pc->emit_pos = 0;
334 for (n = 0; n < pc->num_blocks; ++n) {
335 struct nv_instruction *i;
336 struct nv_basic_block *b = pc->bb_list[n];
337
338 for (i = b->entry; i; i = i->next) {
339 nvc0_emit_instruction(pc, i);
340 pc->emit += 2;
341 pc->emit_pos += 8;
342 }
343 }
344 assert(pc->emit == &code[pc->emit_size / 4]);
345
346 pc->emit[0] = 0x00001de7;
347 pc->emit[1] = 0x80000000;
348 pc->emit_size += 8;
349
350 pc->emit = code;
351
352 #ifdef NOUVEAU_DEBUG
353 nvc0_pc_print_binary(pc);
354 #else
355 debug_printf("not printing binary\n");
356 #endif
357 return 0;
358 }
359
360 int
361 nvc0_generate_code(struct nvc0_translation_info *ti)
362 {
363 struct nv_pc *pc;
364 int ret;
365 int i;
366
367 pc = CALLOC_STRUCT(nv_pc);
368 if (!pc)
369 return 1;
370
371 pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT;
372
373 pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0]));
374 if (!pc->root) {
375 FREE(pc);
376 return 1;
377 }
378 pc->num_subroutines = ti->num_subrs;
379
380 ret = nvc0_tgsi_to_nc(pc, ti);
381 if (ret)
382 goto out;
383 #if NOUVEAU_DEBUG > 1
384 nvc0_print_program(pc);
385 #endif
386
387 pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE;
388
389 /* optimization */
390 ret = nvc0_pc_exec_pass0(pc);
391 if (ret)
392 goto out;
393 #ifdef NOUVEAU_DEBUG
394 nvc0_print_program(pc);
395 #endif
396
397 /* register allocation */
398 ret = nvc0_pc_exec_pass1(pc);
399 if (ret)
400 goto out;
401 #if NOUVEAU_DEBUG > 1
402 nvc0_print_program(pc);
403 nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0);
404 #endif
405
406 /* prepare for emission */
407 ret = nvc0_pc_exec_pass2(pc);
408 if (ret)
409 goto out;
410 assert(!(pc->emit_size % 8));
411
412 pc->emit = CALLOC(pc->emit_size / 4 + 2, 4);
413 if (!pc->emit) {
414 ret = 3;
415 goto out;
416 }
417 ret = nvc0_emit_program(pc);
418 if (ret)
419 goto out;
420
421 ti->prog->code = pc->emit;
422 ti->prog->code_base = 0;
423 ti->prog->code_size = pc->emit_size;
424 ti->prog->parm_size = 0;
425
426 ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1);
427
428 ti->prog->relocs = pc->reloc_entries;
429 ti->prog->num_relocs = pc->num_relocs;
430
431 NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
432
433 out:
434 nv_pc_free_refs(pc);
435
436 for (i = 0; i < pc->num_blocks; ++i)
437 FREE(pc->bb_list[i]);
438 if (pc->root)
439 FREE(pc->root);
440 if (ret) {
441 /* on success, these will be referenced by struct nvc0_program */
442 if (pc->emit)
443 FREE(pc->emit);
444 if (pc->immd_buf)
445 FREE(pc->immd_buf);
446 if (pc->reloc_entries)
447 FREE(pc->reloc_entries);
448 }
449 FREE(pc);
450 return ret;
451 }
452
453 static void
454 nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
455 {
456 if (!b->phi) {
457 i->prev = NULL;
458 b->phi = i;
459 i->next = b->entry;
460 if (b->entry) {
461 assert(!b->entry->prev && b->exit);
462 b->entry->prev = i;
463 } else {
464 b->entry = i;
465 b->exit = i;
466 }
467 } else {
468 assert(b->entry);
469 if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
470 assert(b->entry == b->exit);
471 b->entry->next = i;
472 i->prev = b->entry;
473 b->entry = i;
474 b->exit = i;
475 } else { /* insert before entry */
476 assert(b->entry->prev && b->exit);
477 i->next = b->entry;
478 i->prev = b->entry->prev;
479 b->entry->prev = i;
480 i->prev->next = i;
481 }
482 }
483 }
484
485 void
486 nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
487 {
488 if (i->opcode == NV_OP_PHI) {
489 nvbb_insert_phi(b, i);
490 } else {
491 i->prev = b->exit;
492 if (b->exit)
493 b->exit->next = i;
494 b->exit = i;
495 if (!b->entry)
496 b->entry = i;
497 else
498 if (i->prev && i->prev->opcode == NV_OP_PHI)
499 b->entry = i;
500 }
501
502 i->bb = b;
503 b->num_instructions++;
504
505 if (i->prev && i->prev->terminator)
506 nvc0_insns_permute(i->prev, i);
507 }
508
509 void
510 nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
511 {
512 if (!at->next) {
513 nvc0_insn_append(at->bb, ni);
514 return;
515 }
516 ni->next = at->next;
517 ni->prev = at;
518 ni->next->prev = ni;
519 ni->prev->next = ni;
520 ni->bb = at->bb;
521 ni->bb->num_instructions++;
522 }
523
524 void
525 nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni)
526 {
527 nvc0_insn_insert_after(at, ni);
528 nvc0_insns_permute(at, ni);
529 }
530
531 void
532 nvc0_insn_delete(struct nv_instruction *nvi)
533 {
534 struct nv_basic_block *b = nvi->bb;
535 int s;
536
537 /* debug_printf("REM: "); nv_print_instruction(nvi); */
538
539 for (s = 0; s < 6 && nvi->src[s]; ++s)
540 nv_reference(NULL, nvi, s, NULL);
541
542 if (nvi->next)
543 nvi->next->prev = nvi->prev;
544 else {
545 assert(nvi == b->exit);
546 b->exit = nvi->prev;
547 }
548
549 if (nvi->prev)
550 nvi->prev->next = nvi->next;
551
552 if (nvi == b->entry) {
553 /* PHIs don't get hooked to b->entry */
554 b->entry = nvi->next;
555 assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
556 }
557
558 if (nvi == b->phi) {
559 if (nvi->opcode != NV_OP_PHI)
560 NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n");
561
562 assert(!nvi->prev);
563 if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
564 b->phi = NULL;
565 else
566 b->phi = nvi->next;
567 }
568 }
569
570 void
571 nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2)
572 {
573 struct nv_basic_block *b = i1->bb;
574
575 assert(i1->opcode != NV_OP_PHI &&
576 i2->opcode != NV_OP_PHI);
577 assert(i1->next == i2);
578
579 if (b->exit == i2)
580 b->exit = i1;
581
582 if (b->entry == i1)
583 b->entry = i2;
584
585 i2->prev = i1->prev;
586 i1->next = i2->next;
587 i2->next = i1;
588 i1->prev = i2;
589
590 if (i2->prev)
591 i2->prev->next = i2;
592 if (i1->next)
593 i1->next->prev = i1;
594 }
595
596 void
597 nvc0_bblock_attach(struct nv_basic_block *parent,
598 struct nv_basic_block *b, ubyte edge_kind)
599 {
600 assert(b->num_in < 8);
601
602 if (parent->out[0]) {
603 assert(!parent->out[1]);
604 parent->out[1] = b;
605 parent->out_kind[1] = edge_kind;
606 } else {
607 parent->out[0] = b;
608 parent->out_kind[0] = edge_kind;
609 }
610
611 b->in[b->num_in] = parent;
612 b->in_kind[b->num_in++] = edge_kind;
613 }
614
615 /* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
616
617 boolean
618 nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
619 {
620 int j;
621
622 if (b == d)
623 return TRUE;
624
625 for (j = 0; j < b->num_in; ++j)
626 if ((b->in_kind[j] != CFG_EDGE_BACK) &&
627 !nvc0_bblock_dominated_by(b->in[j], d))
628 return FALSE;
629
630 return j ? TRUE : FALSE;
631 }
632
633 /* check if @bf (future) can be reached from @bp (past), stop at @bt */
634 boolean
635 nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
636 struct nv_basic_block *bt)
637 {
638 struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
639 int i, p, n;
640
641 p = 0;
642 n = 1;
643 q[0] = bp;
644
645 while (p < n) {
646 b = q[p++];
647
648 if (b == bf)
649 break;
650 if (b == bt)
651 continue;
652 assert(n <= (1024 - 2));
653
654 for (i = 0; i < 2; ++i) {
655 if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
656 q[n] = b->out[i];
657 q[n++]->priv = 1;
658 }
659 }
660 }
661 for (--n; n >= 0; --n)
662 q[n]->priv = 0;
663
664 return (b == bf);
665 }
666
667 static struct nv_basic_block *
668 nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
669 {
670 struct nv_basic_block *out;
671 int i;
672
673 if (!nvc0_bblock_dominated_by(df, b)) {
674 for (i = 0; i < df->num_in; ++i) {
675 if (df->in_kind[i] == CFG_EDGE_BACK)
676 continue;
677 if (nvc0_bblock_dominated_by(df->in[i], b))
678 return df;
679 }
680 }
681 for (i = 0; i < 2 && df->out[i]; ++i) {
682 if (df->out_kind[i] == CFG_EDGE_BACK)
683 continue;
684 if ((out = nvbb_find_dom_frontier(b, df->out[i])))
685 return out;
686 }
687 return NULL;
688 }
689
690 struct nv_basic_block *
691 nvc0_bblock_dom_frontier(struct nv_basic_block *b)
692 {
693 struct nv_basic_block *df;
694 int i;
695
696 for (i = 0; i < 2 && b->out[i]; ++i)
697 if ((df = nvbb_find_dom_frontier(b, b->out[i])))
698 return df;
699 return NULL;
700 }