nv50: newlines in shader bincode printing
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 /* #define NV50PC_DEBUG */
24
25 #include "nv50_pc.h"
26 #include "nv50_program.h"
27
28 #include <stdio.h>
29
30 /* returns TRUE if operands 0 and 1 can be swapped */
31 boolean
32 nv_op_commutative(uint opcode)
33 {
34 switch (opcode) {
35 case NV_OP_ADD:
36 case NV_OP_MUL:
37 case NV_OP_MAD:
38 case NV_OP_AND:
39 case NV_OP_OR:
40 case NV_OP_XOR:
41 case NV_OP_MIN:
42 case NV_OP_MAX:
43 case NV_OP_SAD:
44 return TRUE;
45 default:
46 return FALSE;
47 }
48 }
49
50 /* return operand to which the address register applies */
51 int
52 nv50_indirect_opnd(struct nv_instruction *i)
53 {
54 if (!i->src[4])
55 return -1;
56
57 switch (i->opcode) {
58 case NV_OP_MOV:
59 case NV_OP_LDA:
60 case NV_OP_STA:
61 return 0;
62 default:
63 return 1;
64 }
65 }
66
67 boolean
68 nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s)
69 {
70 if (nvi->flags_src || nvi->flags_def)
71 return FALSE;
72
73 switch (nvi->opcode) {
74 case NV_OP_ADD:
75 case NV_OP_MUL:
76 case NV_OP_AND:
77 case NV_OP_OR:
78 case NV_OP_XOR:
79 case NV_OP_SHL:
80 case NV_OP_SHR:
81 return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) &&
82 (nvi->def[0]->reg.file == NV_FILE_GPR);
83 case NV_OP_MOV:
84 assert(s == 0);
85 return (nvi->def[0]->reg.file == NV_FILE_GPR);
86 default:
87 return FALSE;
88 }
89 }
90
91 boolean
92 nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
93 {
94 int i;
95
96 for (i = 0; i < 3 && nvi->src[i]; ++i)
97 if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
98 return FALSE;
99
100 switch (nvi->opcode) {
101 case NV_OP_ABS:
102 case NV_OP_ADD:
103 case NV_OP_CEIL:
104 case NV_OP_FLOOR:
105 case NV_OP_TRUNC:
106 case NV_OP_CVT:
107 case NV_OP_MAD:
108 case NV_OP_MUL:
109 case NV_OP_SAT:
110 case NV_OP_SUB:
111 case NV_OP_MAX:
112 case NV_OP_MIN:
113 if (s == 0 && (value->reg.file == NV_FILE_MEM_S ||
114 value->reg.file == NV_FILE_MEM_P))
115 return TRUE;
116 if (value->reg.file < NV_FILE_MEM_C(0) ||
117 value->reg.file > NV_FILE_MEM_C(15))
118 return FALSE;
119 return (s == 1) ||
120 ((s == 2) && (nvi->src[1]->value->reg.file == NV_FILE_GPR));
121 case NV_OP_MOV:
122 assert(s == 0);
123 return /* TRUE */ FALSE; /* don't turn MOVs into loads */
124 default:
125 return FALSE;
126 }
127 }
128
129 /* Return whether this instruction can be executed conditionally. */
130 boolean
131 nv50_nvi_can_predicate(struct nv_instruction *nvi)
132 {
133 int i;
134
135 if (nvi->flags_src)
136 return FALSE;
137 for (i = 0; i < 4 && nvi->src[i]; ++i)
138 if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
139 return FALSE;
140 return TRUE;
141 }
142
143 ubyte
144 nv50_supported_src_mods(uint opcode, int s)
145 {
146 switch (opcode) {
147 case NV_OP_ABS:
148 return NV_MOD_NEG | NV_MOD_ABS; /* obviously */
149 case NV_OP_ADD:
150 case NV_OP_MUL:
151 case NV_OP_MAD:
152 return NV_MOD_NEG;
153 case NV_OP_DFDX:
154 case NV_OP_DFDY:
155 assert(s == 0);
156 return NV_MOD_NEG;
157 case NV_OP_MAX:
158 case NV_OP_MIN:
159 return NV_MOD_ABS;
160 case NV_OP_CVT:
161 case NV_OP_LG2:
162 case NV_OP_NEG:
163 case NV_OP_PREEX2:
164 case NV_OP_PRESIN:
165 case NV_OP_RCP:
166 case NV_OP_RSQ:
167 return NV_MOD_ABS | NV_MOD_NEG;
168 default:
169 return 0;
170 }
171 }
172
173 int
174 nv_nvi_refcount(struct nv_instruction *nvi)
175 {
176 int i, rc;
177
178 rc = nvi->flags_def ? nvi->flags_def->refc : 0;
179
180 for (i = 0; i < 4; ++i) {
181 if (!nvi->def[i])
182 return rc;
183 rc += nvi->def[i]->refc;
184 }
185 return rc;
186 }
187
188 int
189 nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
190 struct nv_value *new_val)
191 {
192 int i, n;
193
194 if (old_val == new_val)
195 return old_val->refc;
196
197 for (i = 0, n = 0; i < pc->num_refs; ++i) {
198 if (pc->refs[i]->value == old_val) {
199 ++n;
200 nv_reference(pc, &pc->refs[i], new_val);
201 }
202 }
203 return n;
204 }
205
206 struct nv_value *
207 nvcg_find_constant(struct nv_ref *ref)
208 {
209 struct nv_value *src;
210
211 if (!ref)
212 return NULL;
213
214 src = ref->value;
215 while (src->insn && src->insn->opcode == NV_OP_MOV) {
216 assert(!src->insn->src[0]->mod);
217 src = src->insn->src[0]->value;
218 }
219 if ((src->reg.file == NV_FILE_IMM) ||
220 (src->insn && src->insn->opcode == NV_OP_LDA &&
221 src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
222 src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
223 return src;
224 return NULL;
225 }
226
227 struct nv_value *
228 nvcg_find_immediate(struct nv_ref *ref)
229 {
230 struct nv_value *src = nvcg_find_constant(ref);
231
232 return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
233 }
234
235 static void
236 nv_pc_free_refs(struct nv_pc *pc)
237 {
238 int i;
239 for (i = 0; i < pc->num_refs; i += 64)
240 FREE(pc->refs[i]);
241 FREE(pc->refs);
242 }
243
244 static const char *
245 edge_name(ubyte type)
246 {
247 switch (type) {
248 case CFG_EDGE_FORWARD: return "forward";
249 case CFG_EDGE_BACK: return "back";
250 case CFG_EDGE_LOOP_ENTER: return "loop";
251 case CFG_EDGE_LOOP_LEAVE: return "break";
252 case CFG_EDGE_FAKE: return "fake";
253 default:
254 return "?";
255 }
256 }
257
258 void
259 nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
260 {
261 struct nv_basic_block *bb[64], *bbb[16], *b;
262 int j, p, pp;
263
264 bb[0] = root;
265 p = 1;
266 pp = 0;
267
268 while (p > 0) {
269 b = bb[--p];
270 b->priv = 0;
271
272 for (j = 1; j >= 0; --j) {
273 if (!b->out[j])
274 continue;
275
276 switch (b->out_kind[j]) {
277 case CFG_EDGE_BACK:
278 continue;
279 case CFG_EDGE_FORWARD:
280 case CFG_EDGE_FAKE:
281 if (++b->out[j]->priv == b->out[j]->num_in)
282 bb[p++] = b->out[j];
283 break;
284 case CFG_EDGE_LOOP_ENTER:
285 bb[p++] = b->out[j];
286 break;
287 case CFG_EDGE_LOOP_LEAVE:
288 bbb[pp++] = b->out[j];
289 break;
290 default:
291 assert(0);
292 break;
293 }
294 }
295
296 f(priv, b);
297
298 if (!p) {
299 p = pp;
300 for (; pp > 0; --pp)
301 bb[pp - 1] = bbb[pp - 1];
302 }
303 }
304 }
305
306 static void
307 nv_do_print_function(void *priv, struct nv_basic_block *b)
308 {
309 struct nv_instruction *i = b->phi;
310
311 debug_printf("=== BB %i ", b->id);
312 if (b->out[0])
313 debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
314 if (b->out[1])
315 debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
316 debug_printf("===\n");
317
318 i = b->phi;
319 if (!i)
320 i = b->entry;
321 for (; i; i = i->next)
322 nv_print_instruction(i);
323 }
324
325 void
326 nv_print_function(struct nv_basic_block *root)
327 {
328 if (root->subroutine)
329 debug_printf("SUBROUTINE %i\n", root->subroutine);
330 else
331 debug_printf("MAIN\n");
332
333 nv_pc_pass_in_order(root, nv_do_print_function, root);
334 }
335
336 void
337 nv_print_program(struct nv_pc *pc)
338 {
339 int i;
340 for (i = 0; i < pc->num_subroutines + 1; ++i)
341 if (pc->root[i])
342 nv_print_function(pc->root[i]);
343 }
344
345 #ifdef NV50PC_DEBUG
346 static void
347 nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
348 {
349 int i;
350
351 b->pass_seq = pc->pass_seq;
352
353 fprintf(f, "\t%i [shape=box]\n", b->id);
354
355 for (i = 0; i < 2; ++i) {
356 if (!b->out[i])
357 continue;
358 switch (b->out_kind[i]) {
359 case CFG_EDGE_FORWARD:
360 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
361 break;
362 case CFG_EDGE_LOOP_ENTER:
363 fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
364 break;
365 case CFG_EDGE_LOOP_LEAVE:
366 fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
367 break;
368 case CFG_EDGE_BACK:
369 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
370 continue;
371 case CFG_EDGE_FAKE:
372 fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
373 break;
374 default:
375 assert(0);
376 break;
377 }
378 if (b->out[i]->pass_seq < pc->pass_seq)
379 nv_do_print_cfgraph(pc, f, b->out[i]);
380 }
381 }
382
383 /* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
384 static void
385 nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
386 {
387 FILE *f;
388
389 f = fopen(filepath, "a");
390 if (!f)
391 return;
392
393 fprintf(f, "digraph G {\n");
394
395 ++pc->pass_seq;
396
397 nv_do_print_cfgraph(pc, f, pc->root[subr]);
398
399 fprintf(f, "}\n");
400
401 fclose(f);
402 }
403 #endif
404
405 static INLINE void
406 nvcg_show_bincode(struct nv_pc *pc)
407 {
408 unsigned i;
409
410 for (i = 0; i < pc->bin_size / 4; ++i) {
411 debug_printf("0x%08x ", pc->emit[i]);
412 if ((i % 16) == 15)
413 debug_printf("\n");
414 }
415 debug_printf("\n");
416 }
417
418 static int
419 nv50_emit_program(struct nv_pc *pc)
420 {
421 uint32_t *code = pc->emit;
422 int n;
423
424 NV50_DBGMSG("emitting program: size = %u\n", pc->bin_size);
425
426 for (n = 0; n < pc->num_blocks; ++n) {
427 struct nv_instruction *i;
428 struct nv_basic_block *b = pc->bb_list[n];
429
430 for (i = b->entry; i; i = i->next) {
431 nv50_emit_instruction(pc, i);
432
433 pc->bin_pos += 1 + (pc->emit[0] & 1);
434 pc->emit += 1 + (pc->emit[0] & 1);
435 }
436 }
437 assert(pc->emit == &code[pc->bin_size / 4]);
438
439 /* XXX: we can do better than this ... */
440 if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) {
441 pc->emit[0] = 0xf0000001;
442 pc->emit[1] = 0xe0000000;
443 pc->bin_size += 8;
444 }
445
446 pc->emit = code;
447 code[pc->bin_size / 4 - 1] |= 1;
448
449 #ifdef NV50PC_DEBUG
450 nvcg_show_bincode(pc);
451 #endif
452
453 return 0;
454 }
455
456 int
457 nv50_generate_code(struct nv50_translation_info *ti)
458 {
459 struct nv_pc *pc;
460 int ret;
461 int i;
462
463 pc = CALLOC_STRUCT(nv_pc);
464 if (!pc)
465 return 1;
466
467 pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
468 if (!pc->root) {
469 FREE(pc);
470 return 1;
471 }
472 pc->num_subroutines = ti->subr_nr;
473
474 ret = nv50_tgsi_to_nc(pc, ti);
475 if (ret)
476 goto out;
477 #ifdef NV50PC_DEBUG
478 nv_print_program(pc);
479 #endif
480
481 pc->opt_reload_elim = ti->store_to_memory ? FALSE : TRUE;
482
483 /* optimization */
484 ret = nv_pc_exec_pass0(pc);
485 if (ret)
486 goto out;
487 #ifdef NV50PC_DEBUG
488 nv_print_program(pc);
489 #endif
490
491 /* register allocation */
492 ret = nv_pc_exec_pass1(pc);
493 if (ret)
494 goto out;
495 #ifdef NV50PC_DEBUG
496 nv_print_program(pc);
497 nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0);
498 #endif
499
500 /* prepare for emission */
501 ret = nv_pc_exec_pass2(pc);
502 if (ret)
503 goto out;
504
505 pc->emit = CALLOC(pc->bin_size / 4 + 2, 4);
506 if (!pc->emit) {
507 ret = 3;
508 goto out;
509 }
510 ret = nv50_emit_program(pc);
511 if (ret)
512 goto out;
513
514 ti->p->code_size = pc->bin_size;
515 ti->p->code = pc->emit;
516
517 ti->p->immd_size = pc->immd_count * 4;
518 ti->p->immd = pc->immd_buf;
519
520 /* highest 16 bit reg to num of 32 bit regs */
521 ti->p->max_gpr = (pc->max_reg[NV_FILE_GPR] >> 1) + 1;
522
523 ti->p->fixups = pc->fixups;
524 ti->p->num_fixups = pc->num_fixups;
525
526 NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
527
528 out:
529 nv_pc_free_refs(pc);
530
531 for (i = 0; i < pc->num_blocks; ++i)
532 FREE(pc->bb_list[i]);
533 if (pc->root)
534 FREE(pc->root);
535 if (ret) { /* on success, these will be referenced by nv50_program */
536 if (pc->emit)
537 FREE(pc->emit);
538 if (pc->immd_buf)
539 FREE(pc->immd_buf);
540 if (pc->fixups)
541 FREE(pc->fixups);
542 }
543 FREE(pc);
544 return ret;
545 }
546
547 static void
548 nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
549 {
550 if (!b->phi) {
551 i->prev = NULL;
552 b->phi = i;
553 i->next = b->entry;
554 if (b->entry) {
555 assert(!b->entry->prev && b->exit);
556 b->entry->prev = i;
557 } else {
558 b->entry = i;
559 b->exit = i;
560 }
561 } else {
562 assert(b->entry);
563 if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
564 assert(b->entry == b->exit);
565 b->entry->next = i;
566 i->prev = b->entry;
567 b->entry = i;
568 b->exit = i;
569 } else { /* insert before entry */
570 assert(b->entry->prev && b->exit);
571 i->next = b->entry;
572 i->prev = b->entry->prev;
573 b->entry->prev = i;
574 i->prev->next = i;
575 }
576 }
577 }
578
579 void
580 nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i)
581 {
582 if (i->opcode == NV_OP_PHI) {
583 nvbb_insert_phi(b, i);
584 } else {
585 i->prev = b->exit;
586 if (b->exit)
587 b->exit->next = i;
588 b->exit = i;
589 if (!b->entry)
590 b->entry = i;
591 else
592 if (i->prev && i->prev->opcode == NV_OP_PHI)
593 b->entry = i;
594 }
595
596 i->bb = b;
597 b->num_instructions++;
598 }
599
600 void
601 nvi_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
602 {
603 if (!at->next) {
604 nvbb_insert_tail(at->bb, ni);
605 return;
606 }
607 ni->next = at->next;
608 ni->prev = at;
609 ni->next->prev = ni;
610 ni->prev->next = ni;
611 }
612
613 void
614 nv_nvi_delete(struct nv_instruction *nvi)
615 {
616 struct nv_basic_block *b = nvi->bb;
617 int j;
618
619 /* debug_printf("REM: "); nv_print_instruction(nvi); */
620
621 for (j = 0; j < 5; ++j)
622 nv_reference(NULL, &nvi->src[j], NULL);
623 nv_reference(NULL, &nvi->flags_src, NULL);
624
625 if (nvi->next)
626 nvi->next->prev = nvi->prev;
627 else {
628 assert(nvi == b->exit);
629 b->exit = nvi->prev;
630 }
631
632 if (nvi->prev)
633 nvi->prev->next = nvi->next;
634
635 if (nvi == b->entry) {
636 /* PHIs don't get hooked to b->entry */
637 b->entry = nvi->next;
638 assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
639 }
640
641 if (nvi == b->phi) {
642 if (nvi->opcode != NV_OP_PHI)
643 NV50_DBGMSG("NOTE: b->phi points to non-PHI instruction\n");
644
645 assert(!nvi->prev);
646 if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
647 b->phi = NULL;
648 else
649 b->phi = nvi->next;
650 }
651 }
652
653 void
654 nv_nvi_permute(struct nv_instruction *i1, struct nv_instruction *i2)
655 {
656 struct nv_basic_block *b = i1->bb;
657
658 assert(i1->opcode != NV_OP_PHI &&
659 i2->opcode != NV_OP_PHI);
660 assert(i1->next == i2);
661
662 if (b->exit == i2)
663 b->exit = i1;
664
665 if (b->entry == i1)
666 b->entry = i2;
667
668 i2->prev = i1->prev;
669 i1->next = i2->next;
670 i2->next = i1;
671 i1->prev = i2;
672
673 if (i2->prev)
674 i2->prev->next = i2;
675 if (i1->next)
676 i1->next->prev = i1;
677 }
678
679 void
680 nvbb_attach_block(struct nv_basic_block *parent,
681 struct nv_basic_block *b, ubyte edge_kind)
682 {
683 assert(b->num_in < 8);
684
685 if (parent->out[0]) {
686 assert(!parent->out[1]);
687 parent->out[1] = b;
688 parent->out_kind[1] = edge_kind;
689 } else {
690 parent->out[0] = b;
691 parent->out_kind[0] = edge_kind;
692 }
693
694 b->in[b->num_in] = parent;
695 b->in_kind[b->num_in++] = edge_kind;
696 }
697
698 /* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
699
700 boolean
701 nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
702 {
703 int j;
704
705 if (b == d)
706 return TRUE;
707
708 for (j = 0; j < b->num_in; ++j)
709 if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d))
710 return FALSE;
711
712 return j ? TRUE : FALSE;
713 }
714
715 /* check if @bf (future) can be reached from @bp (past), stop at @bt */
716 boolean
717 nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
718 struct nv_basic_block *bt)
719 {
720 struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
721 int i, p, n;
722
723 p = 0;
724 n = 1;
725 q[0] = bp;
726
727 while (p < n) {
728 b = q[p++];
729
730 if (b == bf)
731 break;
732 if (b == bt)
733 continue;
734 assert(n <= (1024 - 2));
735
736 for (i = 0; i < 2; ++i) {
737 if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
738 q[n] = b->out[i];
739 q[n++]->priv = 1;
740 }
741 }
742 }
743 for (--n; n >= 0; --n)
744 q[n]->priv = 0;
745
746 return (b == bf);
747 }
748
749 static struct nv_basic_block *
750 nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
751 {
752 struct nv_basic_block *out;
753 int i;
754
755 if (!nvbb_dominated_by(df, b)) {
756 for (i = 0; i < df->num_in; ++i) {
757 if (df->in_kind[i] == CFG_EDGE_BACK)
758 continue;
759 if (nvbb_dominated_by(df->in[i], b))
760 return df;
761 }
762 }
763 for (i = 0; i < 2 && df->out[i]; ++i) {
764 if (df->out_kind[i] == CFG_EDGE_BACK)
765 continue;
766 if ((out = nvbb_find_dom_frontier(b, df->out[i])))
767 return out;
768 }
769 return NULL;
770 }
771
772 struct nv_basic_block *
773 nvbb_dom_frontier(struct nv_basic_block *b)
774 {
775 struct nv_basic_block *df;
776 int i;
777
778 for (i = 0; i < 2 && b->out[i]; ++i)
779 if ((df = nvbb_find_dom_frontier(b, b->out[i])))
780 return df;
781 return NULL;
782 }