nv50: use actual loads/stores if TEMPs are accessed indirectly
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 /* #define NV50PC_DEBUG */
24
25 #include "nv50_pc.h"
26 #include "nv50_program.h"
27
28 #include <stdio.h>
29
30 /* returns TRUE if operands 0 and 1 can be swapped */
31 boolean
32 nv_op_commutative(uint opcode)
33 {
34 switch (opcode) {
35 case NV_OP_ADD:
36 case NV_OP_MUL:
37 case NV_OP_MAD:
38 case NV_OP_AND:
39 case NV_OP_OR:
40 case NV_OP_XOR:
41 case NV_OP_MIN:
42 case NV_OP_MAX:
43 case NV_OP_SAD:
44 return TRUE;
45 default:
46 return FALSE;
47 }
48 }
49
50 /* return operand to which the address register applies */
51 int
52 nv50_indirect_opnd(struct nv_instruction *i)
53 {
54 if (!i->src[4])
55 return -1;
56
57 switch (i->opcode) {
58 case NV_OP_MOV:
59 case NV_OP_LDA:
60 return 0;
61 default:
62 return 1;
63 }
64 }
65
66 boolean
67 nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s)
68 {
69 if (nvi->flags_src || nvi->flags_def)
70 return FALSE;
71
72 switch (nvi->opcode) {
73 case NV_OP_ADD:
74 case NV_OP_MUL:
75 case NV_OP_AND:
76 case NV_OP_OR:
77 case NV_OP_XOR:
78 case NV_OP_SHL:
79 case NV_OP_SHR:
80 return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) &&
81 (nvi->def[0]->reg.file == NV_FILE_GPR);
82 case NV_OP_MOV:
83 assert(s == 0);
84 return (nvi->def[0]->reg.file == NV_FILE_GPR);
85 default:
86 return FALSE;
87 }
88 }
89
90 boolean
91 nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
92 {
93 int i;
94
95 for (i = 0; i < 3 && nvi->src[i]; ++i)
96 if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
97 return FALSE;
98
99 switch (nvi->opcode) {
100 case NV_OP_ABS:
101 case NV_OP_ADD:
102 case NV_OP_CEIL:
103 case NV_OP_FLOOR:
104 case NV_OP_TRUNC:
105 case NV_OP_CVT:
106 case NV_OP_MAD:
107 case NV_OP_MUL:
108 case NV_OP_SAT:
109 case NV_OP_SUB:
110 case NV_OP_MAX:
111 case NV_OP_MIN:
112 if (s == 0 && (value->reg.file == NV_FILE_MEM_S ||
113 value->reg.file == NV_FILE_MEM_P))
114 return TRUE;
115 if (s == 1 &&
116 value->reg.file >= NV_FILE_MEM_C(0) &&
117 value->reg.file <= NV_FILE_MEM_C(15))
118 return TRUE;
119 if (s == 2 && nvi->src[1]->value->reg.file == NV_FILE_GPR)
120 return TRUE;
121 return FALSE;
122 case NV_OP_MOV:
123 assert(s == 0);
124 return /* TRUE */ FALSE; /* don't turn MOVs into loads */
125 default:
126 return FALSE;
127 }
128 }
129
130 /* Return whether this instruction can be executed conditionally. */
131 boolean
132 nv50_nvi_can_predicate(struct nv_instruction *nvi)
133 {
134 int i;
135
136 if (nvi->flags_src)
137 return FALSE;
138 for (i = 0; i < 4 && nvi->src[i]; ++i)
139 if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
140 return FALSE;
141 return TRUE;
142 }
143
144 ubyte
145 nv50_supported_src_mods(uint opcode, int s)
146 {
147 switch (opcode) {
148 case NV_OP_ABS:
149 return NV_MOD_NEG | NV_MOD_ABS; /* obviously */
150 case NV_OP_ADD:
151 case NV_OP_MUL:
152 case NV_OP_MAD:
153 return NV_MOD_NEG;
154 case NV_OP_DFDX:
155 case NV_OP_DFDY:
156 assert(s == 0);
157 return NV_MOD_NEG;
158 case NV_OP_MAX:
159 case NV_OP_MIN:
160 return NV_MOD_ABS;
161 case NV_OP_CVT:
162 case NV_OP_LG2:
163 case NV_OP_NEG:
164 case NV_OP_PREEX2:
165 case NV_OP_PRESIN:
166 case NV_OP_RCP:
167 case NV_OP_RSQ:
168 return NV_MOD_ABS | NV_MOD_NEG;
169 default:
170 return 0;
171 }
172 }
173
174 int
175 nv_nvi_refcount(struct nv_instruction *nvi)
176 {
177 int i, rc;
178
179 rc = nvi->flags_def ? nvi->flags_def->refc : 0;
180
181 for (i = 0; i < 4; ++i) {
182 if (!nvi->def[i])
183 return rc;
184 rc += nvi->def[i]->refc;
185 }
186 return rc;
187 }
188
189 int
190 nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
191 struct nv_value *new_val)
192 {
193 int i, n;
194
195 if (old_val == new_val)
196 return old_val->refc;
197
198 for (i = 0, n = 0; i < pc->num_refs; ++i) {
199 if (pc->refs[i]->value == old_val) {
200 ++n;
201 nv_reference(pc, &pc->refs[i], new_val);
202 }
203 }
204 return n;
205 }
206
207 struct nv_value *
208 nvcg_find_constant(struct nv_ref *ref)
209 {
210 struct nv_value *src;
211
212 if (!ref)
213 return NULL;
214
215 src = ref->value;
216 while (src->insn && src->insn->opcode == NV_OP_MOV) {
217 assert(!src->insn->src[0]->mod);
218 src = src->insn->src[0]->value;
219 }
220 if ((src->reg.file == NV_FILE_IMM) ||
221 (src->insn && src->insn->opcode == NV_OP_LDA &&
222 src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
223 src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
224 return src;
225 return NULL;
226 }
227
228 struct nv_value *
229 nvcg_find_immediate(struct nv_ref *ref)
230 {
231 struct nv_value *src = nvcg_find_constant(ref);
232
233 return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
234 }
235
236 static void
237 nv_pc_free_refs(struct nv_pc *pc)
238 {
239 int i;
240 for (i = 0; i < pc->num_refs; i += 64)
241 FREE(pc->refs[i]);
242 }
243
244 static const char *
245 edge_name(ubyte type)
246 {
247 switch (type) {
248 case CFG_EDGE_FORWARD: return "forward";
249 case CFG_EDGE_BACK: return "back";
250 case CFG_EDGE_LOOP_ENTER: return "loop";
251 case CFG_EDGE_LOOP_LEAVE: return "break";
252 case CFG_EDGE_FAKE: return "fake";
253 default:
254 return "?";
255 }
256 }
257
258 void
259 nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
260 {
261 struct nv_basic_block *bb[64], *bbb[16], *b;
262 int j, p, pp;
263
264 bb[0] = root;
265 p = 1;
266 pp = 0;
267
268 while (p > 0) {
269 b = bb[--p];
270 b->priv = 0;
271
272 for (j = 1; j >= 0; --j) {
273 if (!b->out[j])
274 continue;
275
276 switch (b->out_kind[j]) {
277 case CFG_EDGE_BACK:
278 continue;
279 case CFG_EDGE_FORWARD:
280 case CFG_EDGE_FAKE:
281 if (++b->out[j]->priv == b->out[j]->num_in)
282 bb[p++] = b->out[j];
283 break;
284 case CFG_EDGE_LOOP_ENTER:
285 bb[p++] = b->out[j];
286 break;
287 case CFG_EDGE_LOOP_LEAVE:
288 bbb[pp++] = b->out[j];
289 break;
290 default:
291 assert(0);
292 break;
293 }
294 }
295
296 f(priv, b);
297
298 if (!p) {
299 p = pp;
300 for (; pp > 0; --pp)
301 bb[pp - 1] = bbb[pp - 1];
302 }
303 }
304 }
305
306 static void
307 nv_do_print_function(void *priv, struct nv_basic_block *b)
308 {
309 struct nv_instruction *i = b->phi;
310
311 debug_printf("=== BB %i ", b->id);
312 if (b->out[0])
313 debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
314 if (b->out[1])
315 debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
316 debug_printf("===\n");
317
318 i = b->phi;
319 if (!i)
320 i = b->entry;
321 for (; i; i = i->next)
322 nv_print_instruction(i);
323 }
324
325 void
326 nv_print_function(struct nv_basic_block *root)
327 {
328 if (root->subroutine)
329 debug_printf("SUBROUTINE %i\n", root->subroutine);
330 else
331 debug_printf("MAIN\n");
332
333 nv_pc_pass_in_order(root, nv_do_print_function, root);
334 }
335
336 void
337 nv_print_program(struct nv_pc *pc)
338 {
339 int i;
340 for (i = 0; i < pc->num_subroutines + 1; ++i)
341 if (pc->root[i])
342 nv_print_function(pc->root[i]);
343 }
344
345 static INLINE void
346 nvcg_show_bincode(struct nv_pc *pc)
347 {
348 int i;
349
350 for (i = 0; i < pc->bin_size / 4; ++i)
351 debug_printf("0x%08x ", pc->emit[i]);
352 debug_printf("\n");
353 }
354
355 static int
356 nv50_emit_program(struct nv_pc *pc)
357 {
358 uint32_t *code = pc->emit;
359 int n;
360
361 NV50_DBGMSG("emitting program: size = %u\n", pc->bin_size);
362
363 for (n = 0; n < pc->num_blocks; ++n) {
364 struct nv_instruction *i;
365 struct nv_basic_block *b = pc->bb_list[n];
366
367 for (i = b->entry; i; i = i->next) {
368 nv50_emit_instruction(pc, i);
369
370 pc->bin_pos += 1 + (pc->emit[0] & 1);
371 pc->emit += 1 + (pc->emit[0] & 1);
372 }
373 }
374 assert(pc->emit == &code[pc->bin_size / 4]);
375
376 /* XXX: we can do better than this ... */
377 if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) {
378 pc->emit[0] = 0xf0000001;
379 pc->emit[1] = 0xe0000000;
380 pc->bin_size += 8;
381 }
382
383 pc->emit = code;
384 code[pc->bin_size / 4 - 1] |= 1;
385
386 #ifdef NV50PC_DEBUG
387 nvcg_show_bincode(pc);
388 #endif
389
390 return 0;
391 }
392
393 int
394 nv50_generate_code(struct nv50_translation_info *ti)
395 {
396 struct nv_pc *pc;
397 int ret;
398
399 pc = CALLOC_STRUCT(nv_pc);
400 if (!pc)
401 return 1;
402
403 pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
404 if (!pc->root) {
405 FREE(pc);
406 return 1;
407 }
408 pc->num_subroutines = ti->subr_nr;
409
410 ret = nv50_tgsi_to_nc(pc, ti);
411 if (ret)
412 goto out;
413 #ifdef NV50PC_DEBUG
414 nv_print_program(pc);
415 #endif
416
417 pc->opt_reload_elim = ti->store_to_memory ? FALSE : TRUE;
418
419 /* optimization */
420 ret = nv_pc_exec_pass0(pc);
421 if (ret)
422 goto out;
423 #ifdef NV50PC_DEBUG
424 nv_print_program(pc);
425 #endif
426
427 /* register allocation */
428 ret = nv_pc_exec_pass1(pc);
429 if (ret)
430 goto out;
431 #ifdef NV50PC_DEBUG
432 nv_print_program(pc);
433 #endif
434
435 /* prepare for emission */
436 ret = nv_pc_exec_pass2(pc);
437 if (ret)
438 goto out;
439
440 pc->emit = CALLOC(pc->bin_size / 4 + 2, 4);
441 if (!pc->emit) {
442 ret = 3;
443 goto out;
444 }
445 ret = nv50_emit_program(pc);
446 if (ret)
447 goto out;
448
449 ti->p->code_size = pc->bin_size;
450 ti->p->code = pc->emit;
451
452 ti->p->immd_size = pc->immd_count * 4;
453 ti->p->immd = pc->immd_buf;
454
455 /* highest 16 bit reg to num of 32 bit regs */
456 ti->p->max_gpr = (pc->max_reg[NV_FILE_GPR] >> 1) + 1;
457
458 ti->p->fixups = pc->fixups;
459 ti->p->num_fixups = pc->num_fixups;
460
461 NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
462
463 out:
464 nv_pc_free_refs(pc);
465
466 if (pc->bb_list)
467 FREE(pc->bb_list);
468
469 if (ret) { /* on success, these will be referenced by nv50_program */
470 if (pc->emit)
471 FREE(pc->emit);
472 if (pc->immd_buf)
473 FREE(pc->immd_buf);
474 if (pc->fixups)
475 FREE(pc->fixups);
476 }
477 FREE(pc);
478 return ret;
479 }
480
481 static void
482 nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
483 {
484 if (!b->phi) {
485 i->prev = NULL;
486 b->phi = i;
487 i->next = b->entry;
488 if (b->entry) {
489 assert(!b->entry->prev && b->exit);
490 b->entry->prev = i;
491 } else {
492 b->entry = i;
493 b->exit = i;
494 }
495 } else {
496 assert(b->entry);
497 if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
498 assert(b->entry == b->exit);
499 b->entry->next = i;
500 i->prev = b->entry;
501 b->entry = i;
502 b->exit = i;
503 } else { /* insert before entry */
504 assert(b->entry->prev && b->exit);
505 i->next = b->entry;
506 i->prev = b->entry->prev;
507 b->entry->prev = i;
508 i->prev->next = i;
509 }
510 }
511 }
512
513 void
514 nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i)
515 {
516 if (i->opcode == NV_OP_PHI) {
517 nvbb_insert_phi(b, i);
518 } else {
519 i->prev = b->exit;
520 if (b->exit)
521 b->exit->next = i;
522 b->exit = i;
523 if (!b->entry)
524 b->entry = i;
525 else
526 if (i->prev && i->prev->opcode == NV_OP_PHI)
527 b->entry = i;
528 }
529
530 i->bb = b;
531 b->num_instructions++;
532 }
533
534 void
535 nvi_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
536 {
537 if (!at->next) {
538 nvbb_insert_tail(at->bb, ni);
539 return;
540 }
541 ni->next = at->next;
542 ni->prev = at;
543 ni->next->prev = ni;
544 ni->prev->next = ni;
545 }
546
547 void
548 nv_nvi_delete(struct nv_instruction *nvi)
549 {
550 struct nv_basic_block *b = nvi->bb;
551 int j;
552
553 /* debug_printf("REM: "); nv_print_instruction(nvi); */
554
555 for (j = 0; j < 5; ++j)
556 nv_reference(NULL, &nvi->src[j], NULL);
557 nv_reference(NULL, &nvi->flags_src, NULL);
558
559 if (nvi->next)
560 nvi->next->prev = nvi->prev;
561 else {
562 assert(nvi == b->exit);
563 b->exit = nvi->prev;
564 }
565
566 if (nvi->prev)
567 nvi->prev->next = nvi->next;
568
569 if (nvi == b->entry) {
570 /* PHIs don't get hooked to b->entry */
571 b->entry = nvi->next;
572 assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
573 }
574
575 if (nvi == b->phi) {
576 if (nvi->opcode != NV_OP_PHI)
577 NV50_DBGMSG("NOTE: b->phi points to non-PHI instruction\n");
578
579 assert(!nvi->prev);
580 if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
581 b->phi = NULL;
582 else
583 b->phi = nvi->next;
584 }
585 }
586
587 void
588 nv_nvi_permute(struct nv_instruction *i1, struct nv_instruction *i2)
589 {
590 struct nv_basic_block *b = i1->bb;
591
592 assert(i1->opcode != NV_OP_PHI &&
593 i2->opcode != NV_OP_PHI);
594 assert(i1->next == i2);
595
596 if (b->exit == i2)
597 b->exit = i1;
598
599 if (b->entry == i1)
600 b->entry = i2;
601
602 i2->prev = i1->prev;
603 i1->next = i2->next;
604 i2->next = i1;
605 i1->prev = i2;
606
607 if (i2->prev)
608 i2->prev->next = i2;
609 if (i1->next)
610 i1->next->prev = i1;
611 }
612
613 void
614 nvbb_attach_block(struct nv_basic_block *parent,
615 struct nv_basic_block *b, ubyte edge_kind)
616 {
617 assert(b->num_in < 8);
618
619 if (parent->out[0]) {
620 assert(!parent->out[1]);
621 parent->out[1] = b;
622 parent->out_kind[1] = edge_kind;
623 } else {
624 parent->out[0] = b;
625 parent->out_kind[0] = edge_kind;
626 }
627
628 b->in[b->num_in] = parent;
629 b->in_kind[b->num_in++] = edge_kind;
630 }
631
632 /* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
633
634 boolean
635 nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
636 {
637 int j;
638
639 if (b == d)
640 return TRUE;
641
642 for (j = 0; j < b->num_in; ++j)
643 if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d))
644 return FALSE;
645
646 return j ? TRUE : FALSE;
647 }
648
649 /* check if bf (future) can be reached from bp (past) */
650 boolean
651 nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
652 struct nv_basic_block *bt)
653 {
654 if (bf == bp)
655 return TRUE;
656 if (bp == bt)
657 return FALSE;
658
659 if (bp->out[0] && !IS_WALL_EDGE(bp->out_kind[0]) &&
660 nvbb_reachable_by(bf, bp->out[0], bt))
661 return TRUE;
662 if (bp->out[1] && !IS_WALL_EDGE(bp->out_kind[1]) &&
663 nvbb_reachable_by(bf, bp->out[1], bt))
664 return TRUE;
665 return FALSE;
666 }
667
668 static struct nv_basic_block *
669 nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
670 {
671 struct nv_basic_block *out;
672 int i;
673
674 if (!nvbb_dominated_by(df, b)) {
675 for (i = 0; i < df->num_in; ++i) {
676 if (df->in_kind[i] == CFG_EDGE_BACK)
677 continue;
678 if (nvbb_dominated_by(df->in[i], b))
679 return df;
680 }
681 }
682 for (i = 0; i < 2 && df->out[i]; ++i) {
683 if (df->out_kind[i] == CFG_EDGE_BACK)
684 continue;
685 if ((out = nvbb_find_dom_frontier(b, df->out[i])))
686 return out;
687 }
688 return NULL;
689 }
690
691 struct nv_basic_block *
692 nvbb_dom_frontier(struct nv_basic_block *b)
693 {
694 struct nv_basic_block *df;
695 int i;
696
697 for (i = 0; i < 2 && b->out[i]; ++i)
698 if ((df = nvbb_find_dom_frontier(b, b->out[i])))
699 return df;
700 return NULL;
701 }