0811420e4258f11ef6c3c24b1e15fc872e619365
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_optimize.c
1
2 #include "nv50_pc.h"
3
4 #define DESCEND_ARBITRARY(j, f) \
5 do { \
6 b->pass_seq = ctx->pc->pass_seq; \
7 \
8 for (j = 0; j < 2; ++j) \
9 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
10 f(ctx, b->out[j]); \
11 } while (0)
12
13 extern unsigned nv50_inst_min_size(struct nv_instruction *);
14
15 struct nv_pc_pass {
16 struct nv_pc *pc;
17 };
18
19 static INLINE boolean
20 values_equal(struct nv_value *a, struct nv_value *b)
21 {
22 /* XXX: sizes */
23 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
24 }
25
26 static INLINE boolean
27 inst_commutation_check(struct nv_instruction *a,
28 struct nv_instruction *b)
29 {
30 int si, di;
31
32 for (di = 0; di < 4; ++di) {
33 if (!a->def[di])
34 break;
35 for (si = 0; si < 5; ++si) {
36 if (!b->src[si])
37 continue;
38 if (values_equal(a->def[di], b->src[si]->value))
39 return FALSE;
40 }
41 }
42
43 if (b->flags_src && b->flags_src->value == a->flags_def)
44 return FALSE;
45
46 return TRUE;
47 }
48
49 /* Check whether we can swap the order of the instructions,
50 * where a & b may be either the earlier or the later one.
51 */
52 static boolean
53 inst_commutation_legal(struct nv_instruction *a,
54 struct nv_instruction *b)
55 {
56 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
57 }
58
59 static INLINE boolean
60 inst_cullable(struct nv_instruction *nvi)
61 {
62 return (!(nvi->is_terminator ||
63 nvi->target ||
64 nvi->fixed ||
65 nv_nvi_refcount(nvi)));
66 }
67
68 static INLINE boolean
69 nvi_isnop(struct nv_instruction *nvi)
70 {
71 if (nvi->opcode == NV_OP_EXPORT)
72 return TRUE;
73
74 if (nvi->fixed ||
75 nvi->is_terminator ||
76 nvi->flags_src ||
77 nvi->flags_def)
78 return FALSE;
79
80 if (nvi->def[0]->join->reg.id < 0)
81 return TRUE;
82
83 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
84 return FALSE;
85
86 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
87 return FALSE;
88
89 if (nvi->src[0]->value->join->reg.id < 0) {
90 debug_printf("nvi_isnop: orphaned value detected\n");
91 return TRUE;
92 }
93
94 if (nvi->opcode == NV_OP_SELECT)
95 if (!values_equal(nvi->def[0], nvi->src[1]->value))
96 return FALSE;
97
98 return values_equal(nvi->def[0], nvi->src[0]->value);
99 }
100
101 static void
102 nv_pc_pass_pre_emission(struct nv_pc *pc, struct nv_basic_block *b)
103 {
104 struct nv_instruction *nvi, *next;
105 int j;
106 uint size, n32 = 0;
107
108 b->priv = 0;
109
110 if (pc->num_blocks)
111 b->bin_pos = pc->bb_list[pc->num_blocks - 1]->bin_pos +
112 pc->bb_list[pc->num_blocks - 1]->bin_size;
113
114 pc->bb_list[pc->num_blocks++] = b;
115
116 /* visit node */
117
118 for (nvi = b->entry; nvi; nvi = next) {
119 next = nvi->next;
120 if (nvi_isnop(nvi))
121 nv_nvi_delete(nvi);
122 }
123
124 for (nvi = b->entry; nvi; nvi = next) {
125 next = nvi->next;
126
127 size = nv50_inst_min_size(nvi);
128 if (nvi->next && size < 8)
129 ++n32;
130 else
131 if ((n32 & 1) && nvi->next &&
132 nv50_inst_min_size(nvi->next) == 4 &&
133 inst_commutation_legal(nvi, nvi->next)) {
134 ++n32;
135 debug_printf("permuting: ");
136 nv_print_instruction(nvi);
137 nv_print_instruction(nvi->next);
138 nv_nvi_permute(nvi, nvi->next);
139 next = nvi;
140 } else {
141 nvi->is_long = 1;
142
143 b->bin_size += n32 & 1;
144 if (n32 & 1)
145 nvi->prev->is_long = 1;
146 n32 = 0;
147 }
148 b->bin_size += 1 + nvi->is_long;
149 }
150
151 if (!b->entry) {
152 debug_printf("block %p is now empty\n", b);
153 } else
154 if (!b->exit->is_long) {
155 assert(n32);
156 b->exit->is_long = 1;
157 b->bin_size += 1;
158
159 /* might have del'd a hole tail of instructions */
160 if (!b->exit->prev->is_long && !(n32 & 1)) {
161 b->bin_size += 1;
162 b->exit->prev->is_long = 1;
163 }
164 }
165 assert(!b->exit || b->exit->is_long);
166
167 pc->bin_size += b->bin_size *= 4;
168
169 /* descend CFG */
170
171 if (!b->out[0])
172 return;
173 if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in)
174 return;
175
176 #if 0
177 /* delete ELSE branch */
178 if (b->entry &&
179 b->entry->opcode == NV_OP_BRA && b->entry->target == b->out[0]) {
180 nv_nvi_delete(b->entry);
181 b->bin_size -= 2;
182 pc->bin_size -= 8;
183 }
184 #endif
185 for (j = 0; j < 2; ++j)
186 if (b->out[j] && b->out[j] != b)
187 nv_pc_pass_pre_emission(pc, b->out[j]);
188 }
189
190 int
191 nv_pc_exec_pass2(struct nv_pc *pc)
192 {
193 debug_printf("preparing %u blocks for emission\n", pc->num_blocks);
194
195 pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
196
197 pc->num_blocks = 0;
198 nv_pc_pass_pre_emission(pc, pc->root);
199
200 return 0;
201 }
202
203 static INLINE boolean
204 is_cmem_load(struct nv_instruction *nvi)
205 {
206 return (nvi->opcode == NV_OP_LDA &&
207 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
208 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
209 }
210
211 static INLINE boolean
212 is_smem_load(struct nv_instruction *nvi)
213 {
214 return (nvi->opcode == NV_OP_LDA &&
215 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
216 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
217 }
218
219 static INLINE boolean
220 is_immd_move(struct nv_instruction *nvi)
221 {
222 return (nvi->opcode == NV_OP_MOV &&
223 nvi->src[0]->value->reg.file == NV_FILE_IMM);
224 }
225
226 static INLINE void
227 check_swap_src_0_1(struct nv_instruction *nvi)
228 {
229 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
230
231 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
232
233 if (!nv_op_commutative(nvi->opcode))
234 return;
235 assert(src0 && src1);
236
237 if (is_cmem_load(src0->value->insn)) {
238 if (!is_cmem_load(src1->value->insn)) {
239 nvi->src[0] = src1;
240 nvi->src[1] = src0;
241 /* debug_printf("swapping cmem load to 1\n"); */
242 }
243 } else
244 if (is_smem_load(src1->value->insn)) {
245 if (!is_smem_load(src0->value->insn)) {
246 nvi->src[0] = src1;
247 nvi->src[1] = src0;
248 /* debug_printf("swapping smem load to 0\n"); */
249 }
250 }
251
252 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
253 nvi->set_cond = cc_swapped[nvi->set_cond];
254 }
255
256 struct nv_pass {
257 struct nv_pc *pc;
258 int n;
259 void *priv;
260 };
261
262 static int
263 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
264 {
265 struct nv_instruction *nvi, *sti;
266 int j;
267
268 for (sti = b->entry; sti; sti = sti->next) {
269 if (!sti->def[0])
270 continue;
271
272 if (sti->def[0]->reg.file != NV_FILE_OUT)
273 continue;
274 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
275 continue;
276
277 nvi = sti->src[0]->value->insn;
278 if (!nvi || nvi->opcode == NV_OP_PHI)
279 continue;
280 assert(nvi->def[0] == sti->src[0]->value);
281
282 if (nvi->def[0]->refc > 1)
283 continue;
284
285 nvi->def[0] = sti->def[0];
286 nvi->fixed = 1;
287 sti->fixed = 0;
288 }
289 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
290
291 return 0;
292 }
293
294 static int
295 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
296 {
297 struct nv_instruction *nvi, *ld;
298 int j;
299
300 for (nvi = b->entry; nvi; nvi = nvi->next) {
301 check_swap_src_0_1(nvi);
302
303 for (j = 0; j < 3; ++j) {
304 if (!nvi->src[j])
305 break;
306 ld = nvi->src[j]->value->insn;
307 if (!ld)
308 continue;
309
310 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
311 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
312 debug_printf("folded immediate %i\n", ld->def[0]->n);
313 continue;
314 }
315
316 if (ld->opcode != NV_OP_LDA)
317 continue;
318 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
319 continue;
320
321 if (j == 0 && ld->src[4]) /* can't load shared mem */
322 continue;
323
324 /* fold it ! */ /* XXX: ref->insn */
325 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
326 if (ld->src[4])
327 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
328 }
329 }
330 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
331
332 return 0;
333 }
334
335 static int
336 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
337 {
338 int j;
339 struct nv_instruction *nvi, *mi, *next;
340 ubyte mod;
341
342 for (nvi = b->entry; nvi; nvi = next) {
343 next = nvi->next;
344 if (nvi->opcode == NV_OP_SUB) {
345 nvi->opcode = NV_OP_ADD;
346 nvi->src[1]->mod ^= NV_MOD_NEG;
347 }
348
349 /* should not put any modifiers on NEG and ABS */
350 assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
351 assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
352
353 for (j = 0; j < 4; ++j) {
354 if (!nvi->src[j])
355 break;
356
357 mi = nvi->src[j]->value->insn;
358 if (!mi)
359 continue;
360 if (mi->def[0]->refc > 1)
361 continue;
362
363 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
364 else
365 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
366 else
367 continue;
368
369 if (nvi->opcode == NV_OP_ABS)
370 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
371 else
372 if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
373 nvi->opcode = NV_OP_MOV;
374 mod = 0;
375 }
376
377 if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
378 continue;
379
380 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
381
382 nvi->src[j]->mod ^= mod;
383 }
384
385 if (nvi->opcode == NV_OP_SAT) {
386 mi = nvi->src[0]->value->insn;
387
388 if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
389 mi->saturate = 1;
390 mi->def[0] = nvi->def[0];
391 nv_nvi_delete(nvi);
392 }
393 }
394 }
395 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
396
397 return 0;
398 }
399
400 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
401
402 static struct nv_value *
403 find_immediate(struct nv_ref *ref)
404 {
405 struct nv_value *src;
406
407 if (!ref)
408 return NULL;
409
410 src = ref->value;
411 while (src->insn && src->insn->opcode == NV_OP_MOV) {
412 assert(!src->insn->src[0]->mod);
413 src = src->insn->src[0]->value;
414 }
415 return (src->reg.file == NV_FILE_IMM) ? src : NULL;
416 }
417
418 static void
419 constant_operand(struct nv_pc *pc,
420 struct nv_instruction *nvi, struct nv_value *val, int s)
421 {
422 int t = s ? 0 : 1;
423 ubyte type;
424
425 if (!nvi->def[0])
426 return;
427 type = nvi->def[0]->reg.type;
428
429 switch (nvi->opcode) {
430 case NV_OP_MUL:
431 if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 1.0f) ||
432 (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 1)) {
433 nvi->opcode = NV_OP_MOV;
434 nv_reference(pc, &nvi->src[s], NULL);
435 if (!s) {
436 nvi->src[0] = nvi->src[1];
437 nvi->src[1] = NULL;
438 }
439 } else
440 if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 2.0f) ||
441 (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 2)) {
442 nvi->opcode = NV_OP_ADD;
443 nv_reference(pc, &nvi->src[s], NULL);
444 if (!s) {
445 nvi->src[0] = nvi->src[1];
446 nvi->src[1] = NULL;
447 }
448 } else
449 if (type == NV_TYPE_F32 && val->reg.imm.f32 == -1.0f) {
450 nvi->opcode = NV_OP_NEG;
451 nv_reference(pc, &nvi->src[s], NULL);
452 nvi->src[0] = nvi->src[t];
453 nvi->src[1] = NULL;
454 } else
455 if (type == NV_TYPE_F32 && val->reg.imm.f32 == -2.0f) {
456 nvi->opcode = NV_OP_ADD;
457 assert(!nvi->src[s]->mod);
458 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
459 nvi->src[t]->mod ^= NV_MOD_NEG;
460 nvi->src[s]->mod |= NV_MOD_NEG;
461 } else
462 if (val->reg.imm.u32 == 0) {
463 nvi->opcode = NV_OP_MOV;
464 nv_reference(pc, &nvi->src[t], NULL);
465 if (s) {
466 nvi->src[0] = nvi->src[1];
467 nvi->src[1] = NULL;
468 }
469 }
470 break;
471 case NV_OP_ADD:
472 if (val->reg.imm.u32 == 0) {
473 nvi->opcode = NV_OP_MOV;
474 nv_reference(pc, &nvi->src[s], NULL);
475 nvi->src[0] = nvi->src[t];
476 nvi->src[1] = NULL;
477 }
478 break;
479 default:
480 break;
481 }
482 }
483
484 static int
485 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
486 {
487 struct nv_instruction *nvi, *next;
488 int j;
489
490 for (nvi = b->entry; nvi; nvi = next) {
491 struct nv_value *src0, *src1, *src;
492 int mod;
493
494 next = nvi->next;
495
496 if ((src = find_immediate(nvi->src[0])) != NULL)
497 constant_operand(ctx->pc, nvi, src, 0);
498 else
499 if ((src = find_immediate(nvi->src[1])) != NULL)
500 constant_operand(ctx->pc, nvi, src, 1);
501
502 /* try to combine MUL, ADD into MAD */
503 if (nvi->opcode != NV_OP_ADD)
504 continue;
505
506 src0 = nvi->src[0]->value;
507 src1 = nvi->src[1]->value;
508
509 if (SRC_IS_MUL(src0) && src0->refc == 1)
510 src = src0;
511 else
512 if (SRC_IS_MUL(src1) && src1->refc == 1)
513 src = src1;
514 else
515 continue;
516
517 nvi->opcode = NV_OP_MAD;
518 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
519 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
520 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
521
522 assert(!(mod & ~NV_MOD_NEG));
523 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
524 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
525 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
526 nvi->src[1]->mod = src->insn->src[1]->mod;
527 }
528 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
529
530 return 0;
531 }
532
533 /*
534 set $r2 g f32 $r2 $r3
535 cvt abs rn f32 $r2 s32 $r2
536 cvt f32 $c0 # f32 $r2
537 e $c0 bra 0x80
538 */
539 #if 0
540 static int
541 nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
542 {
543 /* XXX: easier in IR builder for now */
544 return 0;
545 }
546 #endif
547
548 /* TODO: reload elimination, redundant store elimination */
549
550 struct nv_pass_reldelim {
551 struct nv_pc *pc;
552 };
553
554 static int
555 nv_pass_reload_elim(struct nv_pass_reldelim *ctx, struct nv_basic_block *b)
556 {
557 int j;
558 struct nv_instruction *ld, *next;
559
560 for (ld = b->entry; ld; ld = next) {
561 next = ld->next;
562
563 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
564
565 } else
566 if (ld->opcode == NV_OP_LDA) {
567
568 } else
569 if (ld->opcode == NV_OP_MOV) {
570
571 }
572 }
573 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
574
575 return 0;
576 }
577
578 static int
579 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
580 {
581 int i, c, j;
582
583 for (i = 0; i < ctx->pc->num_instructions; ++i) {
584 struct nv_instruction *nvi = &ctx->pc->instructions[i];
585 struct nv_value *def[4];
586
587 if (!nv_is_vector_op(nvi->opcode))
588 continue;
589 nvi->tex_mask = 0;
590
591 for (c = 0; c < 4; ++c) {
592 if (nvi->def[c]->refc)
593 nvi->tex_mask |= 1 << c;
594 def[c] = nvi->def[c];
595 }
596
597 j = 0;
598 for (c = 0; c < 4; ++c)
599 if (nvi->tex_mask & (1 << c))
600 nvi->def[j++] = def[c];
601 for (c = 0; c < 4; ++c)
602 if (!(nvi->tex_mask & (1 << c)))
603 nvi->def[j++] = def[c];
604 assert(j == 4);
605 }
606 return 0;
607 }
608
609 struct nv_pass_dce {
610 struct nv_pc *pc;
611 uint removed;
612 };
613
614 static int
615 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
616 {
617 int j;
618 struct nv_instruction *nvi, *next;
619
620 for (nvi = b->entry; nvi; nvi = next) {
621 next = nvi->next;
622
623 if (inst_cullable(nvi)) {
624 nv_nvi_delete(nvi);
625
626 ++ctx->removed;
627 }
628 }
629 DESCEND_ARBITRARY(j, nv_pass_dce);
630
631 return 0;
632 }
633
634 static INLINE boolean
635 bb_simple_if_endif(struct nv_basic_block *bb)
636 {
637 return (bb->out[0] && bb->out[1] &&
638 bb->out[0]->out[0] == bb->out[1] &&
639 !bb->out[0]->out[1]);
640 }
641
642 static int
643 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
644 {
645 int j;
646
647 if (bb_simple_if_endif(b)) {
648 ++ctx->n;
649 debug_printf("nv_pass_flatten: total IF/ENDIF constructs: %i\n", ctx->n);
650 }
651 DESCEND_ARBITRARY(j, nv_pass_flatten);
652
653 return 0;
654 }
655
656 int
657 nv_pc_exec_pass0(struct nv_pc *pc)
658 {
659 struct nv_pass_reldelim *reldelim;
660 struct nv_pass pass;
661 struct nv_pass_dce dce;
662 int ret;
663
664 reldelim = CALLOC_STRUCT(nv_pass_reldelim);
665 reldelim->pc = pc;
666
667 ret = nv_pass_reload_elim(reldelim, pc->root);
668
669 FREE(reldelim);
670 if (ret)
671 return ret;
672
673 pass.pc = pc;
674
675 pc->pass_seq++;
676 ret = nv_pass_flatten(&pass, pc->root);
677 if (ret)
678 return ret;
679
680 /* Do this first, so we don't have to pay attention
681 * to whether sources are supported memory loads.
682 */
683 pc->pass_seq++;
684 ret = nv_pass_lower_arith(&pass, pc->root);
685 if (ret)
686 return ret;
687
688 pc->pass_seq++;
689 ret = nv_pass_fold_loads(&pass, pc->root);
690 if (ret)
691 return ret;
692
693 pc->pass_seq++;
694 ret = nv_pass_fold_stores(&pass, pc->root);
695 if (ret)
696 return ret;
697
698 pc->pass_seq++;
699 ret = nv_pass_lower_mods(&pass, pc->root);
700 if (ret)
701 return ret;
702
703 dce.pc = pc;
704 do {
705 dce.removed = 0;
706 pc->pass_seq++;
707 ret = nv_pass_dce(&dce, pc->root);
708 if (ret)
709 return ret;
710 } while (dce.removed);
711
712 ret = nv_pass_tex_mask(&pass, pc->root);
713 if (ret)
714 return ret;
715
716 return ret;
717 }