nv50: don't produce MOV immediate to output reg in store opt
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_optimize.c
1
2 #include "nv50_pc.h"
3
4 #define DESCEND_ARBITRARY(j, f) \
5 do { \
6 b->pass_seq = ctx->pc->pass_seq; \
7 \
8 for (j = 0; j < 2; ++j) \
9 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
10 f(ctx, b->out[j]); \
11 } while (0)
12
13 extern unsigned nv50_inst_min_size(struct nv_instruction *);
14
15 struct nv_pc_pass {
16 struct nv_pc *pc;
17 };
18
19 static INLINE boolean
20 values_equal(struct nv_value *a, struct nv_value *b)
21 {
22 /* XXX: sizes */
23 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
24 }
25
26 static INLINE boolean
27 inst_commutation_check(struct nv_instruction *a,
28 struct nv_instruction *b)
29 {
30 int si, di;
31
32 for (di = 0; di < 4; ++di) {
33 if (!a->def[di])
34 break;
35 for (si = 0; si < 5; ++si) {
36 if (!b->src[si])
37 continue;
38 if (values_equal(a->def[di], b->src[si]->value))
39 return FALSE;
40 }
41 }
42
43 if (b->flags_src && b->flags_src->value == a->flags_def)
44 return FALSE;
45
46 return TRUE;
47 }
48
49 /* Check whether we can swap the order of the instructions,
50 * where a & b may be either the earlier or the later one.
51 */
52 static boolean
53 inst_commutation_legal(struct nv_instruction *a,
54 struct nv_instruction *b)
55 {
56 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
57 }
58
59 static INLINE boolean
60 inst_cullable(struct nv_instruction *nvi)
61 {
62 return (!(nvi->is_terminator ||
63 nvi->target ||
64 nvi->fixed ||
65 nv_nvi_refcount(nvi)));
66 }
67
68 static INLINE boolean
69 nvi_isnop(struct nv_instruction *nvi)
70 {
71 if (nvi->opcode == NV_OP_EXPORT)
72 return TRUE;
73
74 if (nvi->fixed ||
75 nvi->is_terminator ||
76 nvi->flags_src ||
77 nvi->flags_def)
78 return FALSE;
79
80 if (nvi->def[0]->join->reg.id < 0)
81 return TRUE;
82
83 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
84 return FALSE;
85
86 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
87 return FALSE;
88
89 if (nvi->src[0]->value->join->reg.id < 0) {
90 debug_printf("nvi_isnop: orphaned value detected\n");
91 return TRUE;
92 }
93
94 if (nvi->opcode == NV_OP_SELECT)
95 if (!values_equal(nvi->def[0], nvi->src[1]->value))
96 return FALSE;
97
98 return values_equal(nvi->def[0], nvi->src[0]->value);
99 }
100
101 static void
102 nv_pc_pass_pre_emission(struct nv_pc *pc, struct nv_basic_block *b)
103 {
104 struct nv_instruction *nvi, *next;
105 int j;
106 uint size, n32 = 0;
107
108 b->priv = 0;
109
110 if (pc->num_blocks)
111 b->bin_pos = pc->bb_list[pc->num_blocks - 1]->bin_pos +
112 pc->bb_list[pc->num_blocks - 1]->bin_size;
113
114 pc->bb_list[pc->num_blocks++] = b;
115
116 /* visit node */
117
118 for (nvi = b->entry; nvi; nvi = next) {
119 next = nvi->next;
120 if (nvi_isnop(nvi))
121 nv_nvi_delete(nvi);
122 }
123
124 for (nvi = b->entry; nvi; nvi = next) {
125 next = nvi->next;
126
127 size = nv50_inst_min_size(nvi);
128 if (nvi->next && size < 8)
129 ++n32;
130 else
131 if ((n32 & 1) && nvi->next &&
132 nv50_inst_min_size(nvi->next) == 4 &&
133 inst_commutation_legal(nvi, nvi->next)) {
134 ++n32;
135 debug_printf("permuting: ");
136 nv_print_instruction(nvi);
137 nv_print_instruction(nvi->next);
138 nv_nvi_permute(nvi, nvi->next);
139 next = nvi;
140 } else {
141 nvi->is_long = 1;
142
143 b->bin_size += n32 & 1;
144 if (n32 & 1)
145 nvi->prev->is_long = 1;
146 n32 = 0;
147 }
148 b->bin_size += 1 + nvi->is_long;
149 }
150
151 if (!b->entry) {
152 debug_printf("block %p is now empty\n", b);
153 } else
154 if (!b->exit->is_long) {
155 assert(n32);
156 b->exit->is_long = 1;
157 b->bin_size += 1;
158
159 /* might have del'd a hole tail of instructions */
160 if (!b->exit->prev->is_long && !(n32 & 1)) {
161 b->bin_size += 1;
162 b->exit->prev->is_long = 1;
163 }
164 }
165 assert(!b->exit || b->exit->is_long);
166
167 pc->bin_size += b->bin_size *= 4;
168
169 /* descend CFG */
170
171 if (!b->out[0])
172 return;
173 if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in)
174 return;
175
176 #if 0
177 /* delete ELSE branch */
178 if (b->entry &&
179 b->entry->opcode == NV_OP_BRA && b->entry->target == b->out[0]) {
180 nv_nvi_delete(b->entry);
181 b->bin_size -= 2;
182 pc->bin_size -= 8;
183 }
184 #endif
185 for (j = 0; j < 2; ++j)
186 if (b->out[j] && b->out[j] != b)
187 nv_pc_pass_pre_emission(pc, b->out[j]);
188 }
189
190 int
191 nv_pc_exec_pass2(struct nv_pc *pc)
192 {
193 debug_printf("preparing %u blocks for emission\n", pc->num_blocks);
194
195 pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
196
197 pc->num_blocks = 0;
198 nv_pc_pass_pre_emission(pc, pc->root);
199
200 return 0;
201 }
202
203 static INLINE boolean
204 is_cmem_load(struct nv_instruction *nvi)
205 {
206 return (nvi->opcode == NV_OP_LDA &&
207 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
208 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
209 }
210
211 static INLINE boolean
212 is_smem_load(struct nv_instruction *nvi)
213 {
214 return (nvi->opcode == NV_OP_LDA &&
215 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
216 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
217 }
218
219 static INLINE boolean
220 is_immd_move(struct nv_instruction *nvi)
221 {
222 return (nvi->opcode == NV_OP_MOV &&
223 nvi->src[0]->value->reg.file == NV_FILE_IMM);
224 }
225
226 static INLINE void
227 check_swap_src_0_1(struct nv_instruction *nvi)
228 {
229 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
230
231 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
232
233 if (!nv_op_commutative(nvi->opcode))
234 return;
235 assert(src0 && src1);
236
237 if (is_cmem_load(src0->value->insn)) {
238 if (!is_cmem_load(src1->value->insn)) {
239 nvi->src[0] = src1;
240 nvi->src[1] = src0;
241 /* debug_printf("swapping cmem load to 1\n"); */
242 }
243 } else
244 if (is_smem_load(src1->value->insn)) {
245 if (!is_smem_load(src0->value->insn)) {
246 nvi->src[0] = src1;
247 nvi->src[1] = src0;
248 /* debug_printf("swapping smem load to 0\n"); */
249 }
250 }
251
252 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
253 nvi->set_cond = cc_swapped[nvi->set_cond];
254 }
255
256 struct nv_pass {
257 struct nv_pc *pc;
258 int n;
259 void *priv;
260 };
261
262 static int
263 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
264 {
265 struct nv_instruction *nvi, *sti;
266 int j;
267
268 for (sti = b->entry; sti; sti = sti->next) {
269 if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
270 continue;
271
272 /* only handling MOV to $oX here */
273 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
274 continue;
275
276 nvi = sti->src[0]->value->insn;
277 if (!nvi || nvi->opcode == NV_OP_PHI)
278 continue;
279 assert(nvi->def[0] == sti->src[0]->value);
280
281 if (nvi->def[0]->refc > 1)
282 continue;
283
284 /* cannot MOV immediate to $oX */
285 if (nvi->src[0]->value->reg.file == NV_FILE_IMM)
286 continue;
287
288 nvi->def[0] = sti->def[0];
289 sti->def[0] = NULL;
290 nvi->fixed = sti->fixed;
291 sti->fixed = 0;
292 }
293 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
294
295 return 0;
296 }
297
298 static int
299 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
300 {
301 struct nv_instruction *nvi, *ld;
302 int j;
303
304 for (nvi = b->entry; nvi; nvi = nvi->next) {
305 check_swap_src_0_1(nvi);
306
307 for (j = 0; j < 3; ++j) {
308 if (!nvi->src[j])
309 break;
310 ld = nvi->src[j]->value->insn;
311 if (!ld)
312 continue;
313
314 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
315 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
316 debug_printf("folded immediate %i\n", ld->def[0]->n);
317 continue;
318 }
319
320 if (ld->opcode != NV_OP_LDA)
321 continue;
322 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
323 continue;
324
325 if (j == 0 && ld->src[4]) /* can't load shared mem */
326 continue;
327
328 /* fold it ! */ /* XXX: ref->insn */
329 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
330 if (ld->src[4])
331 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
332 }
333 }
334 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
335
336 return 0;
337 }
338
339 static int
340 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
341 {
342 int j;
343 struct nv_instruction *nvi, *mi, *next;
344 ubyte mod;
345
346 for (nvi = b->entry; nvi; nvi = next) {
347 next = nvi->next;
348 if (nvi->opcode == NV_OP_SUB) {
349 nvi->opcode = NV_OP_ADD;
350 nvi->src[1]->mod ^= NV_MOD_NEG;
351 }
352
353 /* should not put any modifiers on NEG and ABS */
354 assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
355 assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
356
357 for (j = 0; j < 4; ++j) {
358 if (!nvi->src[j])
359 break;
360
361 mi = nvi->src[j]->value->insn;
362 if (!mi)
363 continue;
364 if (mi->def[0]->refc > 1)
365 continue;
366
367 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
368 else
369 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
370 else
371 continue;
372
373 if (nvi->opcode == NV_OP_ABS)
374 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
375 else
376 if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
377 nvi->opcode = NV_OP_MOV;
378 mod = 0;
379 }
380
381 if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
382 continue;
383
384 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
385
386 nvi->src[j]->mod ^= mod;
387 }
388
389 if (nvi->opcode == NV_OP_SAT) {
390 mi = nvi->src[0]->value->insn;
391
392 if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
393 mi->saturate = 1;
394 mi->def[0] = nvi->def[0];
395 nv_nvi_delete(nvi);
396 }
397 }
398 }
399 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
400
401 return 0;
402 }
403
404 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
405
406 static struct nv_value *
407 find_immediate(struct nv_ref *ref)
408 {
409 struct nv_value *src;
410
411 if (!ref)
412 return NULL;
413
414 src = ref->value;
415 while (src->insn && src->insn->opcode == NV_OP_MOV) {
416 assert(!src->insn->src[0]->mod);
417 src = src->insn->src[0]->value;
418 }
419 return (src->reg.file == NV_FILE_IMM) ? src : NULL;
420 }
421
422 static void
423 constant_operand(struct nv_pc *pc,
424 struct nv_instruction *nvi, struct nv_value *val, int s)
425 {
426 int t = s ? 0 : 1;
427 ubyte type;
428
429 if (!nvi->def[0])
430 return;
431 type = nvi->def[0]->reg.type;
432
433 switch (nvi->opcode) {
434 case NV_OP_MUL:
435 if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 1.0f) ||
436 (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 1)) {
437 nvi->opcode = NV_OP_MOV;
438 nv_reference(pc, &nvi->src[s], NULL);
439 if (!s) {
440 nvi->src[0] = nvi->src[1];
441 nvi->src[1] = NULL;
442 }
443 } else
444 if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 2.0f) ||
445 (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 2)) {
446 nvi->opcode = NV_OP_ADD;
447 nv_reference(pc, &nvi->src[s], NULL);
448 if (!s) {
449 nvi->src[0] = nvi->src[1];
450 nvi->src[1] = NULL;
451 }
452 } else
453 if (type == NV_TYPE_F32 && val->reg.imm.f32 == -1.0f) {
454 nvi->opcode = NV_OP_NEG;
455 nv_reference(pc, &nvi->src[s], NULL);
456 nvi->src[0] = nvi->src[t];
457 nvi->src[1] = NULL;
458 } else
459 if (type == NV_TYPE_F32 && val->reg.imm.f32 == -2.0f) {
460 nvi->opcode = NV_OP_ADD;
461 assert(!nvi->src[s]->mod);
462 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
463 nvi->src[t]->mod ^= NV_MOD_NEG;
464 nvi->src[s]->mod |= NV_MOD_NEG;
465 } else
466 if (val->reg.imm.u32 == 0) {
467 nvi->opcode = NV_OP_MOV;
468 nv_reference(pc, &nvi->src[t], NULL);
469 if (s) {
470 nvi->src[0] = nvi->src[1];
471 nvi->src[1] = NULL;
472 }
473 }
474 break;
475 case NV_OP_ADD:
476 if (val->reg.imm.u32 == 0) {
477 nvi->opcode = NV_OP_MOV;
478 nv_reference(pc, &nvi->src[s], NULL);
479 nvi->src[0] = nvi->src[t];
480 nvi->src[1] = NULL;
481 }
482 break;
483 default:
484 break;
485 }
486 }
487
488 static int
489 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
490 {
491 struct nv_instruction *nvi, *next;
492 int j;
493
494 for (nvi = b->entry; nvi; nvi = next) {
495 struct nv_value *src0, *src1, *src;
496 int mod;
497
498 next = nvi->next;
499
500 if ((src = find_immediate(nvi->src[0])) != NULL)
501 constant_operand(ctx->pc, nvi, src, 0);
502 else
503 if ((src = find_immediate(nvi->src[1])) != NULL)
504 constant_operand(ctx->pc, nvi, src, 1);
505
506 /* try to combine MUL, ADD into MAD */
507 if (nvi->opcode != NV_OP_ADD)
508 continue;
509
510 src0 = nvi->src[0]->value;
511 src1 = nvi->src[1]->value;
512
513 if (SRC_IS_MUL(src0) && src0->refc == 1)
514 src = src0;
515 else
516 if (SRC_IS_MUL(src1) && src1->refc == 1)
517 src = src1;
518 else
519 continue;
520
521 nvi->opcode = NV_OP_MAD;
522 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
523 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
524 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
525
526 assert(!(mod & ~NV_MOD_NEG));
527 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
528 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
529 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
530 nvi->src[1]->mod = src->insn->src[1]->mod;
531 }
532 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
533
534 return 0;
535 }
536
537 /*
538 set $r2 g f32 $r2 $r3
539 cvt abs rn f32 $r2 s32 $r2
540 cvt f32 $c0 # f32 $r2
541 e $c0 bra 0x80
542 */
543 #if 0
544 static int
545 nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
546 {
547 /* XXX: easier in IR builder for now */
548 return 0;
549 }
550 #endif
551
552 /* TODO: reload elimination, redundant store elimination */
553
554 struct nv_pass_reldelim {
555 struct nv_pc *pc;
556 };
557
558 static int
559 nv_pass_reload_elim(struct nv_pass_reldelim *ctx, struct nv_basic_block *b)
560 {
561 int j;
562 struct nv_instruction *ld, *next;
563
564 for (ld = b->entry; ld; ld = next) {
565 next = ld->next;
566
567 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
568
569 } else
570 if (ld->opcode == NV_OP_LDA) {
571
572 } else
573 if (ld->opcode == NV_OP_MOV) {
574
575 }
576 }
577 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
578
579 return 0;
580 }
581
582 static int
583 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
584 {
585 int i, c, j;
586
587 for (i = 0; i < ctx->pc->num_instructions; ++i) {
588 struct nv_instruction *nvi = &ctx->pc->instructions[i];
589 struct nv_value *def[4];
590
591 if (!nv_is_vector_op(nvi->opcode))
592 continue;
593 nvi->tex_mask = 0;
594
595 for (c = 0; c < 4; ++c) {
596 if (nvi->def[c]->refc)
597 nvi->tex_mask |= 1 << c;
598 def[c] = nvi->def[c];
599 }
600
601 j = 0;
602 for (c = 0; c < 4; ++c)
603 if (nvi->tex_mask & (1 << c))
604 nvi->def[j++] = def[c];
605 for (c = 0; c < 4; ++c)
606 if (!(nvi->tex_mask & (1 << c)))
607 nvi->def[j++] = def[c];
608 assert(j == 4);
609 }
610 return 0;
611 }
612
613 struct nv_pass_dce {
614 struct nv_pc *pc;
615 uint removed;
616 };
617
618 static int
619 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
620 {
621 int j;
622 struct nv_instruction *nvi, *next;
623
624 for (nvi = b->entry; nvi; nvi = next) {
625 next = nvi->next;
626
627 if (inst_cullable(nvi)) {
628 nv_nvi_delete(nvi);
629
630 ++ctx->removed;
631 }
632 }
633 DESCEND_ARBITRARY(j, nv_pass_dce);
634
635 return 0;
636 }
637
638 static INLINE boolean
639 bb_simple_if_endif(struct nv_basic_block *bb)
640 {
641 return (bb->out[0] && bb->out[1] &&
642 bb->out[0]->out[0] == bb->out[1] &&
643 !bb->out[0]->out[1]);
644 }
645
646 static int
647 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
648 {
649 int j;
650
651 if (bb_simple_if_endif(b)) {
652 ++ctx->n;
653 debug_printf("nv_pass_flatten: total IF/ENDIF constructs: %i\n", ctx->n);
654 }
655 DESCEND_ARBITRARY(j, nv_pass_flatten);
656
657 return 0;
658 }
659
660 int
661 nv_pc_exec_pass0(struct nv_pc *pc)
662 {
663 struct nv_pass_reldelim *reldelim;
664 struct nv_pass pass;
665 struct nv_pass_dce dce;
666 int ret;
667
668 reldelim = CALLOC_STRUCT(nv_pass_reldelim);
669 reldelim->pc = pc;
670
671 ret = nv_pass_reload_elim(reldelim, pc->root);
672
673 FREE(reldelim);
674 if (ret)
675 return ret;
676
677 pass.pc = pc;
678
679 pc->pass_seq++;
680 ret = nv_pass_flatten(&pass, pc->root);
681 if (ret)
682 return ret;
683
684 /* Do this first, so we don't have to pay attention
685 * to whether sources are supported memory loads.
686 */
687 pc->pass_seq++;
688 ret = nv_pass_lower_arith(&pass, pc->root);
689 if (ret)
690 return ret;
691
692 pc->pass_seq++;
693 ret = nv_pass_fold_loads(&pass, pc->root);
694 if (ret)
695 return ret;
696
697 pc->pass_seq++;
698 ret = nv_pass_fold_stores(&pass, pc->root);
699 if (ret)
700 return ret;
701
702 pc->pass_seq++;
703 ret = nv_pass_lower_mods(&pass, pc->root);
704 if (ret)
705 return ret;
706
707 dce.pc = pc;
708 do {
709 dce.removed = 0;
710 pc->pass_seq++;
711 ret = nv_pass_dce(&dce, pc->root);
712 if (ret)
713 return ret;
714 } while (dce.removed);
715
716 ret = nv_pass_tex_mask(&pass, pc->root);
717 if (ret)
718 return ret;
719
720 return ret;
721 }