nv50: simple reload elimination and local CSE
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_pc.h"
24
25 #define DESCEND_ARBITRARY(j, f) \
26 do { \
27 b->pass_seq = ctx->pc->pass_seq; \
28 \
29 for (j = 0; j < 2; ++j) \
30 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
31 f(ctx, b->out[j]); \
32 } while (0)
33
34 extern unsigned nv50_inst_min_size(struct nv_instruction *);
35
36 struct nv_pc_pass {
37 struct nv_pc *pc;
38 };
39
40 static INLINE boolean
41 values_equal(struct nv_value *a, struct nv_value *b)
42 {
43 /* XXX: sizes */
44 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
45 }
46
47 static INLINE boolean
48 inst_commutation_check(struct nv_instruction *a,
49 struct nv_instruction *b)
50 {
51 int si, di;
52
53 for (di = 0; di < 4; ++di) {
54 if (!a->def[di])
55 break;
56 for (si = 0; si < 5; ++si) {
57 if (!b->src[si])
58 continue;
59 if (values_equal(a->def[di], b->src[si]->value))
60 return FALSE;
61 }
62 }
63
64 if (b->flags_src && b->flags_src->value == a->flags_def)
65 return FALSE;
66
67 return TRUE;
68 }
69
70 /* Check whether we can swap the order of the instructions,
71 * where a & b may be either the earlier or the later one.
72 */
73 static boolean
74 inst_commutation_legal(struct nv_instruction *a,
75 struct nv_instruction *b)
76 {
77 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
78 }
79
80 static INLINE boolean
81 inst_cullable(struct nv_instruction *nvi)
82 {
83 return (!(nvi->is_terminator ||
84 nvi->target ||
85 nvi->fixed ||
86 nv_nvi_refcount(nvi)));
87 }
88
89 static INLINE boolean
90 nvi_isnop(struct nv_instruction *nvi)
91 {
92 if (nvi->opcode == NV_OP_EXPORT)
93 return TRUE;
94
95 if (nvi->fixed ||
96 nvi->is_terminator ||
97 nvi->flags_src ||
98 nvi->flags_def)
99 return FALSE;
100
101 if (nvi->def[0]->join->reg.id < 0)
102 return TRUE;
103
104 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
105 return FALSE;
106
107 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
108 return FALSE;
109
110 if (nvi->src[0]->value->join->reg.id < 0) {
111 debug_printf("nvi_isnop: orphaned value detected\n");
112 return TRUE;
113 }
114
115 if (nvi->opcode == NV_OP_SELECT)
116 if (!values_equal(nvi->def[0], nvi->src[1]->value))
117 return FALSE;
118
119 return values_equal(nvi->def[0], nvi->src[0]->value);
120 }
121
122 static void
123 nv_pc_pass_pre_emission(struct nv_pc *pc, struct nv_basic_block *b)
124 {
125 struct nv_instruction *nvi, *next;
126 int j;
127 uint size, n32 = 0;
128
129 b->priv = 0;
130
131 if (pc->num_blocks)
132 b->bin_pos = pc->bb_list[pc->num_blocks - 1]->bin_pos +
133 pc->bb_list[pc->num_blocks - 1]->bin_size;
134
135 pc->bb_list[pc->num_blocks++] = b;
136
137 /* visit node */
138
139 for (nvi = b->entry; nvi; nvi = next) {
140 next = nvi->next;
141 if (nvi_isnop(nvi))
142 nv_nvi_delete(nvi);
143 }
144
145 for (nvi = b->entry; nvi; nvi = next) {
146 next = nvi->next;
147
148 size = nv50_inst_min_size(nvi);
149 if (nvi->next && size < 8)
150 ++n32;
151 else
152 if ((n32 & 1) && nvi->next &&
153 nv50_inst_min_size(nvi->next) == 4 &&
154 inst_commutation_legal(nvi, nvi->next)) {
155 ++n32;
156 debug_printf("permuting: ");
157 nv_print_instruction(nvi);
158 nv_print_instruction(nvi->next);
159 nv_nvi_permute(nvi, nvi->next);
160 next = nvi;
161 } else {
162 nvi->is_long = 1;
163
164 b->bin_size += n32 & 1;
165 if (n32 & 1)
166 nvi->prev->is_long = 1;
167 n32 = 0;
168 }
169 b->bin_size += 1 + nvi->is_long;
170 }
171
172 if (!b->entry) {
173 debug_printf("block %p is now empty\n", b);
174 } else
175 if (!b->exit->is_long) {
176 assert(n32);
177 b->exit->is_long = 1;
178 b->bin_size += 1;
179
180 /* might have del'd a hole tail of instructions */
181 if (!b->exit->prev->is_long && !(n32 & 1)) {
182 b->bin_size += 1;
183 b->exit->prev->is_long = 1;
184 }
185 }
186 assert(!b->exit || b->exit->is_long);
187
188 pc->bin_size += b->bin_size *= 4;
189
190 /* descend CFG */
191
192 if (!b->out[0])
193 return;
194 if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in)
195 return;
196
197 #if 0
198 /* delete ELSE branch */
199 if (b->entry &&
200 b->entry->opcode == NV_OP_BRA && b->entry->target == b->out[0]) {
201 nv_nvi_delete(b->entry);
202 b->bin_size -= 2;
203 pc->bin_size -= 8;
204 }
205 #endif
206 for (j = 0; j < 2; ++j)
207 if (b->out[j] && b->out[j] != b)
208 nv_pc_pass_pre_emission(pc, b->out[j]);
209 }
210
211 int
212 nv_pc_exec_pass2(struct nv_pc *pc)
213 {
214 debug_printf("preparing %u blocks for emission\n", pc->num_blocks);
215
216 pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
217
218 pc->num_blocks = 0;
219 nv_pc_pass_pre_emission(pc, pc->root);
220
221 return 0;
222 }
223
224 static INLINE boolean
225 is_cmem_load(struct nv_instruction *nvi)
226 {
227 return (nvi->opcode == NV_OP_LDA &&
228 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
229 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
230 }
231
232 static INLINE boolean
233 is_smem_load(struct nv_instruction *nvi)
234 {
235 return (nvi->opcode == NV_OP_LDA &&
236 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
237 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
238 }
239
240 static INLINE boolean
241 is_immd_move(struct nv_instruction *nvi)
242 {
243 return (nvi->opcode == NV_OP_MOV &&
244 nvi->src[0]->value->reg.file == NV_FILE_IMM);
245 }
246
247 static INLINE void
248 check_swap_src_0_1(struct nv_instruction *nvi)
249 {
250 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
251
252 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
253
254 if (!nv_op_commutative(nvi->opcode))
255 return;
256 assert(src0 && src1);
257
258 if (is_cmem_load(src0->value->insn)) {
259 if (!is_cmem_load(src1->value->insn)) {
260 nvi->src[0] = src1;
261 nvi->src[1] = src0;
262 /* debug_printf("swapping cmem load to 1\n"); */
263 }
264 } else
265 if (is_smem_load(src1->value->insn)) {
266 if (!is_smem_load(src0->value->insn)) {
267 nvi->src[0] = src1;
268 nvi->src[1] = src0;
269 /* debug_printf("swapping smem load to 0\n"); */
270 }
271 }
272
273 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
274 nvi->set_cond = cc_swapped[nvi->set_cond];
275 }
276
277 struct nv_pass {
278 struct nv_pc *pc;
279 int n;
280 void *priv;
281 };
282
283 static int
284 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
285 {
286 struct nv_instruction *nvi, *sti;
287 int j;
288
289 for (sti = b->entry; sti; sti = sti->next) {
290 if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
291 continue;
292
293 /* only handling MOV to $oX here */
294 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
295 continue;
296
297 nvi = sti->src[0]->value->insn;
298 if (!nvi || nvi->opcode == NV_OP_PHI)
299 continue;
300 assert(nvi->def[0] == sti->src[0]->value);
301
302 if (nvi->def[0]->refc > 1)
303 continue;
304
305 /* cannot MOV immediate to $oX */
306 if (nvi->src[0]->value->reg.file == NV_FILE_IMM)
307 continue;
308
309 nvi->def[0] = sti->def[0];
310 sti->def[0] = NULL;
311 nvi->fixed = sti->fixed;
312 sti->fixed = 0;
313 }
314 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
315
316 return 0;
317 }
318
319 static int
320 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
321 {
322 struct nv_instruction *nvi, *ld;
323 int j;
324
325 for (nvi = b->entry; nvi; nvi = nvi->next) {
326 check_swap_src_0_1(nvi);
327
328 for (j = 0; j < 3; ++j) {
329 if (!nvi->src[j])
330 break;
331 ld = nvi->src[j]->value->insn;
332 if (!ld)
333 continue;
334
335 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
336 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
337 debug_printf("folded immediate %i\n", ld->def[0]->n);
338 continue;
339 }
340
341 if (ld->opcode != NV_OP_LDA)
342 continue;
343 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
344 continue;
345
346 if (j == 0 && ld->src[4]) /* can't load shared mem */
347 continue;
348
349 /* fold it ! */ /* XXX: ref->insn */
350 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
351 if (ld->src[4])
352 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
353 }
354 }
355 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
356
357 return 0;
358 }
359
360 static int
361 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
362 {
363 int j;
364 struct nv_instruction *nvi, *mi, *next;
365 ubyte mod;
366
367 for (nvi = b->entry; nvi; nvi = next) {
368 next = nvi->next;
369 if (nvi->opcode == NV_OP_SUB) {
370 nvi->opcode = NV_OP_ADD;
371 nvi->src[1]->mod ^= NV_MOD_NEG;
372 }
373
374 /* should not put any modifiers on NEG and ABS */
375 assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
376 assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
377
378 for (j = 0; j < 4; ++j) {
379 if (!nvi->src[j])
380 break;
381
382 mi = nvi->src[j]->value->insn;
383 if (!mi)
384 continue;
385 if (mi->def[0]->refc > 1)
386 continue;
387
388 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
389 else
390 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
391 else
392 continue;
393
394 if (nvi->opcode == NV_OP_ABS)
395 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
396 else
397 if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
398 nvi->opcode = NV_OP_MOV;
399 mod = 0;
400 }
401
402 if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
403 continue;
404
405 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
406
407 nvi->src[j]->mod ^= mod;
408 }
409
410 if (nvi->opcode == NV_OP_SAT) {
411 mi = nvi->src[0]->value->insn;
412
413 if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
414 mi->saturate = 1;
415 mi->def[0] = nvi->def[0];
416 nv_nvi_delete(nvi);
417 }
418 }
419 }
420 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
421
422 return 0;
423 }
424
425 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
426
427 static struct nv_value *
428 find_immediate(struct nv_ref *ref)
429 {
430 struct nv_value *src;
431
432 if (!ref)
433 return NULL;
434
435 src = ref->value;
436 while (src->insn && src->insn->opcode == NV_OP_MOV) {
437 assert(!src->insn->src[0]->mod);
438 src = src->insn->src[0]->value;
439 }
440 return (src->reg.file == NV_FILE_IMM) ? src : NULL;
441 }
442
443 static void
444 constant_operand(struct nv_pc *pc,
445 struct nv_instruction *nvi, struct nv_value *val, int s)
446 {
447 int t = s ? 0 : 1;
448 ubyte type;
449
450 if (!nvi->def[0])
451 return;
452 type = nvi->def[0]->reg.type;
453
454 switch (nvi->opcode) {
455 case NV_OP_MUL:
456 if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 1.0f) ||
457 (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 1)) {
458 nvi->opcode = NV_OP_MOV;
459 nv_reference(pc, &nvi->src[s], NULL);
460 if (!s) {
461 nvi->src[0] = nvi->src[1];
462 nvi->src[1] = NULL;
463 }
464 } else
465 if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 2.0f) ||
466 (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 2)) {
467 nvi->opcode = NV_OP_ADD;
468 nv_reference(pc, &nvi->src[s], NULL);
469 if (!s) {
470 nvi->src[0] = nvi->src[1];
471 nvi->src[1] = NULL;
472 }
473 } else
474 if (type == NV_TYPE_F32 && val->reg.imm.f32 == -1.0f) {
475 nvi->opcode = NV_OP_NEG;
476 nv_reference(pc, &nvi->src[s], NULL);
477 nvi->src[0] = nvi->src[t];
478 nvi->src[1] = NULL;
479 } else
480 if (type == NV_TYPE_F32 && val->reg.imm.f32 == -2.0f) {
481 nvi->opcode = NV_OP_ADD;
482 assert(!nvi->src[s]->mod);
483 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
484 nvi->src[t]->mod ^= NV_MOD_NEG;
485 nvi->src[s]->mod |= NV_MOD_NEG;
486 } else
487 if (val->reg.imm.u32 == 0) {
488 nvi->opcode = NV_OP_MOV;
489 nv_reference(pc, &nvi->src[t], NULL);
490 if (s) {
491 nvi->src[0] = nvi->src[1];
492 nvi->src[1] = NULL;
493 }
494 }
495 break;
496 case NV_OP_ADD:
497 if (val->reg.imm.u32 == 0) {
498 nvi->opcode = NV_OP_MOV;
499 nv_reference(pc, &nvi->src[s], NULL);
500 nvi->src[0] = nvi->src[t];
501 nvi->src[1] = NULL;
502 }
503 break;
504 default:
505 break;
506 }
507 }
508
509 static int
510 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
511 {
512 struct nv_instruction *nvi, *next;
513 int j;
514
515 for (nvi = b->entry; nvi; nvi = next) {
516 struct nv_value *src0, *src1, *src;
517 int mod;
518
519 next = nvi->next;
520
521 if ((src = find_immediate(nvi->src[0])) != NULL)
522 constant_operand(ctx->pc, nvi, src, 0);
523 else
524 if ((src = find_immediate(nvi->src[1])) != NULL)
525 constant_operand(ctx->pc, nvi, src, 1);
526
527 /* try to combine MUL, ADD into MAD */
528 if (nvi->opcode != NV_OP_ADD)
529 continue;
530
531 src0 = nvi->src[0]->value;
532 src1 = nvi->src[1]->value;
533
534 if (SRC_IS_MUL(src0) && src0->refc == 1)
535 src = src0;
536 else
537 if (SRC_IS_MUL(src1) && src1->refc == 1)
538 src = src1;
539 else
540 continue;
541
542 nvi->opcode = NV_OP_MAD;
543 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
544 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
545 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
546
547 assert(!(mod & ~NV_MOD_NEG));
548 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
549 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
550 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
551 nvi->src[1]->mod = src->insn->src[1]->mod;
552 }
553 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
554
555 return 0;
556 }
557
558 /*
559 set $r2 g f32 $r2 $r3
560 cvt abs rn f32 $r2 s32 $r2
561 cvt f32 $c0 # f32 $r2
562 e $c0 bra 0x80
563 */
564 #if 0
565 static int
566 nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
567 {
568 /* XXX: easier in IR builder for now */
569 return 0;
570 }
571 #endif
572
573 /* TODO: redundant store elimination */
574
575 struct load_record {
576 struct load_record *next;
577 uint64_t data;
578 struct nv_value *value;
579 };
580
581 #define LOAD_RECORD_POOL_SIZE 1024
582
583 struct nv_pass_reld_elim {
584 struct nv_pc *pc;
585
586 struct load_record *imm;
587 struct load_record *mem_s;
588 struct load_record *mem_v;
589 struct load_record *mem_c[16];
590 struct load_record *mem_l;
591
592 struct load_record pool[LOAD_RECORD_POOL_SIZE];
593 int alloc;
594 };
595
596 static int
597 nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
598 {
599 struct load_record **rec, *it;
600 struct nv_instruction *ld, *next;
601 uint64_t data;
602 struct nv_value *val;
603 int j;
604
605 for (ld = b->entry; ld; ld = next) {
606 next = ld->next;
607 if (!ld->src[0])
608 continue;
609 val = ld->src[0]->value;
610 rec = NULL;
611
612 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
613 data = val->reg.id;
614 rec = &ctx->mem_v;
615 } else
616 if (ld->opcode == NV_OP_LDA) {
617 data = val->reg.id;
618 if (val->reg.file >= NV_FILE_MEM_C(0) &&
619 val->reg.file <= NV_FILE_MEM_C(15))
620 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
621 else
622 if (val->reg.file == NV_FILE_MEM_S)
623 rec = &ctx->mem_s;
624 else
625 if (val->reg.file == NV_FILE_MEM_L)
626 rec = &ctx->mem_l;
627 } else
628 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
629 data = val->reg.imm.u32;
630 rec = &ctx->imm;
631 }
632
633 if (!rec || !ld->def[0]->refc)
634 continue;
635
636 for (it = *rec; it; it = it->next)
637 if (it->data == data)
638 break;
639
640 if (it) {
641 #if 1
642 nvcg_replace_value(ctx->pc, ld->def[0], it->value);
643 #else
644 ld->opcode = NV_OP_MOV;
645 nv_reference(ctx->pc, &ld->src[0], it->value);
646 #endif
647 } else {
648 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
649 continue;
650 it = &ctx->pool[ctx->alloc++];
651 it->next = *rec;
652 it->data = data;
653 it->value = ld->def[0];
654 *rec = it;
655 }
656 }
657
658 ctx->imm = NULL;
659 ctx->mem_s = NULL;
660 ctx->mem_v = NULL;
661 for (j = 0; j < 16; ++j)
662 ctx->mem_c[j] = NULL;
663 ctx->mem_l = NULL;
664 ctx->alloc = 0;
665
666 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
667
668 return 0;
669 }
670
671 static int
672 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
673 {
674 int i, c, j;
675
676 for (i = 0; i < ctx->pc->num_instructions; ++i) {
677 struct nv_instruction *nvi = &ctx->pc->instructions[i];
678 struct nv_value *def[4];
679
680 if (!nv_is_vector_op(nvi->opcode))
681 continue;
682 nvi->tex_mask = 0;
683
684 for (c = 0; c < 4; ++c) {
685 if (nvi->def[c]->refc)
686 nvi->tex_mask |= 1 << c;
687 def[c] = nvi->def[c];
688 }
689
690 j = 0;
691 for (c = 0; c < 4; ++c)
692 if (nvi->tex_mask & (1 << c))
693 nvi->def[j++] = def[c];
694 for (c = 0; c < 4; ++c)
695 if (!(nvi->tex_mask & (1 << c)))
696 nvi->def[j++] = def[c];
697 assert(j == 4);
698 }
699 return 0;
700 }
701
702 struct nv_pass_dce {
703 struct nv_pc *pc;
704 uint removed;
705 };
706
707 static int
708 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
709 {
710 int j;
711 struct nv_instruction *nvi, *next;
712
713 for (nvi = b->entry; nvi; nvi = next) {
714 next = nvi->next;
715
716 if (inst_cullable(nvi)) {
717 nv_nvi_delete(nvi);
718
719 ++ctx->removed;
720 }
721 }
722 DESCEND_ARBITRARY(j, nv_pass_dce);
723
724 return 0;
725 }
726
727 static INLINE boolean
728 bb_simple_if_endif(struct nv_basic_block *bb)
729 {
730 return (bb->out[0] && bb->out[1] &&
731 bb->out[0]->out[0] == bb->out[1] &&
732 !bb->out[0]->out[1]);
733 }
734
735 static int
736 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
737 {
738 int j;
739
740 if (bb_simple_if_endif(b)) {
741 ++ctx->n;
742 debug_printf("nv_pass_flatten: total IF/ENDIF constructs: %i\n", ctx->n);
743 }
744 DESCEND_ARBITRARY(j, nv_pass_flatten);
745
746 return 0;
747 }
748
749 /* local common subexpression elimination, stupid O(n^2) implementation */
750 static int
751 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
752 {
753 struct nv_instruction *ir, *ik, *next;
754 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
755 int s;
756 unsigned int reps;
757
758 do {
759 reps = 0;
760 for (ir = entry; ir; ir = next) {
761 next = ir->next;
762 for (ik = entry; ik != ir; ik = ik->next) {
763 if (ir->opcode != ik->opcode)
764 continue;
765
766 if (ik->opcode == NV_OP_LDA ||
767 ik->opcode == NV_OP_STA ||
768 ik->opcode == NV_OP_MOV ||
769 nv_is_vector_op(ik->opcode))
770 continue; /* ignore loads, stores & moves */
771
772 if (ik->src[4] || ir->src[4])
773 continue; /* don't mess with address registers */
774
775 for (s = 0; s < 3; ++s) {
776 struct nv_value *a, *b;
777
778 if (!ik->src[s]) {
779 if (ir->src[s])
780 break;
781 continue;
782 }
783 if (ik->src[s]->mod != ir->src[s]->mod)
784 break;
785 a = ik->src[s]->value;
786 b = ir->src[s]->value;
787 if (a == b)
788 continue;
789 if (a->reg.file != b->reg.file ||
790 a->reg.id < 0 ||
791 a->reg.id != b->reg.id)
792 break;
793 }
794 if (s == 3) {
795 nv_nvi_delete(ir);
796 ++reps;
797 nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
798 break;
799 }
800 }
801 }
802 } while(reps);
803
804 DESCEND_ARBITRARY(s, nv_pass_cse);
805
806 return 0;
807 }
808
809 int
810 nv_pc_exec_pass0(struct nv_pc *pc)
811 {
812 struct nv_pass_reld_elim *reldelim;
813 struct nv_pass pass;
814 struct nv_pass_dce dce;
815 int ret;
816
817 pass.pc = pc;
818
819 pc->pass_seq++;
820 ret = nv_pass_flatten(&pass, pc->root);
821 if (ret)
822 return ret;
823
824 /* Do this first, so we don't have to pay attention
825 * to whether sources are supported memory loads.
826 */
827 pc->pass_seq++;
828 ret = nv_pass_lower_arith(&pass, pc->root);
829 if (ret)
830 return ret;
831
832 pc->pass_seq++;
833 ret = nv_pass_fold_loads(&pass, pc->root);
834 if (ret)
835 return ret;
836
837 pc->pass_seq++;
838 ret = nv_pass_fold_stores(&pass, pc->root);
839 if (ret)
840 return ret;
841
842 reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
843 reldelim->pc = pc;
844 pc->pass_seq++;
845 ret = nv_pass_reload_elim(reldelim, pc->root);
846 FREE(reldelim);
847 if (ret)
848 return ret;
849
850 pc->pass_seq++;
851 ret = nv_pass_cse(&pass, pc->root);
852 if (ret)
853 return ret;
854
855 pc->pass_seq++;
856 ret = nv_pass_lower_mods(&pass, pc->root);
857 if (ret)
858 return ret;
859
860 dce.pc = pc;
861 do {
862 dce.removed = 0;
863 pc->pass_seq++;
864 ret = nv_pass_dce(&dce, pc->root);
865 if (ret)
866 return ret;
867 } while (dce.removed);
868
869 ret = nv_pass_tex_mask(&pass, pc->root);
870 if (ret)
871 return ret;
872
873 return ret;
874 }