nv50: don't eliminate loads to dedicated values
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_pc.h"
24
25 #define DESCEND_ARBITRARY(j, f) \
26 do { \
27 b->pass_seq = ctx->pc->pass_seq; \
28 \
29 for (j = 0; j < 2; ++j) \
30 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
31 f(ctx, b->out[j]); \
32 } while (0)
33
34 extern unsigned nv50_inst_min_size(struct nv_instruction *);
35
36 struct nv_pc_pass {
37 struct nv_pc *pc;
38 };
39
40 static INLINE boolean
41 values_equal(struct nv_value *a, struct nv_value *b)
42 {
43 /* XXX: sizes */
44 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
45 }
46
47 static INLINE boolean
48 inst_commutation_check(struct nv_instruction *a,
49 struct nv_instruction *b)
50 {
51 int si, di;
52
53 for (di = 0; di < 4; ++di) {
54 if (!a->def[di])
55 break;
56 for (si = 0; si < 5; ++si) {
57 if (!b->src[si])
58 continue;
59 if (values_equal(a->def[di], b->src[si]->value))
60 return FALSE;
61 }
62 }
63
64 if (b->flags_src && b->flags_src->value == a->flags_def)
65 return FALSE;
66
67 return TRUE;
68 }
69
70 /* Check whether we can swap the order of the instructions,
71 * where a & b may be either the earlier or the later one.
72 */
73 static boolean
74 inst_commutation_legal(struct nv_instruction *a,
75 struct nv_instruction *b)
76 {
77 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
78 }
79
80 static INLINE boolean
81 inst_cullable(struct nv_instruction *nvi)
82 {
83 return (!(nvi->is_terminator ||
84 nvi->target ||
85 nvi->fixed ||
86 nv_nvi_refcount(nvi)));
87 }
88
89 static INLINE boolean
90 nvi_isnop(struct nv_instruction *nvi)
91 {
92 if (nvi->opcode == NV_OP_EXPORT)
93 return TRUE;
94
95 if (nvi->fixed ||
96 nvi->is_terminator ||
97 nvi->flags_src ||
98 nvi->flags_def)
99 return FALSE;
100
101 if (nvi->def[0]->join->reg.id < 0)
102 return TRUE;
103
104 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
105 return FALSE;
106
107 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
108 return FALSE;
109
110 if (nvi->src[0]->value->join->reg.id < 0) {
111 debug_printf("nvi_isnop: orphaned value detected\n");
112 return TRUE;
113 }
114
115 if (nvi->opcode == NV_OP_SELECT)
116 if (!values_equal(nvi->def[0], nvi->src[1]->value))
117 return FALSE;
118
119 return values_equal(nvi->def[0], nvi->src[0]->value);
120 }
121
122 static void
123 nv_pc_pass_pre_emission(struct nv_pc *pc, struct nv_basic_block *b)
124 {
125 struct nv_basic_block *in;
126 struct nv_instruction *nvi, *next;
127 int j;
128 uint size, n32 = 0;
129
130 b->priv = 0;
131
132 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
133 if (j >= 0) {
134 in = pc->bb_list[j];
135
136 /* check for no-op branches (BRA $PC+8) */
137 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
138 in->bin_size -= 8;
139 pc->bin_size -= 8;
140
141 for (++j; j < pc->num_blocks; ++j)
142 pc->bb_list[j]->bin_pos -= 8;
143
144 nv_nvi_delete(in->exit);
145 }
146 b->bin_pos = in->bin_pos + in->bin_size;
147 }
148
149 pc->bb_list[pc->num_blocks++] = b;
150
151 /* visit node */
152
153 for (nvi = b->entry; nvi; nvi = next) {
154 next = nvi->next;
155 if (nvi_isnop(nvi))
156 nv_nvi_delete(nvi);
157 }
158
159 for (nvi = b->entry; nvi; nvi = next) {
160 next = nvi->next;
161
162 size = nv50_inst_min_size(nvi);
163 if (nvi->next && size < 8)
164 ++n32;
165 else
166 if ((n32 & 1) && nvi->next &&
167 nv50_inst_min_size(nvi->next) == 4 &&
168 inst_commutation_legal(nvi, nvi->next)) {
169 ++n32;
170 debug_printf("permuting: ");
171 nv_print_instruction(nvi);
172 nv_print_instruction(nvi->next);
173 nv_nvi_permute(nvi, nvi->next);
174 next = nvi;
175 } else {
176 nvi->is_long = 1;
177
178 b->bin_size += n32 & 1;
179 if (n32 & 1)
180 nvi->prev->is_long = 1;
181 n32 = 0;
182 }
183 b->bin_size += 1 + nvi->is_long;
184 }
185
186 if (!b->entry) {
187 debug_printf("block %p is now empty\n", b);
188 } else
189 if (!b->exit->is_long) {
190 assert(n32);
191 b->exit->is_long = 1;
192 b->bin_size += 1;
193
194 /* might have del'd a hole tail of instructions */
195 if (!b->exit->prev->is_long && !(n32 & 1)) {
196 b->bin_size += 1;
197 b->exit->prev->is_long = 1;
198 }
199 }
200 assert(!b->entry || (b->exit && b->exit->is_long));
201
202 pc->bin_size += b->bin_size *= 4;
203
204 /* descend CFG */
205
206 if (!b->out[0])
207 return;
208 if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in)
209 return;
210
211 for (j = 0; j < 2; ++j)
212 if (b->out[j] && b->out[j] != b)
213 nv_pc_pass_pre_emission(pc, b->out[j]);
214 }
215
216 int
217 nv_pc_exec_pass2(struct nv_pc *pc)
218 {
219 debug_printf("preparing %u blocks for emission\n", pc->num_blocks);
220
221 pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
222
223 pc->num_blocks = 0;
224 nv_pc_pass_pre_emission(pc, pc->root);
225
226 return 0;
227 }
228
229 static INLINE boolean
230 is_cmem_load(struct nv_instruction *nvi)
231 {
232 return (nvi->opcode == NV_OP_LDA &&
233 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
234 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
235 }
236
237 static INLINE boolean
238 is_smem_load(struct nv_instruction *nvi)
239 {
240 return (nvi->opcode == NV_OP_LDA &&
241 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
242 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
243 }
244
245 static INLINE boolean
246 is_immd_move(struct nv_instruction *nvi)
247 {
248 return (nvi->opcode == NV_OP_MOV &&
249 nvi->src[0]->value->reg.file == NV_FILE_IMM);
250 }
251
252 static INLINE void
253 check_swap_src_0_1(struct nv_instruction *nvi)
254 {
255 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
256
257 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
258
259 if (!nv_op_commutative(nvi->opcode))
260 return;
261 assert(src0 && src1);
262
263 if (is_cmem_load(src0->value->insn)) {
264 if (!is_cmem_load(src1->value->insn)) {
265 nvi->src[0] = src1;
266 nvi->src[1] = src0;
267 /* debug_printf("swapping cmem load to 1\n"); */
268 }
269 } else
270 if (is_smem_load(src1->value->insn)) {
271 if (!is_smem_load(src0->value->insn)) {
272 nvi->src[0] = src1;
273 nvi->src[1] = src0;
274 /* debug_printf("swapping smem load to 0\n"); */
275 }
276 }
277
278 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
279 nvi->set_cond = cc_swapped[nvi->set_cond];
280 }
281
282 struct nv_pass {
283 struct nv_pc *pc;
284 int n;
285 void *priv;
286 };
287
288 static int
289 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
290 {
291 struct nv_instruction *nvi, *sti;
292 int j;
293
294 for (sti = b->entry; sti; sti = sti->next) {
295 if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
296 continue;
297
298 /* only handling MOV to $oX here */
299 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
300 continue;
301
302 nvi = sti->src[0]->value->insn;
303 if (!nvi || nvi->opcode == NV_OP_PHI)
304 continue;
305 assert(nvi->def[0] == sti->src[0]->value);
306
307 if (nvi->def[0]->refc > 1)
308 continue;
309
310 /* cannot MOV immediate to $oX */
311 if (nvi->src[0]->value->reg.file == NV_FILE_IMM)
312 continue;
313
314 nvi->def[0] = sti->def[0];
315 sti->def[0] = NULL;
316 nvi->fixed = sti->fixed;
317 sti->fixed = 0;
318 }
319 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
320
321 return 0;
322 }
323
324 static int
325 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
326 {
327 struct nv_instruction *nvi, *ld;
328 int j;
329
330 for (nvi = b->entry; nvi; nvi = nvi->next) {
331 check_swap_src_0_1(nvi);
332
333 for (j = 0; j < 3; ++j) {
334 if (!nvi->src[j])
335 break;
336 ld = nvi->src[j]->value->insn;
337 if (!ld)
338 continue;
339
340 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
341 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
342 debug_printf("folded immediate %i\n", ld->def[0]->n);
343 continue;
344 }
345
346 if (ld->opcode != NV_OP_LDA)
347 continue;
348 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
349 continue;
350
351 if (j == 0 && ld->src[4]) /* can't load shared mem */
352 continue;
353
354 /* fold it ! */ /* XXX: ref->insn */
355 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
356 if (ld->src[4])
357 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
358 }
359 }
360 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
361
362 return 0;
363 }
364
365 static int
366 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
367 {
368 int j;
369 struct nv_instruction *nvi, *mi, *next;
370 ubyte mod;
371
372 for (nvi = b->entry; nvi; nvi = next) {
373 next = nvi->next;
374 if (nvi->opcode == NV_OP_SUB) {
375 nvi->opcode = NV_OP_ADD;
376 nvi->src[1]->mod ^= NV_MOD_NEG;
377 }
378
379 /* should not put any modifiers on NEG and ABS */
380 assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
381 assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
382
383 for (j = 0; j < 4; ++j) {
384 if (!nvi->src[j])
385 break;
386
387 mi = nvi->src[j]->value->insn;
388 if (!mi)
389 continue;
390 if (mi->def[0]->refc > 1)
391 continue;
392
393 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
394 else
395 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
396 else
397 continue;
398
399 if (nvi->opcode == NV_OP_ABS)
400 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
401 else
402 if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
403 nvi->opcode = NV_OP_MOV;
404 mod = 0;
405 }
406
407 if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
408 continue;
409
410 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
411
412 nvi->src[j]->mod ^= mod;
413 }
414
415 if (nvi->opcode == NV_OP_SAT) {
416 mi = nvi->src[0]->value->insn;
417
418 if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
419 mi->saturate = 1;
420 mi->def[0] = nvi->def[0];
421 nv_nvi_delete(nvi);
422 }
423 }
424 }
425 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
426
427 return 0;
428 }
429
430 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
431
432 static struct nv_value *
433 find_immediate(struct nv_ref *ref)
434 {
435 struct nv_value *src;
436
437 if (!ref)
438 return NULL;
439
440 src = ref->value;
441 while (src->insn && src->insn->opcode == NV_OP_MOV) {
442 assert(!src->insn->src[0]->mod);
443 src = src->insn->src[0]->value;
444 }
445 return (src->reg.file == NV_FILE_IMM) ? src : NULL;
446 }
447
448 static void
449 constant_operand(struct nv_pc *pc,
450 struct nv_instruction *nvi, struct nv_value *val, int s)
451 {
452 int t = s ? 0 : 1;
453 ubyte type;
454
455 if (!nvi->def[0])
456 return;
457 type = nvi->def[0]->reg.type;
458
459 switch (nvi->opcode) {
460 case NV_OP_MUL:
461 if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 1.0f) ||
462 (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 1)) {
463 nvi->opcode = NV_OP_MOV;
464 nv_reference(pc, &nvi->src[s], NULL);
465 if (!s) {
466 nvi->src[0] = nvi->src[1];
467 nvi->src[1] = NULL;
468 }
469 } else
470 if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 2.0f) ||
471 (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 2)) {
472 nvi->opcode = NV_OP_ADD;
473 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
474 } else
475 if (type == NV_TYPE_F32 && val->reg.imm.f32 == -1.0f) {
476 nvi->opcode = NV_OP_NEG;
477 nv_reference(pc, &nvi->src[s], NULL);
478 nvi->src[0] = nvi->src[t];
479 nvi->src[1] = NULL;
480 } else
481 if (type == NV_TYPE_F32 && val->reg.imm.f32 == -2.0f) {
482 nvi->opcode = NV_OP_ADD;
483 assert(!nvi->src[s]->mod);
484 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
485 nvi->src[t]->mod ^= NV_MOD_NEG;
486 nvi->src[s]->mod |= NV_MOD_NEG;
487 } else
488 if (val->reg.imm.u32 == 0) {
489 nvi->opcode = NV_OP_MOV;
490 nv_reference(pc, &nvi->src[t], NULL);
491 if (s) {
492 nvi->src[0] = nvi->src[1];
493 nvi->src[1] = NULL;
494 }
495 }
496 break;
497 case NV_OP_ADD:
498 if (val->reg.imm.u32 == 0) {
499 nvi->opcode = NV_OP_MOV;
500 nv_reference(pc, &nvi->src[s], NULL);
501 nvi->src[0] = nvi->src[t];
502 nvi->src[1] = NULL;
503 }
504 break;
505 default:
506 break;
507 }
508 }
509
510 static int
511 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
512 {
513 struct nv_instruction *nvi, *next;
514 int j;
515
516 for (nvi = b->entry; nvi; nvi = next) {
517 struct nv_value *src0, *src1, *src;
518 int mod;
519
520 next = nvi->next;
521
522 if ((src = find_immediate(nvi->src[0])) != NULL)
523 constant_operand(ctx->pc, nvi, src, 0);
524 else
525 if ((src = find_immediate(nvi->src[1])) != NULL)
526 constant_operand(ctx->pc, nvi, src, 1);
527
528 /* try to combine MUL, ADD into MAD */
529 if (nvi->opcode != NV_OP_ADD)
530 continue;
531
532 src0 = nvi->src[0]->value;
533 src1 = nvi->src[1]->value;
534
535 if (SRC_IS_MUL(src0) && src0->refc == 1)
536 src = src0;
537 else
538 if (SRC_IS_MUL(src1) && src1->refc == 1)
539 src = src1;
540 else
541 continue;
542
543 nvi->opcode = NV_OP_MAD;
544 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
545 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
546 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
547
548 assert(!(mod & ~NV_MOD_NEG));
549 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
550 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
551 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
552 nvi->src[1]->mod = src->insn->src[1]->mod;
553 }
554 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
555
556 return 0;
557 }
558
559 /*
560 set $r2 g f32 $r2 $r3
561 cvt abs rn f32 $r2 s32 $r2
562 cvt f32 $c0 # f32 $r2
563 e $c0 bra 0x80
564 */
565 #if 0
566 static int
567 nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
568 {
569 /* XXX: easier in IR builder for now */
570 return 0;
571 }
572 #endif
573
574 /* TODO: redundant store elimination */
575
576 struct load_record {
577 struct load_record *next;
578 uint64_t data;
579 struct nv_value *value;
580 };
581
582 #define LOAD_RECORD_POOL_SIZE 1024
583
584 struct nv_pass_reld_elim {
585 struct nv_pc *pc;
586
587 struct load_record *imm;
588 struct load_record *mem_s;
589 struct load_record *mem_v;
590 struct load_record *mem_c[16];
591 struct load_record *mem_l;
592
593 struct load_record pool[LOAD_RECORD_POOL_SIZE];
594 int alloc;
595 };
596
597 static int
598 nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
599 {
600 struct load_record **rec, *it;
601 struct nv_instruction *ld, *next;
602 uint64_t data;
603 struct nv_value *val;
604 int j;
605
606 for (ld = b->entry; ld; ld = next) {
607 next = ld->next;
608 if (!ld->src[0])
609 continue;
610 val = ld->src[0]->value;
611 rec = NULL;
612
613 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
614 data = val->reg.id;
615 rec = &ctx->mem_v;
616 } else
617 if (ld->opcode == NV_OP_LDA) {
618 data = val->reg.id;
619 if (val->reg.file >= NV_FILE_MEM_C(0) &&
620 val->reg.file <= NV_FILE_MEM_C(15))
621 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
622 else
623 if (val->reg.file == NV_FILE_MEM_S)
624 rec = &ctx->mem_s;
625 else
626 if (val->reg.file == NV_FILE_MEM_L)
627 rec = &ctx->mem_l;
628 } else
629 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
630 data = val->reg.imm.u32;
631 rec = &ctx->imm;
632 }
633
634 if (!rec || !ld->def[0]->refc)
635 continue;
636
637 for (it = *rec; it; it = it->next)
638 if (it->data == data)
639 break;
640
641 if (it) {
642 if (ld->def[0]->reg.id >= 0)
643 it->value = ld->def[0];
644 else
645 nvcg_replace_value(ctx->pc, ld->def[0], it->value);
646 } else {
647 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
648 continue;
649 it = &ctx->pool[ctx->alloc++];
650 it->next = *rec;
651 it->data = data;
652 it->value = ld->def[0];
653 *rec = it;
654 }
655 }
656
657 ctx->imm = NULL;
658 ctx->mem_s = NULL;
659 ctx->mem_v = NULL;
660 for (j = 0; j < 16; ++j)
661 ctx->mem_c[j] = NULL;
662 ctx->mem_l = NULL;
663 ctx->alloc = 0;
664
665 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
666
667 return 0;
668 }
669
670 static int
671 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
672 {
673 int i, c, j;
674
675 for (i = 0; i < ctx->pc->num_instructions; ++i) {
676 struct nv_instruction *nvi = &ctx->pc->instructions[i];
677 struct nv_value *def[4];
678
679 if (!nv_is_vector_op(nvi->opcode))
680 continue;
681 nvi->tex_mask = 0;
682
683 for (c = 0; c < 4; ++c) {
684 if (nvi->def[c]->refc)
685 nvi->tex_mask |= 1 << c;
686 def[c] = nvi->def[c];
687 }
688
689 j = 0;
690 for (c = 0; c < 4; ++c)
691 if (nvi->tex_mask & (1 << c))
692 nvi->def[j++] = def[c];
693 for (c = 0; c < 4; ++c)
694 if (!(nvi->tex_mask & (1 << c)))
695 nvi->def[j++] = def[c];
696 assert(j == 4);
697 }
698 return 0;
699 }
700
701 struct nv_pass_dce {
702 struct nv_pc *pc;
703 uint removed;
704 };
705
706 static int
707 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
708 {
709 int j;
710 struct nv_instruction *nvi, *next;
711
712 for (nvi = b->entry; nvi; nvi = next) {
713 next = nvi->next;
714
715 if (inst_cullable(nvi)) {
716 nv_nvi_delete(nvi);
717
718 ++ctx->removed;
719 }
720 }
721 DESCEND_ARBITRARY(j, nv_pass_dce);
722
723 return 0;
724 }
725
726 static INLINE boolean
727 bb_simple_if_endif(struct nv_basic_block *bb)
728 {
729 return (bb->out[0] && bb->out[1] &&
730 bb->out[0]->out[0] == bb->out[1] &&
731 !bb->out[0]->out[1]);
732 }
733
734 static int
735 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
736 {
737 int j;
738
739 if (bb_simple_if_endif(b)) {
740 ++ctx->n;
741 debug_printf("nv_pass_flatten: total IF/ENDIF constructs: %i\n", ctx->n);
742 }
743 DESCEND_ARBITRARY(j, nv_pass_flatten);
744
745 return 0;
746 }
747
748 /* local common subexpression elimination, stupid O(n^2) implementation */
749 static int
750 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
751 {
752 struct nv_instruction *ir, *ik, *next;
753 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
754 int s;
755 unsigned int reps;
756
757 do {
758 reps = 0;
759 for (ir = entry; ir; ir = next) {
760 next = ir->next;
761 for (ik = entry; ik != ir; ik = ik->next) {
762 if (ir->opcode != ik->opcode)
763 continue;
764
765 if (ik->opcode == NV_OP_LDA ||
766 ik->opcode == NV_OP_STA ||
767 ik->opcode == NV_OP_MOV ||
768 nv_is_vector_op(ik->opcode))
769 continue; /* ignore loads, stores & moves */
770
771 if (ik->src[4] || ir->src[4])
772 continue; /* don't mess with address registers */
773
774 for (s = 0; s < 3; ++s) {
775 struct nv_value *a, *b;
776
777 if (!ik->src[s]) {
778 if (ir->src[s])
779 break;
780 continue;
781 }
782 if (ik->src[s]->mod != ir->src[s]->mod)
783 break;
784 a = ik->src[s]->value;
785 b = ir->src[s]->value;
786 if (a == b)
787 continue;
788 if (a->reg.file != b->reg.file ||
789 a->reg.id < 0 ||
790 a->reg.id != b->reg.id)
791 break;
792 }
793 if (s == 3) {
794 nv_nvi_delete(ir);
795 ++reps;
796 nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
797 break;
798 }
799 }
800 }
801 } while(reps);
802
803 DESCEND_ARBITRARY(s, nv_pass_cse);
804
805 return 0;
806 }
807
808 int
809 nv_pc_exec_pass0(struct nv_pc *pc)
810 {
811 struct nv_pass_reld_elim *reldelim;
812 struct nv_pass pass;
813 struct nv_pass_dce dce;
814 int ret;
815
816 pass.n = 0;
817 pass.pc = pc;
818
819 pc->pass_seq++;
820 ret = nv_pass_flatten(&pass, pc->root);
821 if (ret)
822 return ret;
823
824 /* Do this first, so we don't have to pay attention
825 * to whether sources are supported memory loads.
826 */
827 pc->pass_seq++;
828 ret = nv_pass_lower_arith(&pass, pc->root);
829 if (ret)
830 return ret;
831
832 pc->pass_seq++;
833 ret = nv_pass_fold_loads(&pass, pc->root);
834 if (ret)
835 return ret;
836
837 pc->pass_seq++;
838 ret = nv_pass_fold_stores(&pass, pc->root);
839 if (ret)
840 return ret;
841
842 reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
843 reldelim->pc = pc;
844 pc->pass_seq++;
845 ret = nv_pass_reload_elim(reldelim, pc->root);
846 FREE(reldelim);
847 if (ret)
848 return ret;
849
850 pc->pass_seq++;
851 ret = nv_pass_cse(&pass, pc->root);
852 if (ret)
853 return ret;
854
855 pc->pass_seq++;
856 ret = nv_pass_lower_mods(&pass, pc->root);
857 if (ret)
858 return ret;
859
860 dce.pc = pc;
861 do {
862 dce.removed = 0;
863 pc->pass_seq++;
864 ret = nv_pass_dce(&dce, pc->root);
865 if (ret)
866 return ret;
867 } while (dce.removed);
868
869 ret = nv_pass_tex_mask(&pass, pc->root);
870 if (ret)
871 return ret;
872
873 return ret;
874 }