nv50: more constant folding
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_pc.h"
24
25 #define DESCEND_ARBITRARY(j, f) \
26 do { \
27 b->pass_seq = ctx->pc->pass_seq; \
28 \
29 for (j = 0; j < 2; ++j) \
30 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
31 f(ctx, b->out[j]); \
32 } while (0)
33
34 extern unsigned nv50_inst_min_size(struct nv_instruction *);
35
36 struct nv_pc_pass {
37 struct nv_pc *pc;
38 };
39
40 static INLINE boolean
41 values_equal(struct nv_value *a, struct nv_value *b)
42 {
43 /* XXX: sizes */
44 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
45 }
46
47 static INLINE boolean
48 inst_commutation_check(struct nv_instruction *a,
49 struct nv_instruction *b)
50 {
51 int si, di;
52
53 for (di = 0; di < 4; ++di) {
54 if (!a->def[di])
55 break;
56 for (si = 0; si < 5; ++si) {
57 if (!b->src[si])
58 continue;
59 if (values_equal(a->def[di], b->src[si]->value))
60 return FALSE;
61 }
62 }
63
64 if (b->flags_src && b->flags_src->value == a->flags_def)
65 return FALSE;
66
67 return TRUE;
68 }
69
70 /* Check whether we can swap the order of the instructions,
71 * where a & b may be either the earlier or the later one.
72 */
73 static boolean
74 inst_commutation_legal(struct nv_instruction *a,
75 struct nv_instruction *b)
76 {
77 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
78 }
79
80 static INLINE boolean
81 inst_cullable(struct nv_instruction *nvi)
82 {
83 return (!(nvi->is_terminator ||
84 nvi->target ||
85 nvi->fixed ||
86 nv_nvi_refcount(nvi)));
87 }
88
89 static INLINE boolean
90 nvi_isnop(struct nv_instruction *nvi)
91 {
92 if (nvi->opcode == NV_OP_EXPORT)
93 return TRUE;
94
95 if (nvi->fixed ||
96 nvi->is_terminator ||
97 nvi->flags_src ||
98 nvi->flags_def)
99 return FALSE;
100
101 if (nvi->def[0]->join->reg.id < 0)
102 return TRUE;
103
104 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
105 return FALSE;
106
107 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
108 return FALSE;
109
110 if (nvi->src[0]->value->join->reg.id < 0) {
111 debug_printf("nvi_isnop: orphaned value detected\n");
112 return TRUE;
113 }
114
115 if (nvi->opcode == NV_OP_SELECT)
116 if (!values_equal(nvi->def[0], nvi->src[1]->value))
117 return FALSE;
118
119 return values_equal(nvi->def[0], nvi->src[0]->value);
120 }
121
122 static void
123 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
124 {
125 struct nv_pc *pc = (struct nv_pc *)priv;
126 struct nv_basic_block *in;
127 struct nv_instruction *nvi, *next;
128 int j;
129 uint size, n32 = 0;
130
131 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
132 if (j >= 0) {
133 in = pc->bb_list[j];
134
135 /* check for no-op branches (BRA $PC+8) */
136 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
137 in->bin_size -= 8;
138 pc->bin_size -= 8;
139
140 for (++j; j < pc->num_blocks; ++j)
141 pc->bb_list[j]->bin_pos -= 8;
142
143 nv_nvi_delete(in->exit);
144 }
145 b->bin_pos = in->bin_pos + in->bin_size;
146 }
147
148 pc->bb_list[pc->num_blocks++] = b;
149
150 /* visit node */
151
152 for (nvi = b->entry; nvi; nvi = next) {
153 next = nvi->next;
154 if (nvi_isnop(nvi))
155 nv_nvi_delete(nvi);
156 }
157
158 for (nvi = b->entry; nvi; nvi = next) {
159 next = nvi->next;
160
161 size = nv50_inst_min_size(nvi);
162 if (nvi->next && size < 8)
163 ++n32;
164 else
165 if ((n32 & 1) && nvi->next &&
166 nv50_inst_min_size(nvi->next) == 4 &&
167 inst_commutation_legal(nvi, nvi->next)) {
168 ++n32;
169 debug_printf("permuting: ");
170 nv_print_instruction(nvi);
171 nv_print_instruction(nvi->next);
172 nv_nvi_permute(nvi, nvi->next);
173 next = nvi;
174 } else {
175 nvi->is_long = 1;
176
177 b->bin_size += n32 & 1;
178 if (n32 & 1)
179 nvi->prev->is_long = 1;
180 n32 = 0;
181 }
182 b->bin_size += 1 + nvi->is_long;
183 }
184
185 if (!b->entry) {
186 debug_printf("block %p is now empty\n", b);
187 } else
188 if (!b->exit->is_long) {
189 assert(n32);
190 b->exit->is_long = 1;
191 b->bin_size += 1;
192
193 /* might have del'd a hole tail of instructions */
194 if (!b->exit->prev->is_long && !(n32 & 1)) {
195 b->bin_size += 1;
196 b->exit->prev->is_long = 1;
197 }
198 }
199 assert(!b->entry || (b->exit && b->exit->is_long));
200
201 pc->bin_size += b->bin_size *= 4;
202 }
203
204 int
205 nv_pc_exec_pass2(struct nv_pc *pc)
206 {
207 debug_printf("preparing %u blocks for emission\n", pc->num_blocks);
208
209 pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
210 pc->num_blocks = 0;
211
212 nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
213
214 return 0;
215 }
216
217 static INLINE boolean
218 is_cmem_load(struct nv_instruction *nvi)
219 {
220 return (nvi->opcode == NV_OP_LDA &&
221 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
222 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
223 }
224
225 static INLINE boolean
226 is_smem_load(struct nv_instruction *nvi)
227 {
228 return (nvi->opcode == NV_OP_LDA &&
229 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
230 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
231 }
232
233 static INLINE boolean
234 is_immd_move(struct nv_instruction *nvi)
235 {
236 return (nvi->opcode == NV_OP_MOV &&
237 nvi->src[0]->value->reg.file == NV_FILE_IMM);
238 }
239
240 static INLINE void
241 check_swap_src_0_1(struct nv_instruction *nvi)
242 {
243 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
244
245 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
246
247 if (!nv_op_commutative(nvi->opcode))
248 return;
249 assert(src0 && src1);
250
251 if (src1->value->reg.file == NV_FILE_IMM) {
252 /* should only be present from folding a constant MUL part of a MAD */
253 assert(nvi->opcode == NV_OP_ADD);
254 return;
255 }
256
257 if (is_cmem_load(src0->value->insn)) {
258 if (!is_cmem_load(src1->value->insn)) {
259 nvi->src[0] = src1;
260 nvi->src[1] = src0;
261 /* debug_printf("swapping cmem load to 1\n"); */
262 }
263 } else
264 if (is_smem_load(src1->value->insn)) {
265 if (!is_smem_load(src0->value->insn)) {
266 nvi->src[0] = src1;
267 nvi->src[1] = src0;
268 /* debug_printf("swapping smem load to 0\n"); */
269 }
270 }
271
272 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
273 nvi->set_cond = cc_swapped[nvi->set_cond];
274 }
275
276 struct nv_pass {
277 struct nv_pc *pc;
278 int n;
279 void *priv;
280 };
281
282 static int
283 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
284 {
285 struct nv_instruction *nvi, *sti;
286 int j;
287
288 for (sti = b->entry; sti; sti = sti->next) {
289 if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
290 continue;
291
292 /* only handling MOV to $oX here */
293 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
294 continue;
295
296 nvi = sti->src[0]->value->insn;
297 if (!nvi || nvi->opcode == NV_OP_PHI)
298 continue;
299 assert(nvi->def[0] == sti->src[0]->value);
300
301 if (nvi->def[0]->refc > 1)
302 continue;
303
304 /* cannot write to $oX when using immediate */
305 for (j = 0; j < 4 && nvi->src[j]; ++j)
306 if (nvi->src[j]->value->reg.file == NV_FILE_IMM)
307 break;
308 if (j < 4)
309 continue;
310
311 nvi->def[0] = sti->def[0];
312 sti->def[0] = NULL;
313 nvi->fixed = sti->fixed;
314 sti->fixed = 0;
315 }
316 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
317
318 return 0;
319 }
320
321 static int
322 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
323 {
324 struct nv_instruction *nvi, *ld;
325 int j;
326
327 for (nvi = b->entry; nvi; nvi = nvi->next) {
328 check_swap_src_0_1(nvi);
329
330 for (j = 0; j < 3; ++j) {
331 if (!nvi->src[j])
332 break;
333 ld = nvi->src[j]->value->insn;
334 if (!ld)
335 continue;
336
337 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
338 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
339 continue;
340 }
341
342 if (ld->opcode != NV_OP_LDA)
343 continue;
344 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
345 continue;
346
347 if (j == 0 && ld->src[4]) /* can't load shared mem */
348 continue;
349
350 /* fold it ! */ /* XXX: ref->insn */
351 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
352 if (ld->src[4])
353 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
354 }
355 }
356 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
357
358 return 0;
359 }
360
361 static int
362 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
363 {
364 int j;
365 struct nv_instruction *nvi, *mi, *next;
366 ubyte mod;
367
368 for (nvi = b->entry; nvi; nvi = next) {
369 next = nvi->next;
370 if (nvi->opcode == NV_OP_SUB) {
371 nvi->opcode = NV_OP_ADD;
372 nvi->src[1]->mod ^= NV_MOD_NEG;
373 }
374
375 /* should not put any modifiers on NEG and ABS */
376 assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
377 assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
378
379 for (j = 0; j < 4; ++j) {
380 if (!nvi->src[j])
381 break;
382
383 mi = nvi->src[j]->value->insn;
384 if (!mi)
385 continue;
386 if (mi->def[0]->refc > 1)
387 continue;
388
389 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
390 else
391 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
392 else
393 continue;
394
395 if (nvi->opcode == NV_OP_ABS)
396 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
397 else
398 if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
399 nvi->opcode = NV_OP_MOV;
400 mod = 0;
401 }
402
403 if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
404 continue;
405
406 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
407
408 nvi->src[j]->mod ^= mod;
409 }
410
411 if (nvi->opcode == NV_OP_SAT) {
412 mi = nvi->src[0]->value->insn;
413
414 if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
415 mi->saturate = 1;
416 mi->def[0] = nvi->def[0];
417 nv_nvi_delete(nvi);
418 }
419 }
420 }
421 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
422
423 return 0;
424 }
425
426 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
427
428 static struct nv_value *
429 find_immediate(struct nv_ref *ref)
430 {
431 struct nv_value *src;
432
433 if (!ref)
434 return NULL;
435
436 src = ref->value;
437 while (src->insn && src->insn->opcode == NV_OP_MOV) {
438 assert(!src->insn->src[0]->mod);
439 src = src->insn->src[0]->value;
440 }
441 return (src->reg.file == NV_FILE_IMM) ? src : NULL;
442 }
443
444 static void
445 modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
446 {
447 if (mod & NV_MOD_ABS) {
448 if (type == NV_TYPE_F32)
449 *val &= 0x7fffffff;
450 else
451 if ((*val) & (1 << 31))
452 *val = ~(*val) + 1;
453 }
454 if (mod & NV_MOD_NEG) {
455 if (type == NV_TYPE_F32)
456 *val ^= 0x80000000;
457 else
458 *val = ~(*val) + 1;
459 }
460 }
461
462 static INLINE uint
463 modifiers_opcode(ubyte mod)
464 {
465 switch (mod) {
466 case NV_MOD_NEG: return NV_OP_NEG;
467 case NV_MOD_ABS: return NV_OP_ABS;
468 case 0:
469 return NV_OP_MOV;
470 default:
471 return NV_OP_NOP;
472 }
473 }
474
475 static void
476 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
477 struct nv_value *src0, struct nv_value *src1)
478 {
479 struct nv_value *val;
480 union {
481 float f32;
482 uint32_t u32;
483 int32_t s32;
484 } u0, u1, u;
485 ubyte type;
486
487 if (!nvi->def[0])
488 return;
489 type = nvi->def[0]->reg.type;
490
491 u.u32 = 0;
492 u0.u32 = src0->reg.imm.u32;
493 u1.u32 = src1->reg.imm.u32;
494
495 modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
496 modifiers_apply(&u0.u32, type, nvi->src[1]->mod);
497
498 switch (nvi->opcode) {
499 case NV_OP_MAD:
500 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
501 return;
502 /* fall through */
503 case NV_OP_MUL:
504 switch (type) {
505 case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
506 case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
507 case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
508 default:
509 assert(0);
510 break;
511 }
512 break;
513 case NV_OP_ADD:
514 switch (type) {
515 case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
516 case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
517 case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
518 default:
519 assert(0);
520 break;
521 }
522 break;
523 case NV_OP_SUB:
524 switch (type) {
525 case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32;
526 case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32;
527 case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32;
528 default:
529 assert(0);
530 break;
531 }
532 break;
533 default:
534 return;
535 }
536
537 nvi->opcode = NV_OP_MOV;
538
539 val = new_value(pc, NV_FILE_IMM, type);
540
541 val->reg.imm.u32 = u.u32;
542
543 nv_reference(pc, &nvi->src[1], NULL);
544 nv_reference(pc, &nvi->src[0], val);
545
546 if (nvi->src[2]) { /* from MAD */
547 nvi->src[1] = nvi->src[0];
548 nvi->src[0] = nvi->src[2];
549 nvi->src[2] = NULL;
550 nvi->opcode = NV_OP_ADD;
551 }
552 }
553
554 static void
555 constant_operand(struct nv_pc *pc,
556 struct nv_instruction *nvi, struct nv_value *val, int s)
557 {
558 union {
559 float f32;
560 uint32_t u32;
561 int32_t s32;
562 } u;
563 int t = s ? 0 : 1;
564 uint op;
565 ubyte type;
566
567 if (!nvi->def[0])
568 return;
569 type = nvi->def[0]->reg.type;
570
571 u.u32 = val->reg.imm.u32;
572 modifiers_apply(&u.u32, type, nvi->src[s]->mod);
573
574 switch (nvi->opcode) {
575 case NV_OP_MUL:
576 if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
577 (NV_TYPE_ISINT(type) && u.u32 == 1)) {
578 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
579 break;
580 nvi->opcode = op;
581 nv_reference(pc, &nvi->src[s], NULL);
582 nvi->src[0] = nvi->src[t];
583 nvi->src[1] = NULL;
584 } else
585 if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
586 (NV_TYPE_ISINT(type) && u.u32 == 2)) {
587 nvi->opcode = NV_OP_ADD;
588 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
589 nvi->src[s]->mod = nvi->src[t]->mod;
590 } else
591 if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
592 if (nvi->src[t]->mod & NV_MOD_NEG)
593 nvi->opcode = NV_OP_MOV;
594 else
595 nvi->opcode = NV_OP_NEG;
596 nv_reference(pc, &nvi->src[s], NULL);
597 nvi->src[0] = nvi->src[t];
598 nvi->src[1] = NULL;
599 } else
600 if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
601 nvi->opcode = NV_OP_ADD;
602 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
603 nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
604 } else
605 if (u.u32 == 0) {
606 nvi->opcode = NV_OP_MOV;
607 nv_reference(pc, &nvi->src[t], NULL);
608 if (s) {
609 nvi->src[0] = nvi->src[1];
610 nvi->src[1] = NULL;
611 }
612 }
613 break;
614 case NV_OP_ADD:
615 if (u.u32 == 0) {
616 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
617 break;
618 nvi->opcode = op;
619 nv_reference(pc, &nvi->src[s], NULL);
620 nvi->src[0] = nvi->src[t];
621 nvi->src[1] = NULL;
622 }
623 break;
624 case NV_OP_RCP:
625 u.f32 = 1.0f / u.f32;
626 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
627 nvi->opcode = NV_OP_MOV;
628 assert(s == 0);
629 nv_reference(pc, &nvi->src[0], val);
630 break;
631 case NV_OP_RSQ:
632 u.f32 = 1.0f / sqrtf(u.f32);
633 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
634 nvi->opcode = NV_OP_MOV;
635 assert(s == 0);
636 nv_reference(pc, &nvi->src[0], val);
637 break;
638 default:
639 break;
640 }
641 }
642
643 static int
644 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
645 {
646 struct nv_instruction *nvi, *next;
647 int j;
648
649 for (nvi = b->entry; nvi; nvi = next) {
650 struct nv_value *src0, *src1, *src;
651 int mod;
652
653 next = nvi->next;
654
655 src0 = find_immediate(nvi->src[0]);
656 src1 = find_immediate(nvi->src[1]);
657
658 if (src0 && src1)
659 constant_expression(ctx->pc, nvi, src0, src1);
660 else {
661 if (src0)
662 constant_operand(ctx->pc, nvi, src0, 0);
663 else
664 if (src1)
665 constant_operand(ctx->pc, nvi, src1, 1);
666 }
667
668 /* try to combine MUL, ADD into MAD */
669 if (nvi->opcode != NV_OP_ADD)
670 continue;
671
672 src0 = nvi->src[0]->value;
673 src1 = nvi->src[1]->value;
674
675 if (SRC_IS_MUL(src0) && src0->refc == 1)
676 src = src0;
677 else
678 if (SRC_IS_MUL(src1) && src1->refc == 1)
679 src = src1;
680 else
681 continue;
682
683 nvi->opcode = NV_OP_MAD;
684 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
685 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
686 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
687
688 assert(!(mod & ~NV_MOD_NEG));
689 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
690 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
691 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
692 nvi->src[1]->mod = src->insn->src[1]->mod;
693 }
694 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
695
696 return 0;
697 }
698
699 /*
700 set $r2 g f32 $r2 $r3
701 cvt abs rn f32 $r2 s32 $r2
702 cvt f32 $c0 # f32 $r2
703 e $c0 bra 0x80
704 */
705 #if 0
706 static int
707 nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
708 {
709 /* XXX: easier in IR builder for now */
710 return 0;
711 }
712 #endif
713
714 /* TODO: redundant store elimination */
715
716 struct load_record {
717 struct load_record *next;
718 uint64_t data;
719 struct nv_value *value;
720 };
721
722 #define LOAD_RECORD_POOL_SIZE 1024
723
724 struct nv_pass_reld_elim {
725 struct nv_pc *pc;
726
727 struct load_record *imm;
728 struct load_record *mem_s;
729 struct load_record *mem_v;
730 struct load_record *mem_c[16];
731 struct load_record *mem_l;
732
733 struct load_record pool[LOAD_RECORD_POOL_SIZE];
734 int alloc;
735 };
736
737 static int
738 nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
739 {
740 struct load_record **rec, *it;
741 struct nv_instruction *ld, *next;
742 uint64_t data;
743 struct nv_value *val;
744 int j;
745
746 for (ld = b->entry; ld; ld = next) {
747 next = ld->next;
748 if (!ld->src[0])
749 continue;
750 val = ld->src[0]->value;
751 rec = NULL;
752
753 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
754 data = val->reg.id;
755 rec = &ctx->mem_v;
756 } else
757 if (ld->opcode == NV_OP_LDA) {
758 data = val->reg.id;
759 if (val->reg.file >= NV_FILE_MEM_C(0) &&
760 val->reg.file <= NV_FILE_MEM_C(15))
761 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
762 else
763 if (val->reg.file == NV_FILE_MEM_S)
764 rec = &ctx->mem_s;
765 else
766 if (val->reg.file == NV_FILE_MEM_L)
767 rec = &ctx->mem_l;
768 } else
769 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
770 data = val->reg.imm.u32;
771 rec = &ctx->imm;
772 }
773
774 if (!rec || !ld->def[0]->refc)
775 continue;
776
777 for (it = *rec; it; it = it->next)
778 if (it->data == data)
779 break;
780
781 if (it) {
782 if (ld->def[0]->reg.id >= 0)
783 it->value = ld->def[0];
784 else
785 nvcg_replace_value(ctx->pc, ld->def[0], it->value);
786 } else {
787 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
788 continue;
789 it = &ctx->pool[ctx->alloc++];
790 it->next = *rec;
791 it->data = data;
792 it->value = ld->def[0];
793 *rec = it;
794 }
795 }
796
797 ctx->imm = NULL;
798 ctx->mem_s = NULL;
799 ctx->mem_v = NULL;
800 for (j = 0; j < 16; ++j)
801 ctx->mem_c[j] = NULL;
802 ctx->mem_l = NULL;
803 ctx->alloc = 0;
804
805 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
806
807 return 0;
808 }
809
810 static int
811 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
812 {
813 int i, c, j;
814
815 for (i = 0; i < ctx->pc->num_instructions; ++i) {
816 struct nv_instruction *nvi = &ctx->pc->instructions[i];
817 struct nv_value *def[4];
818
819 if (!nv_is_vector_op(nvi->opcode))
820 continue;
821 nvi->tex_mask = 0;
822
823 for (c = 0; c < 4; ++c) {
824 if (nvi->def[c]->refc)
825 nvi->tex_mask |= 1 << c;
826 def[c] = nvi->def[c];
827 }
828
829 j = 0;
830 for (c = 0; c < 4; ++c)
831 if (nvi->tex_mask & (1 << c))
832 nvi->def[j++] = def[c];
833 for (c = 0; c < 4; ++c)
834 if (!(nvi->tex_mask & (1 << c)))
835 nvi->def[j++] = def[c];
836 assert(j == 4);
837 }
838 return 0;
839 }
840
841 struct nv_pass_dce {
842 struct nv_pc *pc;
843 uint removed;
844 };
845
846 static int
847 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
848 {
849 int j;
850 struct nv_instruction *nvi, *next;
851
852 for (nvi = b->entry; nvi; nvi = next) {
853 next = nvi->next;
854
855 if (inst_cullable(nvi)) {
856 nv_nvi_delete(nvi);
857
858 ++ctx->removed;
859 }
860 }
861 DESCEND_ARBITRARY(j, nv_pass_dce);
862
863 return 0;
864 }
865
866 static INLINE boolean
867 bb_simple_if_endif(struct nv_basic_block *bb)
868 {
869 return (bb->out[0] && bb->out[1] &&
870 bb->out[0]->out[0] == bb->out[1] &&
871 !bb->out[0]->out[1]);
872 }
873
874 static int
875 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
876 {
877 int j;
878
879 if (bb_simple_if_endif(b)) {
880 ++ctx->n;
881 debug_printf("nv_pass_flatten: total IF/ENDIF constructs: %i\n", ctx->n);
882 }
883 DESCEND_ARBITRARY(j, nv_pass_flatten);
884
885 return 0;
886 }
887
888 /* local common subexpression elimination, stupid O(n^2) implementation */
889 static int
890 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
891 {
892 struct nv_instruction *ir, *ik, *next;
893 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
894 int s;
895 unsigned int reps;
896
897 do {
898 reps = 0;
899 for (ir = entry; ir; ir = next) {
900 next = ir->next;
901 for (ik = entry; ik != ir; ik = ik->next) {
902 if (ir->opcode != ik->opcode)
903 continue;
904
905 if (ik->opcode == NV_OP_LDA ||
906 ik->opcode == NV_OP_STA ||
907 ik->opcode == NV_OP_MOV ||
908 nv_is_vector_op(ik->opcode))
909 continue; /* ignore loads, stores & moves */
910
911 if (ik->src[4] || ir->src[4])
912 continue; /* don't mess with address registers */
913
914 if (ik->flags_src || ir->flags_src ||
915 ik->flags_def || ir->flags_def)
916 continue; /* and also not with flags, for now */
917
918 for (s = 0; s < 3; ++s) {
919 struct nv_value *a, *b;
920
921 if (!ik->src[s]) {
922 if (ir->src[s])
923 break;
924 continue;
925 }
926 if (ik->src[s]->mod != ir->src[s]->mod)
927 break;
928 a = ik->src[s]->value;
929 b = ir->src[s]->value;
930 if (a == b)
931 continue;
932 if (a->reg.file != b->reg.file ||
933 a->reg.id < 0 ||
934 a->reg.id != b->reg.id)
935 break;
936 }
937 if (s == 3) {
938 nv_nvi_delete(ir);
939 ++reps;
940 nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
941 break;
942 }
943 }
944 }
945 } while(reps);
946
947 DESCEND_ARBITRARY(s, nv_pass_cse);
948
949 return 0;
950 }
951
952 int
953 nv_pc_exec_pass0(struct nv_pc *pc)
954 {
955 struct nv_pass_reld_elim *reldelim;
956 struct nv_pass pass;
957 struct nv_pass_dce dce;
958 int ret;
959
960 pass.n = 0;
961 pass.pc = pc;
962
963 pc->pass_seq++;
964 ret = nv_pass_flatten(&pass, pc->root);
965 if (ret)
966 return ret;
967
968 /* Do this first, so we don't have to pay attention
969 * to whether sources are supported memory loads.
970 */
971 pc->pass_seq++;
972 ret = nv_pass_lower_arith(&pass, pc->root);
973 if (ret)
974 return ret;
975
976 pc->pass_seq++;
977 ret = nv_pass_fold_loads(&pass, pc->root);
978 if (ret)
979 return ret;
980
981 pc->pass_seq++;
982 ret = nv_pass_fold_stores(&pass, pc->root);
983 if (ret)
984 return ret;
985
986 reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
987 reldelim->pc = pc;
988 pc->pass_seq++;
989 ret = nv_pass_reload_elim(reldelim, pc->root);
990 FREE(reldelim);
991 if (ret)
992 return ret;
993
994 pc->pass_seq++;
995 ret = nv_pass_cse(&pass, pc->root);
996 if (ret)
997 return ret;
998
999 pc->pass_seq++;
1000 ret = nv_pass_lower_mods(&pass, pc->root);
1001 if (ret)
1002 return ret;
1003
1004 dce.pc = pc;
1005 do {
1006 dce.removed = 0;
1007 pc->pass_seq++;
1008 ret = nv_pass_dce(&dce, pc->root);
1009 if (ret)
1010 return ret;
1011 } while (dce.removed);
1012
1013 ret = nv_pass_tex_mask(&pass, pc->root);
1014 if (ret)
1015 return ret;
1016
1017 return ret;
1018 }