nv50: initialize edgeflag input index
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_pc.h"
24
25 #define DESCEND_ARBITRARY(j, f) \
26 do { \
27 b->pass_seq = ctx->pc->pass_seq; \
28 \
29 for (j = 0; j < 2; ++j) \
30 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
31 f(ctx, b->out[j]); \
32 } while (0)
33
34 extern unsigned nv50_inst_min_size(struct nv_instruction *);
35
36 struct nv_pc_pass {
37 struct nv_pc *pc;
38 };
39
40 static INLINE boolean
41 values_equal(struct nv_value *a, struct nv_value *b)
42 {
43 /* XXX: sizes */
44 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
45 }
46
47 static INLINE boolean
48 inst_commutation_check(struct nv_instruction *a,
49 struct nv_instruction *b)
50 {
51 int si, di;
52
53 for (di = 0; di < 4; ++di) {
54 if (!a->def[di])
55 break;
56 for (si = 0; si < 5; ++si) {
57 if (!b->src[si])
58 continue;
59 if (values_equal(a->def[di], b->src[si]->value))
60 return FALSE;
61 }
62 }
63
64 if (b->flags_src && b->flags_src->value == a->flags_def)
65 return FALSE;
66
67 return TRUE;
68 }
69
70 /* Check whether we can swap the order of the instructions,
71 * where a & b may be either the earlier or the later one.
72 */
73 static boolean
74 inst_commutation_legal(struct nv_instruction *a,
75 struct nv_instruction *b)
76 {
77 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
78 }
79
80 static INLINE boolean
81 inst_cullable(struct nv_instruction *nvi)
82 {
83 return (!(nvi->is_terminator || nvi->is_join ||
84 nvi->target ||
85 nvi->fixed ||
86 nv_nvi_refcount(nvi)));
87 }
88
89 static INLINE boolean
90 nvi_isnop(struct nv_instruction *nvi)
91 {
92 if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
93 return TRUE;
94
95 if (nvi->fixed ||
96 nvi->is_terminator ||
97 nvi->flags_src ||
98 nvi->flags_def ||
99 nvi->is_join)
100 return FALSE;
101
102 if (nvi->def[0]->join->reg.id < 0)
103 return TRUE;
104
105 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
106 return FALSE;
107
108 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
109 return FALSE;
110
111 if (nvi->src[0]->value->join->reg.id < 0) {
112 debug_printf("nvi_isnop: orphaned value detected\n");
113 return TRUE;
114 }
115
116 if (nvi->opcode == NV_OP_SELECT)
117 if (!values_equal(nvi->def[0], nvi->src[1]->value))
118 return FALSE;
119
120 return values_equal(nvi->def[0], nvi->src[0]->value);
121 }
122
123 struct nv_pass {
124 struct nv_pc *pc;
125 int n;
126 void *priv;
127 };
128
129 static int
130 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
131
132 static void
133 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
134 {
135 struct nv_pc *pc = (struct nv_pc *)priv;
136 struct nv_basic_block *in;
137 struct nv_instruction *nvi, *next;
138 int j;
139 uint size, n32 = 0;
140
141 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
142 if (j >= 0) {
143 in = pc->bb_list[j];
144
145 /* check for no-op branches (BRA $PC+8) */
146 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
147 in->bin_size -= 8;
148 pc->bin_size -= 8;
149
150 for (++j; j < pc->num_blocks; ++j)
151 pc->bb_list[j]->bin_pos -= 8;
152
153 nv_nvi_delete(in->exit);
154 }
155 b->bin_pos = in->bin_pos + in->bin_size;
156 }
157
158 pc->bb_list[pc->num_blocks++] = b;
159
160 /* visit node */
161
162 for (nvi = b->entry; nvi; nvi = next) {
163 next = nvi->next;
164 if (nvi_isnop(nvi))
165 nv_nvi_delete(nvi);
166 }
167
168 for (nvi = b->entry; nvi; nvi = next) {
169 next = nvi->next;
170
171 size = nv50_inst_min_size(nvi);
172 if (nvi->next && size < 8)
173 ++n32;
174 else
175 if ((n32 & 1) && nvi->next &&
176 nv50_inst_min_size(nvi->next) == 4 &&
177 inst_commutation_legal(nvi, nvi->next)) {
178 ++n32;
179 debug_printf("permuting: ");
180 nv_print_instruction(nvi);
181 nv_print_instruction(nvi->next);
182 nv_nvi_permute(nvi, nvi->next);
183 next = nvi;
184 } else {
185 nvi->is_long = 1;
186
187 b->bin_size += n32 & 1;
188 if (n32 & 1)
189 nvi->prev->is_long = 1;
190 n32 = 0;
191 }
192 b->bin_size += 1 + nvi->is_long;
193 }
194
195 if (!b->entry) {
196 debug_printf("block %p is now empty\n", b);
197 } else
198 if (!b->exit->is_long) {
199 assert(n32);
200 b->exit->is_long = 1;
201 b->bin_size += 1;
202
203 /* might have del'd a hole tail of instructions */
204 if (!b->exit->prev->is_long && !(n32 & 1)) {
205 b->bin_size += 1;
206 b->exit->prev->is_long = 1;
207 }
208 }
209 assert(!b->entry || (b->exit && b->exit->is_long));
210
211 pc->bin_size += b->bin_size *= 4;
212 }
213
214 int
215 nv_pc_exec_pass2(struct nv_pc *pc)
216 {
217 struct nv_pass pass;
218
219 pass.pc = pc;
220
221 pc->pass_seq++;
222 nv_pass_flatten(&pass, pc->root);
223
224 debug_printf("preparing %u blocks for emission\n", pc->num_blocks);
225
226 pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
227 pc->num_blocks = 0;
228
229 nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
230
231 return 0;
232 }
233
234 static INLINE boolean
235 is_cmem_load(struct nv_instruction *nvi)
236 {
237 return (nvi->opcode == NV_OP_LDA &&
238 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
239 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
240 }
241
242 static INLINE boolean
243 is_smem_load(struct nv_instruction *nvi)
244 {
245 return (nvi->opcode == NV_OP_LDA &&
246 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
247 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
248 }
249
250 static INLINE boolean
251 is_immd_move(struct nv_instruction *nvi)
252 {
253 return (nvi->opcode == NV_OP_MOV &&
254 nvi->src[0]->value->reg.file == NV_FILE_IMM);
255 }
256
257 static INLINE void
258 check_swap_src_0_1(struct nv_instruction *nvi)
259 {
260 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
261
262 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
263
264 if (!nv_op_commutative(nvi->opcode))
265 return;
266 assert(src0 && src1);
267
268 if (src1->value->reg.file == NV_FILE_IMM) {
269 /* should only be present from folding a constant MUL part of a MAD */
270 assert(nvi->opcode == NV_OP_ADD);
271 return;
272 }
273
274 if (is_cmem_load(src0->value->insn)) {
275 if (!is_cmem_load(src1->value->insn)) {
276 nvi->src[0] = src1;
277 nvi->src[1] = src0;
278 /* debug_printf("swapping cmem load to 1\n"); */
279 }
280 } else
281 if (is_smem_load(src1->value->insn)) {
282 if (!is_smem_load(src0->value->insn)) {
283 nvi->src[0] = src1;
284 nvi->src[1] = src0;
285 /* debug_printf("swapping smem load to 0\n"); */
286 }
287 }
288
289 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
290 nvi->set_cond = cc_swapped[nvi->set_cond];
291 }
292
293 static int
294 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
295 {
296 struct nv_instruction *nvi, *sti, *next;
297 int j;
298
299 for (sti = b->entry; sti; sti = next) {
300 next = sti->next;
301
302 /* only handling MOV to $oX here */
303 if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
304 continue;
305 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
306 continue;
307
308 nvi = sti->src[0]->value->insn;
309 if (!nvi || nvi->opcode == NV_OP_PHI)
310 continue;
311 assert(nvi->def[0] == sti->src[0]->value);
312
313 if (nvi->def[0]->refc > 1)
314 continue;
315
316 /* cannot write to $oX when using immediate */
317 for (j = 0; j < 4 && nvi->src[j]; ++j)
318 if (nvi->src[j]->value->reg.file == NV_FILE_IMM)
319 break;
320 if (j < 4 && nvi->src[j])
321 continue;
322
323 nvi->def[0] = sti->def[0];
324 nvi->fixed = sti->fixed;
325
326 nv_nvi_delete(sti);
327 }
328 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
329
330 return 0;
331 }
332
333 static int
334 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
335 {
336 struct nv_instruction *nvi, *ld;
337 int j;
338
339 for (nvi = b->entry; nvi; nvi = nvi->next) {
340 check_swap_src_0_1(nvi);
341
342 for (j = 0; j < 3; ++j) {
343 if (!nvi->src[j])
344 break;
345 ld = nvi->src[j]->value->insn;
346 if (!ld)
347 continue;
348
349 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
350 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
351 continue;
352 }
353
354 if (ld->opcode != NV_OP_LDA)
355 continue;
356 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
357 continue;
358
359 if (j == 0 && ld->src[4]) /* can't load shared mem */
360 continue;
361
362 /* fold it ! */ /* XXX: ref->insn */
363 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
364 if (ld->src[4])
365 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
366 }
367 }
368 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
369
370 return 0;
371 }
372
373 static int
374 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
375 {
376 int j;
377 struct nv_instruction *nvi, *mi, *next;
378 ubyte mod;
379
380 for (nvi = b->entry; nvi; nvi = next) {
381 next = nvi->next;
382 if (nvi->opcode == NV_OP_SUB) {
383 nvi->opcode = NV_OP_ADD;
384 nvi->src[1]->mod ^= NV_MOD_NEG;
385 }
386
387 /* should not put any modifiers on NEG and ABS */
388 assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
389 assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
390
391 for (j = 0; j < 4; ++j) {
392 if (!nvi->src[j])
393 break;
394
395 mi = nvi->src[j]->value->insn;
396 if (!mi)
397 continue;
398 if (mi->def[0]->refc > 1)
399 continue;
400
401 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
402 else
403 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
404 else
405 continue;
406
407 if (nvi->opcode == NV_OP_ABS)
408 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
409 else
410 if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
411 nvi->opcode = NV_OP_MOV;
412 mod = 0;
413 }
414
415 if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
416 continue;
417
418 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
419
420 nvi->src[j]->mod ^= mod;
421 }
422
423 if (nvi->opcode == NV_OP_SAT) {
424 mi = nvi->src[0]->value->insn;
425
426 if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
427 mi->saturate = 1;
428 mi->def[0] = nvi->def[0];
429 nv_nvi_delete(nvi);
430 }
431 }
432 }
433 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
434
435 return 0;
436 }
437
438 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
439
440 static struct nv_value *
441 find_immediate(struct nv_ref *ref)
442 {
443 struct nv_value *src;
444
445 if (!ref)
446 return NULL;
447
448 src = ref->value;
449 while (src->insn && src->insn->opcode == NV_OP_MOV) {
450 assert(!src->insn->src[0]->mod);
451 src = src->insn->src[0]->value;
452 }
453 return (src->reg.file == NV_FILE_IMM) ? src : NULL;
454 }
455
456 static void
457 modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
458 {
459 if (mod & NV_MOD_ABS) {
460 if (type == NV_TYPE_F32)
461 *val &= 0x7fffffff;
462 else
463 if ((*val) & (1 << 31))
464 *val = ~(*val) + 1;
465 }
466 if (mod & NV_MOD_NEG) {
467 if (type == NV_TYPE_F32)
468 *val ^= 0x80000000;
469 else
470 *val = ~(*val) + 1;
471 }
472 }
473
474 static INLINE uint
475 modifiers_opcode(ubyte mod)
476 {
477 switch (mod) {
478 case NV_MOD_NEG: return NV_OP_NEG;
479 case NV_MOD_ABS: return NV_OP_ABS;
480 case 0:
481 return NV_OP_MOV;
482 default:
483 return NV_OP_NOP;
484 }
485 }
486
487 static void
488 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
489 struct nv_value *src0, struct nv_value *src1)
490 {
491 struct nv_value *val;
492 union {
493 float f32;
494 uint32_t u32;
495 int32_t s32;
496 } u0, u1, u;
497 ubyte type;
498
499 if (!nvi->def[0])
500 return;
501 type = nvi->def[0]->reg.type;
502
503 u.u32 = 0;
504 u0.u32 = src0->reg.imm.u32;
505 u1.u32 = src1->reg.imm.u32;
506
507 modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
508 modifiers_apply(&u0.u32, type, nvi->src[1]->mod);
509
510 switch (nvi->opcode) {
511 case NV_OP_MAD:
512 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
513 return;
514 /* fall through */
515 case NV_OP_MUL:
516 switch (type) {
517 case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
518 case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
519 case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
520 default:
521 assert(0);
522 break;
523 }
524 break;
525 case NV_OP_ADD:
526 switch (type) {
527 case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
528 case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
529 case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
530 default:
531 assert(0);
532 break;
533 }
534 break;
535 case NV_OP_SUB:
536 switch (type) {
537 case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32;
538 case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32;
539 case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32;
540 default:
541 assert(0);
542 break;
543 }
544 break;
545 default:
546 return;
547 }
548
549 nvi->opcode = NV_OP_MOV;
550
551 val = new_value(pc, NV_FILE_IMM, type);
552
553 val->reg.imm.u32 = u.u32;
554
555 nv_reference(pc, &nvi->src[1], NULL);
556 nv_reference(pc, &nvi->src[0], val);
557
558 if (nvi->src[2]) { /* from MAD */
559 nvi->src[1] = nvi->src[0];
560 nvi->src[0] = nvi->src[2];
561 nvi->src[2] = NULL;
562 nvi->opcode = NV_OP_ADD;
563 }
564 }
565
566 static void
567 constant_operand(struct nv_pc *pc,
568 struct nv_instruction *nvi, struct nv_value *val, int s)
569 {
570 union {
571 float f32;
572 uint32_t u32;
573 int32_t s32;
574 } u;
575 int t = s ? 0 : 1;
576 uint op;
577 ubyte type;
578
579 if (!nvi->def[0])
580 return;
581 type = nvi->def[0]->reg.type;
582
583 u.u32 = val->reg.imm.u32;
584 modifiers_apply(&u.u32, type, nvi->src[s]->mod);
585
586 switch (nvi->opcode) {
587 case NV_OP_MUL:
588 if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
589 (NV_TYPE_ISINT(type) && u.u32 == 1)) {
590 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
591 break;
592 nvi->opcode = op;
593 nv_reference(pc, &nvi->src[s], NULL);
594 nvi->src[0] = nvi->src[t];
595 nvi->src[1] = NULL;
596 } else
597 if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
598 (NV_TYPE_ISINT(type) && u.u32 == 2)) {
599 nvi->opcode = NV_OP_ADD;
600 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
601 nvi->src[s]->mod = nvi->src[t]->mod;
602 } else
603 if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
604 if (nvi->src[t]->mod & NV_MOD_NEG)
605 nvi->opcode = NV_OP_MOV;
606 else
607 nvi->opcode = NV_OP_NEG;
608 nv_reference(pc, &nvi->src[s], NULL);
609 nvi->src[0] = nvi->src[t];
610 nvi->src[1] = NULL;
611 } else
612 if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
613 nvi->opcode = NV_OP_ADD;
614 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
615 nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
616 } else
617 if (u.u32 == 0) {
618 nvi->opcode = NV_OP_MOV;
619 nv_reference(pc, &nvi->src[t], NULL);
620 if (s) {
621 nvi->src[0] = nvi->src[1];
622 nvi->src[1] = NULL;
623 }
624 }
625 break;
626 case NV_OP_ADD:
627 if (u.u32 == 0) {
628 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
629 break;
630 nvi->opcode = op;
631 nv_reference(pc, &nvi->src[s], NULL);
632 nvi->src[0] = nvi->src[t];
633 nvi->src[1] = NULL;
634 }
635 break;
636 case NV_OP_RCP:
637 u.f32 = 1.0f / u.f32;
638 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
639 nvi->opcode = NV_OP_MOV;
640 assert(s == 0);
641 nv_reference(pc, &nvi->src[0], val);
642 break;
643 case NV_OP_RSQ:
644 u.f32 = 1.0f / sqrtf(u.f32);
645 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
646 nvi->opcode = NV_OP_MOV;
647 assert(s == 0);
648 nv_reference(pc, &nvi->src[0], val);
649 break;
650 default:
651 break;
652 }
653 }
654
655 static int
656 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
657 {
658 struct nv_instruction *nvi, *next;
659 int j;
660
661 for (nvi = b->entry; nvi; nvi = next) {
662 struct nv_value *src0, *src1, *src;
663 int mod;
664
665 next = nvi->next;
666
667 src0 = find_immediate(nvi->src[0]);
668 src1 = find_immediate(nvi->src[1]);
669
670 if (src0 && src1)
671 constant_expression(ctx->pc, nvi, src0, src1);
672 else {
673 if (src0)
674 constant_operand(ctx->pc, nvi, src0, 0);
675 else
676 if (src1)
677 constant_operand(ctx->pc, nvi, src1, 1);
678 }
679
680 /* try to combine MUL, ADD into MAD */
681 if (nvi->opcode != NV_OP_ADD)
682 continue;
683
684 src0 = nvi->src[0]->value;
685 src1 = nvi->src[1]->value;
686
687 if (SRC_IS_MUL(src0) && src0->refc == 1)
688 src = src0;
689 else
690 if (SRC_IS_MUL(src1) && src1->refc == 1)
691 src = src1;
692 else
693 continue;
694
695 nvi->opcode = NV_OP_MAD;
696 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
697 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
698 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
699
700 assert(!(mod & ~NV_MOD_NEG));
701 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
702 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
703 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
704 nvi->src[1]->mod = src->insn->src[1]->mod;
705 }
706 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
707
708 return 0;
709 }
710
711 /*
712 set $r2 g f32 $r2 $r3
713 cvt abs rn f32 $r2 s32 $r2
714 cvt f32 $c0 # f32 $r2
715 e $c0 bra 0x80
716 */
717 #if 0
718 static int
719 nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
720 {
721 /* XXX: easier in IR builder for now */
722 return 0;
723 }
724 #endif
725
726 /* TODO: redundant store elimination */
727
728 struct load_record {
729 struct load_record *next;
730 uint64_t data;
731 struct nv_value *value;
732 };
733
734 #define LOAD_RECORD_POOL_SIZE 1024
735
736 struct nv_pass_reld_elim {
737 struct nv_pc *pc;
738
739 struct load_record *imm;
740 struct load_record *mem_s;
741 struct load_record *mem_v;
742 struct load_record *mem_c[16];
743 struct load_record *mem_l;
744
745 struct load_record pool[LOAD_RECORD_POOL_SIZE];
746 int alloc;
747 };
748
749 static int
750 nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
751 {
752 struct load_record **rec, *it;
753 struct nv_instruction *ld, *next;
754 uint64_t data;
755 struct nv_value *val;
756 int j;
757
758 for (ld = b->entry; ld; ld = next) {
759 next = ld->next;
760 if (!ld->src[0])
761 continue;
762 val = ld->src[0]->value;
763 rec = NULL;
764
765 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
766 data = val->reg.id;
767 rec = &ctx->mem_v;
768 } else
769 if (ld->opcode == NV_OP_LDA) {
770 data = val->reg.id;
771 if (val->reg.file >= NV_FILE_MEM_C(0) &&
772 val->reg.file <= NV_FILE_MEM_C(15))
773 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
774 else
775 if (val->reg.file == NV_FILE_MEM_S)
776 rec = &ctx->mem_s;
777 else
778 if (val->reg.file == NV_FILE_MEM_L)
779 rec = &ctx->mem_l;
780 } else
781 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
782 data = val->reg.imm.u32;
783 rec = &ctx->imm;
784 }
785
786 if (!rec || !ld->def[0]->refc)
787 continue;
788
789 for (it = *rec; it; it = it->next)
790 if (it->data == data)
791 break;
792
793 if (it) {
794 if (ld->def[0]->reg.id >= 0)
795 it->value = ld->def[0];
796 else
797 nvcg_replace_value(ctx->pc, ld->def[0], it->value);
798 } else {
799 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
800 continue;
801 it = &ctx->pool[ctx->alloc++];
802 it->next = *rec;
803 it->data = data;
804 it->value = ld->def[0];
805 *rec = it;
806 }
807 }
808
809 ctx->imm = NULL;
810 ctx->mem_s = NULL;
811 ctx->mem_v = NULL;
812 for (j = 0; j < 16; ++j)
813 ctx->mem_c[j] = NULL;
814 ctx->mem_l = NULL;
815 ctx->alloc = 0;
816
817 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
818
819 return 0;
820 }
821
822 static int
823 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
824 {
825 int i, c, j;
826
827 for (i = 0; i < ctx->pc->num_instructions; ++i) {
828 struct nv_instruction *nvi = &ctx->pc->instructions[i];
829 struct nv_value *def[4];
830
831 if (!nv_is_vector_op(nvi->opcode))
832 continue;
833 nvi->tex_mask = 0;
834
835 for (c = 0; c < 4; ++c) {
836 if (nvi->def[c]->refc)
837 nvi->tex_mask |= 1 << c;
838 def[c] = nvi->def[c];
839 }
840
841 j = 0;
842 for (c = 0; c < 4; ++c)
843 if (nvi->tex_mask & (1 << c))
844 nvi->def[j++] = def[c];
845 for (c = 0; c < 4; ++c)
846 if (!(nvi->tex_mask & (1 << c)))
847 nvi->def[j++] = def[c];
848 assert(j == 4);
849 }
850 return 0;
851 }
852
853 struct nv_pass_dce {
854 struct nv_pc *pc;
855 uint removed;
856 };
857
858 static int
859 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
860 {
861 int j;
862 struct nv_instruction *nvi, *next;
863
864 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
865 next = nvi->next;
866
867 if (inst_cullable(nvi)) {
868 nv_nvi_delete(nvi);
869
870 ++ctx->removed;
871 }
872 }
873 DESCEND_ARBITRARY(j, nv_pass_dce);
874
875 return 0;
876 }
877
878 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
879 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
880 * BREAK and dummy ELSE block.
881 */
882 static INLINE boolean
883 bb_is_if_else_endif(struct nv_basic_block *bb)
884 {
885 if (!bb->out[0] || !bb->out[1])
886 return FALSE;
887
888 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
889 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
890 !bb->out[1]->out[1]);
891 } else {
892 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
893 !bb->out[0]->out[1] &&
894 !bb->out[1]->out[1]);
895 }
896 }
897
898 /* predicate instructions and remove branch at the end */
899 static void
900 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
901 struct nv_value *p, ubyte cc)
902 {
903 struct nv_instruction *nvi;
904
905 if (!b->entry)
906 return;
907 for (nvi = b->entry; nvi->next; nvi = nvi->next) {
908 if (!nvi_isnop(nvi)) {
909 nvi->cc = cc;
910 nv_reference(pc, &nvi->flags_src, p);
911 }
912 }
913
914 if (nvi->opcode == NV_OP_BRA)
915 nv_nvi_delete(nvi);
916 else
917 if (!nvi_isnop(nvi)) {
918 nvi->cc = cc;
919 nv_reference(pc, &nvi->flags_src, p);
920 }
921 }
922
923 /* NOTE: Run this after register allocation, we can just cut out the cflow
924 * instructions and hook the predicates to the conditional OPs if they are
925 * not using immediates; better than inserting SELECT to join definitions.
926 *
927 * NOTE: Should adapt prior optimization to make this possible more often.
928 */
929 static int
930 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
931 {
932 struct nv_instruction *nvi;
933 struct nv_value *pred;
934 int i;
935 int n0 = 0, n1 = 0;
936
937 if (bb_is_if_else_endif(b)) {
938
939 debug_printf("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
940
941 for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
942 if (!nv50_nvi_can_predicate(nvi))
943 break;
944 if (!nvi) {
945 for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
946 if (!nv50_nvi_can_predicate(nvi))
947 break;
948 if (nvi) {
949 debug_printf("cannot predicate: "); nv_print_instruction(nvi);
950 }
951 } else {
952 debug_printf("cannot predicate: "); nv_print_instruction(nvi);
953 }
954
955 if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */
956 assert(b->exit && b->exit->flags_src);
957 pred = b->exit->flags_src->value;
958
959 predicate_instructions(ctx->pc, b->out[0], pred, NV_CC_NE | NV_CC_U);
960 predicate_instructions(ctx->pc, b->out[1], pred, NV_CC_EQ);
961
962 assert(b->exit && b->exit->opcode == NV_OP_BRA);
963 nv_nvi_delete(b->exit);
964
965 if (b->exit && b->exit->opcode == NV_OP_JOINAT)
966 nv_nvi_delete(b->exit);
967
968 if ((nvi = b->out[0]->out[0]->entry)) {
969 nvi->is_join = 0;
970 if (nvi->opcode == NV_OP_JOIN)
971 nv_nvi_delete(nvi);
972 }
973 }
974 }
975 DESCEND_ARBITRARY(i, nv_pass_flatten);
976
977 return 0;
978 }
979
980 /* local common subexpression elimination, stupid O(n^2) implementation */
981 static int
982 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
983 {
984 struct nv_instruction *ir, *ik, *next;
985 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
986 int s;
987 unsigned int reps;
988
989 do {
990 reps = 0;
991 for (ir = entry; ir; ir = next) {
992 next = ir->next;
993 for (ik = entry; ik != ir; ik = ik->next) {
994 if (ir->opcode != ik->opcode)
995 continue;
996
997 if (ik->opcode == NV_OP_LDA ||
998 ik->opcode == NV_OP_STA ||
999 ik->opcode == NV_OP_MOV ||
1000 nv_is_vector_op(ik->opcode))
1001 continue; /* ignore loads, stores & moves */
1002
1003 if (ik->src[4] || ir->src[4])
1004 continue; /* don't mess with address registers */
1005
1006 if (ik->flags_src || ir->flags_src ||
1007 ik->flags_def || ir->flags_def)
1008 continue; /* and also not with flags, for now */
1009
1010 assert(ik->def[0] && ir->def[0]);
1011
1012 if (ik->def[0]->reg.file == NV_FILE_OUT ||
1013 ir->def[0]->reg.file == NV_FILE_OUT ||
1014 !values_equal(ik->def[0], ir->def[0]))
1015 continue;
1016
1017 for (s = 0; s < 3; ++s) {
1018 struct nv_value *a, *b;
1019
1020 if (!ik->src[s]) {
1021 if (ir->src[s])
1022 break;
1023 continue;
1024 }
1025 if (ik->src[s]->mod != ir->src[s]->mod)
1026 break;
1027 a = ik->src[s]->value;
1028 b = ir->src[s]->value;
1029 if (a == b)
1030 continue;
1031 if (a->reg.file != b->reg.file ||
1032 a->reg.id < 0 ||
1033 a->reg.id != b->reg.id)
1034 break;
1035 }
1036 if (s == 3) {
1037 nv_nvi_delete(ir);
1038 ++reps;
1039 nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
1040 break;
1041 }
1042 }
1043 }
1044 } while(reps);
1045
1046 DESCEND_ARBITRARY(s, nv_pass_cse);
1047
1048 return 0;
1049 }
1050
1051 int
1052 nv_pc_exec_pass0(struct nv_pc *pc)
1053 {
1054 struct nv_pass_reld_elim *reldelim;
1055 struct nv_pass pass;
1056 struct nv_pass_dce dce;
1057 int ret;
1058
1059 pass.n = 0;
1060 pass.pc = pc;
1061
1062 /* Do this first, so we don't have to pay attention
1063 * to whether sources are supported memory loads.
1064 */
1065 pc->pass_seq++;
1066 ret = nv_pass_lower_arith(&pass, pc->root);
1067 if (ret)
1068 return ret;
1069
1070 pc->pass_seq++;
1071 ret = nv_pass_fold_loads(&pass, pc->root);
1072 if (ret)
1073 return ret;
1074
1075 pc->pass_seq++;
1076 ret = nv_pass_fold_stores(&pass, pc->root);
1077 if (ret)
1078 return ret;
1079
1080 reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
1081 reldelim->pc = pc;
1082 pc->pass_seq++;
1083 ret = nv_pass_reload_elim(reldelim, pc->root);
1084 FREE(reldelim);
1085 if (ret)
1086 return ret;
1087
1088 pc->pass_seq++;
1089 ret = nv_pass_cse(&pass, pc->root);
1090 if (ret)
1091 return ret;
1092
1093 pc->pass_seq++;
1094 ret = nv_pass_lower_mods(&pass, pc->root);
1095 if (ret)
1096 return ret;
1097
1098 dce.pc = pc;
1099 do {
1100 dce.removed = 0;
1101 pc->pass_seq++;
1102 ret = nv_pass_dce(&dce, pc->root);
1103 if (ret)
1104 return ret;
1105 } while (dce.removed);
1106
1107 ret = nv_pass_tex_mask(&pass, pc->root);
1108 if (ret)
1109 return ret;
1110
1111 return ret;
1112 }