r600g: interpret integer texture types as ints.
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_pc.h"
24
25 #define DESCEND_ARBITRARY(j, f) \
26 do { \
27 b->pass_seq = ctx->pc->pass_seq; \
28 \
29 for (j = 0; j < 2; ++j) \
30 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
31 f(ctx, b->out[j]); \
32 } while (0)
33
34 extern unsigned nv50_inst_min_size(struct nv_instruction *);
35
36 struct nv_pc_pass {
37 struct nv_pc *pc;
38 };
39
40 static INLINE boolean
41 values_equal(struct nv_value *a, struct nv_value *b)
42 {
43 /* XXX: sizes */
44 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
45 }
46
47 static INLINE boolean
48 inst_commutation_check(struct nv_instruction *a,
49 struct nv_instruction *b)
50 {
51 int si, di;
52
53 for (di = 0; di < 4; ++di) {
54 if (!a->def[di])
55 break;
56 for (si = 0; si < 5; ++si) {
57 if (!b->src[si])
58 continue;
59 if (values_equal(a->def[di], b->src[si]->value))
60 return FALSE;
61 }
62 }
63
64 if (b->flags_src && b->flags_src->value == a->flags_def)
65 return FALSE;
66
67 return TRUE;
68 }
69
70 /* Check whether we can swap the order of the instructions,
71 * where a & b may be either the earlier or the later one.
72 */
73 static boolean
74 inst_commutation_legal(struct nv_instruction *a,
75 struct nv_instruction *b)
76 {
77 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
78 }
79
80 static INLINE boolean
81 inst_cullable(struct nv_instruction *nvi)
82 {
83 if (nvi->opcode == NV_OP_STA)
84 return FALSE;
85 return (!(nvi->is_terminator || nvi->is_join ||
86 nvi->target ||
87 nvi->fixed ||
88 nv_nvi_refcount(nvi)));
89 }
90
91 static INLINE boolean
92 nvi_isnop(struct nv_instruction *nvi)
93 {
94 if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
95 return TRUE;
96
97 /* NOTE: 'fixed' now only means that it shouldn't be optimized away,
98 * but we can still remove it if it is a no-op move.
99 */
100 if (/* nvi->fixed || */
101 /* nvi->flags_src || */ /* cond. MOV to same register is still NOP */
102 nvi->flags_def ||
103 nvi->is_terminator ||
104 nvi->is_join)
105 return FALSE;
106
107 if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
108 return TRUE;
109
110 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
111 return FALSE;
112
113 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
114 return FALSE;
115
116 if (nvi->src[0]->value->join->reg.id < 0) {
117 NV50_DBGMSG(PROG_IR, "nvi_isnop: orphaned value detected\n");
118 return TRUE;
119 }
120
121 if (nvi->opcode == NV_OP_SELECT)
122 if (!values_equal(nvi->def[0], nvi->src[1]->value))
123 return FALSE;
124
125 return values_equal(nvi->def[0], nvi->src[0]->value);
126 }
127
128 struct nv_pass {
129 struct nv_pc *pc;
130 int n;
131 void *priv;
132 };
133
134 static int
135 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
136
137 static void
138 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
139 {
140 struct nv_pc *pc = (struct nv_pc *)priv;
141 struct nv_basic_block *in;
142 struct nv_instruction *nvi, *next;
143 int j;
144 uint size, n32 = 0;
145
146 /* find first non-empty block emitted before b */
147 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
148 for (; j >= 0; --j) {
149 in = pc->bb_list[j];
150
151 /* check for no-op branches (BRA $PC+8) */
152 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
153 in->bin_size -= 8;
154 pc->bin_size -= 8;
155
156 for (++j; j < pc->num_blocks; ++j)
157 pc->bb_list[j]->bin_pos -= 8;
158
159 nv_nvi_delete(in->exit);
160 }
161 b->bin_pos = in->bin_pos + in->bin_size;
162
163 if (in->bin_size) /* no more no-op branches to b */
164 break;
165 }
166
167 pc->bb_list[pc->num_blocks++] = b;
168
169 /* visit node */
170
171 for (nvi = b->entry; nvi; nvi = next) {
172 next = nvi->next;
173 if (nvi_isnop(nvi))
174 nv_nvi_delete(nvi);
175 }
176
177 for (nvi = b->entry; nvi; nvi = next) {
178 next = nvi->next;
179
180 size = nv50_inst_min_size(nvi);
181 if (nvi->next && size < 8)
182 ++n32;
183 else
184 if ((n32 & 1) && nvi->next &&
185 nv50_inst_min_size(nvi->next) == 4 &&
186 inst_commutation_legal(nvi, nvi->next)) {
187 ++n32;
188 nv_nvi_permute(nvi, nvi->next);
189 next = nvi;
190 } else {
191 nvi->is_long = 1;
192
193 b->bin_size += n32 & 1;
194 if (n32 & 1)
195 nvi->prev->is_long = 1;
196 n32 = 0;
197 }
198 b->bin_size += 1 + nvi->is_long;
199 }
200
201 if (!b->entry) {
202 NV50_DBGMSG(PROG_IR, "block %p is now empty\n", b);
203 } else
204 if (!b->exit->is_long) {
205 assert(n32);
206 b->exit->is_long = 1;
207 b->bin_size += 1;
208
209 /* might have del'd a hole tail of instructions */
210 if (!b->exit->prev->is_long && !(n32 & 1)) {
211 b->bin_size += 1;
212 b->exit->prev->is_long = 1;
213 }
214 }
215 assert(!b->entry || (b->exit && b->exit->is_long));
216
217 pc->bin_size += b->bin_size *= 4;
218 }
219
220 static int
221 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
222 {
223 struct nv_pass pass;
224
225 pass.pc = pc;
226
227 pc->pass_seq++;
228
229 nv_pass_flatten(&pass, root);
230
231 nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
232
233 return 0;
234 }
235
236 int
237 nv_pc_exec_pass2(struct nv_pc *pc)
238 {
239 int i, ret;
240
241 NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks);
242
243 pc->num_blocks = 0; /* will reorder bb_list */
244
245 for (i = 0; i < pc->num_subroutines + 1; ++i)
246 if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
247 return ret;
248 return 0;
249 }
250
251 static INLINE boolean
252 is_cmem_load(struct nv_instruction *nvi)
253 {
254 return (nvi->opcode == NV_OP_LDA &&
255 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
256 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
257 }
258
259 static INLINE boolean
260 is_smem_load(struct nv_instruction *nvi)
261 {
262 return (nvi->opcode == NV_OP_LDA &&
263 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
264 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
265 }
266
267 static INLINE boolean
268 is_immd_move(struct nv_instruction *nvi)
269 {
270 return (nvi->opcode == NV_OP_MOV &&
271 nvi->src[0]->value->reg.file == NV_FILE_IMM);
272 }
273
274 static INLINE void
275 check_swap_src_0_1(struct nv_instruction *nvi)
276 {
277 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
278
279 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
280
281 if (!nv_op_commutative(nvi->opcode))
282 return;
283 assert(src0 && src1);
284
285 if (src1->value->reg.file == NV_FILE_IMM)
286 return;
287
288 if (is_cmem_load(src0->value->insn)) {
289 if (!is_cmem_load(src1->value->insn)) {
290 nvi->src[0] = src1;
291 nvi->src[1] = src0;
292 /* debug_printf("swapping cmem load to 1\n"); */
293 }
294 } else
295 if (is_smem_load(src1->value->insn)) {
296 if (!is_smem_load(src0->value->insn)) {
297 nvi->src[0] = src1;
298 nvi->src[1] = src0;
299 /* debug_printf("swapping smem load to 0\n"); */
300 }
301 }
302
303 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
304 nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
305 }
306
307 static int
308 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
309 {
310 struct nv_instruction *nvi, *sti, *next;
311 int j;
312
313 for (sti = b->entry; sti; sti = next) {
314 next = sti->next;
315
316 /* only handling MOV to $oX here */
317 if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
318 continue;
319 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
320 continue;
321
322 nvi = sti->src[0]->value->insn;
323 if (!nvi || nvi->opcode == NV_OP_PHI || nv_is_vector_op(nvi->opcode))
324 continue;
325 assert(nvi->def[0] == sti->src[0]->value);
326
327 if (nvi->opcode == NV_OP_SELECT)
328 continue;
329 if (nvi->def[0]->refc > 1)
330 continue;
331
332 /* cannot write to $oX when using immediate */
333 for (j = 0; j < 4 && nvi->src[j]; ++j)
334 if (nvi->src[j]->value->reg.file == NV_FILE_IMM ||
335 nvi->src[j]->value->reg.file == NV_FILE_MEM_L)
336 break;
337 if (j < 4 && nvi->src[j])
338 continue;
339
340 nvi->def[0] = sti->def[0];
341 nvi->def[0]->insn = nvi;
342 nvi->fixed = sti->fixed;
343
344 nv_nvi_delete(sti);
345 }
346 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
347
348 return 0;
349 }
350
351 static int
352 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
353 {
354 struct nv_instruction *nvi, *ld;
355 int j;
356
357 for (nvi = b->entry; nvi; nvi = nvi->next) {
358 check_swap_src_0_1(nvi);
359
360 for (j = 0; j < 3; ++j) {
361 if (!nvi->src[j])
362 break;
363 ld = nvi->src[j]->value->insn;
364 if (!ld)
365 continue;
366
367 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
368 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
369 continue;
370 }
371
372 if (ld->opcode != NV_OP_LDA)
373 continue;
374 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
375 continue;
376
377 if (j == 0 && ld->src[4]) /* can't load shared mem */
378 continue;
379
380 /* fold it ! */
381 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
382 if (ld->src[4])
383 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
384
385 if (!nv_nvi_refcount(ld))
386 nv_nvi_delete(ld);
387 }
388 }
389 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
390
391 return 0;
392 }
393
394 /* NOTE: Assumes loads have not yet been folded. */
395 static int
396 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
397 {
398 int j;
399 struct nv_instruction *nvi, *mi, *next;
400 ubyte mod;
401
402 for (nvi = b->entry; nvi; nvi = next) {
403 next = nvi->next;
404 if (nvi->opcode == NV_OP_SUB) {
405 nvi->opcode = NV_OP_ADD;
406 nvi->src[1]->mod ^= NV_MOD_NEG;
407 }
408
409 for (j = 0; j < 4 && nvi->src[j]; ++j) {
410 mi = nvi->src[j]->value->insn;
411 if (!mi)
412 continue;
413 if (mi->def[0]->refc > 1)
414 continue;
415
416 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
417 else
418 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
419 else
420 continue;
421 assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
422
423 mod |= mi->src[0]->mod;
424
425 if (mi->flags_def || mi->flags_src)
426 continue;
427
428 if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
429 /* abs neg [abs] = abs */
430 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
431 } else
432 if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
433 /* neg as opcode and modifier on same insn cannot occur */
434 /* neg neg abs = abs, neg neg = identity */
435 assert(j == 0);
436 if (mod & NV_MOD_ABS)
437 nvi->opcode = NV_OP_ABS;
438 else
439 if (nvi->flags_def)
440 nvi->opcode = NV_OP_CVT;
441 else
442 nvi->opcode = NV_OP_MOV;
443 mod = 0;
444 }
445
446 if ((nv50_supported_src_mods(nvi->opcode, j) & mod) != mod)
447 continue;
448
449 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
450
451 nvi->src[j]->mod ^= mod;
452 }
453
454 if (nvi->opcode == NV_OP_SAT) {
455 mi = nvi->src[0]->value->insn;
456
457 if (mi->opcode != NV_OP_ADD && mi->opcode != NV_OP_MAD)
458 continue;
459 if (mi->flags_def || mi->def[0]->refc > 1)
460 continue;
461
462 mi->saturate = 1;
463 mi->def[0] = nvi->def[0];
464 mi->def[0]->insn = mi;
465 if (nvi->flags_def) {
466 mi->flags_def = nvi->flags_def;
467 mi->flags_def->insn = mi;
468 }
469 nv_nvi_delete(nvi);
470 }
471 }
472 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
473
474 return 0;
475 }
476
477 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
478
479 static void
480 modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
481 {
482 if (mod & NV_MOD_ABS) {
483 if (type == NV_TYPE_F32)
484 *val &= 0x7fffffff;
485 else
486 if ((*val) & (1 << 31))
487 *val = ~(*val) + 1;
488 }
489 if (mod & NV_MOD_NEG) {
490 if (type == NV_TYPE_F32)
491 *val ^= 0x80000000;
492 else
493 *val = ~(*val) + 1;
494 }
495 }
496
497 static INLINE uint
498 modifiers_opcode(ubyte mod)
499 {
500 switch (mod) {
501 case NV_MOD_NEG: return NV_OP_NEG;
502 case NV_MOD_ABS: return NV_OP_ABS;
503 case 0:
504 return NV_OP_MOV;
505 default:
506 return NV_OP_NOP;
507 }
508 }
509
510 static void
511 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
512 struct nv_value *src0, struct nv_value *src1)
513 {
514 struct nv_value *val;
515 union {
516 float f32;
517 uint32_t u32;
518 int32_t s32;
519 } u0, u1, u;
520 ubyte type;
521
522 if (!nvi->def[0])
523 return;
524 type = nvi->def[0]->reg.type;
525
526 u.u32 = 0;
527 u0.u32 = src0->reg.imm.u32;
528 u1.u32 = src1->reg.imm.u32;
529
530 modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
531 modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
532
533 switch (nvi->opcode) {
534 case NV_OP_MAD:
535 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
536 return;
537 /* fall through */
538 case NV_OP_MUL:
539 switch (type) {
540 case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
541 case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
542 case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
543 default:
544 assert(0);
545 break;
546 }
547 break;
548 case NV_OP_ADD:
549 switch (type) {
550 case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
551 case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
552 case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
553 default:
554 assert(0);
555 break;
556 }
557 break;
558 case NV_OP_SUB:
559 switch (type) {
560 case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break;
561 case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break;
562 case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break;
563 default:
564 assert(0);
565 break;
566 }
567 break;
568 default:
569 return;
570 }
571
572 nvi->opcode = NV_OP_MOV;
573
574 val = new_value(pc, NV_FILE_IMM, type);
575
576 val->reg.imm.u32 = u.u32;
577
578 nv_reference(pc, &nvi->src[1], NULL);
579 nv_reference(pc, &nvi->src[0], val);
580
581 if (nvi->src[2]) { /* from MAD */
582 nvi->src[1] = nvi->src[0];
583 nvi->src[0] = nvi->src[2];
584 nvi->src[2] = NULL;
585 nvi->opcode = NV_OP_ADD;
586
587 if (val->reg.imm.u32 == 0) {
588 nvi->src[1] = NULL;
589 nvi->opcode = NV_OP_MOV;
590 }
591 }
592 }
593
594 static void
595 constant_operand(struct nv_pc *pc,
596 struct nv_instruction *nvi, struct nv_value *val, int s)
597 {
598 union {
599 float f32;
600 uint32_t u32;
601 int32_t s32;
602 } u;
603 int t = s ? 0 : 1;
604 uint op;
605 ubyte type;
606
607 if (!nvi->def[0])
608 return;
609 type = nvi->def[0]->reg.type;
610
611 u.u32 = val->reg.imm.u32;
612 modifiers_apply(&u.u32, type, nvi->src[s]->mod);
613
614 switch (nvi->opcode) {
615 case NV_OP_MUL:
616 if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
617 (NV_TYPE_ISINT(type) && u.u32 == 1)) {
618 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
619 break;
620 nvi->opcode = op;
621 nv_reference(pc, &nvi->src[s], NULL);
622 nvi->src[0] = nvi->src[t];
623 nvi->src[1] = NULL;
624 } else
625 if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
626 (NV_TYPE_ISINT(type) && u.u32 == 2)) {
627 nvi->opcode = NV_OP_ADD;
628 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
629 nvi->src[s]->mod = nvi->src[t]->mod;
630 } else
631 if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
632 if (nvi->src[t]->mod & NV_MOD_NEG)
633 nvi->opcode = NV_OP_MOV;
634 else
635 nvi->opcode = NV_OP_NEG;
636 nv_reference(pc, &nvi->src[s], NULL);
637 nvi->src[0] = nvi->src[t];
638 nvi->src[1] = NULL;
639 } else
640 if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
641 nvi->opcode = NV_OP_ADD;
642 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
643 nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
644 } else
645 if (u.u32 == 0) {
646 nvi->opcode = NV_OP_MOV;
647 nv_reference(pc, &nvi->src[t], NULL);
648 if (s) {
649 nvi->src[0] = nvi->src[1];
650 nvi->src[1] = NULL;
651 }
652 }
653 break;
654 case NV_OP_ADD:
655 if (u.u32 == 0) {
656 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
657 break;
658 nvi->opcode = op;
659 nv_reference(pc, &nvi->src[s], NULL);
660 nvi->src[0] = nvi->src[t];
661 nvi->src[1] = NULL;
662 }
663 break;
664 case NV_OP_RCP:
665 u.f32 = 1.0f / u.f32;
666 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
667 nvi->opcode = NV_OP_MOV;
668 assert(s == 0);
669 nv_reference(pc, &nvi->src[0], val);
670 break;
671 case NV_OP_RSQ:
672 u.f32 = 1.0f / sqrtf(u.f32);
673 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
674 nvi->opcode = NV_OP_MOV;
675 assert(s == 0);
676 nv_reference(pc, &nvi->src[0], val);
677 break;
678 default:
679 break;
680 }
681
682 if (nvi->opcode == NV_OP_MOV && nvi->flags_def) {
683 struct nv_instruction *cvt = new_instruction_at(pc, nvi, NV_OP_CVT);
684
685 nv_reference(pc, &cvt->src[0], nvi->def[0]);
686
687 cvt->flags_def = nvi->flags_def;
688 nvi->flags_def = NULL;
689 }
690 }
691
692 static int
693 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
694 {
695 struct nv_instruction *nvi, *next;
696 int j;
697
698 for (nvi = b->entry; nvi; nvi = next) {
699 struct nv_value *src0, *src1, *src;
700 int mod;
701
702 next = nvi->next;
703
704 src0 = nvcg_find_immediate(nvi->src[0]);
705 src1 = nvcg_find_immediate(nvi->src[1]);
706
707 if (src0 && src1)
708 constant_expression(ctx->pc, nvi, src0, src1);
709 else {
710 if (src0)
711 constant_operand(ctx->pc, nvi, src0, 0);
712 else
713 if (src1)
714 constant_operand(ctx->pc, nvi, src1, 1);
715 }
716
717 /* try to combine MUL, ADD into MAD */
718 if (nvi->opcode != NV_OP_ADD)
719 continue;
720
721 src0 = nvi->src[0]->value;
722 src1 = nvi->src[1]->value;
723
724 if (SRC_IS_MUL(src0) && src0->refc == 1)
725 src = src0;
726 else
727 if (SRC_IS_MUL(src1) && src1->refc == 1)
728 src = src1;
729 else
730 continue;
731
732 /* could have an immediate from above constant_* */
733 if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
734 continue;
735
736 nvi->opcode = NV_OP_MAD;
737 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
738 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
739 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
740
741 assert(!(mod & ~NV_MOD_NEG));
742 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
743 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
744 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
745 nvi->src[1]->mod = src->insn->src[1]->mod;
746 }
747 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
748
749 return 0;
750 }
751
752 /* TODO: redundant store elimination */
753
754 struct load_record {
755 struct load_record *next;
756 uint64_t data[2];
757 struct nv_value *value;
758 };
759
760 #define LOAD_RECORD_POOL_SIZE 1024
761
762 struct nv_pass_reld_elim {
763 struct nv_pc *pc;
764
765 struct load_record *imm;
766 struct load_record *mem_s;
767 struct load_record *mem_v;
768 struct load_record *mem_c[16];
769 struct load_record *mem_l;
770
771 struct load_record pool[LOAD_RECORD_POOL_SIZE];
772 int alloc;
773 };
774
775 /* TODO: properly handle loads from l[] memory in the presence of stores */
776 static int
777 nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
778 {
779 struct load_record **rec, *it;
780 struct nv_instruction *ld, *next;
781 uint64_t data[2];
782 struct nv_value *val;
783 int j;
784
785 for (ld = b->entry; ld; ld = next) {
786 next = ld->next;
787 if (!ld->src[0])
788 continue;
789 val = ld->src[0]->value;
790 rec = NULL;
791
792 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
793 data[0] = val->reg.id;
794 data[1] = 0;
795 rec = &ctx->mem_v;
796 } else
797 if (ld->opcode == NV_OP_LDA) {
798 data[0] = val->reg.id;
799 data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
800 if (val->reg.file >= NV_FILE_MEM_C(0) &&
801 val->reg.file <= NV_FILE_MEM_C(15))
802 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
803 else
804 if (val->reg.file == NV_FILE_MEM_S)
805 rec = &ctx->mem_s;
806 else
807 if (val->reg.file == NV_FILE_MEM_L)
808 rec = &ctx->mem_l;
809 } else
810 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
811 data[0] = val->reg.imm.u32;
812 data[1] = 0;
813 rec = &ctx->imm;
814 }
815
816 if (!rec || !ld->def[0]->refc)
817 continue;
818
819 for (it = *rec; it; it = it->next)
820 if (it->data[0] == data[0] && it->data[1] == data[1])
821 break;
822
823 if (it) {
824 if (ld->def[0]->reg.id >= 0)
825 it->value = ld->def[0];
826 else
827 if (!ld->fixed)
828 nvcg_replace_value(ctx->pc, ld->def[0], it->value);
829 } else {
830 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
831 continue;
832 it = &ctx->pool[ctx->alloc++];
833 it->next = *rec;
834 it->data[0] = data[0];
835 it->data[1] = data[1];
836 it->value = ld->def[0];
837 *rec = it;
838 }
839 }
840
841 ctx->imm = NULL;
842 ctx->mem_s = NULL;
843 ctx->mem_v = NULL;
844 for (j = 0; j < 16; ++j)
845 ctx->mem_c[j] = NULL;
846 ctx->mem_l = NULL;
847 ctx->alloc = 0;
848
849 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
850
851 return 0;
852 }
853
854 static int
855 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
856 {
857 int i, c, j;
858
859 for (i = 0; i < ctx->pc->num_instructions; ++i) {
860 struct nv_instruction *nvi = &ctx->pc->instructions[i];
861 struct nv_value *def[4];
862
863 if (!nv_is_vector_op(nvi->opcode))
864 continue;
865 nvi->tex_mask = 0;
866
867 for (c = 0; c < 4; ++c) {
868 if (nvi->def[c]->refc)
869 nvi->tex_mask |= 1 << c;
870 def[c] = nvi->def[c];
871 }
872
873 j = 0;
874 for (c = 0; c < 4; ++c)
875 if (nvi->tex_mask & (1 << c))
876 nvi->def[j++] = def[c];
877 for (c = 0; c < 4; ++c)
878 if (!(nvi->tex_mask & (1 << c)))
879 nvi->def[j++] = def[c];
880 assert(j == 4);
881 }
882 return 0;
883 }
884
885 struct nv_pass_dce {
886 struct nv_pc *pc;
887 uint removed;
888 };
889
890 static int
891 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
892 {
893 int j;
894 struct nv_instruction *nvi, *next;
895
896 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
897 next = nvi->next;
898
899 if (inst_cullable(nvi)) {
900 nv_nvi_delete(nvi);
901
902 ++ctx->removed;
903 }
904 }
905 DESCEND_ARBITRARY(j, nv_pass_dce);
906
907 return 0;
908 }
909
910 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
911 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
912 * BREAK and dummy ELSE block.
913 */
914 static INLINE boolean
915 bb_is_if_else_endif(struct nv_basic_block *bb)
916 {
917 if (!bb->out[0] || !bb->out[1])
918 return FALSE;
919
920 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
921 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
922 !bb->out[1]->out[1]);
923 } else {
924 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
925 !bb->out[0]->out[1] &&
926 !bb->out[1]->out[1]);
927 }
928 }
929
930 /* predicate instructions and remove branch at the end */
931 static void
932 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
933 struct nv_value *p, ubyte cc)
934 {
935 struct nv_instruction *nvi;
936
937 if (!b->entry)
938 return;
939 for (nvi = b->entry; nvi->next; nvi = nvi->next) {
940 if (!nvi_isnop(nvi)) {
941 nvi->cc = cc;
942 nv_reference(pc, &nvi->flags_src, p);
943 }
944 }
945
946 if (nvi->opcode == NV_OP_BRA)
947 nv_nvi_delete(nvi);
948 else
949 if (!nvi_isnop(nvi)) {
950 nvi->cc = cc;
951 nv_reference(pc, &nvi->flags_src, p);
952 }
953 }
954
955 /* NOTE: Run this after register allocation, we can just cut out the cflow
956 * instructions and hook the predicates to the conditional OPs if they are
957 * not using immediates; better than inserting SELECT to join definitions.
958 *
959 * NOTE: Should adapt prior optimization to make this possible more often.
960 */
961 static int
962 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
963 {
964 struct nv_instruction *nvi;
965 struct nv_value *pred;
966 int i;
967 int n0 = 0, n1 = 0;
968
969 if (bb_is_if_else_endif(b)) {
970
971 NV50_DBGMSG(PROG_IR,
972 "pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
973
974 for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
975 if (!nv50_nvi_can_predicate(nvi))
976 break;
977 if (!nvi) {
978 for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
979 if (!nv50_nvi_can_predicate(nvi))
980 break;
981 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
982 if (nvi) {
983 debug_printf("cannot predicate: "); nv_print_instruction(nvi);
984 }
985 } else {
986 debug_printf("cannot predicate: "); nv_print_instruction(nvi);
987 #endif
988 }
989
990 if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */
991 assert(b->exit && b->exit->flags_src);
992 pred = b->exit->flags_src->value;
993
994 predicate_instructions(ctx->pc, b->out[0], pred, NV_CC_NE | NV_CC_U);
995 predicate_instructions(ctx->pc, b->out[1], pred, NV_CC_EQ);
996
997 assert(b->exit && b->exit->opcode == NV_OP_BRA);
998 nv_nvi_delete(b->exit);
999
1000 if (b->exit && b->exit->opcode == NV_OP_JOINAT)
1001 nv_nvi_delete(b->exit);
1002
1003 i = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
1004
1005 if ((nvi = b->out[0]->out[i]->entry)) {
1006 nvi->is_join = 0;
1007 if (nvi->opcode == NV_OP_JOIN)
1008 nv_nvi_delete(nvi);
1009 }
1010 }
1011 }
1012 DESCEND_ARBITRARY(i, nv_pass_flatten);
1013
1014 return 0;
1015 }
1016
1017 /* local common subexpression elimination, stupid O(n^2) implementation */
1018 static int
1019 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1020 {
1021 struct nv_instruction *ir, *ik, *next;
1022 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1023 int s;
1024 unsigned int reps;
1025
1026 do {
1027 reps = 0;
1028 for (ir = entry; ir; ir = next) {
1029 next = ir->next;
1030 for (ik = entry; ik != ir; ik = ik->next) {
1031 if (ir->opcode != ik->opcode || ir->fixed)
1032 continue;
1033
1034 if (!ir->def[0] || !ik->def[0] ||
1035 ik->opcode == NV_OP_LDA ||
1036 ik->opcode == NV_OP_STA ||
1037 ik->opcode == NV_OP_MOV ||
1038 nv_is_vector_op(ik->opcode))
1039 continue; /* ignore loads, stores & moves */
1040
1041 if (ik->src[4] || ir->src[4])
1042 continue; /* don't mess with address registers */
1043
1044 if (ik->flags_src || ir->flags_src ||
1045 ik->flags_def || ir->flags_def)
1046 continue; /* and also not with flags, for now */
1047
1048 if (ik->def[0]->reg.file == NV_FILE_OUT ||
1049 ir->def[0]->reg.file == NV_FILE_OUT ||
1050 !values_equal(ik->def[0], ir->def[0]))
1051 continue;
1052
1053 for (s = 0; s < 3; ++s) {
1054 struct nv_value *a, *b;
1055
1056 if (!ik->src[s]) {
1057 if (ir->src[s])
1058 break;
1059 continue;
1060 }
1061 if (ik->src[s]->mod != ir->src[s]->mod)
1062 break;
1063 a = ik->src[s]->value;
1064 b = ir->src[s]->value;
1065 if (a == b)
1066 continue;
1067 if (a->reg.file != b->reg.file ||
1068 a->reg.id < 0 ||
1069 a->reg.id != b->reg.id)
1070 break;
1071 }
1072 if (s == 3) {
1073 nv_nvi_delete(ir);
1074 ++reps;
1075 nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
1076 break;
1077 }
1078 }
1079 }
1080 } while(reps);
1081
1082 DESCEND_ARBITRARY(s, nv_pass_cse);
1083
1084 return 0;
1085 }
1086
1087 static int
1088 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1089 {
1090 struct nv_pass_reld_elim *reldelim;
1091 struct nv_pass pass;
1092 struct nv_pass_dce dce;
1093 int ret;
1094
1095 pass.n = 0;
1096 pass.pc = pc;
1097
1098 /* Do this first, so we don't have to pay attention
1099 * to whether sources are supported memory loads.
1100 */
1101 pc->pass_seq++;
1102 ret = nv_pass_lower_arith(&pass, root);
1103 if (ret)
1104 return ret;
1105
1106 pc->pass_seq++;
1107 ret = nv_pass_lower_mods(&pass, root);
1108 if (ret)
1109 return ret;
1110
1111 pc->pass_seq++;
1112 ret = nv_pass_fold_loads(&pass, root);
1113 if (ret)
1114 return ret;
1115
1116 pc->pass_seq++;
1117 ret = nv_pass_fold_stores(&pass, root);
1118 if (ret)
1119 return ret;
1120
1121 if (pc->opt_reload_elim) {
1122 reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
1123 reldelim->pc = pc;
1124 pc->pass_seq++;
1125 ret = nv_pass_reload_elim(reldelim, root);
1126 FREE(reldelim);
1127 if (ret)
1128 return ret;
1129 }
1130
1131 pc->pass_seq++;
1132 ret = nv_pass_cse(&pass, root);
1133 if (ret)
1134 return ret;
1135
1136 dce.pc = pc;
1137 do {
1138 dce.removed = 0;
1139 pc->pass_seq++;
1140 ret = nv_pass_dce(&dce, root);
1141 if (ret)
1142 return ret;
1143 } while (dce.removed);
1144
1145 ret = nv_pass_tex_mask(&pass, root);
1146 if (ret)
1147 return ret;
1148
1149 return ret;
1150 }
1151
1152 int
1153 nv_pc_exec_pass0(struct nv_pc *pc)
1154 {
1155 int i, ret;
1156
1157 for (i = 0; i < pc->num_subroutines + 1; ++i)
1158 if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1159 return ret;
1160 return 0;
1161 }