evergreeng: set hardware pixelcenters according to gl_rasterization_rules
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 /* #define NV50PC_DEBUG */
24
25 #include "nv50_pc.h"
26
27 #define DESCEND_ARBITRARY(j, f) \
28 do { \
29 b->pass_seq = ctx->pc->pass_seq; \
30 \
31 for (j = 0; j < 2; ++j) \
32 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
33 f(ctx, b->out[j]); \
34 } while (0)
35
36 extern unsigned nv50_inst_min_size(struct nv_instruction *);
37
38 struct nv_pc_pass {
39 struct nv_pc *pc;
40 };
41
42 static INLINE boolean
43 values_equal(struct nv_value *a, struct nv_value *b)
44 {
45 /* XXX: sizes */
46 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
47 }
48
49 static INLINE boolean
50 inst_commutation_check(struct nv_instruction *a,
51 struct nv_instruction *b)
52 {
53 int si, di;
54
55 for (di = 0; di < 4; ++di) {
56 if (!a->def[di])
57 break;
58 for (si = 0; si < 5; ++si) {
59 if (!b->src[si])
60 continue;
61 if (values_equal(a->def[di], b->src[si]->value))
62 return FALSE;
63 }
64 }
65
66 if (b->flags_src && b->flags_src->value == a->flags_def)
67 return FALSE;
68
69 return TRUE;
70 }
71
72 /* Check whether we can swap the order of the instructions,
73 * where a & b may be either the earlier or the later one.
74 */
75 static boolean
76 inst_commutation_legal(struct nv_instruction *a,
77 struct nv_instruction *b)
78 {
79 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
80 }
81
82 static INLINE boolean
83 inst_cullable(struct nv_instruction *nvi)
84 {
85 if (nvi->opcode == NV_OP_STA)
86 return FALSE;
87 return (!(nvi->is_terminator || nvi->is_join ||
88 nvi->target ||
89 nvi->fixed ||
90 nv_nvi_refcount(nvi)));
91 }
92
93 static INLINE boolean
94 nvi_isnop(struct nv_instruction *nvi)
95 {
96 if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
97 return TRUE;
98
99 /* NOTE: 'fixed' now only means that it shouldn't be optimized away,
100 * but we can still remove it if it is a no-op move.
101 */
102 if (/* nvi->fixed || */
103 /* nvi->flags_src || */ /* cond. MOV to same register is still NOP */
104 nvi->flags_def ||
105 nvi->is_terminator ||
106 nvi->is_join)
107 return FALSE;
108
109 if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
110 return TRUE;
111
112 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
113 return FALSE;
114
115 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
116 return FALSE;
117
118 if (nvi->src[0]->value->join->reg.id < 0) {
119 NV50_DBGMSG("nvi_isnop: orphaned value detected\n");
120 return TRUE;
121 }
122
123 if (nvi->opcode == NV_OP_SELECT)
124 if (!values_equal(nvi->def[0], nvi->src[1]->value))
125 return FALSE;
126
127 return values_equal(nvi->def[0], nvi->src[0]->value);
128 }
129
130 struct nv_pass {
131 struct nv_pc *pc;
132 int n;
133 void *priv;
134 };
135
136 static int
137 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
138
139 static void
140 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
141 {
142 struct nv_pc *pc = (struct nv_pc *)priv;
143 struct nv_basic_block *in;
144 struct nv_instruction *nvi, *next;
145 int j;
146 uint size, n32 = 0;
147
148 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
149 if (j >= 0) {
150 in = pc->bb_list[j];
151
152 /* check for no-op branches (BRA $PC+8) */
153 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
154 in->bin_size -= 8;
155 pc->bin_size -= 8;
156
157 for (++j; j < pc->num_blocks; ++j)
158 pc->bb_list[j]->bin_pos -= 8;
159
160 nv_nvi_delete(in->exit);
161 }
162 b->bin_pos = in->bin_pos + in->bin_size;
163 }
164
165 pc->bb_list[pc->num_blocks++] = b;
166
167 /* visit node */
168
169 for (nvi = b->entry; nvi; nvi = next) {
170 next = nvi->next;
171 if (nvi_isnop(nvi))
172 nv_nvi_delete(nvi);
173 }
174
175 for (nvi = b->entry; nvi; nvi = next) {
176 next = nvi->next;
177
178 size = nv50_inst_min_size(nvi);
179 if (nvi->next && size < 8)
180 ++n32;
181 else
182 if ((n32 & 1) && nvi->next &&
183 nv50_inst_min_size(nvi->next) == 4 &&
184 inst_commutation_legal(nvi, nvi->next)) {
185 ++n32;
186 nv_nvi_permute(nvi, nvi->next);
187 next = nvi;
188 } else {
189 nvi->is_long = 1;
190
191 b->bin_size += n32 & 1;
192 if (n32 & 1)
193 nvi->prev->is_long = 1;
194 n32 = 0;
195 }
196 b->bin_size += 1 + nvi->is_long;
197 }
198
199 if (!b->entry) {
200 NV50_DBGMSG("block %p is now empty\n", b);
201 } else
202 if (!b->exit->is_long) {
203 assert(n32);
204 b->exit->is_long = 1;
205 b->bin_size += 1;
206
207 /* might have del'd a hole tail of instructions */
208 if (!b->exit->prev->is_long && !(n32 & 1)) {
209 b->bin_size += 1;
210 b->exit->prev->is_long = 1;
211 }
212 }
213 assert(!b->entry || (b->exit && b->exit->is_long));
214
215 pc->bin_size += b->bin_size *= 4;
216 }
217
218 static int
219 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
220 {
221 struct nv_pass pass;
222
223 pass.pc = pc;
224
225 pc->pass_seq++;
226
227 nv_pass_flatten(&pass, root);
228
229 nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
230
231 return 0;
232 }
233
234 int
235 nv_pc_exec_pass2(struct nv_pc *pc)
236 {
237 int i, ret;
238
239 NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks);
240
241 pc->num_blocks = 0; /* will reorder bb_list */
242
243 for (i = 0; i < pc->num_subroutines + 1; ++i)
244 if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
245 return ret;
246 return 0;
247 }
248
249 static INLINE boolean
250 is_cmem_load(struct nv_instruction *nvi)
251 {
252 return (nvi->opcode == NV_OP_LDA &&
253 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
254 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
255 }
256
257 static INLINE boolean
258 is_smem_load(struct nv_instruction *nvi)
259 {
260 return (nvi->opcode == NV_OP_LDA &&
261 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
262 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
263 }
264
265 static INLINE boolean
266 is_immd_move(struct nv_instruction *nvi)
267 {
268 return (nvi->opcode == NV_OP_MOV &&
269 nvi->src[0]->value->reg.file == NV_FILE_IMM);
270 }
271
272 static INLINE void
273 check_swap_src_0_1(struct nv_instruction *nvi)
274 {
275 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
276
277 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
278
279 if (!nv_op_commutative(nvi->opcode))
280 return;
281 assert(src0 && src1);
282
283 if (src1->value->reg.file == NV_FILE_IMM)
284 return;
285
286 if (is_cmem_load(src0->value->insn)) {
287 if (!is_cmem_load(src1->value->insn)) {
288 nvi->src[0] = src1;
289 nvi->src[1] = src0;
290 /* debug_printf("swapping cmem load to 1\n"); */
291 }
292 } else
293 if (is_smem_load(src1->value->insn)) {
294 if (!is_smem_load(src0->value->insn)) {
295 nvi->src[0] = src1;
296 nvi->src[1] = src0;
297 /* debug_printf("swapping smem load to 0\n"); */
298 }
299 }
300
301 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
302 nvi->set_cond = cc_swapped[nvi->set_cond];
303 }
304
305 static int
306 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
307 {
308 struct nv_instruction *nvi, *sti, *next;
309 int j;
310
311 for (sti = b->entry; sti; sti = next) {
312 next = sti->next;
313
314 /* only handling MOV to $oX here */
315 if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
316 continue;
317 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
318 continue;
319
320 nvi = sti->src[0]->value->insn;
321 if (!nvi || nvi->opcode == NV_OP_PHI || nv_is_vector_op(nvi->opcode))
322 continue;
323 assert(nvi->def[0] == sti->src[0]->value);
324
325 if (nvi->opcode == NV_OP_SELECT)
326 continue;
327 if (nvi->def[0]->refc > 1)
328 continue;
329
330 /* cannot write to $oX when using immediate */
331 for (j = 0; j < 4 && nvi->src[j]; ++j)
332 if (nvi->src[j]->value->reg.file == NV_FILE_IMM ||
333 nvi->src[j]->value->reg.file == NV_FILE_MEM_L)
334 break;
335 if (j < 4 && nvi->src[j])
336 continue;
337
338 nvi->def[0] = sti->def[0];
339 nvi->def[0]->insn = nvi;
340 nvi->fixed = sti->fixed;
341
342 nv_nvi_delete(sti);
343 }
344 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
345
346 return 0;
347 }
348
349 static int
350 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
351 {
352 struct nv_instruction *nvi, *ld;
353 int j;
354
355 for (nvi = b->entry; nvi; nvi = nvi->next) {
356 check_swap_src_0_1(nvi);
357
358 for (j = 0; j < 3; ++j) {
359 if (!nvi->src[j])
360 break;
361 ld = nvi->src[j]->value->insn;
362 if (!ld)
363 continue;
364
365 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
366 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
367 continue;
368 }
369
370 if (ld->opcode != NV_OP_LDA)
371 continue;
372 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
373 continue;
374
375 if (j == 0 && ld->src[4]) /* can't load shared mem */
376 continue;
377
378 /* fold it ! */
379 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
380 if (ld->src[4])
381 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
382
383 if (!nv_nvi_refcount(ld))
384 nv_nvi_delete(ld);
385 }
386 }
387 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
388
389 return 0;
390 }
391
392 /* NOTE: Assumes loads have not yet been folded. */
393 static int
394 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
395 {
396 int j;
397 struct nv_instruction *nvi, *mi, *next;
398 ubyte mod;
399
400 for (nvi = b->entry; nvi; nvi = next) {
401 next = nvi->next;
402 if (nvi->opcode == NV_OP_SUB) {
403 nvi->opcode = NV_OP_ADD;
404 nvi->src[1]->mod ^= NV_MOD_NEG;
405 }
406
407 for (j = 0; j < 4 && nvi->src[j]; ++j) {
408 mi = nvi->src[j]->value->insn;
409 if (!mi)
410 continue;
411 if (mi->def[0]->refc > 1)
412 continue;
413
414 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
415 else
416 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
417 else
418 continue;
419 assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
420
421 mod |= mi->src[0]->mod;
422
423 if (mi->flags_def || mi->flags_src)
424 continue;
425
426 if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
427 /* abs neg [abs] = abs */
428 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
429 } else
430 if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
431 /* neg as opcode and modifier on same insn cannot occur */
432 /* neg neg abs = abs, neg neg = identity */
433 assert(j == 0);
434 if (mod & NV_MOD_ABS)
435 nvi->opcode = NV_OP_ABS;
436 else
437 if (nvi->flags_def)
438 nvi->opcode = NV_OP_CVT;
439 else
440 nvi->opcode = NV_OP_MOV;
441 mod = 0;
442 }
443
444 if ((nv50_supported_src_mods(nvi->opcode, j) & mod) != mod)
445 continue;
446
447 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
448
449 nvi->src[j]->mod ^= mod;
450 }
451
452 if (nvi->opcode == NV_OP_SAT) {
453 mi = nvi->src[0]->value->insn;
454
455 if (mi->opcode != NV_OP_ADD && mi->opcode != NV_OP_MAD)
456 continue;
457 if (mi->flags_def || mi->def[0]->refc > 1)
458 continue;
459
460 mi->saturate = 1;
461 mi->def[0] = nvi->def[0];
462 mi->def[0]->insn = mi;
463 nv_nvi_delete(nvi);
464 }
465 }
466 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
467
468 return 0;
469 }
470
471 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
472
473 static void
474 modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
475 {
476 if (mod & NV_MOD_ABS) {
477 if (type == NV_TYPE_F32)
478 *val &= 0x7fffffff;
479 else
480 if ((*val) & (1 << 31))
481 *val = ~(*val) + 1;
482 }
483 if (mod & NV_MOD_NEG) {
484 if (type == NV_TYPE_F32)
485 *val ^= 0x80000000;
486 else
487 *val = ~(*val) + 1;
488 }
489 }
490
491 static INLINE uint
492 modifiers_opcode(ubyte mod)
493 {
494 switch (mod) {
495 case NV_MOD_NEG: return NV_OP_NEG;
496 case NV_MOD_ABS: return NV_OP_ABS;
497 case 0:
498 return NV_OP_MOV;
499 default:
500 return NV_OP_NOP;
501 }
502 }
503
504 static void
505 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
506 struct nv_value *src0, struct nv_value *src1)
507 {
508 struct nv_value *val;
509 union {
510 float f32;
511 uint32_t u32;
512 int32_t s32;
513 } u0, u1, u;
514 ubyte type;
515
516 if (!nvi->def[0])
517 return;
518 type = nvi->def[0]->reg.type;
519
520 u.u32 = 0;
521 u0.u32 = src0->reg.imm.u32;
522 u1.u32 = src1->reg.imm.u32;
523
524 modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
525 modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
526
527 switch (nvi->opcode) {
528 case NV_OP_MAD:
529 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
530 return;
531 /* fall through */
532 case NV_OP_MUL:
533 switch (type) {
534 case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
535 case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
536 case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
537 default:
538 assert(0);
539 break;
540 }
541 break;
542 case NV_OP_ADD:
543 switch (type) {
544 case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
545 case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
546 case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
547 default:
548 assert(0);
549 break;
550 }
551 break;
552 case NV_OP_SUB:
553 switch (type) {
554 case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break;
555 case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break;
556 case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break;
557 default:
558 assert(0);
559 break;
560 }
561 break;
562 default:
563 return;
564 }
565
566 nvi->opcode = NV_OP_MOV;
567
568 val = new_value(pc, NV_FILE_IMM, type);
569
570 val->reg.imm.u32 = u.u32;
571
572 nv_reference(pc, &nvi->src[1], NULL);
573 nv_reference(pc, &nvi->src[0], val);
574
575 if (nvi->src[2]) { /* from MAD */
576 nvi->src[1] = nvi->src[0];
577 nvi->src[0] = nvi->src[2];
578 nvi->src[2] = NULL;
579 nvi->opcode = NV_OP_ADD;
580
581 if (val->reg.imm.u32 == 0) {
582 nvi->src[1] = NULL;
583 nvi->opcode = NV_OP_MOV;
584 }
585 }
586 }
587
588 static void
589 constant_operand(struct nv_pc *pc,
590 struct nv_instruction *nvi, struct nv_value *val, int s)
591 {
592 union {
593 float f32;
594 uint32_t u32;
595 int32_t s32;
596 } u;
597 int t = s ? 0 : 1;
598 uint op;
599 ubyte type;
600
601 if (!nvi->def[0])
602 return;
603 type = nvi->def[0]->reg.type;
604
605 u.u32 = val->reg.imm.u32;
606 modifiers_apply(&u.u32, type, nvi->src[s]->mod);
607
608 switch (nvi->opcode) {
609 case NV_OP_MUL:
610 if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
611 (NV_TYPE_ISINT(type) && u.u32 == 1)) {
612 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
613 break;
614 nvi->opcode = op;
615 nv_reference(pc, &nvi->src[s], NULL);
616 nvi->src[0] = nvi->src[t];
617 nvi->src[1] = NULL;
618 } else
619 if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
620 (NV_TYPE_ISINT(type) && u.u32 == 2)) {
621 nvi->opcode = NV_OP_ADD;
622 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
623 nvi->src[s]->mod = nvi->src[t]->mod;
624 } else
625 if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
626 if (nvi->src[t]->mod & NV_MOD_NEG)
627 nvi->opcode = NV_OP_MOV;
628 else
629 nvi->opcode = NV_OP_NEG;
630 nv_reference(pc, &nvi->src[s], NULL);
631 nvi->src[0] = nvi->src[t];
632 nvi->src[1] = NULL;
633 } else
634 if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
635 nvi->opcode = NV_OP_ADD;
636 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
637 nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
638 } else
639 if (u.u32 == 0) {
640 nvi->opcode = NV_OP_MOV;
641 nv_reference(pc, &nvi->src[t], NULL);
642 if (s) {
643 nvi->src[0] = nvi->src[1];
644 nvi->src[1] = NULL;
645 }
646 }
647 break;
648 case NV_OP_ADD:
649 if (u.u32 == 0) {
650 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
651 break;
652 nvi->opcode = op;
653 nv_reference(pc, &nvi->src[s], NULL);
654 nvi->src[0] = nvi->src[t];
655 nvi->src[1] = NULL;
656 }
657 break;
658 case NV_OP_RCP:
659 u.f32 = 1.0f / u.f32;
660 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
661 nvi->opcode = NV_OP_MOV;
662 assert(s == 0);
663 nv_reference(pc, &nvi->src[0], val);
664 break;
665 case NV_OP_RSQ:
666 u.f32 = 1.0f / sqrtf(u.f32);
667 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
668 nvi->opcode = NV_OP_MOV;
669 assert(s == 0);
670 nv_reference(pc, &nvi->src[0], val);
671 break;
672 default:
673 break;
674 }
675
676 if (nvi->opcode == NV_OP_MOV && nvi->flags_def) {
677 struct nv_instruction *cvt = new_instruction_at(pc, nvi, NV_OP_CVT);
678
679 nv_reference(pc, &cvt->src[0], nvi->def[0]);
680
681 cvt->flags_def = nvi->flags_def;
682 nvi->flags_def = NULL;
683 }
684 }
685
686 static int
687 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
688 {
689 struct nv_instruction *nvi, *next;
690 int j;
691
692 for (nvi = b->entry; nvi; nvi = next) {
693 struct nv_value *src0, *src1, *src;
694 int mod;
695
696 next = nvi->next;
697
698 src0 = nvcg_find_immediate(nvi->src[0]);
699 src1 = nvcg_find_immediate(nvi->src[1]);
700
701 if (src0 && src1)
702 constant_expression(ctx->pc, nvi, src0, src1);
703 else {
704 if (src0)
705 constant_operand(ctx->pc, nvi, src0, 0);
706 else
707 if (src1)
708 constant_operand(ctx->pc, nvi, src1, 1);
709 }
710
711 /* try to combine MUL, ADD into MAD */
712 if (nvi->opcode != NV_OP_ADD)
713 continue;
714
715 src0 = nvi->src[0]->value;
716 src1 = nvi->src[1]->value;
717
718 if (SRC_IS_MUL(src0) && src0->refc == 1)
719 src = src0;
720 else
721 if (SRC_IS_MUL(src1) && src1->refc == 1)
722 src = src1;
723 else
724 continue;
725
726 /* could have an immediate from above constant_* */
727 if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
728 continue;
729
730 nvi->opcode = NV_OP_MAD;
731 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
732 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
733 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
734
735 assert(!(mod & ~NV_MOD_NEG));
736 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
737 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
738 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
739 nvi->src[1]->mod = src->insn->src[1]->mod;
740 }
741 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
742
743 return 0;
744 }
745
746 /* TODO: redundant store elimination */
747
748 struct load_record {
749 struct load_record *next;
750 uint64_t data[2];
751 struct nv_value *value;
752 };
753
754 #define LOAD_RECORD_POOL_SIZE 1024
755
756 struct nv_pass_reld_elim {
757 struct nv_pc *pc;
758
759 struct load_record *imm;
760 struct load_record *mem_s;
761 struct load_record *mem_v;
762 struct load_record *mem_c[16];
763 struct load_record *mem_l;
764
765 struct load_record pool[LOAD_RECORD_POOL_SIZE];
766 int alloc;
767 };
768
769 /* TODO: properly handle loads from l[] memory in the presence of stores */
770 static int
771 nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
772 {
773 struct load_record **rec, *it;
774 struct nv_instruction *ld, *next;
775 uint64_t data[2];
776 struct nv_value *val;
777 int j;
778
779 for (ld = b->entry; ld; ld = next) {
780 next = ld->next;
781 if (!ld->src[0])
782 continue;
783 val = ld->src[0]->value;
784 rec = NULL;
785
786 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
787 data[0] = val->reg.id;
788 data[1] = 0;
789 rec = &ctx->mem_v;
790 } else
791 if (ld->opcode == NV_OP_LDA) {
792 data[0] = val->reg.id;
793 data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
794 if (val->reg.file >= NV_FILE_MEM_C(0) &&
795 val->reg.file <= NV_FILE_MEM_C(15))
796 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
797 else
798 if (val->reg.file == NV_FILE_MEM_S)
799 rec = &ctx->mem_s;
800 else
801 if (val->reg.file == NV_FILE_MEM_L)
802 rec = &ctx->mem_l;
803 } else
804 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
805 data[0] = val->reg.imm.u32;
806 data[1] = 0;
807 rec = &ctx->imm;
808 }
809
810 if (!rec || !ld->def[0]->refc)
811 continue;
812
813 for (it = *rec; it; it = it->next)
814 if (it->data[0] == data[0] && it->data[1] == data[1])
815 break;
816
817 if (it) {
818 if (ld->def[0]->reg.id >= 0)
819 it->value = ld->def[0];
820 else
821 if (!ld->fixed)
822 nvcg_replace_value(ctx->pc, ld->def[0], it->value);
823 } else {
824 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
825 continue;
826 it = &ctx->pool[ctx->alloc++];
827 it->next = *rec;
828 it->data[0] = data[0];
829 it->data[1] = data[1];
830 it->value = ld->def[0];
831 *rec = it;
832 }
833 }
834
835 ctx->imm = NULL;
836 ctx->mem_s = NULL;
837 ctx->mem_v = NULL;
838 for (j = 0; j < 16; ++j)
839 ctx->mem_c[j] = NULL;
840 ctx->mem_l = NULL;
841 ctx->alloc = 0;
842
843 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
844
845 return 0;
846 }
847
848 static int
849 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
850 {
851 int i, c, j;
852
853 for (i = 0; i < ctx->pc->num_instructions; ++i) {
854 struct nv_instruction *nvi = &ctx->pc->instructions[i];
855 struct nv_value *def[4];
856
857 if (!nv_is_vector_op(nvi->opcode))
858 continue;
859 nvi->tex_mask = 0;
860
861 for (c = 0; c < 4; ++c) {
862 if (nvi->def[c]->refc)
863 nvi->tex_mask |= 1 << c;
864 def[c] = nvi->def[c];
865 }
866
867 j = 0;
868 for (c = 0; c < 4; ++c)
869 if (nvi->tex_mask & (1 << c))
870 nvi->def[j++] = def[c];
871 for (c = 0; c < 4; ++c)
872 if (!(nvi->tex_mask & (1 << c)))
873 nvi->def[j++] = def[c];
874 assert(j == 4);
875 }
876 return 0;
877 }
878
879 struct nv_pass_dce {
880 struct nv_pc *pc;
881 uint removed;
882 };
883
884 static int
885 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
886 {
887 int j;
888 struct nv_instruction *nvi, *next;
889
890 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
891 next = nvi->next;
892
893 if (inst_cullable(nvi)) {
894 nv_nvi_delete(nvi);
895
896 ++ctx->removed;
897 }
898 }
899 DESCEND_ARBITRARY(j, nv_pass_dce);
900
901 return 0;
902 }
903
904 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
905 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
906 * BREAK and dummy ELSE block.
907 */
908 static INLINE boolean
909 bb_is_if_else_endif(struct nv_basic_block *bb)
910 {
911 if (!bb->out[0] || !bb->out[1])
912 return FALSE;
913
914 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
915 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
916 !bb->out[1]->out[1]);
917 } else {
918 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
919 !bb->out[0]->out[1] &&
920 !bb->out[1]->out[1]);
921 }
922 }
923
924 /* predicate instructions and remove branch at the end */
925 static void
926 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
927 struct nv_value *p, ubyte cc)
928 {
929 struct nv_instruction *nvi;
930
931 if (!b->entry)
932 return;
933 for (nvi = b->entry; nvi->next; nvi = nvi->next) {
934 if (!nvi_isnop(nvi)) {
935 nvi->cc = cc;
936 nv_reference(pc, &nvi->flags_src, p);
937 }
938 }
939
940 if (nvi->opcode == NV_OP_BRA)
941 nv_nvi_delete(nvi);
942 else
943 if (!nvi_isnop(nvi)) {
944 nvi->cc = cc;
945 nv_reference(pc, &nvi->flags_src, p);
946 }
947 }
948
949 /* NOTE: Run this after register allocation, we can just cut out the cflow
950 * instructions and hook the predicates to the conditional OPs if they are
951 * not using immediates; better than inserting SELECT to join definitions.
952 *
953 * NOTE: Should adapt prior optimization to make this possible more often.
954 */
955 static int
956 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
957 {
958 struct nv_instruction *nvi;
959 struct nv_value *pred;
960 int i;
961 int n0 = 0, n1 = 0;
962
963 if (bb_is_if_else_endif(b)) {
964
965 NV50_DBGMSG("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
966
967 for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
968 if (!nv50_nvi_can_predicate(nvi))
969 break;
970 if (!nvi) {
971 for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
972 if (!nv50_nvi_can_predicate(nvi))
973 break;
974 #ifdef NV50PC_DEBUG
975 if (nvi) {
976 debug_printf("cannot predicate: "); nv_print_instruction(nvi);
977 }
978 } else {
979 debug_printf("cannot predicate: "); nv_print_instruction(nvi);
980 #endif
981 }
982
983 if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */
984 assert(b->exit && b->exit->flags_src);
985 pred = b->exit->flags_src->value;
986
987 predicate_instructions(ctx->pc, b->out[0], pred, NV_CC_NE | NV_CC_U);
988 predicate_instructions(ctx->pc, b->out[1], pred, NV_CC_EQ);
989
990 assert(b->exit && b->exit->opcode == NV_OP_BRA);
991 nv_nvi_delete(b->exit);
992
993 if (b->exit && b->exit->opcode == NV_OP_JOINAT)
994 nv_nvi_delete(b->exit);
995
996 i = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
997
998 if ((nvi = b->out[0]->out[i]->entry)) {
999 nvi->is_join = 0;
1000 if (nvi->opcode == NV_OP_JOIN)
1001 nv_nvi_delete(nvi);
1002 }
1003 }
1004 }
1005 DESCEND_ARBITRARY(i, nv_pass_flatten);
1006
1007 return 0;
1008 }
1009
1010 /* local common subexpression elimination, stupid O(n^2) implementation */
1011 static int
1012 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1013 {
1014 struct nv_instruction *ir, *ik, *next;
1015 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1016 int s;
1017 unsigned int reps;
1018
1019 do {
1020 reps = 0;
1021 for (ir = entry; ir; ir = next) {
1022 next = ir->next;
1023 for (ik = entry; ik != ir; ik = ik->next) {
1024 if (ir->opcode != ik->opcode || ir->fixed)
1025 continue;
1026
1027 if (!ir->def[0] || !ik->def[0] ||
1028 ik->opcode == NV_OP_LDA ||
1029 ik->opcode == NV_OP_STA ||
1030 ik->opcode == NV_OP_MOV ||
1031 nv_is_vector_op(ik->opcode))
1032 continue; /* ignore loads, stores & moves */
1033
1034 if (ik->src[4] || ir->src[4])
1035 continue; /* don't mess with address registers */
1036
1037 if (ik->flags_src || ir->flags_src ||
1038 ik->flags_def || ir->flags_def)
1039 continue; /* and also not with flags, for now */
1040
1041 if (ik->def[0]->reg.file == NV_FILE_OUT ||
1042 ir->def[0]->reg.file == NV_FILE_OUT ||
1043 !values_equal(ik->def[0], ir->def[0]))
1044 continue;
1045
1046 for (s = 0; s < 3; ++s) {
1047 struct nv_value *a, *b;
1048
1049 if (!ik->src[s]) {
1050 if (ir->src[s])
1051 break;
1052 continue;
1053 }
1054 if (ik->src[s]->mod != ir->src[s]->mod)
1055 break;
1056 a = ik->src[s]->value;
1057 b = ir->src[s]->value;
1058 if (a == b)
1059 continue;
1060 if (a->reg.file != b->reg.file ||
1061 a->reg.id < 0 ||
1062 a->reg.id != b->reg.id)
1063 break;
1064 }
1065 if (s == 3) {
1066 nv_nvi_delete(ir);
1067 ++reps;
1068 nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
1069 break;
1070 }
1071 }
1072 }
1073 } while(reps);
1074
1075 DESCEND_ARBITRARY(s, nv_pass_cse);
1076
1077 return 0;
1078 }
1079
1080 static int
1081 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1082 {
1083 struct nv_pass_reld_elim *reldelim;
1084 struct nv_pass pass;
1085 struct nv_pass_dce dce;
1086 int ret;
1087
1088 pass.n = 0;
1089 pass.pc = pc;
1090
1091 /* Do this first, so we don't have to pay attention
1092 * to whether sources are supported memory loads.
1093 */
1094 pc->pass_seq++;
1095 ret = nv_pass_lower_arith(&pass, root);
1096 if (ret)
1097 return ret;
1098
1099 pc->pass_seq++;
1100 ret = nv_pass_lower_mods(&pass, root);
1101 if (ret)
1102 return ret;
1103
1104 pc->pass_seq++;
1105 ret = nv_pass_fold_loads(&pass, root);
1106 if (ret)
1107 return ret;
1108
1109 pc->pass_seq++;
1110 ret = nv_pass_fold_stores(&pass, root);
1111 if (ret)
1112 return ret;
1113
1114 if (pc->opt_reload_elim) {
1115 reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
1116 reldelim->pc = pc;
1117 pc->pass_seq++;
1118 ret = nv_pass_reload_elim(reldelim, root);
1119 FREE(reldelim);
1120 if (ret)
1121 return ret;
1122 }
1123
1124 pc->pass_seq++;
1125 ret = nv_pass_cse(&pass, root);
1126 if (ret)
1127 return ret;
1128
1129 dce.pc = pc;
1130 do {
1131 dce.removed = 0;
1132 pc->pass_seq++;
1133 ret = nv_pass_dce(&dce, root);
1134 if (ret)
1135 return ret;
1136 } while (dce.removed);
1137
1138 ret = nv_pass_tex_mask(&pass, root);
1139 if (ret)
1140 return ret;
1141
1142 return ret;
1143 }
1144
1145 int
1146 nv_pc_exec_pass0(struct nv_pc *pc)
1147 {
1148 int i, ret;
1149
1150 for (i = 0; i < pc->num_subroutines + 1; ++i)
1151 if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1152 return ret;
1153 return 0;
1154 }