nv50: re-add proper TEXBIAS sequence
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 /* #define NV50PC_DEBUG */
24
25 #include "nv50_pc.h"
26
27 #define DESCEND_ARBITRARY(j, f) \
28 do { \
29 b->pass_seq = ctx->pc->pass_seq; \
30 \
31 for (j = 0; j < 2; ++j) \
32 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
33 f(ctx, b->out[j]); \
34 } while (0)
35
36 extern unsigned nv50_inst_min_size(struct nv_instruction *);
37
38 struct nv_pc_pass {
39 struct nv_pc *pc;
40 };
41
42 static INLINE boolean
43 values_equal(struct nv_value *a, struct nv_value *b)
44 {
45 /* XXX: sizes */
46 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
47 }
48
49 static INLINE boolean
50 inst_commutation_check(struct nv_instruction *a,
51 struct nv_instruction *b)
52 {
53 int si, di;
54
55 for (di = 0; di < 4; ++di) {
56 if (!a->def[di])
57 break;
58 for (si = 0; si < 5; ++si) {
59 if (!b->src[si])
60 continue;
61 if (values_equal(a->def[di], b->src[si]->value))
62 return FALSE;
63 }
64 }
65
66 if (b->flags_src && b->flags_src->value == a->flags_def)
67 return FALSE;
68
69 return TRUE;
70 }
71
72 /* Check whether we can swap the order of the instructions,
73 * where a & b may be either the earlier or the later one.
74 */
75 static boolean
76 inst_commutation_legal(struct nv_instruction *a,
77 struct nv_instruction *b)
78 {
79 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
80 }
81
82 static INLINE boolean
83 inst_cullable(struct nv_instruction *nvi)
84 {
85 return (!(nvi->is_terminator || nvi->is_join ||
86 nvi->target ||
87 nvi->fixed ||
88 nv_nvi_refcount(nvi)));
89 }
90
91 static INLINE boolean
92 nvi_isnop(struct nv_instruction *nvi)
93 {
94 if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
95 return TRUE;
96
97 /* NOTE: 'fixed' now only means that it shouldn't be optimized away,
98 * but we can still remove it if it is a no-op move.
99 */
100 if (/* nvi->fixed || */
101 /* nvi->flags_src || */ /* cond. MOV to same register is still NOP */
102 nvi->flags_def ||
103 nvi->is_terminator ||
104 nvi->is_join)
105 return FALSE;
106
107 if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
108 return TRUE;
109
110 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
111 return FALSE;
112
113 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
114 return FALSE;
115
116 if (nvi->src[0]->value->join->reg.id < 0) {
117 NV50_DBGMSG("nvi_isnop: orphaned value detected\n");
118 return TRUE;
119 }
120
121 if (nvi->opcode == NV_OP_SELECT)
122 if (!values_equal(nvi->def[0], nvi->src[1]->value))
123 return FALSE;
124
125 return values_equal(nvi->def[0], nvi->src[0]->value);
126 }
127
128 struct nv_pass {
129 struct nv_pc *pc;
130 int n;
131 void *priv;
132 };
133
134 static int
135 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
136
137 static void
138 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
139 {
140 struct nv_pc *pc = (struct nv_pc *)priv;
141 struct nv_basic_block *in;
142 struct nv_instruction *nvi, *next;
143 int j;
144 uint size, n32 = 0;
145
146 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
147 if (j >= 0) {
148 in = pc->bb_list[j];
149
150 /* check for no-op branches (BRA $PC+8) */
151 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
152 in->bin_size -= 8;
153 pc->bin_size -= 8;
154
155 for (++j; j < pc->num_blocks; ++j)
156 pc->bb_list[j]->bin_pos -= 8;
157
158 nv_nvi_delete(in->exit);
159 }
160 b->bin_pos = in->bin_pos + in->bin_size;
161 }
162
163 pc->bb_list[pc->num_blocks++] = b;
164
165 /* visit node */
166
167 for (nvi = b->entry; nvi; nvi = next) {
168 next = nvi->next;
169 if (nvi_isnop(nvi))
170 nv_nvi_delete(nvi);
171 }
172
173 for (nvi = b->entry; nvi; nvi = next) {
174 next = nvi->next;
175
176 size = nv50_inst_min_size(nvi);
177 if (nvi->next && size < 8)
178 ++n32;
179 else
180 if ((n32 & 1) && nvi->next &&
181 nv50_inst_min_size(nvi->next) == 4 &&
182 inst_commutation_legal(nvi, nvi->next)) {
183 ++n32;
184 nv_nvi_permute(nvi, nvi->next);
185 next = nvi;
186 } else {
187 nvi->is_long = 1;
188
189 b->bin_size += n32 & 1;
190 if (n32 & 1)
191 nvi->prev->is_long = 1;
192 n32 = 0;
193 }
194 b->bin_size += 1 + nvi->is_long;
195 }
196
197 if (!b->entry) {
198 NV50_DBGMSG("block %p is now empty\n", b);
199 } else
200 if (!b->exit->is_long) {
201 assert(n32);
202 b->exit->is_long = 1;
203 b->bin_size += 1;
204
205 /* might have del'd a hole tail of instructions */
206 if (!b->exit->prev->is_long && !(n32 & 1)) {
207 b->bin_size += 1;
208 b->exit->prev->is_long = 1;
209 }
210 }
211 assert(!b->entry || (b->exit && b->exit->is_long));
212
213 pc->bin_size += b->bin_size *= 4;
214 }
215
216 int
217 nv_pc_exec_pass2(struct nv_pc *pc)
218 {
219 struct nv_pass pass;
220
221 pass.pc = pc;
222
223 pc->pass_seq++;
224 nv_pass_flatten(&pass, pc->root);
225
226 NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks);
227
228 pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
229 pc->num_blocks = 0;
230
231 nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
232
233 return 0;
234 }
235
236 static INLINE boolean
237 is_cmem_load(struct nv_instruction *nvi)
238 {
239 return (nvi->opcode == NV_OP_LDA &&
240 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
241 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
242 }
243
244 static INLINE boolean
245 is_smem_load(struct nv_instruction *nvi)
246 {
247 return (nvi->opcode == NV_OP_LDA &&
248 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
249 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
250 }
251
252 static INLINE boolean
253 is_immd_move(struct nv_instruction *nvi)
254 {
255 return (nvi->opcode == NV_OP_MOV &&
256 nvi->src[0]->value->reg.file == NV_FILE_IMM);
257 }
258
259 static INLINE void
260 check_swap_src_0_1(struct nv_instruction *nvi)
261 {
262 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
263
264 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
265
266 if (!nv_op_commutative(nvi->opcode))
267 return;
268 assert(src0 && src1);
269
270 if (src1->value->reg.file == NV_FILE_IMM)
271 return;
272
273 if (is_cmem_load(src0->value->insn)) {
274 if (!is_cmem_load(src1->value->insn)) {
275 nvi->src[0] = src1;
276 nvi->src[1] = src0;
277 /* debug_printf("swapping cmem load to 1\n"); */
278 }
279 } else
280 if (is_smem_load(src1->value->insn)) {
281 if (!is_smem_load(src0->value->insn)) {
282 nvi->src[0] = src1;
283 nvi->src[1] = src0;
284 /* debug_printf("swapping smem load to 0\n"); */
285 }
286 }
287
288 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
289 nvi->set_cond = cc_swapped[nvi->set_cond];
290 }
291
292 static int
293 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
294 {
295 struct nv_instruction *nvi, *sti, *next;
296 int j;
297
298 for (sti = b->entry; sti; sti = next) {
299 next = sti->next;
300
301 /* only handling MOV to $oX here */
302 if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
303 continue;
304 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
305 continue;
306
307 nvi = sti->src[0]->value->insn;
308 if (!nvi || nvi->opcode == NV_OP_PHI || nv_is_vector_op(nvi->opcode))
309 continue;
310 assert(nvi->def[0] == sti->src[0]->value);
311
312 if (nvi->def[0]->refc > 1)
313 continue;
314
315 /* cannot write to $oX when using immediate */
316 for (j = 0; j < 4 && nvi->src[j]; ++j)
317 if (nvi->src[j]->value->reg.file == NV_FILE_IMM)
318 break;
319 if (j < 4 && nvi->src[j])
320 continue;
321
322 nvi->def[0] = sti->def[0];
323 nvi->fixed = sti->fixed;
324
325 nv_nvi_delete(sti);
326 }
327 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
328
329 return 0;
330 }
331
332 static int
333 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
334 {
335 struct nv_instruction *nvi, *ld;
336 int j;
337
338 for (nvi = b->entry; nvi; nvi = nvi->next) {
339 check_swap_src_0_1(nvi);
340
341 for (j = 0; j < 3; ++j) {
342 if (!nvi->src[j])
343 break;
344 ld = nvi->src[j]->value->insn;
345 if (!ld)
346 continue;
347
348 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
349 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
350 continue;
351 }
352
353 if (ld->opcode != NV_OP_LDA)
354 continue;
355 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
356 continue;
357
358 if (j == 0 && ld->src[4]) /* can't load shared mem */
359 continue;
360
361 /* fold it ! */ /* XXX: ref->insn */
362 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
363 if (ld->src[4])
364 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
365
366 if (!nv_nvi_refcount(ld))
367 nv_nvi_delete(ld);
368 }
369 }
370 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
371
372 return 0;
373 }
374
375 static int
376 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
377 {
378 int j;
379 struct nv_instruction *nvi, *mi, *next;
380 ubyte mod;
381
382 for (nvi = b->entry; nvi; nvi = next) {
383 next = nvi->next;
384 if (nvi->opcode == NV_OP_SUB) {
385 nvi->opcode = NV_OP_ADD;
386 nvi->src[1]->mod ^= NV_MOD_NEG;
387 }
388
389 /* should not put any modifiers on NEG and ABS */
390 assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
391 assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
392
393 for (j = 0; j < 4; ++j) {
394 if (!nvi->src[j])
395 break;
396
397 mi = nvi->src[j]->value->insn;
398 if (!mi)
399 continue;
400 if (mi->def[0]->refc > 1)
401 continue;
402
403 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
404 else
405 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
406 else
407 continue;
408
409 if (nvi->opcode == NV_OP_ABS)
410 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
411 else
412 if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
413 nvi->opcode = NV_OP_MOV;
414 mod = 0;
415 }
416
417 if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
418 continue;
419
420 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
421
422 nvi->src[j]->mod ^= mod;
423 }
424
425 if (nvi->opcode == NV_OP_SAT) {
426 mi = nvi->src[0]->value->insn;
427
428 if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
429 mi->saturate = 1;
430 mi->def[0] = nvi->def[0];
431 nv_nvi_delete(nvi);
432 }
433 }
434 }
435 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
436
437 return 0;
438 }
439
440 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
441
442 static void
443 modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
444 {
445 if (mod & NV_MOD_ABS) {
446 if (type == NV_TYPE_F32)
447 *val &= 0x7fffffff;
448 else
449 if ((*val) & (1 << 31))
450 *val = ~(*val) + 1;
451 }
452 if (mod & NV_MOD_NEG) {
453 if (type == NV_TYPE_F32)
454 *val ^= 0x80000000;
455 else
456 *val = ~(*val) + 1;
457 }
458 }
459
460 static INLINE uint
461 modifiers_opcode(ubyte mod)
462 {
463 switch (mod) {
464 case NV_MOD_NEG: return NV_OP_NEG;
465 case NV_MOD_ABS: return NV_OP_ABS;
466 case 0:
467 return NV_OP_MOV;
468 default:
469 return NV_OP_NOP;
470 }
471 }
472
473 static void
474 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
475 struct nv_value *src0, struct nv_value *src1)
476 {
477 struct nv_value *val;
478 union {
479 float f32;
480 uint32_t u32;
481 int32_t s32;
482 } u0, u1, u;
483 ubyte type;
484
485 if (!nvi->def[0])
486 return;
487 type = nvi->def[0]->reg.type;
488
489 u.u32 = 0;
490 u0.u32 = src0->reg.imm.u32;
491 u1.u32 = src1->reg.imm.u32;
492
493 modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
494 modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
495
496 switch (nvi->opcode) {
497 case NV_OP_MAD:
498 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
499 return;
500 /* fall through */
501 case NV_OP_MUL:
502 switch (type) {
503 case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
504 case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
505 case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
506 default:
507 assert(0);
508 break;
509 }
510 break;
511 case NV_OP_ADD:
512 switch (type) {
513 case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
514 case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
515 case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
516 default:
517 assert(0);
518 break;
519 }
520 break;
521 case NV_OP_SUB:
522 switch (type) {
523 case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break;
524 case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break;
525 case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break;
526 default:
527 assert(0);
528 break;
529 }
530 break;
531 default:
532 return;
533 }
534
535 nvi->opcode = NV_OP_MOV;
536
537 val = new_value(pc, NV_FILE_IMM, type);
538
539 val->reg.imm.u32 = u.u32;
540
541 nv_reference(pc, &nvi->src[1], NULL);
542 nv_reference(pc, &nvi->src[0], val);
543
544 if (nvi->src[2]) { /* from MAD */
545 nvi->src[1] = nvi->src[0];
546 nvi->src[0] = nvi->src[2];
547 nvi->src[2] = NULL;
548 nvi->opcode = NV_OP_ADD;
549 }
550 }
551
552 static void
553 constant_operand(struct nv_pc *pc,
554 struct nv_instruction *nvi, struct nv_value *val, int s)
555 {
556 union {
557 float f32;
558 uint32_t u32;
559 int32_t s32;
560 } u;
561 int t = s ? 0 : 1;
562 uint op;
563 ubyte type;
564
565 if (!nvi->def[0])
566 return;
567 type = nvi->def[0]->reg.type;
568
569 u.u32 = val->reg.imm.u32;
570 modifiers_apply(&u.u32, type, nvi->src[s]->mod);
571
572 switch (nvi->opcode) {
573 case NV_OP_MUL:
574 if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
575 (NV_TYPE_ISINT(type) && u.u32 == 1)) {
576 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
577 break;
578 nvi->opcode = op;
579 nv_reference(pc, &nvi->src[s], NULL);
580 nvi->src[0] = nvi->src[t];
581 nvi->src[1] = NULL;
582 } else
583 if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
584 (NV_TYPE_ISINT(type) && u.u32 == 2)) {
585 nvi->opcode = NV_OP_ADD;
586 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
587 nvi->src[s]->mod = nvi->src[t]->mod;
588 } else
589 if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
590 if (nvi->src[t]->mod & NV_MOD_NEG)
591 nvi->opcode = NV_OP_MOV;
592 else
593 nvi->opcode = NV_OP_NEG;
594 nv_reference(pc, &nvi->src[s], NULL);
595 nvi->src[0] = nvi->src[t];
596 nvi->src[1] = NULL;
597 } else
598 if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
599 nvi->opcode = NV_OP_ADD;
600 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
601 nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
602 } else
603 if (u.u32 == 0) {
604 nvi->opcode = NV_OP_MOV;
605 nv_reference(pc, &nvi->src[t], NULL);
606 if (s) {
607 nvi->src[0] = nvi->src[1];
608 nvi->src[1] = NULL;
609 }
610 }
611 break;
612 case NV_OP_ADD:
613 if (u.u32 == 0) {
614 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
615 break;
616 nvi->opcode = op;
617 nv_reference(pc, &nvi->src[s], NULL);
618 nvi->src[0] = nvi->src[t];
619 nvi->src[1] = NULL;
620 }
621 break;
622 case NV_OP_RCP:
623 u.f32 = 1.0f / u.f32;
624 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
625 nvi->opcode = NV_OP_MOV;
626 assert(s == 0);
627 nv_reference(pc, &nvi->src[0], val);
628 break;
629 case NV_OP_RSQ:
630 u.f32 = 1.0f / sqrtf(u.f32);
631 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
632 nvi->opcode = NV_OP_MOV;
633 assert(s == 0);
634 nv_reference(pc, &nvi->src[0], val);
635 break;
636 default:
637 break;
638 }
639 }
640
641 static int
642 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
643 {
644 struct nv_instruction *nvi, *next;
645 int j;
646
647 for (nvi = b->entry; nvi; nvi = next) {
648 struct nv_value *src0, *src1, *src;
649 int mod;
650
651 next = nvi->next;
652
653 src0 = nvcg_find_immediate(nvi->src[0]);
654 src1 = nvcg_find_immediate(nvi->src[1]);
655
656 if (src0 && src1)
657 constant_expression(ctx->pc, nvi, src0, src1);
658 else {
659 if (src0)
660 constant_operand(ctx->pc, nvi, src0, 0);
661 else
662 if (src1)
663 constant_operand(ctx->pc, nvi, src1, 1);
664 }
665
666 /* try to combine MUL, ADD into MAD */
667 if (nvi->opcode != NV_OP_ADD)
668 continue;
669
670 src0 = nvi->src[0]->value;
671 src1 = nvi->src[1]->value;
672
673 if (SRC_IS_MUL(src0) && src0->refc == 1)
674 src = src0;
675 else
676 if (SRC_IS_MUL(src1) && src1->refc == 1)
677 src = src1;
678 else
679 continue;
680
681 nvi->opcode = NV_OP_MAD;
682 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
683 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
684 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
685
686 assert(!(mod & ~NV_MOD_NEG));
687 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
688 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
689 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
690 nvi->src[1]->mod = src->insn->src[1]->mod;
691 }
692 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
693
694 return 0;
695 }
696
697 /* TODO: redundant store elimination */
698
699 struct load_record {
700 struct load_record *next;
701 uint64_t data;
702 struct nv_value *value;
703 };
704
705 #define LOAD_RECORD_POOL_SIZE 1024
706
707 struct nv_pass_reld_elim {
708 struct nv_pc *pc;
709
710 struct load_record *imm;
711 struct load_record *mem_s;
712 struct load_record *mem_v;
713 struct load_record *mem_c[16];
714 struct load_record *mem_l;
715
716 struct load_record pool[LOAD_RECORD_POOL_SIZE];
717 int alloc;
718 };
719
720 static int
721 nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
722 {
723 struct load_record **rec, *it;
724 struct nv_instruction *ld, *next;
725 uint64_t data;
726 struct nv_value *val;
727 int j;
728
729 for (ld = b->entry; ld; ld = next) {
730 next = ld->next;
731 if (!ld->src[0])
732 continue;
733 val = ld->src[0]->value;
734 rec = NULL;
735
736 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
737 data = val->reg.id;
738 rec = &ctx->mem_v;
739 } else
740 if (ld->opcode == NV_OP_LDA) {
741 data = val->reg.id;
742 if (val->reg.file >= NV_FILE_MEM_C(0) &&
743 val->reg.file <= NV_FILE_MEM_C(15))
744 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
745 else
746 if (val->reg.file == NV_FILE_MEM_S)
747 rec = &ctx->mem_s;
748 else
749 if (val->reg.file == NV_FILE_MEM_L)
750 rec = &ctx->mem_l;
751 } else
752 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
753 data = val->reg.imm.u32;
754 rec = &ctx->imm;
755 }
756
757 if (!rec || !ld->def[0]->refc)
758 continue;
759
760 for (it = *rec; it; it = it->next)
761 if (it->data == data)
762 break;
763
764 if (it) {
765 if (ld->def[0]->reg.id >= 0)
766 it->value = ld->def[0];
767 else
768 if (!ld->fixed)
769 nvcg_replace_value(ctx->pc, ld->def[0], it->value);
770 } else {
771 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
772 continue;
773 it = &ctx->pool[ctx->alloc++];
774 it->next = *rec;
775 it->data = data;
776 it->value = ld->def[0];
777 *rec = it;
778 }
779 }
780
781 ctx->imm = NULL;
782 ctx->mem_s = NULL;
783 ctx->mem_v = NULL;
784 for (j = 0; j < 16; ++j)
785 ctx->mem_c[j] = NULL;
786 ctx->mem_l = NULL;
787 ctx->alloc = 0;
788
789 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
790
791 return 0;
792 }
793
794 static int
795 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
796 {
797 int i, c, j;
798
799 for (i = 0; i < ctx->pc->num_instructions; ++i) {
800 struct nv_instruction *nvi = &ctx->pc->instructions[i];
801 struct nv_value *def[4];
802
803 if (!nv_is_vector_op(nvi->opcode))
804 continue;
805 nvi->tex_mask = 0;
806
807 for (c = 0; c < 4; ++c) {
808 if (nvi->def[c]->refc)
809 nvi->tex_mask |= 1 << c;
810 def[c] = nvi->def[c];
811 }
812
813 j = 0;
814 for (c = 0; c < 4; ++c)
815 if (nvi->tex_mask & (1 << c))
816 nvi->def[j++] = def[c];
817 for (c = 0; c < 4; ++c)
818 if (!(nvi->tex_mask & (1 << c)))
819 nvi->def[j++] = def[c];
820 assert(j == 4);
821 }
822 return 0;
823 }
824
825 struct nv_pass_dce {
826 struct nv_pc *pc;
827 uint removed;
828 };
829
830 static int
831 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
832 {
833 int j;
834 struct nv_instruction *nvi, *next;
835
836 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
837 next = nvi->next;
838
839 if (inst_cullable(nvi)) {
840 nv_nvi_delete(nvi);
841
842 ++ctx->removed;
843 }
844 }
845 DESCEND_ARBITRARY(j, nv_pass_dce);
846
847 return 0;
848 }
849
850 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
851 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
852 * BREAK and dummy ELSE block.
853 */
854 static INLINE boolean
855 bb_is_if_else_endif(struct nv_basic_block *bb)
856 {
857 if (!bb->out[0] || !bb->out[1])
858 return FALSE;
859
860 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
861 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
862 !bb->out[1]->out[1]);
863 } else {
864 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
865 !bb->out[0]->out[1] &&
866 !bb->out[1]->out[1]);
867 }
868 }
869
870 /* predicate instructions and remove branch at the end */
871 static void
872 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
873 struct nv_value *p, ubyte cc)
874 {
875 struct nv_instruction *nvi;
876
877 if (!b->entry)
878 return;
879 for (nvi = b->entry; nvi->next; nvi = nvi->next) {
880 if (!nvi_isnop(nvi)) {
881 nvi->cc = cc;
882 nv_reference(pc, &nvi->flags_src, p);
883 }
884 }
885
886 if (nvi->opcode == NV_OP_BRA)
887 nv_nvi_delete(nvi);
888 else
889 if (!nvi_isnop(nvi)) {
890 nvi->cc = cc;
891 nv_reference(pc, &nvi->flags_src, p);
892 }
893 }
894
895 /* NOTE: Run this after register allocation, we can just cut out the cflow
896 * instructions and hook the predicates to the conditional OPs if they are
897 * not using immediates; better than inserting SELECT to join definitions.
898 *
899 * NOTE: Should adapt prior optimization to make this possible more often.
900 */
901 static int
902 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
903 {
904 struct nv_instruction *nvi;
905 struct nv_value *pred;
906 int i;
907 int n0 = 0, n1 = 0;
908
909 if (bb_is_if_else_endif(b)) {
910
911 NV50_DBGMSG("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
912
913 for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
914 if (!nv50_nvi_can_predicate(nvi))
915 break;
916 if (!nvi) {
917 for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
918 if (!nv50_nvi_can_predicate(nvi))
919 break;
920 #ifdef NV50_PC_DEBUG
921 if (nvi) {
922 debug_printf("cannot predicate: "); nv_print_instruction(nvi);
923 }
924 } else {
925 debug_printf("cannot predicate: "); nv_print_instruction(nvi);
926 #endif
927 }
928
929 if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */
930 assert(b->exit && b->exit->flags_src);
931 pred = b->exit->flags_src->value;
932
933 predicate_instructions(ctx->pc, b->out[0], pred, NV_CC_NE | NV_CC_U);
934 predicate_instructions(ctx->pc, b->out[1], pred, NV_CC_EQ);
935
936 assert(b->exit && b->exit->opcode == NV_OP_BRA);
937 nv_nvi_delete(b->exit);
938
939 if (b->exit && b->exit->opcode == NV_OP_JOINAT)
940 nv_nvi_delete(b->exit);
941
942 i = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
943
944 if ((nvi = b->out[0]->out[i]->entry)) {
945 nvi->is_join = 0;
946 if (nvi->opcode == NV_OP_JOIN)
947 nv_nvi_delete(nvi);
948 }
949 }
950 }
951 DESCEND_ARBITRARY(i, nv_pass_flatten);
952
953 return 0;
954 }
955
956 /* local common subexpression elimination, stupid O(n^2) implementation */
957 static int
958 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
959 {
960 struct nv_instruction *ir, *ik, *next;
961 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
962 int s;
963 unsigned int reps;
964
965 do {
966 reps = 0;
967 for (ir = entry; ir; ir = next) {
968 next = ir->next;
969 for (ik = entry; ik != ir; ik = ik->next) {
970 if (ir->opcode != ik->opcode || ir->fixed)
971 continue;
972
973 if (!ir->def[0] || !ik->def[0] ||
974 ik->opcode == NV_OP_LDA ||
975 ik->opcode == NV_OP_STA ||
976 ik->opcode == NV_OP_MOV ||
977 nv_is_vector_op(ik->opcode))
978 continue; /* ignore loads, stores & moves */
979
980 if (ik->src[4] || ir->src[4])
981 continue; /* don't mess with address registers */
982
983 if (ik->flags_src || ir->flags_src ||
984 ik->flags_def || ir->flags_def)
985 continue; /* and also not with flags, for now */
986
987 if (ik->def[0]->reg.file == NV_FILE_OUT ||
988 ir->def[0]->reg.file == NV_FILE_OUT ||
989 !values_equal(ik->def[0], ir->def[0]))
990 continue;
991
992 for (s = 0; s < 3; ++s) {
993 struct nv_value *a, *b;
994
995 if (!ik->src[s]) {
996 if (ir->src[s])
997 break;
998 continue;
999 }
1000 if (ik->src[s]->mod != ir->src[s]->mod)
1001 break;
1002 a = ik->src[s]->value;
1003 b = ir->src[s]->value;
1004 if (a == b)
1005 continue;
1006 if (a->reg.file != b->reg.file ||
1007 a->reg.id < 0 ||
1008 a->reg.id != b->reg.id)
1009 break;
1010 }
1011 if (s == 3) {
1012 nv_nvi_delete(ir);
1013 ++reps;
1014 nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
1015 break;
1016 }
1017 }
1018 }
1019 } while(reps);
1020
1021 DESCEND_ARBITRARY(s, nv_pass_cse);
1022
1023 return 0;
1024 }
1025
1026 int
1027 nv_pc_exec_pass0(struct nv_pc *pc)
1028 {
1029 struct nv_pass_reld_elim *reldelim;
1030 struct nv_pass pass;
1031 struct nv_pass_dce dce;
1032 int ret;
1033
1034 pass.n = 0;
1035 pass.pc = pc;
1036
1037 /* Do this first, so we don't have to pay attention
1038 * to whether sources are supported memory loads.
1039 */
1040 pc->pass_seq++;
1041 ret = nv_pass_lower_arith(&pass, pc->root);
1042 if (ret)
1043 return ret;
1044
1045 pc->pass_seq++;
1046 ret = nv_pass_fold_loads(&pass, pc->root);
1047 if (ret)
1048 return ret;
1049
1050 pc->pass_seq++;
1051 ret = nv_pass_fold_stores(&pass, pc->root);
1052 if (ret)
1053 return ret;
1054
1055 reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
1056 reldelim->pc = pc;
1057 pc->pass_seq++;
1058 ret = nv_pass_reload_elim(reldelim, pc->root);
1059 FREE(reldelim);
1060 if (ret)
1061 return ret;
1062
1063 pc->pass_seq++;
1064 ret = nv_pass_cse(&pass, pc->root);
1065 if (ret)
1066 return ret;
1067
1068 pc->pass_seq++;
1069 ret = nv_pass_lower_mods(&pass, pc->root);
1070 if (ret)
1071 return ret;
1072
1073 dce.pc = pc;
1074 do {
1075 dce.removed = 0;
1076 pc->pass_seq++;
1077 ret = nv_pass_dce(&dce, pc->root);
1078 if (ret)
1079 return ret;
1080 } while (dce.removed);
1081
1082 ret = nv_pass_tex_mask(&pass, pc->root);
1083 if (ret)
1084 return ret;
1085
1086 return ret;
1087 }