nvc0: generate shader header for geometry programs
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nvc0_pc.h"
24 #include "nvc0_program.h"
25
26 #define DESCEND_ARBITRARY(j, f) \
27 do { \
28 b->pass_seq = ctx->pc->pass_seq; \
29 \
30 for (j = 0; j < 2; ++j) \
31 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
32 f(ctx, b->out[j]); \
33 } while (0)
34
35 static INLINE boolean
36 registers_interfere(struct nv_value *a, struct nv_value *b)
37 {
38 if (a->reg.file != b->reg.file)
39 return FALSE;
40 if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
41 return FALSE;
42
43 assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
44
45 if (a->join->reg.id < b->join->reg.id) {
46 return (a->join->reg.id + a->reg.size >= b->join->reg.id);
47 } else
48 if (a->join->reg.id > b->join->reg.id) {
49 return (b->join->reg.id + b->reg.size >= a->join->reg.id);
50 }
51
52 return FALSE;
53 }
54
55 static INLINE boolean
56 values_equal(struct nv_value *a, struct nv_value *b)
57 {
58 if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
59 return FALSE;
60 if (NV_IS_MEMORY_FILE(a->reg.file))
61 return a->reg.address == b->reg.address;
62 else
63 return a->join->reg.id == b->join->reg.id;
64 }
65
66 #if 0
67 static INLINE boolean
68 inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
69 {
70 int si, di;
71
72 for (di = 0; di < 4 && a->def[di]; ++di)
73 for (si = 0; si < 5 && b->src[si]; ++si)
74 if (registers_interfere(a->def[di], b->src[si]->value))
75 return FALSE;
76
77 return TRUE;
78 }
79
80 /* Check whether we can swap the order of the instructions,
81 * where a & b may be either the earlier or the later one.
82 */
83 static boolean
84 inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
85 {
86 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
87 }
88 #endif
89
90 static INLINE boolean
91 inst_removable(struct nv_instruction *nvi)
92 {
93 if (nvi->opcode == NV_OP_ST)
94 return FALSE;
95 return (!(nvi->terminator ||
96 nvi->join ||
97 nvi->target ||
98 nvi->fixed ||
99 nvc0_insn_refcount(nvi)));
100 }
101
102 static INLINE boolean
103 inst_is_noop(struct nv_instruction *nvi)
104 {
105 if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
106 return TRUE;
107 if (nvi->terminator || nvi->join)
108 return FALSE;
109 if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
110 return TRUE;
111 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
112 return FALSE;
113 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
114 return FALSE;
115
116 if (nvi->src[0]->value->join->reg.id < 0) {
117 NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
118 return TRUE;
119 }
120
121 if (nvi->opcode == NV_OP_SELECT)
122 if (!values_equal(nvi->def[0], nvi->src[1]->value))
123 return FALSE;
124 return values_equal(nvi->def[0], nvi->src[0]->value);
125 }
126
127 struct nv_pass {
128 struct nv_pc *pc;
129 int n;
130 void *priv;
131 };
132
133 static int
134 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
135
136 static void
137 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
138 {
139 struct nv_pc *pc = (struct nv_pc *)priv;
140 struct nv_basic_block *in;
141 struct nv_instruction *nvi, *next;
142 int j;
143
144 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
145
146 if (j >= 0) {
147 in = pc->bb_list[j];
148
149 /* check for no-op branches (BRA $PC+8) */
150 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
151 in->emit_size -= 8;
152 pc->emit_size -= 8;
153
154 for (++j; j < pc->num_blocks; ++j)
155 pc->bb_list[j]->emit_pos -= 8;
156
157 nvc0_insn_delete(in->exit);
158 }
159 b->emit_pos = in->emit_pos + in->emit_size;
160 }
161
162 pc->bb_list[pc->num_blocks++] = b;
163
164 /* visit node */
165
166 for (nvi = b->entry; nvi; nvi = next) {
167 next = nvi->next;
168 if (inst_is_noop(nvi) ||
169 (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
170 nvc0_insn_delete(nvi);
171 } else
172 b->emit_size += 8;
173 }
174 pc->emit_size += b->emit_size;
175
176 #ifdef NOUVEAU_DEBUG
177 if (!b->entry)
178 debug_printf("BB:%i is now empty\n", b->id);
179 else
180 debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
181 #endif
182 }
183
184 static int
185 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
186 {
187 struct nv_pass pass;
188
189 pass.pc = pc;
190
191 pc->pass_seq++;
192 nv_pass_flatten(&pass, root);
193
194 nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
195
196 return 0;
197 }
198
199 int
200 nvc0_pc_exec_pass2(struct nv_pc *pc)
201 {
202 int i, ret;
203
204 NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
205
206 pc->num_blocks = 0; /* will reorder bb_list */
207
208 for (i = 0; i < pc->num_subroutines + 1; ++i)
209 if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
210 return ret;
211 return 0;
212 }
213
214 static INLINE boolean
215 is_cspace_load(struct nv_instruction *nvi)
216 {
217 if (!nvi)
218 return FALSE;
219 assert(nvi->indirect != 0);
220 return (nvi->opcode == NV_OP_LD &&
221 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
222 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
223 }
224
225 static INLINE boolean
226 is_immd32_load(struct nv_instruction *nvi)
227 {
228 if (!nvi)
229 return FALSE;
230 return (nvi->opcode == NV_OP_MOV &&
231 nvi->src[0]->value->reg.file == NV_FILE_IMM &&
232 nvi->src[0]->value->reg.size == 4);
233 }
234
235 static INLINE void
236 check_swap_src_0_1(struct nv_instruction *nvi)
237 {
238 static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
239
240 struct nv_ref *src0 = nvi->src[0];
241 struct nv_ref *src1 = nvi->src[1];
242
243 if (!nv_op_commutative(nvi->opcode))
244 return;
245 assert(src0 && src1 && src0->value && src1->value);
246
247 if (is_cspace_load(src0->value->insn)) {
248 if (!is_cspace_load(src1->value->insn)) {
249 nvi->src[0] = src1;
250 nvi->src[1] = src0;
251 }
252 }
253
254 if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET)
255 nvi->set_cond = cc_swapped[nvi->set_cond];
256 }
257
258 static void
259 nvi_set_indirect_load(struct nv_pc *pc,
260 struct nv_instruction *nvi, struct nv_value *val)
261 {
262 for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
263 ++nvi->indirect);
264 assert(nvi->indirect < 6);
265 nv_reference(pc, nvi, nvi->indirect, val);
266 }
267
268 static int
269 nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
270 {
271 struct nv_instruction *nvi, *ld;
272 int s;
273
274 for (nvi = b->entry; nvi; nvi = nvi->next) {
275 check_swap_src_0_1(nvi);
276
277 for (s = 0; s < 3 && nvi->src[s]; ++s) {
278 ld = nvi->src[s]->value->insn;
279 if (!ld || ld->opcode != NV_OP_LD)
280 continue;
281 if (!nvc0_insn_can_load(nvi, s, ld))
282 continue;
283
284 /* fold it ! */
285 nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
286 if (ld->indirect >= 0)
287 nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
288
289 if (!nvc0_insn_refcount(ld))
290 nvc0_insn_delete(ld);
291 }
292 }
293 DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
294
295 return 0;
296 }
297
298 static INLINE uint
299 modifiers_opcode(uint8_t mod)
300 {
301 switch (mod) {
302 case NV_MOD_NEG: return NV_OP_NEG;
303 case NV_MOD_ABS: return NV_OP_ABS;
304 case 0:
305 return NV_OP_MOV;
306 default:
307 return NV_OP_NOP;
308 }
309 }
310
311 /* NOTE: Assumes loads have not yet been folded. */
312 static int
313 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
314 {
315 struct nv_instruction *nvi, *mi, *next;
316 int j;
317 uint8_t mod;
318
319 for (nvi = b->entry; nvi; nvi = next) {
320 next = nvi->next;
321 if (nvi->opcode == NV_OP_SUB) {
322 nvi->src[1]->mod ^= NV_MOD_NEG;
323 nvi->opcode = NV_OP_ADD;
324 }
325
326 for (j = 0; j < 3 && nvi->src[j]; ++j) {
327 mi = nvi->src[j]->value->insn;
328 if (!mi)
329 continue;
330 if (mi->def[0]->refc > 1 || mi->predicate >= 0)
331 continue;
332
333 if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
334 else
335 if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
336 else
337 continue;
338 assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
339
340 mod |= mi->src[0]->mod;
341
342 if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
343 /* abs neg [abs] = abs */
344 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
345 } else
346 if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
347 /* neg as opcode and modifier on same insn cannot occur */
348 /* neg neg abs = abs, neg neg = identity */
349 assert(j == 0);
350 if (mod & NV_MOD_ABS)
351 nvi->opcode = NV_OP_ABS;
352 else
353 nvi->opcode = NV_OP_MOV;
354 mod = 0;
355 }
356
357 if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
358 continue;
359
360 nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
361
362 nvi->src[j]->mod ^= mod;
363 }
364
365 if (nvi->opcode == NV_OP_SAT) {
366 mi = nvi->src[0]->value->insn;
367
368 if (mi->def[0]->refc > 1 ||
369 (mi->opcode != NV_OP_ADD &&
370 mi->opcode != NV_OP_MUL &&
371 mi->opcode != NV_OP_MAD))
372 continue;
373 mi->saturate = 1;
374 mi->def[0] = nvi->def[0];
375 mi->def[0]->insn = mi;
376 nvc0_insn_delete(nvi);
377 }
378 }
379 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
380
381 return 0;
382 }
383
384 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
385
386 /*
387 static void
388 modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
389 {
390 if (mod & NV_MOD_ABS) {
391 if (type == NV_TYPE_F32)
392 *val &= 0x7fffffff;
393 else
394 if ((*val) & (1 << 31))
395 *val = ~(*val) + 1;
396 }
397 if (mod & NV_MOD_NEG) {
398 if (type == NV_TYPE_F32)
399 *val ^= 0x80000000;
400 else
401 *val = ~(*val) + 1;
402 }
403 }
404 */
405
406 #if 0
407 static void
408 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
409 struct nv_value *src0, struct nv_value *src1)
410 {
411 struct nv_value *val;
412 union {
413 float f32;
414 uint32_t u32;
415 int32_t s32;
416 } u0, u1, u;
417 ubyte type;
418
419 if (!nvi->def[0])
420 return;
421 type = NV_OPTYPE(nvi->opcode);
422
423 u.u32 = 0;
424 u0.u32 = src0->reg.imm.u32;
425 u1.u32 = src1->reg.imm.u32;
426
427 modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
428 modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
429
430 switch (nvi->opcode) {
431 case NV_OP_MAD:
432 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
433 return;
434 /* fall through */
435 case NV_OP_MUL:
436 switch (type) {
437 case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
438 case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
439 case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
440 default:
441 assert(0);
442 break;
443 }
444 break;
445 case NV_OP_ADD:
446 switch (type) {
447 case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
448 case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
449 case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
450 default:
451 assert(0);
452 break;
453 }
454 break;
455 case NV_OP_SUB:
456 switch (type) {
457 case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break;
458 case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break;
459 case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break;
460 default:
461 assert(0);
462 break;
463 }
464 break;
465 default:
466 return;
467 }
468
469 nvi->opcode = NV_OP_MOV;
470
471 val = new_value(pc, NV_FILE_IMM, type);
472
473 val->reg.imm.u32 = u.u32;
474
475 nv_reference(pc, nvi, 1, NULL);
476 nv_reference(pc, nvi, 0, val);
477
478 if (nvi->src[2]) { /* from MAD */
479 nvi->src[1] = nvi->src[0];
480 nvi->src[0] = nvi->src[2];
481 nvi->src[2] = NULL;
482 nvi->opcode = NV_OP_ADD;
483
484 if (val->reg.imm.u32 == 0) {
485 nvi->src[1] = NULL;
486 nvi->opcode = NV_OP_MOV;
487 }
488 }
489 }
490
491 static void
492 constant_operand(struct nv_pc *pc,
493 struct nv_instruction *nvi, struct nv_value *val, int s)
494 {
495 union {
496 float f32;
497 uint32_t u32;
498 int32_t s32;
499 } u;
500 int t = s ? 0 : 1;
501 uint op;
502 ubyte type;
503
504 if (!nvi->def[0])
505 return;
506 type = NV_OPTYPE(nvi->opcode);
507
508 u.u32 = val->reg.imm.u32;
509 modifiers_apply(&u.u32, type, nvi->src[s]->mod);
510
511 switch (NV_BASEOP(nvi->opcode)) {
512 case NV_OP_MUL:
513 if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
514 (NV_TYPE_ISINT(type) && u.u32 == 1)) {
515 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
516 break;
517 nvi->opcode = op;
518 nv_reference(pc, nvi, s, NULL);
519 nvi->src[0] = nvi->src[t];
520 nvi->src[1] = NULL;
521 } else
522 if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
523 (NV_TYPE_ISINT(type) && u.u32 == 2)) {
524 nvi->opcode = NV_OP_ADD;
525 nv_reference(pc, nvi, s, nvi->src[t]->value);
526 nvi->src[s]->mod = nvi->src[t]->mod;
527 } else
528 if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
529 if (nvi->src[t]->mod & NV_MOD_NEG)
530 nvi->opcode = NV_OP_MOV;
531 else
532 nvi->opcode = NV_OP_NEG;
533 nv_reference(pc, nvi, s, NULL);
534 nvi->src[0] = nvi->src[t];
535 nvi->src[1] = NULL;
536 } else
537 if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
538 nvi->opcode = NV_OP_ADD;
539 nv_reference(pc, nvi, s, nvi->src[t]->value);
540 nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
541 } else
542 if (u.u32 == 0) {
543 nvi->opcode = NV_OP_MOV;
544 nv_reference(pc, nvi, t, NULL);
545 if (s) {
546 nvi->src[0] = nvi->src[1];
547 nvi->src[1] = NULL;
548 }
549 }
550 break;
551 case NV_OP_ADD:
552 if (u.u32 == 0) {
553 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
554 break;
555 nvi->opcode = op;
556 nv_reference(pc, nvi, s, NULL);
557 nvi->src[0] = nvi->src[t];
558 nvi->src[1] = NULL;
559 }
560 break;
561 case NV_OP_RCP:
562 u.f32 = 1.0f / u.f32;
563 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
564 nvi->opcode = NV_OP_MOV;
565 assert(s == 0);
566 nv_reference(pc, nvi, 0, val);
567 break;
568 case NV_OP_RSQ:
569 u.f32 = 1.0f / sqrtf(u.f32);
570 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
571 nvi->opcode = NV_OP_MOV;
572 assert(s == 0);
573 nv_reference(pc, nvi, 0, val);
574 break;
575 default:
576 break;
577 }
578 }
579 #endif
580
581 static int
582 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
583 {
584 #if 0
585 struct nv_instruction *nvi, *next;
586 int j;
587
588 for (nvi = b->entry; nvi; nvi = next) {
589 struct nv_value *src0, *src1, *src;
590 int mod;
591
592 next = nvi->next;
593
594 src0 = nvcg_find_immediate(nvi->src[0]);
595 src1 = nvcg_find_immediate(nvi->src[1]);
596
597 if (src0 && src1)
598 constant_expression(ctx->pc, nvi, src0, src1);
599 else {
600 if (src0)
601 constant_operand(ctx->pc, nvi, src0, 0);
602 else
603 if (src1)
604 constant_operand(ctx->pc, nvi, src1, 1);
605 }
606
607 /* try to combine MUL, ADD into MAD */
608 if (nvi->opcode != NV_OP_ADD)
609 continue;
610
611 src0 = nvi->src[0]->value;
612 src1 = nvi->src[1]->value;
613
614 if (SRC_IS_MUL(src0) && src0->refc == 1)
615 src = src0;
616 else
617 if (SRC_IS_MUL(src1) && src1->refc == 1)
618 src = src1;
619 else
620 continue;
621
622 /* could have an immediate from above constant_* */
623 if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
624 continue;
625
626 nvi->opcode = NV_OP_MAD;
627 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
628 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
629 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
630
631 assert(!(mod & ~NV_MOD_NEG));
632 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
633 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
634 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
635 nvi->src[1]->mod = src->insn->src[1]->mod;
636 }
637 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
638 #endif
639 return 0;
640 }
641
642 /* TODO: redundant store elimination */
643
644 struct mem_record {
645 struct mem_record *next;
646 struct nv_instruction *insn;
647 uint32_t ofst;
648 uint32_t base;
649 uint32_t size;
650 };
651
652 #define MEM_RECORD_POOL_SIZE 1024
653
654 struct pass_reld_elim {
655 struct nv_pc *pc;
656
657 struct mem_record *imm;
658 struct mem_record *mem_v;
659 struct mem_record *mem_a;
660 struct mem_record *mem_c[16];
661 struct mem_record *mem_l;
662
663 struct mem_record pool[MEM_RECORD_POOL_SIZE];
664 int alloc;
665 };
666
667 static void
668 combine_load(struct mem_record *rec, struct nv_instruction *ld)
669 {
670 struct nv_instruction *fv = rec->insn;
671 struct nv_value *mem = ld->src[0]->value;
672 uint32_t size = rec->size + mem->reg.size;
673 int j;
674 int d = rec->size / 4;
675
676 assert(rec->size < 16);
677 if (rec->ofst > mem->reg.address) {
678 if ((size == 8 && mem->reg.address & 3) ||
679 (size > 8 && mem->reg.address & 7))
680 return;
681 rec->ofst = mem->reg.address;
682 for (j = 0; j < d; ++j)
683 fv->def[d + j] = fv->def[j];
684 d = 0;
685 } else
686 if ((size == 8 && rec->ofst & 3) ||
687 (size > 8 && rec->ofst & 7)) {
688 return;
689 }
690
691 for (j = 0; j < mem->reg.size / 4; ++j) {
692 fv->def[d] = ld->def[j];
693 fv->def[d++]->insn = fv;
694 }
695
696 fv->src[0]->value->reg.size = rec->size = size;
697
698 nvc0_insn_delete(ld);
699 }
700
701 static void
702 combine_export(struct mem_record *rec, struct nv_instruction *ex)
703 {
704
705 }
706
707 static INLINE void
708 add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
709 uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
710 {
711 struct mem_record *it = &ctx->pool[ctx->alloc++];
712
713 it->next = *rec;
714 *rec = it;
715 it->base = base;
716 it->ofst = ofst;
717 it->insn = nvi;
718 it->size = nvi->src[0]->value->reg.size;
719 }
720
721 /* vectorize and reuse loads from memory or of immediates */
722 static int
723 nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
724 {
725 struct mem_record **rec, *it;
726 struct nv_instruction *ld, *next;
727 struct nv_value *mem;
728 uint32_t base, ofst;
729 int s;
730
731 for (ld = b->entry; ld; ld = next) {
732 next = ld->next;
733
734 if (is_cspace_load(ld)) {
735 mem = ld->src[0]->value;
736 rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
737 } else
738 if (ld->opcode == NV_OP_VFETCH) {
739 mem = ld->src[0]->value;
740 rec = &ctx->mem_a;
741 } else
742 if (ld->opcode == NV_OP_EXPORT) {
743 mem = ld->src[0]->value;
744 if (mem->reg.file != NV_FILE_MEM_V)
745 continue;
746 rec = &ctx->mem_v;
747 } else {
748 continue;
749 }
750 if (ld->def[0] && ld->def[0]->refc == 0)
751 continue;
752 ofst = mem->reg.address;
753 base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
754
755 for (it = *rec; it; it = it->next) {
756 if (it->base == base &&
757 ((it->ofst >> 4) == (ofst >> 4)) &&
758 ((it->ofst + it->size == ofst) ||
759 (it->ofst - mem->reg.size == ofst))) {
760 if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
761 continue;
762 if (it->ofst < ofst) {
763 if ((it->ofst & 0xf) == 4)
764 continue;
765 } else
766 if ((ofst & 0xf) == 4)
767 continue;
768 break;
769 }
770 }
771 if (it) {
772 switch (ld->opcode) {
773 case NV_OP_EXPORT: combine_export(it, ld); break;
774 default:
775 combine_load(it, ld);
776 break;
777 }
778 } else
779 if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
780 add_mem_record(ctx, rec, base, ofst, ld);
781 }
782 }
783
784 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
785 return 0;
786 }
787
788 static void
789 eliminate_store(struct mem_record *rec, struct nv_instruction *st)
790 {
791 }
792
793 /* elimination of redundant stores */
794 static int
795 pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
796 {
797 struct mem_record **rec, *it;
798 struct nv_instruction *st, *next;
799 struct nv_value *mem;
800 uint32_t base, ofst, size;
801 int s;
802
803 for (st = b->entry; st; st = next) {
804 next = st->next;
805
806 if (st->opcode == NV_OP_ST) {
807 mem = st->src[0]->value;
808 rec = &ctx->mem_l;
809 } else
810 if (st->opcode == NV_OP_EXPORT) {
811 mem = st->src[0]->value;
812 if (mem->reg.file != NV_FILE_MEM_V)
813 continue;
814 rec = &ctx->mem_v;
815 } else
816 if (st->opcode == NV_OP_ST) {
817 /* TODO: purge */
818 }
819 ofst = mem->reg.address;
820 base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
821 size = mem->reg.size;
822
823 for (it = *rec; it; it = it->next) {
824 if (it->base == base &&
825 (it->ofst <= ofst && (it->ofst + size) > ofst))
826 break;
827 }
828 if (it)
829 eliminate_store(it, st);
830 else
831 add_mem_record(ctx, rec, base, ofst, st);
832 }
833
834 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
835 return 0;
836 }
837
838 /* TODO: properly handle loads from l[] memory in the presence of stores */
839 static int
840 nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
841 {
842 #if 0
843 struct load_record **rec, *it;
844 struct nv_instruction *ld, *next;
845 uint64_t data[2];
846 struct nv_value *val;
847 int j;
848
849 for (ld = b->entry; ld; ld = next) {
850 next = ld->next;
851 if (!ld->src[0])
852 continue;
853 val = ld->src[0]->value;
854 rec = NULL;
855
856 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
857 data[0] = val->reg.id;
858 data[1] = 0;
859 rec = &ctx->mem_v;
860 } else
861 if (ld->opcode == NV_OP_LDA) {
862 data[0] = val->reg.id;
863 data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
864 if (val->reg.file >= NV_FILE_MEM_C(0) &&
865 val->reg.file <= NV_FILE_MEM_C(15))
866 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
867 else
868 if (val->reg.file == NV_FILE_MEM_S)
869 rec = &ctx->mem_s;
870 else
871 if (val->reg.file == NV_FILE_MEM_L)
872 rec = &ctx->mem_l;
873 } else
874 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
875 data[0] = val->reg.imm.u32;
876 data[1] = 0;
877 rec = &ctx->imm;
878 }
879
880 if (!rec || !ld->def[0]->refc)
881 continue;
882
883 for (it = *rec; it; it = it->next)
884 if (it->data[0] == data[0] && it->data[1] == data[1])
885 break;
886
887 if (it) {
888 if (ld->def[0]->reg.id >= 0)
889 it->value = ld->def[0];
890 else
891 if (!ld->fixed)
892 nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
893 } else {
894 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
895 continue;
896 it = &ctx->pool[ctx->alloc++];
897 it->next = *rec;
898 it->data[0] = data[0];
899 it->data[1] = data[1];
900 it->value = ld->def[0];
901 *rec = it;
902 }
903 }
904
905 ctx->imm = NULL;
906 ctx->mem_s = NULL;
907 ctx->mem_v = NULL;
908 for (j = 0; j < 16; ++j)
909 ctx->mem_c[j] = NULL;
910 ctx->mem_l = NULL;
911 ctx->alloc = 0;
912
913 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
914 #endif
915 return 0;
916 }
917
918 static int
919 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
920 {
921 int i, c, j;
922
923 for (i = 0; i < ctx->pc->num_instructions; ++i) {
924 struct nv_instruction *nvi = &ctx->pc->instructions[i];
925 struct nv_value *def[4];
926
927 if (!nv_is_texture_op(nvi->opcode))
928 continue;
929 nvi->tex_mask = 0;
930
931 for (c = 0; c < 4; ++c) {
932 if (nvi->def[c]->refc)
933 nvi->tex_mask |= 1 << c;
934 def[c] = nvi->def[c];
935 }
936
937 j = 0;
938 for (c = 0; c < 4; ++c)
939 if (nvi->tex_mask & (1 << c))
940 nvi->def[j++] = def[c];
941 for (c = 0; c < 4; ++c)
942 if (!(nvi->tex_mask & (1 << c)))
943 nvi->def[j++] = def[c];
944 assert(j == 4);
945 }
946 return 0;
947 }
948
949 struct nv_pass_dce {
950 struct nv_pc *pc;
951 uint removed;
952 };
953
954 static int
955 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
956 {
957 int j;
958 struct nv_instruction *nvi, *next;
959
960 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
961 next = nvi->next;
962
963 if (inst_removable(nvi)) {
964 nvc0_insn_delete(nvi);
965 ++ctx->removed;
966 }
967 }
968 DESCEND_ARBITRARY(j, nv_pass_dce);
969
970 return 0;
971 }
972
973 #if 0
974 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
975 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
976 * BREAK and dummy ELSE block.
977 */
978 static INLINE boolean
979 bb_is_if_else_endif(struct nv_basic_block *bb)
980 {
981 if (!bb->out[0] || !bb->out[1])
982 return FALSE;
983
984 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
985 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
986 !bb->out[1]->out[1]);
987 } else {
988 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
989 !bb->out[0]->out[1] &&
990 !bb->out[1]->out[1]);
991 }
992 }
993
994 /* predicate instructions and remove branch at the end */
995 static void
996 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
997 struct nv_value *p, ubyte cc)
998 {
999
1000 }
1001 #endif
1002
1003 /* NOTE: Run this after register allocation, we can just cut out the cflow
1004 * instructions and hook the predicates to the conditional OPs if they are
1005 * not using immediates; better than inserting SELECT to join definitions.
1006 *
1007 * NOTE: Should adapt prior optimization to make this possible more often.
1008 */
1009 static int
1010 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
1011 {
1012 return 0;
1013 }
1014
1015 /* local common subexpression elimination, stupid O(n^2) implementation */
1016 static int
1017 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1018 {
1019 #if 0
1020 struct nv_instruction *ir, *ik, *next;
1021 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1022 int s;
1023 unsigned int reps;
1024
1025 do {
1026 reps = 0;
1027 for (ir = entry; ir; ir = next) {
1028 next = ir->next;
1029 for (ik = entry; ik != ir; ik = ik->next) {
1030 if (ir->opcode != ik->opcode || ir->fixed)
1031 continue;
1032
1033 if (!ir->def[0] || !ik->def[0] ||
1034 ik->opcode == NV_OP_LDA ||
1035 ik->opcode == NV_OP_STA ||
1036 ik->opcode == NV_OP_MOV ||
1037 nv_is_vector_op(ik->opcode))
1038 continue; /* ignore loads, stores & moves */
1039
1040 if (ik->src[4] || ir->src[4])
1041 continue; /* don't mess with address registers */
1042
1043 if (ik->flags_src || ir->flags_src ||
1044 ik->flags_def || ir->flags_def)
1045 continue; /* and also not with flags, for now */
1046
1047 if (ik->def[0]->reg.file == NV_FILE_OUT ||
1048 ir->def[0]->reg.file == NV_FILE_OUT ||
1049 !values_equal(ik->def[0], ir->def[0]))
1050 continue;
1051
1052 for (s = 0; s < 3; ++s) {
1053 struct nv_value *a, *b;
1054
1055 if (!ik->src[s]) {
1056 if (ir->src[s])
1057 break;
1058 continue;
1059 }
1060 if (ik->src[s]->mod != ir->src[s]->mod)
1061 break;
1062 a = ik->src[s]->value;
1063 b = ir->src[s]->value;
1064 if (a == b)
1065 continue;
1066 if (a->reg.file != b->reg.file ||
1067 a->reg.id < 0 ||
1068 a->reg.id != b->reg.id)
1069 break;
1070 }
1071 if (s == 3) {
1072 nvc0_insn_delete(ir);
1073 ++reps;
1074 nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
1075 break;
1076 }
1077 }
1078 }
1079 } while(reps);
1080
1081 DESCEND_ARBITRARY(s, nv_pass_cse);
1082 #endif
1083 return 0;
1084 }
1085
1086 static int
1087 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1088 {
1089 struct pass_reld_elim *reldelim;
1090 struct nv_pass pass;
1091 struct nv_pass_dce dce;
1092 int ret;
1093
1094 pass.n = 0;
1095 pass.pc = pc;
1096
1097 /* Do this first, so we don't have to pay attention
1098 * to whether sources are supported memory loads.
1099 */
1100 pc->pass_seq++;
1101 ret = nv_pass_lower_arith(&pass, root);
1102 if (ret)
1103 return ret;
1104
1105 pc->pass_seq++;
1106 ret = nv_pass_lower_mods(&pass, root);
1107 if (ret)
1108 return ret;
1109
1110 pc->pass_seq++;
1111 ret = nvc0_pass_fold_loads(&pass, root);
1112 if (ret)
1113 return ret;
1114
1115 if (pc->opt_reload_elim) {
1116 reldelim = CALLOC_STRUCT(pass_reld_elim);
1117 reldelim->pc = pc;
1118
1119 pc->pass_seq++;
1120 ret = nv_pass_reload_elim(reldelim, root);
1121 if (ret) {
1122 FREE(reldelim);
1123 return ret;
1124 }
1125 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1126 reldelim->pc = pc;
1127 }
1128
1129 pc->pass_seq++;
1130 ret = nv_pass_cse(&pass, root);
1131 if (ret)
1132 return ret;
1133
1134 dce.pc = pc;
1135 do {
1136 dce.removed = 0;
1137 pc->pass_seq++;
1138 ret = nv_pass_dce(&dce, root);
1139 if (ret)
1140 return ret;
1141 } while (dce.removed);
1142
1143 if (pc->opt_reload_elim) {
1144 pc->pass_seq++;
1145 ret = nv_pass_mem_opt(reldelim, root);
1146 if (!ret) {
1147 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1148 reldelim->pc = pc;
1149
1150 pc->pass_seq++;
1151 ret = nv_pass_mem_opt(reldelim, root);
1152 }
1153 FREE(reldelim);
1154 if (ret)
1155 return ret;
1156 }
1157
1158 ret = nv_pass_tex_mask(&pass, root);
1159 if (ret)
1160 return ret;
1161
1162 return ret;
1163 }
1164
1165 int
1166 nvc0_pc_exec_pass0(struct nv_pc *pc)
1167 {
1168 int i, ret;
1169
1170 for (i = 0; i < pc->num_subroutines + 1; ++i)
1171 if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1172 return ret;
1173 return 0;
1174 }