nvc0: don't visit target blocks of a loop break multiple times
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nvc0_pc.h"
24 #include "nvc0_program.h"
25
26 #define DESCEND_ARBITRARY(j, f) \
27 do { \
28 b->pass_seq = ctx->pc->pass_seq; \
29 \
30 for (j = 0; j < 2; ++j) \
31 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
32 f(ctx, b->out[j]); \
33 } while (0)
34
35 static INLINE boolean
36 registers_interfere(struct nv_value *a, struct nv_value *b)
37 {
38 if (a->reg.file != b->reg.file)
39 return FALSE;
40 if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
41 return FALSE;
42
43 assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
44
45 if (a->join->reg.id < b->join->reg.id) {
46 return (a->join->reg.id + a->reg.size >= b->join->reg.id);
47 } else
48 if (a->join->reg.id > b->join->reg.id) {
49 return (b->join->reg.id + b->reg.size >= a->join->reg.id);
50 }
51
52 return FALSE;
53 }
54
55 static INLINE boolean
56 values_equal(struct nv_value *a, struct nv_value *b)
57 {
58 if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
59 return FALSE;
60 if (NV_IS_MEMORY_FILE(a->reg.file))
61 return a->reg.address == b->reg.address;
62 else
63 return a->join->reg.id == b->join->reg.id;
64 }
65
66 #if 0
67 static INLINE boolean
68 inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
69 {
70 int si, di;
71
72 for (di = 0; di < 4 && a->def[di]; ++di)
73 for (si = 0; si < 5 && b->src[si]; ++si)
74 if (registers_interfere(a->def[di], b->src[si]->value))
75 return FALSE;
76
77 return TRUE;
78 }
79
80 /* Check whether we can swap the order of the instructions,
81 * where a & b may be either the earlier or the later one.
82 */
83 static boolean
84 inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
85 {
86 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
87 }
88 #endif
89
90 static INLINE boolean
91 inst_removable(struct nv_instruction *nvi)
92 {
93 if (nvi->opcode == NV_OP_ST)
94 return FALSE;
95 return (!(nvi->terminator ||
96 nvi->join ||
97 nvi->target ||
98 nvi->fixed ||
99 nvc0_insn_refcount(nvi)));
100 }
101
102 /* Check if we do not actually have to emit this instruction. */
103 static INLINE boolean
104 inst_is_noop(struct nv_instruction *nvi)
105 {
106 if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
107 return TRUE;
108 if (nvi->terminator || nvi->join)
109 return FALSE;
110 if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
111 return TRUE;
112 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
113 return FALSE;
114 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
115 return FALSE;
116
117 if (nvi->src[0]->value->join->reg.id < 0) {
118 NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
119 return TRUE;
120 }
121
122 if (nvi->opcode == NV_OP_SELECT)
123 if (!values_equal(nvi->def[0], nvi->src[1]->value))
124 return FALSE;
125 return values_equal(nvi->def[0], nvi->src[0]->value);
126 }
127
128 struct nv_pass {
129 struct nv_pc *pc;
130 int n;
131 void *priv;
132 };
133
134 static int
135 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
136
137 static void
138 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
139 {
140 struct nv_pc *pc = (struct nv_pc *)priv;
141 struct nv_basic_block *in;
142 struct nv_instruction *nvi, *next;
143 int j;
144
145 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
146
147 if (j >= 0) {
148 in = pc->bb_list[j];
149
150 /* check for no-op branches (BRA $PC+8) */
151 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
152 in->emit_size -= 8;
153 pc->emit_size -= 8;
154
155 for (++j; j < pc->num_blocks; ++j)
156 pc->bb_list[j]->emit_pos -= 8;
157
158 nvc0_insn_delete(in->exit);
159 }
160 b->emit_pos = in->emit_pos + in->emit_size;
161 }
162
163 pc->bb_list[pc->num_blocks++] = b;
164
165 /* visit node */
166
167 for (nvi = b->entry; nvi; nvi = next) {
168 next = nvi->next;
169 if (inst_is_noop(nvi) ||
170 (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
171 nvc0_insn_delete(nvi);
172 } else
173 b->emit_size += 8;
174 }
175 pc->emit_size += b->emit_size;
176
177 #ifdef NOUVEAU_DEBUG
178 if (!b->entry)
179 debug_printf("BB:%i is now empty\n", b->id);
180 else
181 debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
182 #endif
183 }
184
185 static int
186 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
187 {
188 struct nv_pass pass;
189
190 pass.pc = pc;
191
192 pc->pass_seq++;
193 nv_pass_flatten(&pass, root);
194
195 nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
196
197 return 0;
198 }
199
200 int
201 nvc0_pc_exec_pass2(struct nv_pc *pc)
202 {
203 int i, ret;
204
205 NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
206
207 pc->num_blocks = 0; /* will reorder bb_list */
208
209 for (i = 0; i < pc->num_subroutines + 1; ++i)
210 if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
211 return ret;
212 return 0;
213 }
214
215 static INLINE boolean
216 is_cspace_load(struct nv_instruction *nvi)
217 {
218 if (!nvi)
219 return FALSE;
220 assert(nvi->indirect != 0);
221 return (nvi->opcode == NV_OP_LD &&
222 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
223 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
224 }
225
226 static INLINE boolean
227 is_immd32_load(struct nv_instruction *nvi)
228 {
229 if (!nvi)
230 return FALSE;
231 return (nvi->opcode == NV_OP_MOV &&
232 nvi->src[0]->value->reg.file == NV_FILE_IMM &&
233 nvi->src[0]->value->reg.size == 4);
234 }
235
236 static INLINE void
237 check_swap_src_0_1(struct nv_instruction *nvi)
238 {
239 static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
240
241 struct nv_ref *src0 = nvi->src[0];
242 struct nv_ref *src1 = nvi->src[1];
243
244 if (!nv_op_commutative(nvi->opcode) && NV_BASEOP(nvi->opcode) != NV_OP_SET)
245 return;
246 assert(src0 && src1 && src0->value && src1->value);
247
248 if (src1->value->reg.file != NV_FILE_GPR)
249 return;
250
251 if (is_cspace_load(src0->value->insn)) {
252 if (!is_cspace_load(src1->value->insn)) {
253 nvi->src[0] = src1;
254 nvi->src[1] = src0;
255 }
256 } else
257 if (is_immd32_load(src0->value->insn)) {
258 if (!is_cspace_load(src1->value->insn) &&
259 !is_immd32_load(src1->value->insn)) {
260 nvi->src[0] = src1;
261 nvi->src[1] = src0;
262 }
263 }
264
265 if (nvi->src[0] != src0 && NV_BASEOP(nvi->opcode) == NV_OP_SET)
266 nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
267 }
268
269 static void
270 nvi_set_indirect_load(struct nv_pc *pc,
271 struct nv_instruction *nvi, struct nv_value *val)
272 {
273 for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
274 ++nvi->indirect);
275 assert(nvi->indirect < 6);
276 nv_reference(pc, nvi, nvi->indirect, val);
277 }
278
279 static int
280 nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
281 {
282 struct nv_instruction *nvi, *ld;
283 int s;
284
285 for (nvi = b->entry; nvi; nvi = nvi->next) {
286 check_swap_src_0_1(nvi);
287
288 for (s = 0; s < 3 && nvi->src[s]; ++s) {
289 ld = nvi->src[s]->value->insn;
290 if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
291 continue;
292 if (!nvc0_insn_can_load(nvi, s, ld))
293 continue;
294
295 /* fold it ! */
296 nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
297 if (ld->indirect >= 0)
298 nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
299
300 if (!nvc0_insn_refcount(ld))
301 nvc0_insn_delete(ld);
302 }
303 }
304 DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
305
306 return 0;
307 }
308
309 /* NOTE: Assumes loads have not yet been folded. */
310 static int
311 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
312 {
313 struct nv_instruction *nvi, *mi, *next;
314 int j;
315 uint8_t mod;
316
317 for (nvi = b->entry; nvi; nvi = next) {
318 next = nvi->next;
319 if (nvi->opcode == NV_OP_SUB) {
320 nvi->src[1]->mod ^= NV_MOD_NEG;
321 nvi->opcode = NV_OP_ADD;
322 }
323
324 for (j = 0; j < 3 && nvi->src[j]; ++j) {
325 mi = nvi->src[j]->value->insn;
326 if (!mi)
327 continue;
328 if (mi->def[0]->refc > 1 || mi->predicate >= 0)
329 continue;
330
331 if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
332 else
333 if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
334 else
335 continue;
336 assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
337
338 mod |= mi->src[0]->mod;
339
340 if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
341 /* abs neg [abs] = abs */
342 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
343 } else
344 if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
345 /* neg as opcode and modifier on same insn cannot occur */
346 /* neg neg abs = abs, neg neg = identity */
347 assert(j == 0);
348 if (mod & NV_MOD_ABS)
349 nvi->opcode = NV_OP_ABS;
350 else
351 nvi->opcode = NV_OP_MOV;
352 mod = 0;
353 }
354
355 if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
356 continue;
357
358 nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
359
360 nvi->src[j]->mod ^= mod;
361 }
362
363 if (nvi->opcode == NV_OP_SAT) {
364 mi = nvi->src[0]->value->insn;
365
366 if (mi->def[0]->refc > 1 ||
367 (mi->opcode != NV_OP_ADD &&
368 mi->opcode != NV_OP_MUL &&
369 mi->opcode != NV_OP_MAD))
370 continue;
371 mi->saturate = 1;
372 mi->def[0] = nvi->def[0];
373 mi->def[0]->insn = mi;
374 nvc0_insn_delete(nvi);
375 }
376 }
377 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
378
379 return 0;
380 }
381
382 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
383
384 static void
385 apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
386 {
387 if (mod & NV_MOD_ABS) {
388 if (type == NV_TYPE_F32)
389 *val &= 0x7fffffff;
390 else
391 if ((*val) & (1 << 31))
392 *val = ~(*val) + 1;
393 }
394 if (mod & NV_MOD_NEG) {
395 if (type == NV_TYPE_F32)
396 *val ^= 0x80000000;
397 else
398 *val = ~(*val) + 1;
399 }
400 if (mod & NV_MOD_SAT) {
401 union {
402 float f;
403 uint32_t u;
404 int32_t i;
405 } u;
406 u.u = *val;
407 if (type == NV_TYPE_F32) {
408 u.f = CLAMP(u.f, -1.0f, 1.0f);
409 } else
410 if (type == NV_TYPE_U16) {
411 u.u = MIN2(u.u, 0xffff);
412 } else
413 if (type == NV_TYPE_S16) {
414 u.i = CLAMP(u.i, -32768, 32767);
415 }
416 *val = u.u;
417 }
418 if (mod & NV_MOD_NOT)
419 *val = ~*val;
420 }
421
422 static void
423 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
424 struct nv_value *src0, struct nv_value *src1)
425 {
426 struct nv_value *val;
427 union {
428 float f32;
429 uint32_t u32;
430 int32_t s32;
431 } u0, u1, u;
432 ubyte type;
433
434 if (!nvi->def[0])
435 return;
436 type = NV_OPTYPE(nvi->opcode);
437
438 u.u32 = 0;
439 u0.u32 = src0->reg.imm.u32;
440 u1.u32 = src1->reg.imm.u32;
441
442 apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
443 apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
444
445 switch (nvi->opcode) {
446 case NV_OP_MAD_F32:
447 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
448 return;
449 /* fall through */
450 case NV_OP_MUL_F32:
451 u.f32 = u0.f32 * u1.f32;
452 break;
453 case NV_OP_MUL_B32:
454 u.u32 = u0.u32 * u1.u32;
455 break;
456 case NV_OP_ADD_F32:
457 u.f32 = u0.f32 + u1.f32;
458 break;
459 case NV_OP_ADD_B32:
460 u.u32 = u0.u32 + u1.u32;
461 break;
462 case NV_OP_SUB_F32:
463 u.f32 = u0.f32 - u1.f32;
464 break;
465 /*
466 case NV_OP_SUB_B32:
467 u.u32 = u0.u32 - u1.u32;
468 break;
469 */
470 default:
471 return;
472 }
473
474 val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
475 val->reg.imm.u32 = u.u32;
476
477 nv_reference(pc, nvi, 1, NULL);
478 nv_reference(pc, nvi, 0, val);
479
480 if (nvi->opcode == NV_OP_MAD_F32) {
481 nvi->src[1] = nvi->src[0];
482 nvi->src[0] = nvi->src[2];
483 nvi->src[2] = NULL;
484 nvi->opcode = NV_OP_ADD_F32;
485
486 if (val->reg.imm.u32 == 0) {
487 nvi->src[1] = NULL;
488 nvi->opcode = NV_OP_MOV;
489 }
490 } else {
491 nvi->opcode = NV_OP_MOV;
492 }
493 }
494
495 static void
496 constant_operand(struct nv_pc *pc,
497 struct nv_instruction *nvi, struct nv_value *val, int s)
498 {
499 union {
500 float f32;
501 uint32_t u32;
502 int32_t s32;
503 } u;
504 int shift;
505 int t = s ? 0 : 1;
506 uint op;
507 ubyte type;
508
509 if (!nvi->def[0])
510 return;
511 type = NV_OPTYPE(nvi->opcode);
512
513 u.u32 = val->reg.imm.u32;
514 apply_modifiers(&u.u32, type, nvi->src[s]->mod);
515
516 if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
517 nvi->opcode = NV_OP_MOV;
518 nv_reference(pc, nvi, t, NULL);
519 if (s) {
520 nvi->src[0] = nvi->src[1];
521 nvi->src[1] = NULL;
522 }
523 return;
524 }
525
526 switch (nvi->opcode) {
527 case NV_OP_MUL_F32:
528 if (u.f32 == 1.0f || u.f32 == -1.0f) {
529 if (u.f32 == -1.0f)
530 nvi->src[t]->mod ^= NV_MOD_NEG;
531 switch (nvi->src[t]->mod) {
532 case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
533 case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
534 case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
535 default:
536 return;
537 }
538 nvi->opcode = op;
539 nv_reference(pc, nvi, 0, nvi->src[t]->value);
540 nv_reference(pc, nvi, 1, NULL);
541 nvi->src[0]->mod = 0;
542 } else
543 if (u.f32 == 2.0f || u.f32 == -2.0f) {
544 if (u.f32 == -2.0f)
545 nvi->src[t]->mod ^= NV_MOD_NEG;
546 nvi->opcode = NV_OP_ADD_F32;
547 nv_reference(pc, nvi, s, nvi->src[t]->value);
548 nvi->src[s]->mod = nvi->src[t]->mod;
549 }
550 break;
551 case NV_OP_ADD_F32:
552 if (u.u32 == 0) {
553 switch (nvi->src[t]->mod) {
554 case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
555 case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
556 case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
557 case NV_MOD_NEG | NV_MOD_ABS:
558 op = NV_OP_CVT;
559 nvi->ext.cvt.s = nvi->ext.cvt.d = type;
560 break;
561 default:
562 return;
563 }
564 nvi->opcode = op;
565 nv_reference(pc, nvi, 0, nvi->src[t]->value);
566 nv_reference(pc, nvi, 1, NULL);
567 if (nvi->opcode != NV_OP_CVT)
568 nvi->src[0]->mod = 0;
569 }
570 break;
571 case NV_OP_ADD_B32:
572 if (u.u32 == 0) {
573 assert(nvi->src[t]->mod == 0);
574 nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
575 nvi->ext.cvt.s = nvi->ext.cvt.d = type;
576 nv_reference(pc, nvi, 0, nvi->src[t]->value);
577 nv_reference(pc, nvi, 1, NULL);
578 }
579 break;
580 case NV_OP_MUL_B32:
581 /* multiplication by 0 already handled above */
582 assert(nvi->src[s]->mod == 0);
583 shift = ffs(u.s32) - 1;
584 if (shift == 0) {
585 nvi->opcode = NV_OP_MOV;
586 nv_reference(pc, nvi, 0, nvi->src[t]->value);
587 nv_reference(pc, nvi, 1, NULL);
588 } else
589 if (u.s32 > 0 && u.s32 == (1 << shift)) {
590 nvi->opcode = NV_OP_SHL;
591 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift;
592 nv_reference(pc, nvi, 0, nvi->src[t]->value);
593 nv_reference(pc, nvi, 1, val);
594 break;
595 }
596 break;
597 case NV_OP_RCP:
598 u.f32 = 1.0f / u.f32;
599 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
600 nvi->opcode = NV_OP_MOV;
601 assert(s == 0);
602 nv_reference(pc, nvi, 0, val);
603 break;
604 case NV_OP_RSQ:
605 u.f32 = 1.0f / sqrtf(u.f32);
606 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
607 nvi->opcode = NV_OP_MOV;
608 assert(s == 0);
609 nv_reference(pc, nvi, 0, val);
610 break;
611 default:
612 break;
613 }
614 }
615
616 static void
617 handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
618 {
619 struct nv_value *src0 = nvi->src[0]->value;
620 struct nv_value *src1 = nvi->src[1]->value;
621
622 if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
623 return;
624 if (src0->reg.file != NV_FILE_GPR)
625 return;
626 nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
627 nvc0_insn_delete(nvi);
628 }
629
630 /* check if we can MUL + ADD -> MAD/FMA */
631 static void
632 handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
633 {
634 struct nv_value *src0 = nvi->src[0]->value;
635 struct nv_value *src1 = nvi->src[1]->value;
636 struct nv_value *src;
637 int s;
638 uint8_t mod[4];
639
640 if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
641 else
642 if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
643 else
644 return;
645
646 if ((src0->insn && src0->insn->bb != nvi->bb) ||
647 (src1->insn && src1->insn->bb != nvi->bb))
648 return;
649
650 /* check for immediates from prior constant folding */
651 if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
652 return;
653 src = nvi->src[s]->value;
654
655 mod[0] = nvi->src[0]->mod;
656 mod[1] = nvi->src[1]->mod;
657 mod[2] = src->insn->src[0]->mod;
658 mod[3] = src->insn->src[1]->mod;
659
660 if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
661 return;
662
663 nvi->opcode = NV_OP_MAD_F32;
664
665 nv_reference(ctx->pc, nvi, s, NULL);
666 nvi->src[2] = nvi->src[!s];
667 nvi->src[!s] = NULL;
668
669 nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
670 nvi->src[0]->mod = mod[2] ^ mod[s];
671 nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
672 nvi->src[1]->mod = mod[3];
673 }
674
675 static int
676 nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
677 {
678 struct nv_instruction *nvi, *next;
679 int j;
680
681 for (nvi = b->entry; nvi; nvi = next) {
682 struct nv_value *src0, *src1;
683 uint baseop = NV_BASEOP(nvi->opcode);
684
685 next = nvi->next;
686
687 src0 = nvc0_pc_find_immediate(nvi->src[0]);
688 src1 = nvc0_pc_find_immediate(nvi->src[1]);
689
690 if (src0 && src1) {
691 constant_expression(ctx->pc, nvi, src0, src1);
692 } else {
693 if (src0)
694 constant_operand(ctx->pc, nvi, src0, 0);
695 else
696 if (src1)
697 constant_operand(ctx->pc, nvi, src1, 1);
698 }
699
700 if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
701 handle_min_max(ctx, nvi);
702 else
703 if (nvi->opcode == NV_OP_ADD_F32)
704 handle_add_mul(ctx, nvi);
705 }
706 DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
707
708 return 0;
709 }
710
711 /* TODO: redundant store elimination */
712
713 struct mem_record {
714 struct mem_record *next;
715 struct nv_instruction *insn;
716 uint32_t ofst;
717 uint32_t base;
718 uint32_t size;
719 };
720
721 #define MEM_RECORD_POOL_SIZE 1024
722
723 struct pass_reld_elim {
724 struct nv_pc *pc;
725
726 struct mem_record *imm;
727 struct mem_record *mem_v;
728 struct mem_record *mem_a;
729 struct mem_record *mem_c[16];
730 struct mem_record *mem_l;
731
732 struct mem_record pool[MEM_RECORD_POOL_SIZE];
733 int alloc;
734 };
735
736 /* Extend the load operation in @rec to also cover the data loaded by @ld.
737 * The two loads may not overlap but reference adjacent memory locations.
738 */
739 static void
740 combine_load(struct nv_pc *pc, struct mem_record *rec,
741 struct nv_instruction *ld)
742 {
743 struct nv_instruction *fv = rec->insn;
744 struct nv_value *mem = ld->src[0]->value;
745 uint32_t size = rec->size + mem->reg.size;
746 int j;
747 int d = rec->size / 4;
748
749 assert(rec->size < 16);
750 if (rec->ofst > mem->reg.address) {
751 if ((size == 8 && mem->reg.address & 3) ||
752 (size > 8 && mem->reg.address & 7))
753 return;
754 rec->ofst = mem->reg.address;
755 for (j = 0; j < d; ++j)
756 fv->def[mem->reg.size / 4 + j] = fv->def[j];
757 d = 0;
758 } else
759 if ((size == 8 && rec->ofst & 3) ||
760 (size > 8 && rec->ofst & 7)) {
761 return;
762 }
763
764 for (j = 0; j < mem->reg.size / 4; ++j) {
765 fv->def[d] = ld->def[j];
766 fv->def[d++]->insn = fv;
767 }
768
769 if (fv->src[0]->value->refc > 1)
770 nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
771 fv->src[0]->value->reg.address = rec->ofst;
772 fv->src[0]->value->reg.size = rec->size = size;
773
774 nvc0_insn_delete(ld);
775 }
776
777 static void
778 combine_export(struct mem_record *rec, struct nv_instruction *ex)
779 {
780
781 }
782
783 static INLINE void
784 add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
785 uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
786 {
787 struct mem_record *it = &ctx->pool[ctx->alloc++];
788
789 it->next = *rec;
790 *rec = it;
791 it->base = base;
792 it->ofst = ofst;
793 it->insn = nvi;
794 it->size = nvi->src[0]->value->reg.size;
795 }
796
797 /* vectorize and reuse loads from memory or of immediates */
798 static int
799 nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
800 {
801 struct mem_record **rec, *it;
802 struct nv_instruction *ld, *next;
803 struct nv_value *mem;
804 uint32_t base, ofst;
805 int s;
806
807 for (ld = b->entry; ld; ld = next) {
808 next = ld->next;
809
810 if (is_cspace_load(ld)) {
811 mem = ld->src[0]->value;
812 rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
813 } else
814 if (ld->opcode == NV_OP_VFETCH) {
815 mem = ld->src[0]->value;
816 rec = &ctx->mem_a;
817 } else
818 if (ld->opcode == NV_OP_EXPORT) {
819 mem = ld->src[0]->value;
820 if (mem->reg.file != NV_FILE_MEM_V)
821 continue;
822 rec = &ctx->mem_v;
823 } else {
824 continue;
825 }
826 if (ld->def[0] && ld->def[0]->refc == 0)
827 continue;
828 ofst = mem->reg.address;
829 base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
830
831 for (it = *rec; it; it = it->next) {
832 if (it->base == base &&
833 ((it->ofst >> 4) == (ofst >> 4)) &&
834 ((it->ofst + it->size == ofst) ||
835 (it->ofst - mem->reg.size == ofst))) {
836 /* only NV_OP_VFETCH can load exactly 12 bytes */
837 if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
838 continue;
839 if (it->ofst < ofst) {
840 if ((it->ofst & 0xf) == 4)
841 continue;
842 } else
843 if ((ofst & 0xf) == 4)
844 continue;
845 break;
846 }
847 }
848 if (it) {
849 switch (ld->opcode) {
850 case NV_OP_EXPORT: combine_export(it, ld); break;
851 default:
852 combine_load(ctx->pc, it, ld);
853 break;
854 }
855 } else
856 if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
857 add_mem_record(ctx, rec, base, ofst, ld);
858 }
859 }
860
861 ctx->alloc = 0;
862 ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
863 for (s = 0; s < 16; ++s)
864 ctx->mem_c[s] = NULL;
865
866 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
867 return 0;
868 }
869
870 static void
871 eliminate_store(struct mem_record *rec, struct nv_instruction *st)
872 {
873 }
874
875 /* elimination of redundant stores */
876 static int
877 pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
878 {
879 struct mem_record **rec, *it;
880 struct nv_instruction *st, *next;
881 struct nv_value *mem;
882 uint32_t base, ofst, size;
883 int s;
884
885 for (st = b->entry; st; st = next) {
886 next = st->next;
887
888 if (st->opcode == NV_OP_ST) {
889 mem = st->src[0]->value;
890 rec = &ctx->mem_l;
891 } else
892 if (st->opcode == NV_OP_EXPORT) {
893 mem = st->src[0]->value;
894 if (mem->reg.file != NV_FILE_MEM_V)
895 continue;
896 rec = &ctx->mem_v;
897 } else
898 if (st->opcode == NV_OP_ST) {
899 /* TODO: purge */
900 }
901 ofst = mem->reg.address;
902 base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
903 size = mem->reg.size;
904
905 for (it = *rec; it; it = it->next) {
906 if (it->base == base &&
907 (it->ofst <= ofst && (it->ofst + size) > ofst))
908 break;
909 }
910 if (it)
911 eliminate_store(it, st);
912 else
913 add_mem_record(ctx, rec, base, ofst, st);
914 }
915
916 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
917 return 0;
918 }
919
920 /* TODO: properly handle loads from l[] memory in the presence of stores */
921 static int
922 nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
923 {
924 #if 0
925 struct load_record **rec, *it;
926 struct nv_instruction *ld, *next;
927 uint64_t data[2];
928 struct nv_value *val;
929 int j;
930
931 for (ld = b->entry; ld; ld = next) {
932 next = ld->next;
933 if (!ld->src[0])
934 continue;
935 val = ld->src[0]->value;
936 rec = NULL;
937
938 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
939 data[0] = val->reg.id;
940 data[1] = 0;
941 rec = &ctx->mem_v;
942 } else
943 if (ld->opcode == NV_OP_LDA) {
944 data[0] = val->reg.id;
945 data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
946 if (val->reg.file >= NV_FILE_MEM_C(0) &&
947 val->reg.file <= NV_FILE_MEM_C(15))
948 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
949 else
950 if (val->reg.file == NV_FILE_MEM_S)
951 rec = &ctx->mem_s;
952 else
953 if (val->reg.file == NV_FILE_MEM_L)
954 rec = &ctx->mem_l;
955 } else
956 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
957 data[0] = val->reg.imm.u32;
958 data[1] = 0;
959 rec = &ctx->imm;
960 }
961
962 if (!rec || !ld->def[0]->refc)
963 continue;
964
965 for (it = *rec; it; it = it->next)
966 if (it->data[0] == data[0] && it->data[1] == data[1])
967 break;
968
969 if (it) {
970 if (ld->def[0]->reg.id >= 0)
971 it->value = ld->def[0];
972 else
973 if (!ld->fixed)
974 nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
975 } else {
976 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
977 continue;
978 it = &ctx->pool[ctx->alloc++];
979 it->next = *rec;
980 it->data[0] = data[0];
981 it->data[1] = data[1];
982 it->value = ld->def[0];
983 *rec = it;
984 }
985 }
986
987 ctx->imm = NULL;
988 ctx->mem_s = NULL;
989 ctx->mem_v = NULL;
990 for (j = 0; j < 16; ++j)
991 ctx->mem_c[j] = NULL;
992 ctx->mem_l = NULL;
993 ctx->alloc = 0;
994
995 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
996 #endif
997 return 0;
998 }
999
1000 static int
1001 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
1002 {
1003 int i, c, j;
1004
1005 for (i = 0; i < ctx->pc->num_instructions; ++i) {
1006 struct nv_instruction *nvi = &ctx->pc->instructions[i];
1007 struct nv_value *def[4];
1008
1009 if (!nv_is_texture_op(nvi->opcode))
1010 continue;
1011 nvi->tex_mask = 0;
1012
1013 for (c = 0; c < 4; ++c) {
1014 if (nvi->def[c]->refc)
1015 nvi->tex_mask |= 1 << c;
1016 def[c] = nvi->def[c];
1017 }
1018
1019 j = 0;
1020 for (c = 0; c < 4; ++c)
1021 if (nvi->tex_mask & (1 << c))
1022 nvi->def[j++] = def[c];
1023 for (c = 0; c < 4; ++c)
1024 if (!(nvi->tex_mask & (1 << c)))
1025 nvi->def[j++] = def[c];
1026 assert(j == 4);
1027 }
1028 return 0;
1029 }
1030
1031 struct nv_pass_dce {
1032 struct nv_pc *pc;
1033 uint removed;
1034 };
1035
1036 static int
1037 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
1038 {
1039 int j;
1040 struct nv_instruction *nvi, *next;
1041
1042 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
1043 next = nvi->next;
1044
1045 if (inst_removable(nvi)) {
1046 nvc0_insn_delete(nvi);
1047 ++ctx->removed;
1048 }
1049 }
1050 DESCEND_ARBITRARY(j, nv_pass_dce);
1051
1052 return 0;
1053 }
1054
1055 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
1056 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
1057 * BREAK and dummy ELSE block.
1058 */
1059 static INLINE boolean
1060 bb_is_if_else_endif(struct nv_basic_block *bb)
1061 {
1062 if (!bb->out[0] || !bb->out[1])
1063 return FALSE;
1064
1065 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
1066 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
1067 !bb->out[1]->out[1]);
1068 } else {
1069 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
1070 !bb->out[0]->out[1] &&
1071 !bb->out[1]->out[1]);
1072 }
1073 }
1074
1075 /* Predicate instructions and delete any branch at the end if it is
1076 * not a break from a loop.
1077 */
1078 static void
1079 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
1080 struct nv_value *pred, uint8_t cc)
1081 {
1082 struct nv_instruction *nvi, *prev;
1083 int s;
1084
1085 if (!b->entry)
1086 return;
1087 for (nvi = b->entry; nvi; nvi = nvi->next) {
1088 prev = nvi;
1089 if (inst_is_noop(nvi))
1090 continue;
1091 for (s = 0; nvi->src[s]; ++s);
1092 assert(s < 6);
1093 nvi->predicate = s;
1094 nvi->cc = cc;
1095 nv_reference(pc, nvi, nvi->predicate, pred);
1096 }
1097 if (prev->opcode == NV_OP_BRA &&
1098 b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
1099 b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
1100 nvc0_insn_delete(prev);
1101 }
1102
1103 static INLINE boolean
1104 may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
1105 {
1106 if (nvi->def[0] && values_equal(nvi->def[0], pred))
1107 return FALSE;
1108 return nvc0_insn_is_predicateable(nvi);
1109 }
1110
1111 /* Transform IF/ELSE/ENDIF constructs into predicated instructions
1112 * where feasible.
1113 */
1114 static int
1115 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
1116 {
1117 struct nv_instruction *nvi;
1118 struct nv_value *pred;
1119 int k;
1120 int n0, n1; /* instruction counts of outgoing blocks */
1121
1122 if (bb_is_if_else_endif(b)) {
1123 assert(b->exit && b->exit->opcode == NV_OP_BRA);
1124
1125 assert(b->exit->predicate >= 0);
1126 pred = b->exit->src[b->exit->predicate]->value;
1127
1128 n1 = n0 = 0;
1129 for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
1130 if (!may_predicate_insn(nvi, pred))
1131 break;
1132 if (!nvi) {
1133 /* we're after register allocation, so there always is an ELSE block */
1134 for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
1135 if (!may_predicate_insn(nvi, pred))
1136 break;
1137 }
1138
1139 /* 12 is an arbitrary limit */
1140 if (!nvi && n0 < 12 && n1 < 12) {
1141 predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
1142 predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
1143
1144 nvc0_insn_delete(b->exit); /* delete the branch */
1145
1146 /* and a potential joinat before it */
1147 if (b->exit && b->exit->opcode == NV_OP_JOINAT)
1148 nvc0_insn_delete(b->exit);
1149
1150 /* remove join operations at the end of the conditional */
1151 k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
1152 if ((nvi = b->out[0]->out[k]->entry)) {
1153 nvi->join = 0;
1154 if (nvi->opcode == NV_OP_JOIN)
1155 nvc0_insn_delete(nvi);
1156 }
1157 }
1158 }
1159 DESCEND_ARBITRARY(k, nv_pass_flatten);
1160
1161 return 0;
1162 }
1163
1164 /* Tests instructions for equality, but independently of sources. */
1165 static boolean
1166 is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
1167 {
1168 if (a->opcode != b->opcode)
1169 return FALSE;
1170 if (nv_is_texture_op(a->opcode)) {
1171 if (a->ext.tex.t != b->ext.tex.t ||
1172 a->ext.tex.s != b->ext.tex.s)
1173 return FALSE;
1174 if (a->tex_dim != b->tex_dim ||
1175 a->tex_array != b->tex_array ||
1176 a->tex_cube != b->tex_cube ||
1177 a->tex_shadow != b->tex_shadow ||
1178 a->tex_live != b->tex_live)
1179 return FALSE;
1180 } else
1181 if (a->opcode == NV_OP_CVT) {
1182 if (a->ext.cvt.s != b->ext.cvt.s ||
1183 a->ext.cvt.d != b->ext.cvt.d)
1184 return FALSE;
1185 } else
1186 if (NV_BASEOP(a->opcode) == NV_OP_SET ||
1187 NV_BASEOP(a->opcode) == NV_OP_SLCT) {
1188 if (a->set_cond != b->set_cond)
1189 return FALSE;
1190 } else
1191 if (a->opcode == NV_OP_LINTERP ||
1192 a->opcode == NV_OP_PINTERP) {
1193 if (a->centroid != b->centroid ||
1194 a->flat != b->flat)
1195 return FALSE;
1196 }
1197 if (a->cc != b->cc)
1198 return FALSE;
1199 if (a->lanes != b->lanes ||
1200 a->patch != b->patch ||
1201 a->saturate != b->saturate)
1202 return FALSE;
1203 if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
1204 return FALSE;
1205 return TRUE;
1206 }
1207
1208 /* local common subexpression elimination, stupid O(n^2) implementation */
1209 static int
1210 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1211 {
1212 struct nv_instruction *ir, *ik, *next;
1213 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1214 int s, d;
1215 unsigned int reps;
1216
1217 do {
1218 reps = 0;
1219 for (ir = entry; ir; ir = next) {
1220 next = ir->next;
1221 if (ir->fixed)
1222 continue;
1223 for (ik = entry; ik != ir; ik = ik->next) {
1224 if (!is_operation_equal(ir, ik))
1225 continue;
1226 if (!ir->def[0] || !ik->def[0])
1227 continue;
1228
1229 if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
1230 continue;
1231
1232 for (d = 0; d < 4; ++d) {
1233 if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
1234 break;
1235 if (ir->def[d]) {
1236 if (!values_equal(ik->def[0], ir->def[0]))
1237 break;
1238 } else {
1239 d = 4;
1240 break;
1241 }
1242 }
1243 if (d != 4)
1244 continue;
1245
1246 for (s = 0; s < 5; ++s) {
1247 struct nv_value *a, *b;
1248
1249 if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
1250 break;
1251 if (!ir->src[s]) {
1252 s = 5;
1253 break;
1254 }
1255
1256 if (ik->src[s]->mod != ir->src[s]->mod)
1257 break;
1258 a = ik->src[s]->value;
1259 b = ir->src[s]->value;
1260 if (a == b)
1261 continue;
1262 if (a->reg.file != b->reg.file ||
1263 a->reg.id < 0 || /* this excludes memory loads/stores */
1264 a->reg.id != b->reg.id)
1265 break;
1266 }
1267 if (s == 5) {
1268 nvc0_insn_delete(ir);
1269 for (d = 0; d < 4 && ir->def[d]; ++d)
1270 nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
1271 ++reps;
1272 break;
1273 }
1274 }
1275 }
1276 } while(reps);
1277
1278 DESCEND_ARBITRARY(s, nv_pass_cse);
1279
1280 return 0;
1281 }
1282
1283 /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
1284 * neighbouring registers. CSE might have messed this up.
1285 * Just generate a MOV for each source to avoid conflicts if they're used in
1286 * multiple NV_OP_BIND at different positions.
1287 */
1288 static int
1289 nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
1290 {
1291 struct nv_value *val;
1292 struct nv_instruction *bnd, *nvi, *next;
1293 int s;
1294
1295 for (bnd = b->entry; bnd; bnd = next) {
1296 next = bnd->next;
1297 if (bnd->opcode != NV_OP_BIND)
1298 continue;
1299 for (s = 0; s < 4 && bnd->src[s]; ++s) {
1300 val = bnd->src[s]->value;
1301
1302 nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
1303 nvi->def[0] = new_value_like(ctx->pc, val);
1304 nvi->def[0]->insn = nvi;
1305 nv_reference(ctx->pc, nvi, 0, val);
1306 nv_reference(ctx->pc, bnd, s, nvi->def[0]);
1307
1308 nvc0_insn_insert_before(bnd, nvi);
1309 }
1310 }
1311 DESCEND_ARBITRARY(s, nv_pass_fix_bind);
1312
1313 return 0;
1314 }
1315
1316 static int
1317 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1318 {
1319 struct pass_reld_elim *reldelim;
1320 struct nv_pass pass;
1321 struct nv_pass_dce dce;
1322 int ret;
1323
1324 pass.n = 0;
1325 pass.pc = pc;
1326
1327 /* Do CSE so we can just compare values by pointer in subsequent passes. */
1328 pc->pass_seq++;
1329 ret = nv_pass_cse(&pass, root);
1330 if (ret)
1331 return ret;
1332
1333 /* Do this first, so we don't have to pay attention
1334 * to whether sources are supported memory loads.
1335 */
1336 pc->pass_seq++;
1337 ret = nv_pass_algebraic_opt(&pass, root);
1338 if (ret)
1339 return ret;
1340
1341 pc->pass_seq++;
1342 ret = nv_pass_lower_mods(&pass, root);
1343 if (ret)
1344 return ret;
1345
1346 pc->pass_seq++;
1347 ret = nvc0_pass_fold_loads(&pass, root);
1348 if (ret)
1349 return ret;
1350
1351 if (pc->opt_reload_elim) {
1352 reldelim = CALLOC_STRUCT(pass_reld_elim);
1353 reldelim->pc = pc;
1354
1355 pc->pass_seq++;
1356 ret = nv_pass_reload_elim(reldelim, root);
1357 if (ret) {
1358 FREE(reldelim);
1359 return ret;
1360 }
1361 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1362 reldelim->pc = pc;
1363 }
1364
1365 /* May run DCE before load-combining since that pass will clean up
1366 * after itself.
1367 */
1368 dce.pc = pc;
1369 do {
1370 dce.removed = 0;
1371 pc->pass_seq++;
1372 ret = nv_pass_dce(&dce, root);
1373 if (ret)
1374 return ret;
1375 } while (dce.removed);
1376
1377 if (pc->opt_reload_elim) {
1378 pc->pass_seq++;
1379 ret = nv_pass_mem_opt(reldelim, root);
1380 if (!ret) {
1381 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1382 reldelim->pc = pc;
1383
1384 pc->pass_seq++;
1385 ret = nv_pass_mem_opt(reldelim, root);
1386 }
1387 FREE(reldelim);
1388 if (ret)
1389 return ret;
1390 }
1391
1392 ret = nv_pass_tex_mask(&pass, root);
1393 if (ret)
1394 return ret;
1395
1396 pc->pass_seq++;
1397 ret = nv_pass_fix_bind(&pass, root);
1398
1399 return ret;
1400 }
1401
1402 int
1403 nvc0_pc_exec_pass0(struct nv_pc *pc)
1404 {
1405 int i, ret;
1406
1407 for (i = 0; i < pc->num_subroutines + 1; ++i)
1408 if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1409 return ret;
1410 return 0;
1411 }