nvc0: add missing break statements in constant_operand
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nvc0_pc.h"
24 #include "nvc0_program.h"
25
26 #define DESCEND_ARBITRARY(j, f) \
27 do { \
28 b->pass_seq = ctx->pc->pass_seq; \
29 \
30 for (j = 0; j < 2; ++j) \
31 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
32 f(ctx, b->out[j]); \
33 } while (0)
34
35 static INLINE boolean
36 registers_interfere(struct nv_value *a, struct nv_value *b)
37 {
38 if (a->reg.file != b->reg.file)
39 return FALSE;
40 if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
41 return FALSE;
42
43 assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
44
45 if (a->join->reg.id < b->join->reg.id) {
46 return (a->join->reg.id + a->reg.size >= b->join->reg.id);
47 } else
48 if (a->join->reg.id > b->join->reg.id) {
49 return (b->join->reg.id + b->reg.size >= a->join->reg.id);
50 }
51
52 return FALSE;
53 }
54
55 static INLINE boolean
56 values_equal(struct nv_value *a, struct nv_value *b)
57 {
58 if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
59 return FALSE;
60 if (NV_IS_MEMORY_FILE(a->reg.file))
61 return a->reg.address == b->reg.address;
62 else
63 return a->join->reg.id == b->join->reg.id;
64 }
65
66 #if 0
67 static INLINE boolean
68 inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
69 {
70 int si, di;
71
72 for (di = 0; di < 4 && a->def[di]; ++di)
73 for (si = 0; si < 5 && b->src[si]; ++si)
74 if (registers_interfere(a->def[di], b->src[si]->value))
75 return FALSE;
76
77 return TRUE;
78 }
79
80 /* Check whether we can swap the order of the instructions,
81 * where a & b may be either the earlier or the later one.
82 */
83 static boolean
84 inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
85 {
86 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
87 }
88 #endif
89
90 static INLINE boolean
91 inst_removable(struct nv_instruction *nvi)
92 {
93 if (nvi->opcode == NV_OP_ST)
94 return FALSE;
95 return (!(nvi->terminator ||
96 nvi->join ||
97 nvi->target ||
98 nvi->fixed ||
99 nvc0_insn_refcount(nvi)));
100 }
101
102 /* Check if we do not actually have to emit this instruction. */
103 static INLINE boolean
104 inst_is_noop(struct nv_instruction *nvi)
105 {
106 if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
107 return TRUE;
108 if (nvi->terminator || nvi->join)
109 return FALSE;
110 if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
111 return TRUE;
112 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
113 return FALSE;
114 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
115 return FALSE;
116
117 if (nvi->src[0]->value->join->reg.id < 0) {
118 NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
119 return TRUE;
120 }
121
122 if (nvi->opcode == NV_OP_SELECT)
123 if (!values_equal(nvi->def[0], nvi->src[1]->value))
124 return FALSE;
125 return values_equal(nvi->def[0], nvi->src[0]->value);
126 }
127
128 struct nv_pass {
129 struct nv_pc *pc;
130 int n;
131 void *priv;
132 };
133
134 static int
135 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
136
137 static void
138 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
139 {
140 struct nv_pc *pc = (struct nv_pc *)priv;
141 struct nv_basic_block *in;
142 struct nv_instruction *nvi, *next;
143 int j;
144
145 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
146
147 if (j >= 0) {
148 in = pc->bb_list[j];
149
150 /* check for no-op branches (BRA $PC+8) */
151 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
152 in->emit_size -= 8;
153 pc->emit_size -= 8;
154
155 for (++j; j < pc->num_blocks; ++j)
156 pc->bb_list[j]->emit_pos -= 8;
157
158 nvc0_insn_delete(in->exit);
159 }
160 b->emit_pos = in->emit_pos + in->emit_size;
161 }
162
163 pc->bb_list[pc->num_blocks++] = b;
164
165 /* visit node */
166
167 for (nvi = b->entry; nvi; nvi = next) {
168 next = nvi->next;
169 if (inst_is_noop(nvi) ||
170 (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
171 nvc0_insn_delete(nvi);
172 } else
173 b->emit_size += 8;
174 }
175 pc->emit_size += b->emit_size;
176
177 #ifdef NOUVEAU_DEBUG
178 if (!b->entry)
179 debug_printf("BB:%i is now empty\n", b->id);
180 else
181 debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
182 #endif
183 }
184
185 static int
186 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
187 {
188 struct nv_pass pass;
189
190 pass.pc = pc;
191
192 pc->pass_seq++;
193 nv_pass_flatten(&pass, root);
194
195 nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
196
197 return 0;
198 }
199
200 int
201 nvc0_pc_exec_pass2(struct nv_pc *pc)
202 {
203 int i, ret;
204
205 NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
206
207 pc->num_blocks = 0; /* will reorder bb_list */
208
209 for (i = 0; i < pc->num_subroutines + 1; ++i)
210 if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
211 return ret;
212 return 0;
213 }
214
215 static INLINE boolean
216 is_cspace_load(struct nv_instruction *nvi)
217 {
218 if (!nvi)
219 return FALSE;
220 assert(nvi->indirect != 0);
221 return (nvi->opcode == NV_OP_LD &&
222 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
223 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
224 }
225
226 static INLINE boolean
227 is_immd32_load(struct nv_instruction *nvi)
228 {
229 if (!nvi)
230 return FALSE;
231 return (nvi->opcode == NV_OP_MOV &&
232 nvi->src[0]->value->reg.file == NV_FILE_IMM &&
233 nvi->src[0]->value->reg.size == 4);
234 }
235
236 static INLINE void
237 check_swap_src_0_1(struct nv_instruction *nvi)
238 {
239 static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
240
241 struct nv_ref *src0 = nvi->src[0];
242 struct nv_ref *src1 = nvi->src[1];
243
244 if (!nv_op_commutative(nvi->opcode) && NV_BASEOP(nvi->opcode) != NV_OP_SET)
245 return;
246 assert(src0 && src1 && src0->value && src1->value);
247
248 if (is_cspace_load(src0->value->insn)) {
249 if (!is_cspace_load(src1->value->insn)) {
250 nvi->src[0] = src1;
251 nvi->src[1] = src0;
252 }
253 } else
254 if (is_immd32_load(src0->value->insn)) {
255 if (!is_cspace_load(src1->value->insn) &&
256 !is_immd32_load(src1->value->insn)) {
257 nvi->src[0] = src1;
258 nvi->src[1] = src0;
259 }
260 }
261
262 if (nvi->src[0] != src0 && NV_BASEOP(nvi->opcode) == NV_OP_SET)
263 nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
264 }
265
266 static void
267 nvi_set_indirect_load(struct nv_pc *pc,
268 struct nv_instruction *nvi, struct nv_value *val)
269 {
270 for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
271 ++nvi->indirect);
272 assert(nvi->indirect < 6);
273 nv_reference(pc, nvi, nvi->indirect, val);
274 }
275
276 static int
277 nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
278 {
279 struct nv_instruction *nvi, *ld;
280 int s;
281
282 for (nvi = b->entry; nvi; nvi = nvi->next) {
283 check_swap_src_0_1(nvi);
284
285 for (s = 0; s < 3 && nvi->src[s]; ++s) {
286 ld = nvi->src[s]->value->insn;
287 if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
288 continue;
289 if (!nvc0_insn_can_load(nvi, s, ld))
290 continue;
291
292 /* fold it ! */
293 nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
294 if (ld->indirect >= 0)
295 nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
296
297 if (!nvc0_insn_refcount(ld))
298 nvc0_insn_delete(ld);
299 }
300 }
301 DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
302
303 return 0;
304 }
305
306 /* NOTE: Assumes loads have not yet been folded. */
307 static int
308 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
309 {
310 struct nv_instruction *nvi, *mi, *next;
311 int j;
312 uint8_t mod;
313
314 for (nvi = b->entry; nvi; nvi = next) {
315 next = nvi->next;
316 if (nvi->opcode == NV_OP_SUB) {
317 nvi->src[1]->mod ^= NV_MOD_NEG;
318 nvi->opcode = NV_OP_ADD;
319 }
320
321 for (j = 0; j < 3 && nvi->src[j]; ++j) {
322 mi = nvi->src[j]->value->insn;
323 if (!mi)
324 continue;
325 if (mi->def[0]->refc > 1 || mi->predicate >= 0)
326 continue;
327
328 if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
329 else
330 if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
331 else
332 continue;
333 assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
334
335 mod |= mi->src[0]->mod;
336
337 if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
338 /* abs neg [abs] = abs */
339 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
340 } else
341 if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
342 /* neg as opcode and modifier on same insn cannot occur */
343 /* neg neg abs = abs, neg neg = identity */
344 assert(j == 0);
345 if (mod & NV_MOD_ABS)
346 nvi->opcode = NV_OP_ABS;
347 else
348 nvi->opcode = NV_OP_MOV;
349 mod = 0;
350 }
351
352 if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
353 continue;
354
355 nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
356
357 nvi->src[j]->mod ^= mod;
358 }
359
360 if (nvi->opcode == NV_OP_SAT) {
361 mi = nvi->src[0]->value->insn;
362
363 if (mi->def[0]->refc > 1 ||
364 (mi->opcode != NV_OP_ADD &&
365 mi->opcode != NV_OP_MUL &&
366 mi->opcode != NV_OP_MAD))
367 continue;
368 mi->saturate = 1;
369 mi->def[0] = nvi->def[0];
370 mi->def[0]->insn = mi;
371 nvc0_insn_delete(nvi);
372 }
373 }
374 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
375
376 return 0;
377 }
378
379 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
380
381 static void
382 apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
383 {
384 if (mod & NV_MOD_ABS) {
385 if (type == NV_TYPE_F32)
386 *val &= 0x7fffffff;
387 else
388 if ((*val) & (1 << 31))
389 *val = ~(*val) + 1;
390 }
391 if (mod & NV_MOD_NEG) {
392 if (type == NV_TYPE_F32)
393 *val ^= 0x80000000;
394 else
395 *val = ~(*val) + 1;
396 }
397 if (mod & NV_MOD_SAT) {
398 union {
399 float f;
400 uint32_t u;
401 int32_t i;
402 } u;
403 u.u = *val;
404 if (type == NV_TYPE_F32) {
405 u.f = CLAMP(u.f, -1.0f, 1.0f);
406 } else
407 if (type == NV_TYPE_U16) {
408 u.u = MIN2(u.u, 0xffff);
409 } else
410 if (type == NV_TYPE_S16) {
411 u.i = CLAMP(u.i, -32768, 32767);
412 }
413 *val = u.u;
414 }
415 if (mod & NV_MOD_NOT)
416 *val = ~*val;
417 }
418
419 static void
420 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
421 struct nv_value *src0, struct nv_value *src1)
422 {
423 struct nv_value *val;
424 union {
425 float f32;
426 uint32_t u32;
427 int32_t s32;
428 } u0, u1, u;
429 ubyte type;
430
431 if (!nvi->def[0])
432 return;
433 type = NV_OPTYPE(nvi->opcode);
434
435 u.u32 = 0;
436 u0.u32 = src0->reg.imm.u32;
437 u1.u32 = src1->reg.imm.u32;
438
439 apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
440 apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
441
442 switch (nvi->opcode) {
443 case NV_OP_MAD_F32:
444 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
445 return;
446 /* fall through */
447 case NV_OP_MUL_F32:
448 u.f32 = u0.f32 * u1.f32;
449 break;
450 case NV_OP_MUL_B32:
451 u.u32 = u0.u32 * u1.u32;
452 break;
453 case NV_OP_ADD_F32:
454 u.f32 = u0.f32 + u1.f32;
455 break;
456 case NV_OP_ADD_B32:
457 u.u32 = u0.u32 + u1.u32;
458 break;
459 case NV_OP_SUB_F32:
460 u.f32 = u0.f32 - u1.f32;
461 break;
462 /*
463 case NV_OP_SUB_B32:
464 u.u32 = u0.u32 - u1.u32;
465 break;
466 */
467 default:
468 return;
469 }
470
471 val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
472 val->reg.imm.u32 = u.u32;
473
474 nv_reference(pc, nvi, 1, NULL);
475 nv_reference(pc, nvi, 0, val);
476
477 if (nvi->opcode == NV_OP_MAD_F32) {
478 nvi->src[1] = nvi->src[0];
479 nvi->src[0] = nvi->src[2];
480 nvi->src[2] = NULL;
481 nvi->opcode = NV_OP_ADD_F32;
482
483 if (val->reg.imm.u32 == 0) {
484 nvi->src[1] = NULL;
485 nvi->opcode = NV_OP_MOV;
486 }
487 } else {
488 nvi->opcode = NV_OP_MOV;
489 }
490 }
491
492 static void
493 constant_operand(struct nv_pc *pc,
494 struct nv_instruction *nvi, struct nv_value *val, int s)
495 {
496 union {
497 float f32;
498 uint32_t u32;
499 int32_t s32;
500 } u;
501 int shift;
502 int t = s ? 0 : 1;
503 uint op;
504 ubyte type;
505
506 if (!nvi->def[0])
507 return;
508 type = NV_OPTYPE(nvi->opcode);
509
510 u.u32 = val->reg.imm.u32;
511 apply_modifiers(&u.u32, type, nvi->src[s]->mod);
512
513 if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
514 nvi->opcode = NV_OP_MOV;
515 nv_reference(pc, nvi, t, NULL);
516 if (s) {
517 nvi->src[0] = nvi->src[1];
518 nvi->src[1] = NULL;
519 }
520 return;
521 }
522
523 switch (nvi->opcode) {
524 case NV_OP_MUL_F32:
525 if (u.f32 == 1.0f || u.f32 == -1.0f) {
526 if (u.f32 == -1.0f)
527 nvi->src[t]->mod ^= NV_MOD_NEG;
528 switch (nvi->src[t]->mod) {
529 case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
530 case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
531 case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
532 default:
533 return;
534 }
535 nvi->opcode = op;
536 nv_reference(pc, nvi, 0, nvi->src[t]->value);
537 nv_reference(pc, nvi, 1, NULL);
538 nvi->src[0]->mod = 0;
539 } else
540 if (u.f32 == 2.0f || u.f32 == -2.0f) {
541 if (u.f32 == -2.0f)
542 nvi->src[t]->mod ^= NV_MOD_NEG;
543 nvi->opcode = NV_OP_ADD_F32;
544 nv_reference(pc, nvi, s, nvi->src[t]->value);
545 nvi->src[s]->mod = nvi->src[t]->mod;
546 }
547 break;
548 case NV_OP_ADD_F32:
549 if (u.u32 == 0) {
550 switch (nvi->src[t]->mod) {
551 case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
552 case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
553 case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
554 case NV_MOD_NEG | NV_MOD_ABS:
555 op = NV_OP_CVT;
556 nvi->ext.cvt.s = nvi->ext.cvt.d = type;
557 break;
558 default:
559 return;
560 }
561 nvi->opcode = op;
562 nv_reference(pc, nvi, 0, nvi->src[t]->value);
563 nv_reference(pc, nvi, 1, NULL);
564 if (nvi->opcode != NV_OP_CVT)
565 nvi->src[0]->mod = 0;
566 }
567 break;
568 case NV_OP_ADD_B32:
569 if (u.u32 == 0) {
570 assert(nvi->src[t]->mod == 0);
571 nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
572 nvi->ext.cvt.s = nvi->ext.cvt.d = type;
573 nv_reference(pc, nvi, 0, nvi->src[t]->value);
574 nv_reference(pc, nvi, 1, NULL);
575 }
576 break;
577 case NV_OP_MUL_B32:
578 /* multiplication by 0 already handled above */
579 assert(nvi->src[s]->mod == 0);
580 shift = ffs(u.s32) - 1;
581 if (shift == 0) {
582 nvi->opcode = NV_OP_MOV;
583 nv_reference(pc, nvi, 0, nvi->src[t]->value);
584 nv_reference(pc, nvi, 1, NULL);
585 } else
586 if (u.s32 > 0 && u.s32 == (1 << shift)) {
587 nvi->opcode = NV_OP_SHL;
588 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift;
589 nv_reference(pc, nvi, 0, nvi->src[t]->value);
590 nv_reference(pc, nvi, 1, val);
591 break;
592 }
593 break;
594 case NV_OP_RCP:
595 u.f32 = 1.0f / u.f32;
596 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
597 nvi->opcode = NV_OP_MOV;
598 assert(s == 0);
599 nv_reference(pc, nvi, 0, val);
600 break;
601 case NV_OP_RSQ:
602 u.f32 = 1.0f / sqrtf(u.f32);
603 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
604 nvi->opcode = NV_OP_MOV;
605 assert(s == 0);
606 nv_reference(pc, nvi, 0, val);
607 break;
608 default:
609 break;
610 }
611 }
612
613 static void
614 handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
615 {
616 struct nv_value *src0 = nvi->src[0]->value;
617 struct nv_value *src1 = nvi->src[1]->value;
618
619 if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
620 return;
621 if (src0->reg.file != NV_FILE_GPR)
622 return;
623 nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
624 nvc0_insn_delete(nvi);
625 }
626
627 /* check if we can MUL + ADD -> MAD/FMA */
628 static void
629 handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
630 {
631 struct nv_value *src0 = nvi->src[0]->value;
632 struct nv_value *src1 = nvi->src[1]->value;
633 struct nv_value *src;
634 int s;
635 uint8_t mod[4];
636
637 if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
638 else
639 if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
640 else
641 return;
642
643 if ((src0->insn && src0->insn->bb != nvi->bb) ||
644 (src1->insn && src1->insn->bb != nvi->bb))
645 return;
646
647 /* check for immediates from prior constant folding */
648 if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
649 return;
650 src = nvi->src[s]->value;
651
652 mod[0] = nvi->src[0]->mod;
653 mod[1] = nvi->src[1]->mod;
654 mod[2] = src->insn->src[0]->mod;
655 mod[3] = src->insn->src[1]->mod;
656
657 if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
658 return;
659
660 nvi->opcode = NV_OP_MAD_F32;
661
662 nv_reference(ctx->pc, nvi, s, NULL);
663 nvi->src[2] = nvi->src[!s];
664 nvi->src[!s] = NULL;
665
666 nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
667 nvi->src[0]->mod = mod[2] ^ mod[s];
668 nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
669 nvi->src[1]->mod = mod[3];
670 }
671
672 static int
673 nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
674 {
675 struct nv_instruction *nvi, *next;
676 int j;
677
678 for (nvi = b->entry; nvi; nvi = next) {
679 struct nv_value *src0, *src1;
680 uint baseop = NV_BASEOP(nvi->opcode);
681
682 next = nvi->next;
683
684 src0 = nvc0_pc_find_immediate(nvi->src[0]);
685 src1 = nvc0_pc_find_immediate(nvi->src[1]);
686
687 if (src0 && src1) {
688 constant_expression(ctx->pc, nvi, src0, src1);
689 } else {
690 if (src0)
691 constant_operand(ctx->pc, nvi, src0, 0);
692 else
693 if (src1)
694 constant_operand(ctx->pc, nvi, src1, 1);
695 }
696
697 if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
698 handle_min_max(ctx, nvi);
699 else
700 if (nvi->opcode == NV_OP_ADD_F32)
701 handle_add_mul(ctx, nvi);
702 }
703 DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
704
705 return 0;
706 }
707
708 /* TODO: redundant store elimination */
709
710 struct mem_record {
711 struct mem_record *next;
712 struct nv_instruction *insn;
713 uint32_t ofst;
714 uint32_t base;
715 uint32_t size;
716 };
717
718 #define MEM_RECORD_POOL_SIZE 1024
719
720 struct pass_reld_elim {
721 struct nv_pc *pc;
722
723 struct mem_record *imm;
724 struct mem_record *mem_v;
725 struct mem_record *mem_a;
726 struct mem_record *mem_c[16];
727 struct mem_record *mem_l;
728
729 struct mem_record pool[MEM_RECORD_POOL_SIZE];
730 int alloc;
731 };
732
733 /* Extend the load operation in @rec to also cover the data loaded by @ld.
734 * The two loads may not overlap but reference adjacent memory locations.
735 */
736 static void
737 combine_load(struct nv_pc *pc, struct mem_record *rec,
738 struct nv_instruction *ld)
739 {
740 struct nv_instruction *fv = rec->insn;
741 struct nv_value *mem = ld->src[0]->value;
742 uint32_t size = rec->size + mem->reg.size;
743 int j;
744 int d = rec->size / 4;
745
746 assert(rec->size < 16);
747 if (rec->ofst > mem->reg.address) {
748 if ((size == 8 && mem->reg.address & 3) ||
749 (size > 8 && mem->reg.address & 7))
750 return;
751 rec->ofst = mem->reg.address;
752 for (j = 0; j < d; ++j)
753 fv->def[mem->reg.size / 4 + j] = fv->def[j];
754 d = 0;
755 } else
756 if ((size == 8 && rec->ofst & 3) ||
757 (size > 8 && rec->ofst & 7)) {
758 return;
759 }
760
761 for (j = 0; j < mem->reg.size / 4; ++j) {
762 fv->def[d] = ld->def[j];
763 fv->def[d++]->insn = fv;
764 }
765
766 if (fv->src[0]->value->refc > 1)
767 nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
768 fv->src[0]->value->reg.address = rec->ofst;
769 fv->src[0]->value->reg.size = rec->size = size;
770
771 nvc0_insn_delete(ld);
772 }
773
774 static void
775 combine_export(struct mem_record *rec, struct nv_instruction *ex)
776 {
777
778 }
779
780 static INLINE void
781 add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
782 uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
783 {
784 struct mem_record *it = &ctx->pool[ctx->alloc++];
785
786 it->next = *rec;
787 *rec = it;
788 it->base = base;
789 it->ofst = ofst;
790 it->insn = nvi;
791 it->size = nvi->src[0]->value->reg.size;
792 }
793
794 /* vectorize and reuse loads from memory or of immediates */
795 static int
796 nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
797 {
798 struct mem_record **rec, *it;
799 struct nv_instruction *ld, *next;
800 struct nv_value *mem;
801 uint32_t base, ofst;
802 int s;
803
804 for (ld = b->entry; ld; ld = next) {
805 next = ld->next;
806
807 if (is_cspace_load(ld)) {
808 mem = ld->src[0]->value;
809 rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
810 } else
811 if (ld->opcode == NV_OP_VFETCH) {
812 mem = ld->src[0]->value;
813 rec = &ctx->mem_a;
814 } else
815 if (ld->opcode == NV_OP_EXPORT) {
816 mem = ld->src[0]->value;
817 if (mem->reg.file != NV_FILE_MEM_V)
818 continue;
819 rec = &ctx->mem_v;
820 } else {
821 continue;
822 }
823 if (ld->def[0] && ld->def[0]->refc == 0)
824 continue;
825 ofst = mem->reg.address;
826 base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
827
828 for (it = *rec; it; it = it->next) {
829 if (it->base == base &&
830 ((it->ofst >> 4) == (ofst >> 4)) &&
831 ((it->ofst + it->size == ofst) ||
832 (it->ofst - mem->reg.size == ofst))) {
833 /* only NV_OP_VFETCH can load exactly 12 bytes */
834 if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
835 continue;
836 if (it->ofst < ofst) {
837 if ((it->ofst & 0xf) == 4)
838 continue;
839 } else
840 if ((ofst & 0xf) == 4)
841 continue;
842 break;
843 }
844 }
845 if (it) {
846 switch (ld->opcode) {
847 case NV_OP_EXPORT: combine_export(it, ld); break;
848 default:
849 combine_load(ctx->pc, it, ld);
850 break;
851 }
852 } else
853 if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
854 add_mem_record(ctx, rec, base, ofst, ld);
855 }
856 }
857
858 ctx->alloc = 0;
859 ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
860 for (s = 0; s < 16; ++s)
861 ctx->mem_c[s] = NULL;
862
863 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
864 return 0;
865 }
866
867 static void
868 eliminate_store(struct mem_record *rec, struct nv_instruction *st)
869 {
870 }
871
872 /* elimination of redundant stores */
873 static int
874 pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
875 {
876 struct mem_record **rec, *it;
877 struct nv_instruction *st, *next;
878 struct nv_value *mem;
879 uint32_t base, ofst, size;
880 int s;
881
882 for (st = b->entry; st; st = next) {
883 next = st->next;
884
885 if (st->opcode == NV_OP_ST) {
886 mem = st->src[0]->value;
887 rec = &ctx->mem_l;
888 } else
889 if (st->opcode == NV_OP_EXPORT) {
890 mem = st->src[0]->value;
891 if (mem->reg.file != NV_FILE_MEM_V)
892 continue;
893 rec = &ctx->mem_v;
894 } else
895 if (st->opcode == NV_OP_ST) {
896 /* TODO: purge */
897 }
898 ofst = mem->reg.address;
899 base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
900 size = mem->reg.size;
901
902 for (it = *rec; it; it = it->next) {
903 if (it->base == base &&
904 (it->ofst <= ofst && (it->ofst + size) > ofst))
905 break;
906 }
907 if (it)
908 eliminate_store(it, st);
909 else
910 add_mem_record(ctx, rec, base, ofst, st);
911 }
912
913 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
914 return 0;
915 }
916
917 /* TODO: properly handle loads from l[] memory in the presence of stores */
918 static int
919 nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
920 {
921 #if 0
922 struct load_record **rec, *it;
923 struct nv_instruction *ld, *next;
924 uint64_t data[2];
925 struct nv_value *val;
926 int j;
927
928 for (ld = b->entry; ld; ld = next) {
929 next = ld->next;
930 if (!ld->src[0])
931 continue;
932 val = ld->src[0]->value;
933 rec = NULL;
934
935 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
936 data[0] = val->reg.id;
937 data[1] = 0;
938 rec = &ctx->mem_v;
939 } else
940 if (ld->opcode == NV_OP_LDA) {
941 data[0] = val->reg.id;
942 data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
943 if (val->reg.file >= NV_FILE_MEM_C(0) &&
944 val->reg.file <= NV_FILE_MEM_C(15))
945 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
946 else
947 if (val->reg.file == NV_FILE_MEM_S)
948 rec = &ctx->mem_s;
949 else
950 if (val->reg.file == NV_FILE_MEM_L)
951 rec = &ctx->mem_l;
952 } else
953 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
954 data[0] = val->reg.imm.u32;
955 data[1] = 0;
956 rec = &ctx->imm;
957 }
958
959 if (!rec || !ld->def[0]->refc)
960 continue;
961
962 for (it = *rec; it; it = it->next)
963 if (it->data[0] == data[0] && it->data[1] == data[1])
964 break;
965
966 if (it) {
967 if (ld->def[0]->reg.id >= 0)
968 it->value = ld->def[0];
969 else
970 if (!ld->fixed)
971 nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
972 } else {
973 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
974 continue;
975 it = &ctx->pool[ctx->alloc++];
976 it->next = *rec;
977 it->data[0] = data[0];
978 it->data[1] = data[1];
979 it->value = ld->def[0];
980 *rec = it;
981 }
982 }
983
984 ctx->imm = NULL;
985 ctx->mem_s = NULL;
986 ctx->mem_v = NULL;
987 for (j = 0; j < 16; ++j)
988 ctx->mem_c[j] = NULL;
989 ctx->mem_l = NULL;
990 ctx->alloc = 0;
991
992 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
993 #endif
994 return 0;
995 }
996
997 static int
998 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
999 {
1000 int i, c, j;
1001
1002 for (i = 0; i < ctx->pc->num_instructions; ++i) {
1003 struct nv_instruction *nvi = &ctx->pc->instructions[i];
1004 struct nv_value *def[4];
1005
1006 if (!nv_is_texture_op(nvi->opcode))
1007 continue;
1008 nvi->tex_mask = 0;
1009
1010 for (c = 0; c < 4; ++c) {
1011 if (nvi->def[c]->refc)
1012 nvi->tex_mask |= 1 << c;
1013 def[c] = nvi->def[c];
1014 }
1015
1016 j = 0;
1017 for (c = 0; c < 4; ++c)
1018 if (nvi->tex_mask & (1 << c))
1019 nvi->def[j++] = def[c];
1020 for (c = 0; c < 4; ++c)
1021 if (!(nvi->tex_mask & (1 << c)))
1022 nvi->def[j++] = def[c];
1023 assert(j == 4);
1024 }
1025 return 0;
1026 }
1027
1028 struct nv_pass_dce {
1029 struct nv_pc *pc;
1030 uint removed;
1031 };
1032
1033 static int
1034 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
1035 {
1036 int j;
1037 struct nv_instruction *nvi, *next;
1038
1039 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
1040 next = nvi->next;
1041
1042 if (inst_removable(nvi)) {
1043 nvc0_insn_delete(nvi);
1044 ++ctx->removed;
1045 }
1046 }
1047 DESCEND_ARBITRARY(j, nv_pass_dce);
1048
1049 return 0;
1050 }
1051
1052 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
1053 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
1054 * BREAK and dummy ELSE block.
1055 */
1056 static INLINE boolean
1057 bb_is_if_else_endif(struct nv_basic_block *bb)
1058 {
1059 if (!bb->out[0] || !bb->out[1])
1060 return FALSE;
1061
1062 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
1063 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
1064 !bb->out[1]->out[1]);
1065 } else {
1066 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
1067 !bb->out[0]->out[1] &&
1068 !bb->out[1]->out[1]);
1069 }
1070 }
1071
1072 /* Predicate instructions and delete any branch at the end if it is
1073 * not a break from a loop.
1074 */
1075 static void
1076 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
1077 struct nv_value *pred, uint8_t cc)
1078 {
1079 struct nv_instruction *nvi, *prev;
1080 int s;
1081
1082 if (!b->entry)
1083 return;
1084 for (nvi = b->entry; nvi; nvi = nvi->next) {
1085 prev = nvi;
1086 if (inst_is_noop(nvi))
1087 continue;
1088 for (s = 0; nvi->src[s]; ++s);
1089 assert(s < 6);
1090 nvi->predicate = s;
1091 nvi->cc = cc;
1092 nv_reference(pc, nvi, nvi->predicate, pred);
1093 }
1094 if (prev->opcode == NV_OP_BRA &&
1095 b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
1096 b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
1097 nvc0_insn_delete(prev);
1098 }
1099
1100 static INLINE boolean
1101 may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
1102 {
1103 if (nvi->def[0] && values_equal(nvi->def[0], pred))
1104 return FALSE;
1105 return nvc0_insn_is_predicateable(nvi);
1106 }
1107
1108 /* Transform IF/ELSE/ENDIF constructs into predicated instructions
1109 * where feasible.
1110 */
1111 static int
1112 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
1113 {
1114 struct nv_instruction *nvi;
1115 struct nv_value *pred;
1116 int k;
1117 int n0, n1; /* instruction counts of outgoing blocks */
1118
1119 if (bb_is_if_else_endif(b)) {
1120 assert(b->exit && b->exit->opcode == NV_OP_BRA);
1121
1122 assert(b->exit->predicate >= 0);
1123 pred = b->exit->src[b->exit->predicate]->value;
1124
1125 n1 = n0 = 0;
1126 for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
1127 if (!may_predicate_insn(nvi, pred))
1128 break;
1129 if (!nvi) {
1130 /* we're after register allocation, so there always is an ELSE block */
1131 for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
1132 if (!may_predicate_insn(nvi, pred))
1133 break;
1134 }
1135
1136 /* 12 is an arbitrary limit */
1137 if (!nvi && n0 < 12 && n1 < 12) {
1138 predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
1139 predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
1140
1141 nvc0_insn_delete(b->exit); /* delete the branch */
1142
1143 /* and a potential joinat before it */
1144 if (b->exit && b->exit->opcode == NV_OP_JOINAT)
1145 nvc0_insn_delete(b->exit);
1146
1147 /* remove join operations at the end of the conditional */
1148 k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
1149 if ((nvi = b->out[0]->out[k]->entry)) {
1150 nvi->join = 0;
1151 if (nvi->opcode == NV_OP_JOIN)
1152 nvc0_insn_delete(nvi);
1153 }
1154 }
1155 }
1156 DESCEND_ARBITRARY(k, nv_pass_flatten);
1157
1158 return 0;
1159 }
1160
1161 /* Tests instructions for equality, but independently of sources. */
1162 static boolean
1163 is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
1164 {
1165 if (a->opcode != b->opcode)
1166 return FALSE;
1167 if (nv_is_texture_op(a->opcode)) {
1168 if (a->ext.tex.t != b->ext.tex.t ||
1169 a->ext.tex.s != b->ext.tex.s)
1170 return FALSE;
1171 if (a->tex_dim != b->tex_dim ||
1172 a->tex_array != b->tex_array ||
1173 a->tex_cube != b->tex_cube ||
1174 a->tex_shadow != b->tex_shadow ||
1175 a->tex_live != b->tex_live)
1176 return FALSE;
1177 } else
1178 if (a->opcode == NV_OP_CVT) {
1179 if (a->ext.cvt.s != b->ext.cvt.s ||
1180 a->ext.cvt.d != b->ext.cvt.d)
1181 return FALSE;
1182 } else
1183 if (NV_BASEOP(a->opcode) == NV_OP_SET ||
1184 NV_BASEOP(a->opcode) == NV_OP_SLCT) {
1185 if (a->set_cond != b->set_cond)
1186 return FALSE;
1187 } else
1188 if (a->opcode == NV_OP_LINTERP ||
1189 a->opcode == NV_OP_PINTERP) {
1190 if (a->centroid != b->centroid ||
1191 a->flat != b->flat)
1192 return FALSE;
1193 }
1194 if (a->cc != b->cc)
1195 return FALSE;
1196 if (a->lanes != b->lanes ||
1197 a->patch != b->patch ||
1198 a->saturate != b->saturate)
1199 return FALSE;
1200 if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
1201 return FALSE;
1202 return TRUE;
1203 }
1204
1205 /* local common subexpression elimination, stupid O(n^2) implementation */
1206 static int
1207 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1208 {
1209 struct nv_instruction *ir, *ik, *next;
1210 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1211 int s, d;
1212 unsigned int reps;
1213
1214 do {
1215 reps = 0;
1216 for (ir = entry; ir; ir = next) {
1217 next = ir->next;
1218 if (ir->fixed)
1219 continue;
1220 for (ik = entry; ik != ir; ik = ik->next) {
1221 if (!is_operation_equal(ir, ik))
1222 continue;
1223 if (!ir->def[0] || !ik->def[0])
1224 continue;
1225
1226 if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
1227 continue;
1228
1229 for (d = 0; d < 4; ++d) {
1230 if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
1231 break;
1232 if (ir->def[d]) {
1233 if (!values_equal(ik->def[0], ir->def[0]))
1234 break;
1235 } else {
1236 d = 4;
1237 break;
1238 }
1239 }
1240 if (d != 4)
1241 continue;
1242
1243 for (s = 0; s < 5; ++s) {
1244 struct nv_value *a, *b;
1245
1246 if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
1247 break;
1248 if (!ir->src[s]) {
1249 s = 5;
1250 break;
1251 }
1252
1253 if (ik->src[s]->mod != ir->src[s]->mod)
1254 break;
1255 a = ik->src[s]->value;
1256 b = ir->src[s]->value;
1257 if (a == b)
1258 continue;
1259 if (a->reg.file != b->reg.file ||
1260 a->reg.id < 0 || /* this excludes memory loads/stores */
1261 a->reg.id != b->reg.id)
1262 break;
1263 }
1264 if (s == 5) {
1265 nvc0_insn_delete(ir);
1266 for (d = 0; d < 4 && ir->def[d]; ++d)
1267 nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
1268 ++reps;
1269 break;
1270 }
1271 }
1272 }
1273 } while(reps);
1274
1275 DESCEND_ARBITRARY(s, nv_pass_cse);
1276
1277 return 0;
1278 }
1279
1280 /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
1281 * neighbouring registers. CSE might have messed this up.
1282 * Just generate a MOV for each source to avoid conflicts if they're used in
1283 * multiple NV_OP_BIND at different positions.
1284 */
1285 static int
1286 nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
1287 {
1288 struct nv_value *val;
1289 struct nv_instruction *bnd, *nvi, *next;
1290 int s;
1291
1292 for (bnd = b->entry; bnd; bnd = next) {
1293 next = bnd->next;
1294 if (bnd->opcode != NV_OP_BIND)
1295 continue;
1296 for (s = 0; s < 4 && bnd->src[s]; ++s) {
1297 val = bnd->src[s]->value;
1298
1299 nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
1300 nvi->def[0] = new_value_like(ctx->pc, val);
1301 nvi->def[0]->insn = nvi;
1302 nv_reference(ctx->pc, nvi, 0, val);
1303 nv_reference(ctx->pc, bnd, s, nvi->def[0]);
1304
1305 nvc0_insn_insert_before(bnd, nvi);
1306 }
1307 }
1308 DESCEND_ARBITRARY(s, nv_pass_fix_bind);
1309
1310 return 0;
1311 }
1312
1313 static int
1314 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1315 {
1316 struct pass_reld_elim *reldelim;
1317 struct nv_pass pass;
1318 struct nv_pass_dce dce;
1319 int ret;
1320
1321 pass.n = 0;
1322 pass.pc = pc;
1323
1324 /* Do CSE so we can just compare values by pointer in subsequent passes. */
1325 pc->pass_seq++;
1326 ret = nv_pass_cse(&pass, root);
1327 if (ret)
1328 return ret;
1329
1330 /* Do this first, so we don't have to pay attention
1331 * to whether sources are supported memory loads.
1332 */
1333 pc->pass_seq++;
1334 ret = nv_pass_algebraic_opt(&pass, root);
1335 if (ret)
1336 return ret;
1337
1338 pc->pass_seq++;
1339 ret = nv_pass_lower_mods(&pass, root);
1340 if (ret)
1341 return ret;
1342
1343 pc->pass_seq++;
1344 ret = nvc0_pass_fold_loads(&pass, root);
1345 if (ret)
1346 return ret;
1347
1348 if (pc->opt_reload_elim) {
1349 reldelim = CALLOC_STRUCT(pass_reld_elim);
1350 reldelim->pc = pc;
1351
1352 pc->pass_seq++;
1353 ret = nv_pass_reload_elim(reldelim, root);
1354 if (ret) {
1355 FREE(reldelim);
1356 return ret;
1357 }
1358 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1359 reldelim->pc = pc;
1360 }
1361
1362 /* May run DCE before load-combining since that pass will clean up
1363 * after itself.
1364 */
1365 dce.pc = pc;
1366 do {
1367 dce.removed = 0;
1368 pc->pass_seq++;
1369 ret = nv_pass_dce(&dce, root);
1370 if (ret)
1371 return ret;
1372 } while (dce.removed);
1373
1374 if (pc->opt_reload_elim) {
1375 pc->pass_seq++;
1376 ret = nv_pass_mem_opt(reldelim, root);
1377 if (!ret) {
1378 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1379 reldelim->pc = pc;
1380
1381 pc->pass_seq++;
1382 ret = nv_pass_mem_opt(reldelim, root);
1383 }
1384 FREE(reldelim);
1385 if (ret)
1386 return ret;
1387 }
1388
1389 ret = nv_pass_tex_mask(&pass, root);
1390 if (ret)
1391 return ret;
1392
1393 pc->pass_seq++;
1394 ret = nv_pass_fix_bind(&pass, root);
1395
1396 return ret;
1397 }
1398
1399 int
1400 nvc0_pc_exec_pass0(struct nv_pc *pc)
1401 {
1402 int i, ret;
1403
1404 for (i = 0; i < pc->num_subroutines + 1; ++i)
1405 if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1406 return ret;
1407 return 0;
1408 }