gallium: notify drivers about possible changes in user buffer contents
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_pc_optimize.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nvc0_pc.h"
24 #include "nvc0_program.h"
25
26 #define DESCEND_ARBITRARY(j, f) \
27 do { \
28 b->pass_seq = ctx->pc->pass_seq; \
29 \
30 for (j = 0; j < 2; ++j) \
31 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
32 f(ctx, b->out[j]); \
33 } while (0)
34
35 static INLINE boolean
36 registers_interfere(struct nv_value *a, struct nv_value *b)
37 {
38 if (a->reg.file != b->reg.file)
39 return FALSE;
40 if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
41 return FALSE;
42
43 assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
44
45 if (a->join->reg.id < b->join->reg.id) {
46 return (a->join->reg.id + a->reg.size >= b->join->reg.id);
47 } else
48 if (a->join->reg.id > b->join->reg.id) {
49 return (b->join->reg.id + b->reg.size >= a->join->reg.id);
50 }
51
52 return FALSE;
53 }
54
55 static INLINE boolean
56 values_equal(struct nv_value *a, struct nv_value *b)
57 {
58 if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
59 return FALSE;
60 if (NV_IS_MEMORY_FILE(a->reg.file))
61 return a->reg.address == b->reg.address;
62 else
63 return a->join->reg.id == b->join->reg.id;
64 }
65
66 #if 0
67 static INLINE boolean
68 inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
69 {
70 int si, di;
71
72 for (di = 0; di < 4 && a->def[di]; ++di)
73 for (si = 0; si < 5 && b->src[si]; ++si)
74 if (registers_interfere(a->def[di], b->src[si]->value))
75 return FALSE;
76
77 return TRUE;
78 }
79
80 /* Check whether we can swap the order of the instructions,
81 * where a & b may be either the earlier or the later one.
82 */
83 static boolean
84 inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
85 {
86 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
87 }
88 #endif
89
90 static INLINE boolean
91 inst_removable(struct nv_instruction *nvi)
92 {
93 if (nvi->opcode == NV_OP_ST)
94 return FALSE;
95 return (!(nvi->terminator ||
96 nvi->join ||
97 nvi->target ||
98 nvi->fixed ||
99 nvc0_insn_refcount(nvi)));
100 }
101
102 /* Check if we do not actually have to emit this instruction. */
103 static INLINE boolean
104 inst_is_noop(struct nv_instruction *nvi)
105 {
106 if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
107 return TRUE;
108 if (nvi->terminator || nvi->join)
109 return FALSE;
110 if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
111 return TRUE;
112 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
113 return FALSE;
114 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
115 return FALSE;
116
117 if (nvi->src[0]->value->join->reg.id < 0) {
118 NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
119 return TRUE;
120 }
121
122 if (nvi->opcode == NV_OP_SELECT)
123 if (!values_equal(nvi->def[0], nvi->src[1]->value))
124 return FALSE;
125 return values_equal(nvi->def[0], nvi->src[0]->value);
126 }
127
128 struct nv_pass {
129 struct nv_pc *pc;
130 int n;
131 void *priv;
132 };
133
134 static int
135 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
136
137 static void
138 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
139 {
140 struct nv_pc *pc = (struct nv_pc *)priv;
141 struct nv_basic_block *in;
142 struct nv_instruction *nvi, *next;
143 int j;
144
145 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
146
147 if (j >= 0) {
148 in = pc->bb_list[j];
149
150 /* check for no-op branches (BRA $PC+8) */
151 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
152 in->emit_size -= 8;
153 pc->emit_size -= 8;
154
155 for (++j; j < pc->num_blocks; ++j)
156 pc->bb_list[j]->emit_pos -= 8;
157
158 nvc0_insn_delete(in->exit);
159 }
160 b->emit_pos = in->emit_pos + in->emit_size;
161 }
162
163 pc->bb_list[pc->num_blocks++] = b;
164
165 /* visit node */
166
167 for (nvi = b->entry; nvi; nvi = next) {
168 next = nvi->next;
169 if (inst_is_noop(nvi) ||
170 (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
171 nvc0_insn_delete(nvi);
172 } else
173 b->emit_size += 8;
174 }
175 pc->emit_size += b->emit_size;
176
177 #ifdef NOUVEAU_DEBUG
178 if (!b->entry)
179 debug_printf("BB:%i is now empty\n", b->id);
180 else
181 debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
182 #endif
183 }
184
185 static int
186 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
187 {
188 struct nv_pass pass;
189
190 pass.pc = pc;
191
192 pc->pass_seq++;
193 nv_pass_flatten(&pass, root);
194
195 nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
196
197 return 0;
198 }
199
200 int
201 nvc0_pc_exec_pass2(struct nv_pc *pc)
202 {
203 int i, ret;
204
205 NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
206
207 pc->num_blocks = 0; /* will reorder bb_list */
208
209 for (i = 0; i < pc->num_subroutines + 1; ++i)
210 if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
211 return ret;
212 return 0;
213 }
214
215 static INLINE boolean
216 is_cspace_load(struct nv_instruction *nvi)
217 {
218 if (!nvi)
219 return FALSE;
220 assert(nvi->indirect != 0);
221 return (nvi->opcode == NV_OP_LD &&
222 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
223 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
224 }
225
226 static INLINE boolean
227 is_immd32_load(struct nv_instruction *nvi)
228 {
229 if (!nvi)
230 return FALSE;
231 return (nvi->opcode == NV_OP_MOV &&
232 nvi->src[0]->value->reg.file == NV_FILE_IMM &&
233 nvi->src[0]->value->reg.size == 4);
234 }
235
236 static INLINE void
237 check_swap_src_0_1(struct nv_instruction *nvi)
238 {
239 static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
240
241 struct nv_ref *src0 = nvi->src[0];
242 struct nv_ref *src1 = nvi->src[1];
243
244 if (!nv_op_commutative(nvi->opcode) && NV_BASEOP(nvi->opcode) != NV_OP_SET)
245 return;
246 assert(src0 && src1 && src0->value && src1->value);
247
248 if (is_cspace_load(src0->value->insn)) {
249 if (!is_cspace_load(src1->value->insn)) {
250 nvi->src[0] = src1;
251 nvi->src[1] = src0;
252 }
253 } else
254 if (is_immd32_load(src0->value->insn)) {
255 if (!is_cspace_load(src1->value->insn) &&
256 !is_immd32_load(src1->value->insn)) {
257 nvi->src[0] = src1;
258 nvi->src[1] = src0;
259 }
260 }
261
262 if (nvi->src[0] != src0 && NV_BASEOP(nvi->opcode) == NV_OP_SET)
263 nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
264 }
265
266 static void
267 nvi_set_indirect_load(struct nv_pc *pc,
268 struct nv_instruction *nvi, struct nv_value *val)
269 {
270 for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
271 ++nvi->indirect);
272 assert(nvi->indirect < 6);
273 nv_reference(pc, nvi, nvi->indirect, val);
274 }
275
276 static int
277 nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
278 {
279 struct nv_instruction *nvi, *ld;
280 int s;
281
282 for (nvi = b->entry; nvi; nvi = nvi->next) {
283 check_swap_src_0_1(nvi);
284
285 for (s = 0; s < 3 && nvi->src[s]; ++s) {
286 ld = nvi->src[s]->value->insn;
287 if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
288 continue;
289 if (!nvc0_insn_can_load(nvi, s, ld))
290 continue;
291
292 /* fold it ! */
293 nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
294 if (ld->indirect >= 0)
295 nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
296
297 if (!nvc0_insn_refcount(ld))
298 nvc0_insn_delete(ld);
299 }
300 }
301 DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
302
303 return 0;
304 }
305
306 /* NOTE: Assumes loads have not yet been folded. */
307 static int
308 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
309 {
310 struct nv_instruction *nvi, *mi, *next;
311 int j;
312 uint8_t mod;
313
314 for (nvi = b->entry; nvi; nvi = next) {
315 next = nvi->next;
316 if (nvi->opcode == NV_OP_SUB) {
317 nvi->src[1]->mod ^= NV_MOD_NEG;
318 nvi->opcode = NV_OP_ADD;
319 }
320
321 for (j = 0; j < 3 && nvi->src[j]; ++j) {
322 mi = nvi->src[j]->value->insn;
323 if (!mi)
324 continue;
325 if (mi->def[0]->refc > 1 || mi->predicate >= 0)
326 continue;
327
328 if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
329 else
330 if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
331 else
332 continue;
333 assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
334
335 mod |= mi->src[0]->mod;
336
337 if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
338 /* abs neg [abs] = abs */
339 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
340 } else
341 if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
342 /* neg as opcode and modifier on same insn cannot occur */
343 /* neg neg abs = abs, neg neg = identity */
344 assert(j == 0);
345 if (mod & NV_MOD_ABS)
346 nvi->opcode = NV_OP_ABS;
347 else
348 nvi->opcode = NV_OP_MOV;
349 mod = 0;
350 }
351
352 if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
353 continue;
354
355 nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
356
357 nvi->src[j]->mod ^= mod;
358 }
359
360 if (nvi->opcode == NV_OP_SAT) {
361 mi = nvi->src[0]->value->insn;
362
363 if (mi->def[0]->refc > 1 ||
364 (mi->opcode != NV_OP_ADD &&
365 mi->opcode != NV_OP_MUL &&
366 mi->opcode != NV_OP_MAD))
367 continue;
368 mi->saturate = 1;
369 mi->def[0] = nvi->def[0];
370 mi->def[0]->insn = mi;
371 nvc0_insn_delete(nvi);
372 }
373 }
374 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
375
376 return 0;
377 }
378
379 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
380
381 static void
382 apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
383 {
384 if (mod & NV_MOD_ABS) {
385 if (type == NV_TYPE_F32)
386 *val &= 0x7fffffff;
387 else
388 if ((*val) & (1 << 31))
389 *val = ~(*val) + 1;
390 }
391 if (mod & NV_MOD_NEG) {
392 if (type == NV_TYPE_F32)
393 *val ^= 0x80000000;
394 else
395 *val = ~(*val) + 1;
396 }
397 if (mod & NV_MOD_SAT) {
398 union {
399 float f;
400 uint32_t u;
401 int32_t i;
402 } u;
403 u.u = *val;
404 if (type == NV_TYPE_F32) {
405 u.f = CLAMP(u.f, -1.0f, 1.0f);
406 } else
407 if (type == NV_TYPE_U16) {
408 u.u = MIN2(u.u, 0xffff);
409 } else
410 if (type == NV_TYPE_S16) {
411 u.i = CLAMP(u.i, -32768, 32767);
412 }
413 *val = u.u;
414 }
415 if (mod & NV_MOD_NOT)
416 *val = ~*val;
417 }
418
419 static void
420 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
421 struct nv_value *src0, struct nv_value *src1)
422 {
423 struct nv_value *val;
424 union {
425 float f32;
426 uint32_t u32;
427 int32_t s32;
428 } u0, u1, u;
429 ubyte type;
430
431 if (!nvi->def[0])
432 return;
433 type = NV_OPTYPE(nvi->opcode);
434
435 u.u32 = 0;
436 u0.u32 = src0->reg.imm.u32;
437 u1.u32 = src1->reg.imm.u32;
438
439 apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
440 apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
441
442 switch (nvi->opcode) {
443 case NV_OP_MAD_F32:
444 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
445 return;
446 /* fall through */
447 case NV_OP_MUL_F32:
448 u.f32 = u0.f32 * u1.f32;
449 break;
450 case NV_OP_MUL_B32:
451 u.u32 = u0.u32 * u1.u32;
452 break;
453 case NV_OP_ADD_F32:
454 u.f32 = u0.f32 + u1.f32;
455 break;
456 case NV_OP_ADD_B32:
457 u.u32 = u0.u32 + u1.u32;
458 break;
459 case NV_OP_SUB_F32:
460 u.f32 = u0.f32 - u1.f32;
461 break;
462 /*
463 case NV_OP_SUB_B32:
464 u.u32 = u0.u32 - u1.u32;
465 break;
466 */
467 default:
468 return;
469 }
470
471 val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
472 val->reg.imm.u32 = u.u32;
473
474 nv_reference(pc, nvi, 1, NULL);
475 nv_reference(pc, nvi, 0, val);
476
477 if (nvi->opcode == NV_OP_MAD_F32) {
478 nvi->src[1] = nvi->src[0];
479 nvi->src[0] = nvi->src[2];
480 nvi->src[2] = NULL;
481 nvi->opcode = NV_OP_ADD_F32;
482
483 if (val->reg.imm.u32 == 0) {
484 nvi->src[1] = NULL;
485 nvi->opcode = NV_OP_MOV;
486 }
487 } else {
488 nvi->opcode = NV_OP_MOV;
489 }
490 }
491
492 static void
493 constant_operand(struct nv_pc *pc,
494 struct nv_instruction *nvi, struct nv_value *val, int s)
495 {
496 union {
497 float f32;
498 uint32_t u32;
499 int32_t s32;
500 } u;
501 int shift;
502 int t = s ? 0 : 1;
503 uint op;
504 ubyte type;
505
506 if (!nvi->def[0])
507 return;
508 type = NV_OPTYPE(nvi->opcode);
509
510 u.u32 = val->reg.imm.u32;
511 apply_modifiers(&u.u32, type, nvi->src[s]->mod);
512
513 if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
514 nvi->opcode = NV_OP_MOV;
515 nv_reference(pc, nvi, t, NULL);
516 if (s) {
517 nvi->src[0] = nvi->src[1];
518 nvi->src[1] = NULL;
519 }
520 return;
521 }
522
523 switch (nvi->opcode) {
524 case NV_OP_MUL_F32:
525 if (u.f32 == 1.0f || u.f32 == -1.0f) {
526 if (u.f32 == -1.0f)
527 nvi->src[t]->mod ^= NV_MOD_NEG;
528 switch (nvi->src[t]->mod) {
529 case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
530 case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
531 case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
532 default:
533 return;
534 }
535 nvi->opcode = op;
536 nv_reference(pc, nvi, 0, nvi->src[t]->value);
537 nv_reference(pc, nvi, 1, NULL);
538 nvi->src[0]->mod = 0;
539 } else
540 if (u.f32 == 2.0f || u.f32 == -2.0f) {
541 if (u.f32 == -2.0f)
542 nvi->src[t]->mod ^= NV_MOD_NEG;
543 nvi->opcode = NV_OP_ADD_F32;
544 nv_reference(pc, nvi, s, nvi->src[t]->value);
545 nvi->src[s]->mod = nvi->src[t]->mod;
546 }
547 case NV_OP_ADD_F32:
548 if (u.u32 == 0) {
549 switch (nvi->src[t]->mod) {
550 case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
551 case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
552 case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
553 case NV_MOD_NEG | NV_MOD_ABS:
554 op = NV_OP_CVT;
555 nvi->ext.cvt.s = nvi->ext.cvt.d = type;
556 break;
557 default:
558 return;
559 }
560 nvi->opcode = op;
561 nv_reference(pc, nvi, 0, nvi->src[t]->value);
562 nv_reference(pc, nvi, 1, NULL);
563 if (nvi->opcode != NV_OP_CVT)
564 nvi->src[0]->mod = 0;
565 }
566 case NV_OP_ADD_B32:
567 if (u.u32 == 0) {
568 assert(nvi->src[t]->mod == 0);
569 nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
570 nvi->ext.cvt.s = nvi->ext.cvt.d = type;
571 nv_reference(pc, nvi, 0, nvi->src[t]->value);
572 nv_reference(pc, nvi, 1, NULL);
573 }
574 break;
575 case NV_OP_MUL_B32:
576 /* multiplication by 0 already handled above */
577 assert(nvi->src[s]->mod == 0);
578 shift = ffs(u.s32) - 1;
579 if (shift == 0) {
580 nvi->opcode = NV_OP_MOV;
581 nv_reference(pc, nvi, 0, nvi->src[t]->value);
582 nv_reference(pc, nvi, 1, NULL);
583 } else
584 if (u.s32 > 0 && u.s32 == (1 << shift)) {
585 nvi->opcode = NV_OP_SHL;
586 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift;
587 nv_reference(pc, nvi, 0, nvi->src[t]->value);
588 nv_reference(pc, nvi, 1, val);
589 break;
590 }
591 break;
592 case NV_OP_RCP:
593 u.f32 = 1.0f / u.f32;
594 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
595 nvi->opcode = NV_OP_MOV;
596 assert(s == 0);
597 nv_reference(pc, nvi, 0, val);
598 break;
599 case NV_OP_RSQ:
600 u.f32 = 1.0f / sqrtf(u.f32);
601 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
602 nvi->opcode = NV_OP_MOV;
603 assert(s == 0);
604 nv_reference(pc, nvi, 0, val);
605 break;
606 default:
607 break;
608 }
609 }
610
611 static void
612 handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
613 {
614 struct nv_value *src0 = nvi->src[0]->value;
615 struct nv_value *src1 = nvi->src[1]->value;
616
617 if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
618 return;
619 if (src0->reg.file != NV_FILE_GPR)
620 return;
621 nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
622 nvc0_insn_delete(nvi);
623 }
624
625 /* check if we can MUL + ADD -> MAD/FMA */
626 static void
627 handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
628 {
629 struct nv_value *src0 = nvi->src[0]->value;
630 struct nv_value *src1 = nvi->src[1]->value;
631 struct nv_value *src;
632 int s;
633 uint8_t mod[4];
634
635 if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
636 else
637 if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
638 else
639 return;
640
641 if ((src0->insn && src0->insn->bb != nvi->bb) ||
642 (src1->insn && src1->insn->bb != nvi->bb))
643 return;
644
645 /* check for immediates from prior constant folding */
646 if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
647 return;
648 src = nvi->src[s]->value;
649
650 mod[0] = nvi->src[0]->mod;
651 mod[1] = nvi->src[1]->mod;
652 mod[2] = src->insn->src[0]->mod;
653 mod[3] = src->insn->src[1]->mod;
654
655 if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
656 return;
657
658 nvi->opcode = NV_OP_MAD_F32;
659
660 nv_reference(ctx->pc, nvi, s, NULL);
661 nvi->src[2] = nvi->src[!s];
662 nvi->src[!s] = NULL;
663
664 nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
665 nvi->src[0]->mod = mod[2] ^ mod[s];
666 nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
667 nvi->src[1]->mod = mod[3];
668 }
669
670 static int
671 nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
672 {
673 struct nv_instruction *nvi, *next;
674 int j;
675
676 for (nvi = b->entry; nvi; nvi = next) {
677 struct nv_value *src0, *src1;
678 uint baseop = NV_BASEOP(nvi->opcode);
679
680 next = nvi->next;
681
682 src0 = nvc0_pc_find_immediate(nvi->src[0]);
683 src1 = nvc0_pc_find_immediate(nvi->src[1]);
684
685 if (src0 && src1) {
686 constant_expression(ctx->pc, nvi, src0, src1);
687 } else {
688 if (src0)
689 constant_operand(ctx->pc, nvi, src0, 0);
690 else
691 if (src1)
692 constant_operand(ctx->pc, nvi, src1, 1);
693 }
694
695 if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
696 handle_min_max(ctx, nvi);
697 else
698 if (nvi->opcode == NV_OP_ADD_F32)
699 handle_add_mul(ctx, nvi);
700 }
701 DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
702
703 return 0;
704 }
705
706 /* TODO: redundant store elimination */
707
708 struct mem_record {
709 struct mem_record *next;
710 struct nv_instruction *insn;
711 uint32_t ofst;
712 uint32_t base;
713 uint32_t size;
714 };
715
716 #define MEM_RECORD_POOL_SIZE 1024
717
718 struct pass_reld_elim {
719 struct nv_pc *pc;
720
721 struct mem_record *imm;
722 struct mem_record *mem_v;
723 struct mem_record *mem_a;
724 struct mem_record *mem_c[16];
725 struct mem_record *mem_l;
726
727 struct mem_record pool[MEM_RECORD_POOL_SIZE];
728 int alloc;
729 };
730
731 /* Extend the load operation in @rec to also cover the data loaded by @ld.
732 * The two loads may not overlap but reference adjacent memory locations.
733 */
734 static void
735 combine_load(struct mem_record *rec, struct nv_instruction *ld)
736 {
737 struct nv_instruction *fv = rec->insn;
738 struct nv_value *mem = ld->src[0]->value;
739 uint32_t size = rec->size + mem->reg.size;
740 int j;
741 int d = rec->size / 4;
742
743 assert(rec->size < 16);
744 if (rec->ofst > mem->reg.address) {
745 if ((size == 8 && mem->reg.address & 3) ||
746 (size > 8 && mem->reg.address & 7))
747 return;
748 rec->ofst = mem->reg.address;
749 for (j = 0; j < d; ++j)
750 fv->def[mem->reg.size / 4 + j] = fv->def[j];
751 d = 0;
752 } else
753 if ((size == 8 && rec->ofst & 3) ||
754 (size > 8 && rec->ofst & 7)) {
755 return;
756 }
757
758 for (j = 0; j < mem->reg.size / 4; ++j) {
759 fv->def[d] = ld->def[j];
760 fv->def[d++]->insn = fv;
761 }
762
763 fv->src[0]->value->reg.address = rec->ofst;
764 fv->src[0]->value->reg.size = rec->size = size;
765
766 nvc0_insn_delete(ld);
767 }
768
769 static void
770 combine_export(struct mem_record *rec, struct nv_instruction *ex)
771 {
772
773 }
774
775 static INLINE void
776 add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
777 uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
778 {
779 struct mem_record *it = &ctx->pool[ctx->alloc++];
780
781 it->next = *rec;
782 *rec = it;
783 it->base = base;
784 it->ofst = ofst;
785 it->insn = nvi;
786 it->size = nvi->src[0]->value->reg.size;
787 }
788
789 /* vectorize and reuse loads from memory or of immediates */
790 static int
791 nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
792 {
793 struct mem_record **rec, *it;
794 struct nv_instruction *ld, *next;
795 struct nv_value *mem;
796 uint32_t base, ofst;
797 int s;
798
799 for (ld = b->entry; ld; ld = next) {
800 next = ld->next;
801
802 if (is_cspace_load(ld)) {
803 mem = ld->src[0]->value;
804 rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
805 } else
806 if (ld->opcode == NV_OP_VFETCH) {
807 mem = ld->src[0]->value;
808 rec = &ctx->mem_a;
809 } else
810 if (ld->opcode == NV_OP_EXPORT) {
811 mem = ld->src[0]->value;
812 if (mem->reg.file != NV_FILE_MEM_V)
813 continue;
814 rec = &ctx->mem_v;
815 } else {
816 continue;
817 }
818 if (ld->def[0] && ld->def[0]->refc == 0)
819 continue;
820 ofst = mem->reg.address;
821 base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
822
823 for (it = *rec; it; it = it->next) {
824 if (it->base == base &&
825 ((it->ofst >> 4) == (ofst >> 4)) &&
826 ((it->ofst + it->size == ofst) ||
827 (it->ofst - mem->reg.size == ofst))) {
828 /* only NV_OP_VFETCH can load exactly 12 bytes */
829 if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
830 continue;
831 if (it->ofst < ofst) {
832 if ((it->ofst & 0xf) == 4)
833 continue;
834 } else
835 if ((ofst & 0xf) == 4)
836 continue;
837 break;
838 }
839 }
840 if (it) {
841 switch (ld->opcode) {
842 case NV_OP_EXPORT: combine_export(it, ld); break;
843 default:
844 combine_load(it, ld);
845 break;
846 }
847 } else
848 if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
849 add_mem_record(ctx, rec, base, ofst, ld);
850 }
851 }
852
853 ctx->alloc = 0;
854 ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
855 for (s = 0; s < 16; ++s)
856 ctx->mem_c[s] = NULL;
857
858 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
859 return 0;
860 }
861
862 static void
863 eliminate_store(struct mem_record *rec, struct nv_instruction *st)
864 {
865 }
866
867 /* elimination of redundant stores */
868 static int
869 pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
870 {
871 struct mem_record **rec, *it;
872 struct nv_instruction *st, *next;
873 struct nv_value *mem;
874 uint32_t base, ofst, size;
875 int s;
876
877 for (st = b->entry; st; st = next) {
878 next = st->next;
879
880 if (st->opcode == NV_OP_ST) {
881 mem = st->src[0]->value;
882 rec = &ctx->mem_l;
883 } else
884 if (st->opcode == NV_OP_EXPORT) {
885 mem = st->src[0]->value;
886 if (mem->reg.file != NV_FILE_MEM_V)
887 continue;
888 rec = &ctx->mem_v;
889 } else
890 if (st->opcode == NV_OP_ST) {
891 /* TODO: purge */
892 }
893 ofst = mem->reg.address;
894 base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
895 size = mem->reg.size;
896
897 for (it = *rec; it; it = it->next) {
898 if (it->base == base &&
899 (it->ofst <= ofst && (it->ofst + size) > ofst))
900 break;
901 }
902 if (it)
903 eliminate_store(it, st);
904 else
905 add_mem_record(ctx, rec, base, ofst, st);
906 }
907
908 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
909 return 0;
910 }
911
912 /* TODO: properly handle loads from l[] memory in the presence of stores */
913 static int
914 nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
915 {
916 #if 0
917 struct load_record **rec, *it;
918 struct nv_instruction *ld, *next;
919 uint64_t data[2];
920 struct nv_value *val;
921 int j;
922
923 for (ld = b->entry; ld; ld = next) {
924 next = ld->next;
925 if (!ld->src[0])
926 continue;
927 val = ld->src[0]->value;
928 rec = NULL;
929
930 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
931 data[0] = val->reg.id;
932 data[1] = 0;
933 rec = &ctx->mem_v;
934 } else
935 if (ld->opcode == NV_OP_LDA) {
936 data[0] = val->reg.id;
937 data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
938 if (val->reg.file >= NV_FILE_MEM_C(0) &&
939 val->reg.file <= NV_FILE_MEM_C(15))
940 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
941 else
942 if (val->reg.file == NV_FILE_MEM_S)
943 rec = &ctx->mem_s;
944 else
945 if (val->reg.file == NV_FILE_MEM_L)
946 rec = &ctx->mem_l;
947 } else
948 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
949 data[0] = val->reg.imm.u32;
950 data[1] = 0;
951 rec = &ctx->imm;
952 }
953
954 if (!rec || !ld->def[0]->refc)
955 continue;
956
957 for (it = *rec; it; it = it->next)
958 if (it->data[0] == data[0] && it->data[1] == data[1])
959 break;
960
961 if (it) {
962 if (ld->def[0]->reg.id >= 0)
963 it->value = ld->def[0];
964 else
965 if (!ld->fixed)
966 nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
967 } else {
968 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
969 continue;
970 it = &ctx->pool[ctx->alloc++];
971 it->next = *rec;
972 it->data[0] = data[0];
973 it->data[1] = data[1];
974 it->value = ld->def[0];
975 *rec = it;
976 }
977 }
978
979 ctx->imm = NULL;
980 ctx->mem_s = NULL;
981 ctx->mem_v = NULL;
982 for (j = 0; j < 16; ++j)
983 ctx->mem_c[j] = NULL;
984 ctx->mem_l = NULL;
985 ctx->alloc = 0;
986
987 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
988 #endif
989 return 0;
990 }
991
992 static int
993 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
994 {
995 int i, c, j;
996
997 for (i = 0; i < ctx->pc->num_instructions; ++i) {
998 struct nv_instruction *nvi = &ctx->pc->instructions[i];
999 struct nv_value *def[4];
1000
1001 if (!nv_is_texture_op(nvi->opcode))
1002 continue;
1003 nvi->tex_mask = 0;
1004
1005 for (c = 0; c < 4; ++c) {
1006 if (nvi->def[c]->refc)
1007 nvi->tex_mask |= 1 << c;
1008 def[c] = nvi->def[c];
1009 }
1010
1011 j = 0;
1012 for (c = 0; c < 4; ++c)
1013 if (nvi->tex_mask & (1 << c))
1014 nvi->def[j++] = def[c];
1015 for (c = 0; c < 4; ++c)
1016 if (!(nvi->tex_mask & (1 << c)))
1017 nvi->def[j++] = def[c];
1018 assert(j == 4);
1019 }
1020 return 0;
1021 }
1022
1023 struct nv_pass_dce {
1024 struct nv_pc *pc;
1025 uint removed;
1026 };
1027
1028 static int
1029 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
1030 {
1031 int j;
1032 struct nv_instruction *nvi, *next;
1033
1034 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
1035 next = nvi->next;
1036
1037 if (inst_removable(nvi)) {
1038 nvc0_insn_delete(nvi);
1039 ++ctx->removed;
1040 }
1041 }
1042 DESCEND_ARBITRARY(j, nv_pass_dce);
1043
1044 return 0;
1045 }
1046
1047 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
1048 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
1049 * BREAK and dummy ELSE block.
1050 */
1051 static INLINE boolean
1052 bb_is_if_else_endif(struct nv_basic_block *bb)
1053 {
1054 if (!bb->out[0] || !bb->out[1])
1055 return FALSE;
1056
1057 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
1058 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
1059 !bb->out[1]->out[1]);
1060 } else {
1061 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
1062 !bb->out[0]->out[1] &&
1063 !bb->out[1]->out[1]);
1064 }
1065 }
1066
1067 /* Predicate instructions and delete any branch at the end if it is
1068 * not a break from a loop.
1069 */
1070 static void
1071 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
1072 struct nv_value *pred, uint8_t cc)
1073 {
1074 struct nv_instruction *nvi, *prev;
1075 int s;
1076
1077 if (!b->entry)
1078 return;
1079 for (nvi = b->entry; nvi; nvi = nvi->next) {
1080 prev = nvi;
1081 if (inst_is_noop(nvi))
1082 continue;
1083 for (s = 0; nvi->src[s]; ++s);
1084 assert(s < 6);
1085 nvi->predicate = s;
1086 nvi->cc = cc;
1087 nv_reference(pc, nvi, nvi->predicate, pred);
1088 }
1089 if (prev->opcode == NV_OP_BRA &&
1090 b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
1091 b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
1092 nvc0_insn_delete(prev);
1093 }
1094
1095 static INLINE boolean
1096 may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
1097 {
1098 if (nvi->def[0] && values_equal(nvi->def[0], pred))
1099 return FALSE;
1100 return nvc0_insn_is_predicateable(nvi);
1101 }
1102
1103 /* Transform IF/ELSE/ENDIF constructs into predicated instructions
1104 * where feasible.
1105 */
1106 static int
1107 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
1108 {
1109 struct nv_instruction *nvi;
1110 struct nv_value *pred;
1111 int k;
1112 int n0, n1; /* instruction counts of outgoing blocks */
1113
1114 if (bb_is_if_else_endif(b)) {
1115 assert(b->exit && b->exit->opcode == NV_OP_BRA);
1116
1117 assert(b->exit->predicate >= 0);
1118 pred = b->exit->src[b->exit->predicate]->value;
1119
1120 n1 = n0 = 0;
1121 for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
1122 if (!may_predicate_insn(nvi, pred))
1123 break;
1124 if (!nvi) {
1125 /* we're after register allocation, so there always is an ELSE block */
1126 for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
1127 if (!may_predicate_insn(nvi, pred))
1128 break;
1129 }
1130
1131 /* 12 is an arbitrary limit */
1132 if (!nvi && n0 < 12 && n1 < 12) {
1133 predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
1134 predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
1135
1136 nvc0_insn_delete(b->exit); /* delete the branch */
1137
1138 /* and a potential joinat before it */
1139 if (b->exit && b->exit->opcode == NV_OP_JOINAT)
1140 nvc0_insn_delete(b->exit);
1141
1142 /* remove join operations at the end of the conditional */
1143 k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
1144 if ((nvi = b->out[0]->out[k]->entry)) {
1145 nvi->join = 0;
1146 if (nvi->opcode == NV_OP_JOIN)
1147 nvc0_insn_delete(nvi);
1148 }
1149 }
1150 }
1151 DESCEND_ARBITRARY(k, nv_pass_flatten);
1152
1153 return 0;
1154 }
1155
1156 /* Tests instructions for equality, but independently of sources. */
1157 static boolean
1158 is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
1159 {
1160 if (a->opcode != b->opcode)
1161 return FALSE;
1162 if (nv_is_texture_op(a->opcode)) {
1163 if (a->ext.tex.t != b->ext.tex.t ||
1164 a->ext.tex.s != b->ext.tex.s)
1165 return FALSE;
1166 if (a->tex_dim != b->tex_dim ||
1167 a->tex_array != b->tex_array ||
1168 a->tex_cube != b->tex_cube ||
1169 a->tex_shadow != b->tex_shadow ||
1170 a->tex_live != b->tex_live)
1171 return FALSE;
1172 } else
1173 if (a->opcode == NV_OP_CVT) {
1174 if (a->ext.cvt.s != b->ext.cvt.s ||
1175 a->ext.cvt.d != b->ext.cvt.d)
1176 return FALSE;
1177 } else
1178 if (NV_BASEOP(a->opcode) == NV_OP_SET ||
1179 NV_BASEOP(a->opcode) == NV_OP_SLCT) {
1180 if (a->set_cond != b->set_cond)
1181 return FALSE;
1182 } else
1183 if (a->opcode == NV_OP_LINTERP ||
1184 a->opcode == NV_OP_PINTERP) {
1185 if (a->centroid != b->centroid ||
1186 a->flat != b->flat)
1187 return FALSE;
1188 }
1189 if (a->cc != b->cc)
1190 return FALSE;
1191 if (a->lanes != b->lanes ||
1192 a->patch != b->patch ||
1193 a->saturate != b->saturate)
1194 return FALSE;
1195 if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
1196 return FALSE;
1197 return TRUE;
1198 }
1199
1200 /* local common subexpression elimination, stupid O(n^2) implementation */
1201 static int
1202 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1203 {
1204 struct nv_instruction *ir, *ik, *next;
1205 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1206 int s, d;
1207 unsigned int reps;
1208
1209 do {
1210 reps = 0;
1211 for (ir = entry; ir; ir = next) {
1212 next = ir->next;
1213 if (ir->fixed)
1214 continue;
1215 for (ik = entry; ik != ir; ik = ik->next) {
1216 if (!is_operation_equal(ir, ik))
1217 continue;
1218 if (!ir->def[0] || !ik->def[0])
1219 continue;
1220
1221 if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
1222 continue;
1223
1224 for (d = 0; d < 4; ++d) {
1225 if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
1226 break;
1227 if (ir->def[d]) {
1228 if (!values_equal(ik->def[0], ir->def[0]))
1229 break;
1230 } else {
1231 d = 4;
1232 break;
1233 }
1234 }
1235 if (d != 4)
1236 continue;
1237
1238 for (s = 0; s < 5; ++s) {
1239 struct nv_value *a, *b;
1240
1241 if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
1242 break;
1243 if (!ir->src[s]) {
1244 s = 5;
1245 break;
1246 }
1247
1248 if (ik->src[s]->mod != ir->src[s]->mod)
1249 break;
1250 a = ik->src[s]->value;
1251 b = ir->src[s]->value;
1252 if (a == b)
1253 continue;
1254 if (a->reg.file != b->reg.file ||
1255 a->reg.id < 0 || /* this excludes memory loads/stores */
1256 a->reg.id != b->reg.id)
1257 break;
1258 }
1259 if (s == 5) {
1260 nvc0_insn_delete(ir);
1261 for (d = 0; d < 4 && ir->def[d]; ++d)
1262 nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
1263 ++reps;
1264 break;
1265 }
1266 }
1267 }
1268 } while(reps);
1269
1270 DESCEND_ARBITRARY(s, nv_pass_cse);
1271
1272 return 0;
1273 }
1274
1275 /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
1276 * neighbouring registers. CSE might have messed this up.
1277 * Just generate a MOV for each source to avoid conflicts if they're used in
1278 * multiple NV_OP_BIND at different positions.
1279 */
1280 static int
1281 nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
1282 {
1283 struct nv_value *val;
1284 struct nv_instruction *bnd, *nvi, *next;
1285 int s;
1286
1287 for (bnd = b->entry; bnd; bnd = next) {
1288 next = bnd->next;
1289 if (bnd->opcode != NV_OP_BIND)
1290 continue;
1291 for (s = 0; s < 4 && bnd->src[s]; ++s) {
1292 val = bnd->src[s]->value;
1293
1294 nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
1295 nvi->def[0] = new_value_like(ctx->pc, val);
1296 nvi->def[0]->insn = nvi;
1297 nv_reference(ctx->pc, nvi, 0, val);
1298 nv_reference(ctx->pc, bnd, s, nvi->def[0]);
1299
1300 nvc0_insn_insert_before(bnd, nvi);
1301 }
1302 }
1303 DESCEND_ARBITRARY(s, nv_pass_fix_bind);
1304
1305 return 0;
1306 }
1307
1308 static int
1309 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1310 {
1311 struct pass_reld_elim *reldelim;
1312 struct nv_pass pass;
1313 struct nv_pass_dce dce;
1314 int ret;
1315
1316 pass.n = 0;
1317 pass.pc = pc;
1318
1319 /* Do CSE so we can just compare values by pointer in subsequent passes. */
1320 pc->pass_seq++;
1321 ret = nv_pass_cse(&pass, root);
1322 if (ret)
1323 return ret;
1324
1325 /* Do this first, so we don't have to pay attention
1326 * to whether sources are supported memory loads.
1327 */
1328 pc->pass_seq++;
1329 ret = nv_pass_algebraic_opt(&pass, root);
1330 if (ret)
1331 return ret;
1332
1333 pc->pass_seq++;
1334 ret = nv_pass_lower_mods(&pass, root);
1335 if (ret)
1336 return ret;
1337
1338 pc->pass_seq++;
1339 ret = nvc0_pass_fold_loads(&pass, root);
1340 if (ret)
1341 return ret;
1342
1343 if (pc->opt_reload_elim) {
1344 reldelim = CALLOC_STRUCT(pass_reld_elim);
1345 reldelim->pc = pc;
1346
1347 pc->pass_seq++;
1348 ret = nv_pass_reload_elim(reldelim, root);
1349 if (ret) {
1350 FREE(reldelim);
1351 return ret;
1352 }
1353 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1354 reldelim->pc = pc;
1355 }
1356
1357 /* May run DCE before load-combining since that pass will clean up
1358 * after itself.
1359 */
1360 dce.pc = pc;
1361 do {
1362 dce.removed = 0;
1363 pc->pass_seq++;
1364 ret = nv_pass_dce(&dce, root);
1365 if (ret)
1366 return ret;
1367 } while (dce.removed);
1368
1369 if (pc->opt_reload_elim) {
1370 pc->pass_seq++;
1371 ret = nv_pass_mem_opt(reldelim, root);
1372 if (!ret) {
1373 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1374 reldelim->pc = pc;
1375
1376 pc->pass_seq++;
1377 ret = nv_pass_mem_opt(reldelim, root);
1378 }
1379 FREE(reldelim);
1380 if (ret)
1381 return ret;
1382 }
1383
1384 ret = nv_pass_tex_mask(&pass, root);
1385 if (ret)
1386 return ret;
1387
1388 pc->pass_seq++;
1389 ret = nv_pass_fix_bind(&pass, root);
1390
1391 return ret;
1392 }
1393
1394 int
1395 nvc0_pc_exec_pass0(struct nv_pc *pc)
1396 {
1397 int i, ret;
1398
1399 for (i = 0; i < pc->num_subroutines + 1; ++i)
1400 if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1401 return ret;
1402 return 0;
1403 }