r300g: when printing shader linker errors to stderr, report it's not a bug
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_tgsi_to_nc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include <unistd.h>
24
25 #define NOUVEAU_DEBUG 1
26
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_util.h"
30 #include "tgsi/tgsi_dump.h"
31 #include "util/u_dynarray.h"
32
33 #include "nvc0_pc.h"
34 #include "nvc0_program.h"
35
36 /* Arbitrary internal limits. */
37 #define BLD_MAX_TEMPS 64
38 #define BLD_MAX_ADDRS 4
39 #define BLD_MAX_PREDS 4
40 #define BLD_MAX_IMMDS 128
41 #define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS
42
43 #define BLD_MAX_COND_NESTING 8
44 #define BLD_MAX_LOOP_NESTING 4
45 #define BLD_MAX_CALL_NESTING 2
46
47 /* This structure represents a TGSI register. */
48 struct bld_register {
49 struct nv_value *current;
50 /* collect all SSA values assigned to it */
51 struct util_dynarray vals;
52 /* 1 bit per loop level, indicates if used/defd, reset when loop ends */
53 uint16_t loop_use;
54 uint16_t loop_def;
55 };
56
57 static INLINE struct nv_value **
58 bld_register_access(struct bld_register *reg, unsigned i)
59 {
60 return util_dynarray_element(&reg->vals, struct nv_value *, i);
61 }
62
63 static INLINE void
64 bld_register_add_val(struct bld_register *reg, struct nv_value *val)
65 {
66 struct nv_basic_block *bb = val->insn->bb;
67
68 if (reg->vals.size &&
69 (util_dynarray_top(&reg->vals, struct nv_value *))->insn->bb == bb)
70 *(util_dynarray_top_ptr(&reg->vals, struct nv_value *)) = val;
71 else
72 util_dynarray_append(&reg->vals, struct nv_value *, val);
73 }
74
75 static INLINE boolean
76 bld_register_del_val(struct bld_register *reg, struct nv_value *val)
77 {
78 unsigned i;
79
80 for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i)
81 if (*bld_register_access(reg, i - 1) == val)
82 break;
83 if (!i)
84 return FALSE;
85
86 if (i != reg->vals.size / sizeof(struct nv_value *))
87 *bld_register_access(reg, i - 1) = util_dynarray_pop(&reg->vals,
88 struct nv_value *);
89 else
90 reg->vals.size -= sizeof(struct nv_value *);
91
92 return TRUE;
93 }
94
95 struct bld_context {
96 struct nvc0_translation_info *ti;
97
98 struct nv_pc *pc;
99 struct nv_basic_block *b;
100
101 struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING];
102 int call_lvl;
103
104 struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING];
105 struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING];
106 struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
107 int cond_lvl;
108 struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
109 struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
110 int loop_lvl;
111
112 ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
113
114 struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
115 struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */
116 struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
117 struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */
118
119 uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8];
120 int hpos_index;
121
122 struct nv_value *zero;
123 struct nv_value *frag_coord[4];
124
125 /* wipe on new BB */
126 struct nv_value *saved_sysvals[4];
127 struct nv_value *saved_addr[4][2];
128 struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4];
129 struct nv_value *saved_immd[BLD_MAX_IMMDS];
130 uint num_immds;
131 };
132
133 static INLINE ubyte
134 bld_register_file(struct bld_context *bld, struct bld_register *reg)
135 {
136 if (reg < &bld->avs[0][0]) return NV_FILE_GPR;
137 else
138 if (reg < &bld->pvs[0][0]) return NV_FILE_GPR;
139 else
140 if (reg < &bld->ovs[0][0]) return NV_FILE_PRED;
141 else
142 return NV_FILE_MEM_V;
143 }
144
145 static INLINE struct nv_value *
146 bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c)
147 {
148 regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
149 return regs[i * 4 + c].current;
150 }
151
152 static struct nv_value *
153 bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *);
154
155 /* If a variable is defined in a loop without prior use, we don't need
156 * a phi in the loop header to account for backwards flow.
157 *
158 * However, if this variable is then also used outside the loop, we do
159 * need a phi after all. But we must not use this phi's def inside the
160 * loop, so we can eliminate the phi if it is unused later.
161 */
162 static INLINE void
163 bld_store(struct bld_context *bld,
164 struct bld_register *regs, int i, int c, struct nv_value *val)
165 {
166 const uint16_t m = 1 << bld->loop_lvl;
167 struct bld_register *reg = &regs[i * 4 + c];
168
169 if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use)))
170 bld_loop_phi(bld, reg, val);
171
172 reg->current = val;
173 bld_register_add_val(reg, reg->current);
174
175 reg->loop_def |= 1 << bld->loop_lvl;
176 }
177
178 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
179 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
180 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
181 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
182 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
183 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
184 #define STORE_OUTP(i, c, v) \
185 do { \
186 bld_store(bld, &bld->ovs[0][0], i, c, (v)); \
187 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
188 } while (0)
189
190 static INLINE void
191 bld_clear_def_use(struct bld_register *regs, int n, int lvl)
192 {
193 int i;
194 const uint16_t mask = ~(1 << lvl);
195
196 for (i = 0; i < n * 4; ++i) {
197 regs[i].loop_def &= mask;
198 regs[i].loop_use &= mask;
199 }
200 }
201
202 static INLINE void
203 bld_warn_uninitialized(struct bld_context *bld, int kind,
204 struct bld_register *reg, struct nv_basic_block *b)
205 {
206 #ifdef NOUVEAU_DEBUG
207 long i = (reg - &bld->tvs[0][0]) / 4;
208 long c = (reg - &bld->tvs[0][0]) & 3;
209
210 if (c == 3)
211 c = -1;
212 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
213 i, (int)('x' + c), kind ? "may be" : "is", b->id);
214 #endif
215 }
216
217 static INLINE struct nv_value *
218 bld_def(struct nv_instruction *i, int c, struct nv_value *value)
219 {
220 i->def[c] = value;
221 value->insn = i;
222 return value;
223 }
224
225 static INLINE struct nv_value *
226 find_by_bb(struct bld_register *reg, struct nv_basic_block *b)
227 {
228 int i;
229
230 if (reg->current && reg->current->insn->bb == b)
231 return reg->current;
232
233 for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i)
234 if ((*bld_register_access(reg, i))->insn->bb == b)
235 return *bld_register_access(reg, i);
236 return NULL;
237 }
238
239 /* Fetch value from register that was defined in the specified BB,
240 * or search for first definitions in all of its predecessors.
241 */
242 static void
243 fetch_by_bb(struct bld_register *reg,
244 struct nv_value **vals, int *n,
245 struct nv_basic_block *b)
246 {
247 int i;
248 struct nv_value *val;
249
250 assert(*n < 16); /* MAX_COND_NESTING */
251
252 val = find_by_bb(reg, b);
253 if (val) {
254 for (i = 0; i < *n; ++i)
255 if (vals[i] == val)
256 return;
257 vals[(*n)++] = val;
258 return;
259 }
260 for (i = 0; i < b->num_in; ++i)
261 if (!IS_WALL_EDGE(b->in_kind[i]))
262 fetch_by_bb(reg, vals, n, b->in[i]);
263 }
264
265 static INLINE struct nv_value *
266 bld_load_imm_u32(struct bld_context *bld, uint32_t u);
267
268 static INLINE struct nv_value *
269 bld_undef(struct bld_context *bld, ubyte file)
270 {
271 struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF);
272
273 return bld_def(nvi, 0, new_value(bld->pc, file, 4));
274 }
275
276 static struct nv_value *
277 bld_phi(struct bld_context *bld, struct nv_basic_block *b,
278 struct bld_register *reg)
279 {
280 struct nv_basic_block *in;
281 struct nv_value *vals[16] = { NULL };
282 struct nv_value *val;
283 struct nv_instruction *phi;
284 int i, j, n;
285
286 do {
287 i = n = 0;
288 fetch_by_bb(reg, vals, &n, b);
289
290 if (!n) {
291 bld_warn_uninitialized(bld, 0, reg, b);
292 return NULL;
293 }
294
295 if (n == 1) {
296 if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb))
297 break;
298
299 bld_warn_uninitialized(bld, 1, reg, b);
300
301 /* back-tracking to insert missing value of other path */
302 in = b;
303 while (in->in[0]) {
304 if (in->num_in == 1) {
305 in = in->in[0];
306 } else {
307 if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b))
308 in = in->in[0];
309 else
310 if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b))
311 in = in->in[1];
312 else
313 in = in->in[0];
314 }
315 }
316 bld->pc->current_block = in;
317
318 /* should make this a no-op */
319 bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file));
320 continue;
321 }
322
323 for (i = 0; i < n; ++i) {
324 /* if value dominates b, continue to the redefinitions */
325 if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb))
326 continue;
327
328 /* if value dominates any in-block, b should be the dom frontier */
329 for (j = 0; j < b->num_in; ++j)
330 if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb))
331 break;
332 /* otherwise, find the dominance frontier and put the phi there */
333 if (j == b->num_in) {
334 in = nvc0_bblock_dom_frontier(vals[i]->insn->bb);
335 val = bld_phi(bld, in, reg);
336 bld_register_add_val(reg, val);
337 break;
338 }
339 }
340 } while(i < n);
341
342 bld->pc->current_block = b;
343
344 if (n == 1)
345 return vals[0];
346
347 phi = new_instruction(bld->pc, NV_OP_PHI);
348
349 bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size));
350 for (i = 0; i < n; ++i)
351 nv_reference(bld->pc, phi, i, vals[i]);
352
353 return phi->def[0];
354 }
355
356 /* Insert a phi function in the loop header.
357 * For nested loops, we need to insert phi functions in all the outer
358 * loop headers if they don't have one yet.
359 *
360 * @def: redefinition from inside loop, or NULL if to be replaced later
361 */
362 static struct nv_value *
363 bld_loop_phi(struct bld_context *bld, struct bld_register *reg,
364 struct nv_value *def)
365 {
366 struct nv_instruction *phi;
367 struct nv_basic_block *bb = bld->pc->current_block;
368 struct nv_value *val = NULL;
369
370 if (bld->loop_lvl > 1) {
371 --bld->loop_lvl;
372 if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl)))
373 val = bld_loop_phi(bld, reg, NULL);
374 ++bld->loop_lvl;
375 }
376
377 if (!val)
378 val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */
379 if (!val) {
380 bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0];
381 val = bld_undef(bld, bld_register_file(bld, reg));
382 }
383
384 bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1];
385
386 phi = new_instruction(bld->pc, NV_OP_PHI);
387
388 bld_def(phi, 0, new_value_like(bld->pc, val));
389 if (!def)
390 def = phi->def[0];
391
392 bld_register_add_val(reg, phi->def[0]);
393
394 phi->target = (struct nv_basic_block *)reg; /* cheat */
395
396 nv_reference(bld->pc, phi, 0, val);
397 nv_reference(bld->pc, phi, 1, def);
398
399 bld->pc->current_block = bb;
400
401 return phi->def[0];
402 }
403
404 static INLINE struct nv_value *
405 bld_fetch_global(struct bld_context *bld, struct bld_register *reg)
406 {
407 const uint16_t m = 1 << bld->loop_lvl;
408 const uint16_t use = reg->loop_use;
409
410 reg->loop_use |= m;
411
412 /* If neither used nor def'd inside the loop, build a phi in foresight,
413 * so we don't have to replace stuff later on, which requires tracking.
414 */
415 if (bld->loop_lvl && !((use | reg->loop_def) & m))
416 return bld_loop_phi(bld, reg, NULL);
417
418 return bld_phi(bld, bld->pc->current_block, reg);
419 }
420
421 static INLINE struct nv_value *
422 bld_imm_u32(struct bld_context *bld, uint32_t u)
423 {
424 int i;
425 unsigned n = bld->num_immds;
426
427 for (i = 0; i < n; ++i)
428 if (bld->saved_immd[i]->reg.imm.u32 == u)
429 return bld->saved_immd[i];
430
431 assert(n < BLD_MAX_IMMDS);
432 bld->num_immds++;
433
434 bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4);
435 bld->saved_immd[n]->reg.imm.u32 = u;
436 return bld->saved_immd[n];
437 }
438
439 static void
440 bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
441 struct nv_value *);
442
443 /* Replace the source of the phi in the loop header by the last assignment,
444 * or eliminate the phi function if there is no assignment inside the loop.
445 *
446 * Redundancy situation 1 - (used) but (not redefined) value:
447 * %3 = phi %0, %3 = %3 is used
448 * %3 = phi %0, %4 = is new definition
449 *
450 * Redundancy situation 2 - (not used) but (redefined) value:
451 * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
452 */
453 static void
454 bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
455 {
456 struct nv_basic_block *save = bld->pc->current_block;
457 struct nv_instruction *phi, *next;
458 struct nv_value *val;
459 struct bld_register *reg;
460 int i, s, n;
461
462 for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {
463 next = phi->next;
464
465 reg = (struct bld_register *)phi->target;
466 phi->target = NULL;
467
468 for (s = 1, n = 0; n < bb->num_in; ++n) {
469 if (bb->in_kind[n] != CFG_EDGE_BACK)
470 continue;
471
472 assert(s < 4);
473 bld->pc->current_block = bb->in[n];
474 val = bld_fetch_global(bld, reg);
475
476 for (i = 0; i < 4; ++i)
477 if (phi->src[i] && phi->src[i]->value == val)
478 break;
479 if (i == 4)
480 nv_reference(bld->pc, phi, s++, val);
481 }
482 bld->pc->current_block = save;
483
484 if (phi->src[0]->value == phi->def[0] ||
485 phi->src[0]->value == phi->src[1]->value)
486 s = 1;
487 else
488 if (phi->src[1]->value == phi->def[0])
489 s = 0;
490 else
491 continue;
492
493 if (s >= 0) {
494 /* eliminate the phi */
495 bld_register_del_val(reg, phi->def[0]);
496
497 ++bld->pc->pass_seq;
498 bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value);
499
500 nvc0_insn_delete(phi);
501 }
502 }
503 }
504
505 static INLINE struct nv_value *
506 bld_imm_f32(struct bld_context *bld, float f)
507 {
508 return bld_imm_u32(bld, fui(f));
509 }
510
511 static struct nv_value *
512 bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
513 {
514 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
515
516 nv_reference(bld->pc, insn, 0, src0);
517
518 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
519 }
520
521 static struct nv_value *
522 bld_insn_2(struct bld_context *bld, uint opcode,
523 struct nv_value *src0, struct nv_value *src1)
524 {
525 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
526
527 nv_reference(bld->pc, insn, 0, src0);
528 nv_reference(bld->pc, insn, 1, src1);
529
530 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
531 }
532
533 static struct nv_value *
534 bld_insn_3(struct bld_context *bld, uint opcode,
535 struct nv_value *src0, struct nv_value *src1,
536 struct nv_value *src2)
537 {
538 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
539
540 nv_reference(bld->pc, insn, 0, src0);
541 nv_reference(bld->pc, insn, 1, src1);
542 nv_reference(bld->pc, insn, 2, src2);
543
544 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
545 }
546
547 static INLINE void
548 bld_src_predicate(struct bld_context *bld,
549 struct nv_instruction *nvi, int s, struct nv_value *val)
550 {
551 nvi->predicate = s;
552 nv_reference(bld->pc, nvi, s, val);
553 }
554
555 static INLINE void
556 bld_src_pointer(struct bld_context *bld,
557 struct nv_instruction *nvi, int s, struct nv_value *val)
558 {
559 nvi->indirect = s;
560 nv_reference(bld->pc, nvi, s, val);
561 }
562
563 static void
564 bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst,
565 struct nv_value *val)
566 {
567 struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST);
568 struct nv_value *loc;
569
570 loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
571
572 loc->reg.address = ofst * 4;
573
574 nv_reference(bld->pc, insn, 0, loc);
575 nv_reference(bld->pc, insn, 1, val);
576 if (ptr)
577 bld_src_pointer(bld, insn, 2, ptr);
578 }
579
580 static struct nv_value *
581 bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst)
582 {
583 struct nv_value *loc, *val;
584
585 loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
586
587 loc->reg.address = ofst * 4;
588
589 val = bld_insn_1(bld, NV_OP_LD, loc);
590 if (ptr)
591 bld_src_pointer(bld, val->insn, 1, ptr);
592
593 return val;
594 }
595
596 static struct nv_value *
597 bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e)
598 {
599 struct nv_value *val;
600
601 val = bld_insn_1(bld, NV_OP_LG2, x);
602 val = bld_insn_2(bld, NV_OP_MUL_F32, e, val);
603
604 val = bld_insn_1(bld, NV_OP_PREEX2, val);
605 val = bld_insn_1(bld, NV_OP_EX2, val);
606
607 return val;
608 }
609
610 static INLINE struct nv_value *
611 bld_load_imm_f32(struct bld_context *bld, float f)
612 {
613 if (f == 0.0f)
614 return bld->zero;
615 return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f));
616 }
617
618 static INLINE struct nv_value *
619 bld_load_imm_u32(struct bld_context *bld, uint32_t u)
620 {
621 if (u == 0)
622 return bld->zero;
623 return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u));
624 }
625
626 static INLINE struct nv_value *
627 bld_setp(struct bld_context *bld, uint op, uint8_t cc,
628 struct nv_value *src0, struct nv_value *src1)
629 {
630 struct nv_value *val = bld_insn_2(bld, op, src0, src1);
631
632 val->reg.file = NV_FILE_PRED;
633 val->reg.size = 1;
634 val->insn->set_cond = cc & 0xf;
635 return val;
636 }
637
638 static INLINE struct nv_value *
639 bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src)
640 {
641 struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src);
642 val->insn->ext.cvt.d = dt;
643 val->insn->ext.cvt.s = st;
644 return val;
645 }
646
647 static void
648 bld_kil(struct bld_context *bld, struct nv_value *src)
649 {
650 struct nv_instruction *nvi;
651
652 src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero);
653
654 nvi = new_instruction(bld->pc, NV_OP_KIL);
655 nvi->fixed = 1;
656
657 bld_src_predicate(bld, nvi, 0, src);
658 }
659
660 static void
661 bld_flow(struct bld_context *bld, uint opcode,
662 struct nv_value *pred, uint8_t cc, struct nv_basic_block *target,
663 boolean reconverge)
664 {
665 struct nv_instruction *nvi;
666
667 if (reconverge)
668 new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1;
669
670 nvi = new_instruction(bld->pc, opcode);
671 nvi->target = target;
672 nvi->terminator = 1;
673 if (pred) {
674 nvi->cc = cc;
675 bld_src_predicate(bld, nvi, 0, pred);
676 }
677 }
678
679 static ubyte
680 translate_setcc(unsigned opcode)
681 {
682 switch (opcode) {
683 case TGSI_OPCODE_SLT: return NV_CC_LT;
684 case TGSI_OPCODE_SGE: return NV_CC_GE;
685 case TGSI_OPCODE_SEQ: return NV_CC_EQ;
686 case TGSI_OPCODE_SGT: return NV_CC_GT;
687 case TGSI_OPCODE_SLE: return NV_CC_LE;
688 case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U;
689 case TGSI_OPCODE_STR: return NV_CC_TR;
690 case TGSI_OPCODE_SFL: return NV_CC_FL;
691
692 case TGSI_OPCODE_ISLT: return NV_CC_LT;
693 case TGSI_OPCODE_ISGE: return NV_CC_GE;
694 case TGSI_OPCODE_USEQ: return NV_CC_EQ;
695 case TGSI_OPCODE_USGE: return NV_CC_GE;
696 case TGSI_OPCODE_USLT: return NV_CC_LT;
697 case TGSI_OPCODE_USNE: return NV_CC_NE;
698 default:
699 assert(0);
700 return NV_CC_FL;
701 }
702 }
703
704 static uint
705 translate_opcode(uint opcode)
706 {
707 switch (opcode) {
708 case TGSI_OPCODE_ABS: return NV_OP_ABS_F32;
709 case TGSI_OPCODE_ADD: return NV_OP_ADD_F32;
710 case TGSI_OPCODE_SUB: return NV_OP_SUB_F32;
711 case TGSI_OPCODE_UADD: return NV_OP_ADD_B32;
712 case TGSI_OPCODE_AND: return NV_OP_AND;
713 case TGSI_OPCODE_EX2: return NV_OP_EX2;
714 case TGSI_OPCODE_CEIL: return NV_OP_CEIL;
715 case TGSI_OPCODE_FLR: return NV_OP_FLOOR;
716 case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC;
717 case TGSI_OPCODE_COS: return NV_OP_COS;
718 case TGSI_OPCODE_SIN: return NV_OP_SIN;
719 case TGSI_OPCODE_DDX: return NV_OP_DFDX;
720 case TGSI_OPCODE_DDY: return NV_OP_DFDY;
721 case TGSI_OPCODE_F2I:
722 case TGSI_OPCODE_F2U:
723 case TGSI_OPCODE_I2F:
724 case TGSI_OPCODE_U2F: return NV_OP_CVT;
725 case TGSI_OPCODE_INEG: return NV_OP_NEG_S32;
726 case TGSI_OPCODE_LG2: return NV_OP_LG2;
727 case TGSI_OPCODE_ISHR: return NV_OP_SAR;
728 case TGSI_OPCODE_USHR: return NV_OP_SHR;
729 case TGSI_OPCODE_MAD: return NV_OP_MAD_F32;
730 case TGSI_OPCODE_MAX: return NV_OP_MAX_F32;
731 case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32;
732 case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32;
733 case TGSI_OPCODE_MIN: return NV_OP_MIN_F32;
734 case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32;
735 case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32;
736 case TGSI_OPCODE_MUL: return NV_OP_MUL_F32;
737 case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32;
738 case TGSI_OPCODE_OR: return NV_OP_OR;
739 case TGSI_OPCODE_RCP: return NV_OP_RCP;
740 case TGSI_OPCODE_RSQ: return NV_OP_RSQ;
741 case TGSI_OPCODE_SAD: return NV_OP_SAD;
742 case TGSI_OPCODE_SHL: return NV_OP_SHL;
743 case TGSI_OPCODE_SLT:
744 case TGSI_OPCODE_SGE:
745 case TGSI_OPCODE_SEQ:
746 case TGSI_OPCODE_SGT:
747 case TGSI_OPCODE_SLE:
748 case TGSI_OPCODE_SNE: return NV_OP_FSET_F32;
749 case TGSI_OPCODE_ISLT:
750 case TGSI_OPCODE_ISGE: return NV_OP_SET_S32;
751 case TGSI_OPCODE_USEQ:
752 case TGSI_OPCODE_USGE:
753 case TGSI_OPCODE_USLT:
754 case TGSI_OPCODE_USNE: return NV_OP_SET_U32;
755 case TGSI_OPCODE_TEX: return NV_OP_TEX;
756 case TGSI_OPCODE_TXP: return NV_OP_TEX;
757 case TGSI_OPCODE_TXB: return NV_OP_TXB;
758 case TGSI_OPCODE_TXL: return NV_OP_TXL;
759 case TGSI_OPCODE_XOR: return NV_OP_XOR;
760 default:
761 return NV_OP_NOP;
762 }
763 }
764
765 #if 0
766 static ubyte
767 infer_src_type(unsigned opcode)
768 {
769 switch (opcode) {
770 case TGSI_OPCODE_MOV:
771 case TGSI_OPCODE_AND:
772 case TGSI_OPCODE_OR:
773 case TGSI_OPCODE_XOR:
774 case TGSI_OPCODE_SAD:
775 case TGSI_OPCODE_U2F:
776 case TGSI_OPCODE_UADD:
777 case TGSI_OPCODE_UDIV:
778 case TGSI_OPCODE_UMOD:
779 case TGSI_OPCODE_UMAD:
780 case TGSI_OPCODE_UMUL:
781 case TGSI_OPCODE_UMAX:
782 case TGSI_OPCODE_UMIN:
783 case TGSI_OPCODE_USEQ:
784 case TGSI_OPCODE_USGE:
785 case TGSI_OPCODE_USLT:
786 case TGSI_OPCODE_USNE:
787 case TGSI_OPCODE_USHR:
788 return NV_TYPE_U32;
789 case TGSI_OPCODE_I2F:
790 case TGSI_OPCODE_IDIV:
791 case TGSI_OPCODE_IMAX:
792 case TGSI_OPCODE_IMIN:
793 case TGSI_OPCODE_INEG:
794 case TGSI_OPCODE_ISGE:
795 case TGSI_OPCODE_ISHR:
796 case TGSI_OPCODE_ISLT:
797 return NV_TYPE_S32;
798 default:
799 return NV_TYPE_F32;
800 }
801 }
802
803 static ubyte
804 infer_dst_type(unsigned opcode)
805 {
806 switch (opcode) {
807 case TGSI_OPCODE_MOV:
808 case TGSI_OPCODE_F2U:
809 case TGSI_OPCODE_AND:
810 case TGSI_OPCODE_OR:
811 case TGSI_OPCODE_XOR:
812 case TGSI_OPCODE_SAD:
813 case TGSI_OPCODE_UADD:
814 case TGSI_OPCODE_UDIV:
815 case TGSI_OPCODE_UMOD:
816 case TGSI_OPCODE_UMAD:
817 case TGSI_OPCODE_UMUL:
818 case TGSI_OPCODE_UMAX:
819 case TGSI_OPCODE_UMIN:
820 case TGSI_OPCODE_USEQ:
821 case TGSI_OPCODE_USGE:
822 case TGSI_OPCODE_USLT:
823 case TGSI_OPCODE_USNE:
824 case TGSI_OPCODE_USHR:
825 return NV_TYPE_U32;
826 case TGSI_OPCODE_F2I:
827 case TGSI_OPCODE_IDIV:
828 case TGSI_OPCODE_IMAX:
829 case TGSI_OPCODE_IMIN:
830 case TGSI_OPCODE_INEG:
831 case TGSI_OPCODE_ISGE:
832 case TGSI_OPCODE_ISHR:
833 case TGSI_OPCODE_ISLT:
834 return NV_TYPE_S32;
835 default:
836 return NV_TYPE_F32;
837 }
838 }
839 #endif
840
841 static void
842 emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
843 unsigned chan, struct nv_value *res)
844 {
845 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
846 struct nv_instruction *nvi;
847 struct nv_value *mem;
848 struct nv_value *ptr = NULL;
849 int idx;
850
851 idx = reg->Register.Index;
852 assert(chan < 4);
853
854 if (reg->Register.Indirect)
855 ptr = FETCH_ADDR(reg->Indirect.Index,
856 tgsi_util_get_src_register_swizzle(&reg->Indirect, 0));
857
858 switch (inst->Instruction.Saturate) {
859 case TGSI_SAT_NONE:
860 break;
861 case TGSI_SAT_ZERO_ONE:
862 res = bld_insn_1(bld, NV_OP_SAT, res);
863 break;
864 case TGSI_SAT_MINUS_PLUS_ONE:
865 res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f));
866 res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f));
867 break;
868 }
869
870 switch (reg->Register.File) {
871 case TGSI_FILE_OUTPUT:
872 if (!res->insn)
873 res = bld_insn_1(bld, NV_OP_MOV, res);
874
875 if (bld->pc->is_fragprog) {
876 assert(!ptr);
877 STORE_OUTP(idx, chan, res);
878 } else {
879 nvi = new_instruction(bld->pc, NV_OP_EXPORT);
880 mem = new_value(bld->pc, bld->ti->output_file, res->reg.size);
881 nv_reference(bld->pc, nvi, 0, mem);
882 nv_reference(bld->pc, nvi, 1, res);
883 if (!ptr)
884 mem->reg.address = bld->ti->output_loc[idx][chan];
885 else
886 mem->reg.address = 0x80 + idx * 16 + chan * 4;
887 nvi->fixed = 1;
888 }
889 break;
890 case TGSI_FILE_TEMPORARY:
891 assert(idx < BLD_MAX_TEMPS);
892 if (!res->insn || res->insn->bb != bld->pc->current_block)
893 res = bld_insn_1(bld, NV_OP_MOV, res);
894
895 assert(res->reg.file == NV_FILE_GPR);
896
897 if (bld->ti->require_stores)
898 bld_lmem_store(bld, ptr, idx * 4 + chan, res);
899 else
900 STORE_TEMP(idx, chan, res);
901 break;
902 case TGSI_FILE_ADDRESS:
903 assert(idx < BLD_MAX_ADDRS);
904 STORE_ADDR(idx, chan, res);
905 break;
906 }
907 }
908
909 static INLINE uint32_t
910 bld_is_output_written(struct bld_context *bld, int i, int c)
911 {
912 if (c < 0)
913 return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32));
914 return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32));
915 }
916
917 static void
918 bld_append_vp_ucp(struct bld_context *bld)
919 {
920 struct nv_value *res[6];
921 struct nv_value *ucp, *vtx, *out;
922 struct nv_instruction *insn;
923 int i, c;
924
925 assert(bld->ti->prog->vp.num_ucps <= 6);
926
927 for (c = 0; c < 4; ++c) {
928 vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]);
929
930 for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) {
931 ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4);
932 ucp->reg.address = i * 16 + c * 4;
933
934 if (c == 0)
935 res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp);
936 else
937 res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]);
938 }
939 }
940
941 for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) {
942 (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4;
943 (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
944 nv_reference(bld->pc, insn, 0, out);
945 nv_reference(bld->pc, insn, 1, res[i]);
946 }
947 }
948
949 static void
950 bld_export_fp_outputs(struct bld_context *bld)
951 {
952 struct nv_value *vals[4];
953 struct nv_instruction *nvi;
954 int i, c, n;
955
956 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
957 if (!bld_is_output_written(bld, i, -1))
958 continue;
959 for (n = 0, c = 0; c < 4; ++c) {
960 if (!bld_is_output_written(bld, i, c))
961 continue;
962 vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]);
963 assert(vals[n]);
964 vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]);
965 vals[n++]->reg.id = bld->ti->output_loc[i][c];
966 }
967 assert(n);
968
969 (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
970 for (c = 0; c < n; ++c)
971 nv_reference(bld->pc, nvi, c, vals[c]);
972 }
973 }
974
975 static void
976 bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
977 {
978 int i, c;
979
980 bld->pc->current_block = b;
981
982 for (i = 0; i < 4; ++i)
983 bld->saved_addr[i][0] = NULL;
984 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
985 for (c = 0; c < 4; ++c)
986 bld->saved_inputs[i][c] = NULL;
987
988 bld->out_kind = CFG_EDGE_FORWARD;
989 }
990
991 static struct nv_value *
992 bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val)
993 {
994 unsigned cent = mode & NVC0_INTERP_CENTROID;
995
996 mode &= ~NVC0_INTERP_CENTROID;
997
998 if (val->reg.address == 0x3fc) {
999 /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
1000 val = bld_insn_1(bld, NV_OP_LINTERP, val);
1001 val->insn->flat = 1;
1002 val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31));
1003 val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f));
1004 return val;
1005 } else
1006 if (mode == NVC0_INTERP_PERSPECTIVE) {
1007 val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]);
1008 } else {
1009 val = bld_insn_1(bld, NV_OP_LINTERP, val);
1010 }
1011
1012 val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0;
1013 val->insn->centroid = cent ? 1 : 0;
1014 return val;
1015 }
1016
1017 static struct nv_value *
1018 emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
1019 const unsigned s, const unsigned chan)
1020 {
1021 const struct tgsi_full_src_register *src = &insn->Src[s];
1022 struct nv_value *res = NULL;
1023 struct nv_value *ptr = NULL;
1024 int idx, ind_idx, dim_idx;
1025 unsigned swz, ind_swz, sgn;
1026
1027 idx = src->Register.Index;
1028 swz = tgsi_util_get_full_src_register_swizzle(src, chan);
1029
1030 if (src->Register.Indirect) {
1031 ind_idx = src->Indirect.Index;
1032 ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0);
1033
1034 ptr = FETCH_ADDR(ind_idx, ind_swz);
1035 }
1036
1037 if (src->Register.Dimension)
1038 dim_idx = src->Dimension.Index;
1039 else
1040 dim_idx = 0;
1041
1042 switch (src->Register.File) {
1043 case TGSI_FILE_CONSTANT:
1044 assert(dim_idx < 14);
1045 res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4);
1046 res->reg.address = idx * 16 + swz * 4;
1047 res = bld_insn_1(bld, NV_OP_LD, res);
1048 if (ptr)
1049 bld_src_pointer(bld, res->insn, 1, ptr);
1050 break;
1051 case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */
1052 assert(idx < bld->ti->immd32_nr);
1053 res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]);
1054 break;
1055 case TGSI_FILE_INPUT:
1056 assert(!src->Register.Dimension);
1057 if (!ptr) {
1058 res = bld->saved_inputs[idx][swz];
1059 if (res)
1060 break;
1061 }
1062 res = new_value(bld->pc, bld->ti->input_file, 4);
1063 if (ptr)
1064 res->reg.address = 0x80 + idx * 16 + swz * 4;
1065 else
1066 res->reg.address = bld->ti->input_loc[idx][swz];
1067
1068 if (bld->pc->is_fragprog)
1069 res = bld_interp(bld, bld->ti->interp_mode[idx], res);
1070 else
1071 res = bld_insn_1(bld, NV_OP_VFETCH, res);
1072
1073 if (ptr)
1074 bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr);
1075 else
1076 bld->saved_inputs[idx][swz] = res;
1077 break;
1078 case TGSI_FILE_TEMPORARY:
1079 if (bld->ti->require_stores)
1080 res = bld_lmem_load(bld, ptr, idx * 4 + swz);
1081 else
1082 res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
1083 break;
1084 case TGSI_FILE_ADDRESS:
1085 res = bld_fetch_global(bld, &bld->avs[idx][swz]);
1086 break;
1087 case TGSI_FILE_PREDICATE:
1088 res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
1089 break;
1090 case TGSI_FILE_SYSTEM_VALUE:
1091 assert(bld->ti->sysval_loc[idx] < 0xf00); /* >= would mean special reg */
1092 res = new_value(bld->pc,
1093 bld->pc->is_fragprog ? NV_FILE_MEM_V : NV_FILE_MEM_A, 4);
1094 res->reg.address = bld->ti->sysval_loc[idx];
1095
1096 if (res->reg.file == NV_FILE_MEM_A)
1097 res = bld_insn_1(bld, NV_OP_VFETCH, res);
1098 else
1099 res = bld_interp(bld, NVC0_INTERP_FLAT, res);
1100
1101 /* mesa doesn't do real integers yet :-(and in GL this should be S32) */
1102 res = bld_cvt(bld, NV_TYPE_F32, NV_TYPE_U32, res);
1103 break;
1104 default:
1105 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
1106 abort();
1107 break;
1108 }
1109 if (!res)
1110 return bld_undef(bld, NV_FILE_GPR);
1111
1112 sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
1113
1114 switch (sgn) {
1115 case TGSI_UTIL_SIGN_KEEP:
1116 break;
1117 case TGSI_UTIL_SIGN_CLEAR:
1118 res = bld_insn_1(bld, NV_OP_ABS_F32, res);
1119 break;
1120 case TGSI_UTIL_SIGN_TOGGLE:
1121 res = bld_insn_1(bld, NV_OP_NEG_F32, res);
1122 break;
1123 case TGSI_UTIL_SIGN_SET:
1124 res = bld_insn_1(bld, NV_OP_ABS_F32, res);
1125 res = bld_insn_1(bld, NV_OP_NEG_F32, res);
1126 break;
1127 default:
1128 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
1129 abort();
1130 break;
1131 }
1132
1133 return res;
1134 }
1135
1136 static void
1137 bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
1138 const struct tgsi_full_instruction *insn)
1139 {
1140 struct nv_value *val0 = NULL;
1141 unsigned mask = insn->Dst[0].Register.WriteMask;
1142
1143 if (mask & ((1 << 0) | (1 << 3)))
1144 dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f);
1145
1146 if (mask & (3 << 1)) {
1147 val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero);
1148 if (mask & (1 << 1))
1149 dst0[1] = val0;
1150 }
1151
1152 if (mask & (1 << 2)) {
1153 struct nv_value *val1, *val3, *src1, *src3, *pred;
1154 struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f);
1155 struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f);
1156
1157 src1 = emit_fetch(bld, insn, 0, 1);
1158 src3 = emit_fetch(bld, insn, 0, 3);
1159
1160 pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero);
1161
1162 val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero);
1163 val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128);
1164 val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128);
1165 val3 = bld_pow(bld, val1, val3);
1166
1167 dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero);
1168 bld_src_predicate(bld, dst0[2]->insn, 1, pred);
1169
1170 dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]);
1171 }
1172 }
1173
1174 static INLINE void
1175 describe_texture_target(unsigned target, int *dim,
1176 int *array, int *cube, int *shadow)
1177 {
1178 *array = *cube = *shadow = 0;
1179
1180 switch (target) {
1181 case TGSI_TEXTURE_1D:
1182 *dim = 1;
1183 break;
1184 case TGSI_TEXTURE_SHADOW1D:
1185 *dim = *shadow = 1;
1186 break;
1187 case TGSI_TEXTURE_UNKNOWN:
1188 case TGSI_TEXTURE_2D:
1189 case TGSI_TEXTURE_RECT:
1190 *dim = 2;
1191 break;
1192 case TGSI_TEXTURE_SHADOW2D:
1193 case TGSI_TEXTURE_SHADOWRECT:
1194 *dim = 2;
1195 *shadow = 1;
1196 break;
1197 case TGSI_TEXTURE_3D:
1198 *dim = 3;
1199 break;
1200 case TGSI_TEXTURE_CUBE:
1201 *dim = 2;
1202 *cube = 1;
1203 break;
1204 case TGSI_TEXTURE_1D_ARRAY:
1205 *dim = *array = 1;
1206 break;
1207 case TGSI_TEXTURE_2D_ARRAY:
1208 *dim = 2;
1209 *array = 1;
1210 break;
1211 /*
1212 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1213 *dim = *array = *shadow = 1;
1214 break;
1215 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1216 *dim = 2;
1217 *array = *shadow = 1;
1218 break;
1219 case TGSI_TEXTURE_CUBE_ARRAY:
1220 *dim = 2;
1221 *cube = *array = 1;
1222 break;
1223 */
1224 default:
1225 assert(0);
1226 break;
1227 }
1228 }
1229
1230 static struct nv_value *
1231 bld_clone(struct bld_context *bld, struct nv_instruction *nvi)
1232 {
1233 struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode);
1234 struct nv_instruction *next, *prev;
1235 int c;
1236
1237 next = dupi->next;
1238 prev = dupi->prev;
1239
1240 *dupi = *nvi;
1241
1242 dupi->next = next;
1243 dupi->prev = prev;
1244
1245 for (c = 0; c < 5 && nvi->def[c]; ++c)
1246 bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c]));
1247
1248 for (c = 0; c < 6 && nvi->src[c]; ++c) {
1249 dupi->src[c] = NULL;
1250 nv_reference(bld->pc, dupi, c, nvi->src[c]->value);
1251 }
1252
1253 return dupi->def[0];
1254 }
1255
1256 /* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */
1257 static void
1258 load_proj_tex_coords(struct bld_context *bld,
1259 struct nv_value *t[4], int dim, int shadow,
1260 const struct tgsi_full_instruction *insn)
1261 {
1262 int c;
1263 unsigned mask = (1 << dim) - 1;
1264
1265 if (shadow)
1266 mask |= 4; /* depth comparison value */
1267
1268 t[3] = emit_fetch(bld, insn, 0, 3);
1269 if (t[3]->insn->opcode == NV_OP_PINTERP) {
1270 t[3] = bld_clone(bld, t[3]->insn);
1271 t[3]->insn->opcode = NV_OP_LINTERP;
1272 nv_reference(bld->pc, t[3]->insn, 1, NULL);
1273 }
1274 t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
1275
1276 for (c = 0; c < 4; ++c) {
1277 if (!(mask & (1 << c)))
1278 continue;
1279 t[c] = emit_fetch(bld, insn, 0, c);
1280
1281 if (t[c]->insn->opcode != NV_OP_PINTERP)
1282 continue;
1283 mask &= ~(1 << c);
1284
1285 t[c] = bld_clone(bld, t[c]->insn);
1286 nv_reference(bld->pc, t[c]->insn, 1, t[3]);
1287 }
1288 if (mask == 0)
1289 return;
1290
1291 t[3] = emit_fetch(bld, insn, 0, 3);
1292 t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
1293
1294 for (c = 0; c < 4; ++c)
1295 if (mask & (1 << c))
1296 t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]);
1297 }
1298
1299 /* For a quad of threads / top left, top right, bottom left, bottom right
1300 * pixels, do a different operation, and take src0 from a specific thread.
1301 */
1302 #define QOP_ADD 0
1303 #define QOP_SUBR 1
1304 #define QOP_SUB 2
1305 #define QOP_MOV1 3
1306
1307 #define QOP(a, b, c, d) \
1308 ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
1309
1310 static INLINE struct nv_value *
1311 bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane,
1312 struct nv_value *src1, boolean wp)
1313 {
1314 struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1);
1315 val->insn->lanes = lane;
1316 val->insn->quadop = qop;
1317 if (wp) {
1318 assert(!"quadop predicate write");
1319 }
1320 return val;
1321 }
1322
1323 /* order of TGSI operands: x y z layer shadow lod/bias */
1324 /* order of native operands: layer x y z | lod/bias shadow */
1325 static struct nv_instruction *
1326 emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
1327 struct nv_value *dst[4], struct nv_value *arg[4],
1328 int dim, int array, int cube, int shadow)
1329 {
1330 struct nv_value *src[4];
1331 struct nv_instruction *nvi, *bnd;
1332 int c;
1333 int s = 0;
1334 boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
1335
1336 if (array)
1337 arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]);
1338
1339 /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */
1340
1341 bnd = new_instruction(bld->pc, NV_OP_BIND);
1342 if (array) {
1343 src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
1344 bld_def(bnd, s, src[s]);
1345 nv_reference(bld->pc, bnd, s++, arg[dim + cube]);
1346 }
1347 for (c = 0; c < dim + cube; ++c, ++s) {
1348 src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4));
1349 nv_reference(bld->pc, bnd, s, arg[c]);
1350 }
1351
1352 if (shadow || lodbias) {
1353 bnd = new_instruction(bld->pc, NV_OP_BIND);
1354
1355 if (lodbias) {
1356 src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
1357 bld_def(bnd, 0, src[s++]);
1358 nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]);
1359 }
1360 if (shadow) {
1361 src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
1362 bld_def(bnd, lodbias, src[s++]);
1363 nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]);
1364 }
1365 }
1366
1367 nvi = new_instruction(bld->pc, opcode);
1368 for (c = 0; c < 4; ++c)
1369 dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4));
1370 for (c = 0; c < s; ++c)
1371 nv_reference(bld->pc, nvi, c, src[c]);
1372
1373 nvi->ext.tex.t = tic;
1374 nvi->ext.tex.s = tsc;
1375 nvi->tex_mask = 0xf;
1376 nvi->tex_cube = cube;
1377 nvi->tex_dim = dim;
1378 nvi->tex_cube = cube;
1379 nvi->tex_shadow = shadow;
1380 nvi->tex_array = array;
1381 nvi->tex_live = 0;
1382
1383 return nvi;
1384 }
1385
1386 static void
1387 bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
1388 const struct tgsi_full_instruction *insn)
1389 {
1390 struct nv_value *t[4], *s[3];
1391 uint opcode = translate_opcode(insn->Instruction.Opcode);
1392 int c, dim, array, cube, shadow;
1393 const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
1394 const int tic = insn->Src[1].Register.Index;
1395 const int tsc = tic;
1396
1397 describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow);
1398
1399 assert(dim + array + shadow + lodbias <= 5);
1400
1401 if (!cube && !array && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
1402 load_proj_tex_coords(bld, t, dim, shadow, insn);
1403 else {
1404 for (c = 0; c < dim + cube + array; ++c)
1405 t[c] = emit_fetch(bld, insn, 0, c);
1406 if (shadow)
1407 t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2));
1408 }
1409
1410 if (cube) {
1411 for (c = 0; c < 3; ++c)
1412 s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]);
1413
1414 s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]);
1415 s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]);
1416 s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]);
1417
1418 for (c = 0; c < 3; ++c)
1419 t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]);
1420 }
1421
1422 if (lodbias)
1423 t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3);
1424
1425 emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow);
1426 }
1427
1428 static INLINE struct nv_value *
1429 bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn,
1430 int n)
1431 {
1432 struct nv_value *dotp, *src0, *src1;
1433 int c;
1434
1435 src0 = emit_fetch(bld, insn, 0, 0);
1436 src1 = emit_fetch(bld, insn, 1, 0);
1437 dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
1438
1439 for (c = 1; c < n; ++c) {
1440 src0 = emit_fetch(bld, insn, 0, c);
1441 src1 = emit_fetch(bld, insn, 1, c);
1442 dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp);
1443 }
1444 return dotp;
1445 }
1446
1447 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1448 for (chan = 0; chan < 4; ++chan) \
1449 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1450
1451 static void
1452 bld_instruction(struct bld_context *bld,
1453 const struct tgsi_full_instruction *insn)
1454 {
1455 struct nv_value *src0;
1456 struct nv_value *src1;
1457 struct nv_value *src2;
1458 struct nv_value *dst0[4] = { NULL };
1459 struct nv_value *temp;
1460 int c;
1461 uint opcode = translate_opcode(insn->Instruction.Opcode);
1462 uint8_t mask = insn->Dst[0].Register.WriteMask;
1463
1464 #ifdef NOUVEAU_DEBUG
1465 debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1);
1466 #endif
1467
1468 switch (insn->Instruction.Opcode) {
1469 case TGSI_OPCODE_ADD:
1470 case TGSI_OPCODE_MAX:
1471 case TGSI_OPCODE_MIN:
1472 case TGSI_OPCODE_MUL:
1473 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1474 src0 = emit_fetch(bld, insn, 0, c);
1475 src1 = emit_fetch(bld, insn, 1, c);
1476 dst0[c] = bld_insn_2(bld, opcode, src0, src1);
1477 }
1478 break;
1479 case TGSI_OPCODE_ARL:
1480 src1 = bld_imm_u32(bld, 4);
1481 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1482 src0 = emit_fetch(bld, insn, 0, c);
1483 src0 = bld_insn_1(bld, NV_OP_FLOOR, src0);
1484 src0->insn->ext.cvt.d = NV_TYPE_S32;
1485 src0->insn->ext.cvt.s = NV_TYPE_F32;
1486 dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1);
1487 }
1488 break;
1489 case TGSI_OPCODE_CMP:
1490 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1491 src0 = emit_fetch(bld, insn, 0, c);
1492 src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero);
1493 src1 = emit_fetch(bld, insn, 1, c);
1494 src2 = emit_fetch(bld, insn, 2, c);
1495 dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0);
1496 }
1497 break;
1498 case TGSI_OPCODE_COS:
1499 case TGSI_OPCODE_SIN:
1500 src0 = emit_fetch(bld, insn, 0, 0);
1501 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1502 if (insn->Dst[0].Register.WriteMask & 7)
1503 temp = bld_insn_1(bld, opcode, temp);
1504 for (c = 0; c < 3; ++c)
1505 if (insn->Dst[0].Register.WriteMask & (1 << c))
1506 dst0[c] = temp;
1507 if (!(insn->Dst[0].Register.WriteMask & (1 << 3)))
1508 break;
1509 src0 = emit_fetch(bld, insn, 0, 3);
1510 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1511 dst0[3] = bld_insn_1(bld, opcode, temp);
1512 break;
1513 case TGSI_OPCODE_DP2:
1514 temp = bld_dot(bld, insn, 2);
1515 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1516 dst0[c] = temp;
1517 break;
1518 case TGSI_OPCODE_DP3:
1519 temp = bld_dot(bld, insn, 3);
1520 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1521 dst0[c] = temp;
1522 break;
1523 case TGSI_OPCODE_DP4:
1524 temp = bld_dot(bld, insn, 4);
1525 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1526 dst0[c] = temp;
1527 break;
1528 case TGSI_OPCODE_DPH:
1529 src0 = bld_dot(bld, insn, 3);
1530 src1 = emit_fetch(bld, insn, 1, 3);
1531 temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1);
1532 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1533 dst0[c] = temp;
1534 break;
1535 case TGSI_OPCODE_DST:
1536 if (insn->Dst[0].Register.WriteMask & 1)
1537 dst0[0] = bld_imm_f32(bld, 1.0f);
1538 if (insn->Dst[0].Register.WriteMask & 2) {
1539 src0 = emit_fetch(bld, insn, 0, 1);
1540 src1 = emit_fetch(bld, insn, 1, 1);
1541 dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
1542 }
1543 if (insn->Dst[0].Register.WriteMask & 4)
1544 dst0[2] = emit_fetch(bld, insn, 0, 2);
1545 if (insn->Dst[0].Register.WriteMask & 8)
1546 dst0[3] = emit_fetch(bld, insn, 1, 3);
1547 break;
1548 case TGSI_OPCODE_EXP:
1549 src0 = emit_fetch(bld, insn, 0, 0);
1550 temp = bld_insn_1(bld, NV_OP_FLOOR, src0);
1551
1552 if (insn->Dst[0].Register.WriteMask & 2)
1553 dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp);
1554 if (insn->Dst[0].Register.WriteMask & 1) {
1555 temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
1556 dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp);
1557 }
1558 if (insn->Dst[0].Register.WriteMask & 4) {
1559 temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
1560 dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp);
1561 }
1562 if (insn->Dst[0].Register.WriteMask & 8)
1563 dst0[3] = bld_imm_f32(bld, 1.0f);
1564 break;
1565 case TGSI_OPCODE_EX2:
1566 src0 = emit_fetch(bld, insn, 0, 0);
1567 temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
1568 temp = bld_insn_1(bld, NV_OP_EX2, temp);
1569 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1570 dst0[c] = temp;
1571 break;
1572 case TGSI_OPCODE_FRC:
1573 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1574 src0 = emit_fetch(bld, insn, 0, c);
1575 dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0);
1576 dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]);
1577 }
1578 break;
1579 case TGSI_OPCODE_KIL:
1580 for (c = 0; c < 4; ++c)
1581 bld_kil(bld, emit_fetch(bld, insn, 0, c));
1582 break;
1583 case TGSI_OPCODE_KILP:
1584 (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1;
1585 break;
1586 case TGSI_OPCODE_IF:
1587 {
1588 struct nv_basic_block *b = new_basic_block(bld->pc);
1589 struct nv_value *pred = emit_fetch(bld, insn, 0, 0);
1590
1591 assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
1592
1593 nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD);
1594
1595 bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
1596 bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
1597
1598 if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) {
1599 pred = bld_clone(bld, pred->insn);
1600 pred->reg.size = 1;
1601 pred->reg.file = NV_FILE_PRED;
1602 if (pred->insn->opcode == NV_OP_FSET_F32)
1603 pred->insn->opcode = NV_OP_SET_F32;
1604 } else {
1605 pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U,
1606 pred, bld->zero);
1607 }
1608 assert(!mask);
1609
1610 bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0));
1611
1612 ++bld->cond_lvl;
1613 bld_new_block(bld, b);
1614 }
1615 break;
1616 case TGSI_OPCODE_ELSE:
1617 {
1618 struct nv_basic_block *b = new_basic_block(bld->pc);
1619
1620 --bld->cond_lvl;
1621 nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
1622
1623 bld->cond_bb[bld->cond_lvl]->exit->target = b;
1624 bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
1625
1626 new_instruction(bld->pc, NV_OP_BRA)->terminator = 1;
1627
1628 ++bld->cond_lvl;
1629 bld_new_block(bld, b);
1630 }
1631 break;
1632 case TGSI_OPCODE_ENDIF:
1633 {
1634 struct nv_basic_block *b = new_basic_block(bld->pc);
1635
1636 --bld->cond_lvl;
1637 nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind);
1638 nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
1639
1640 bld->cond_bb[bld->cond_lvl]->exit->target = b;
1641
1642 bld_new_block(bld, b);
1643
1644 if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) {
1645 bld->join_bb[bld->cond_lvl]->exit->prev->target = b;
1646 new_instruction(bld->pc, NV_OP_JOIN)->join = 1;
1647 }
1648 }
1649 break;
1650 case TGSI_OPCODE_BGNLOOP:
1651 {
1652 struct nv_basic_block *bl = new_basic_block(bld->pc);
1653 struct nv_basic_block *bb = new_basic_block(bld->pc);
1654
1655 assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING);
1656
1657 bld->loop_bb[bld->loop_lvl] = bl;
1658 bld->brkt_bb[bld->loop_lvl] = bb;
1659
1660 nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
1661
1662 bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
1663
1664 if (bld->loop_lvl == bld->pc->loop_nesting_bound)
1665 bld->pc->loop_nesting_bound++;
1666
1667 bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
1668 bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
1669 bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
1670 }
1671 break;
1672 case TGSI_OPCODE_BRK:
1673 {
1674 struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
1675
1676 bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
1677
1678 if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
1679 nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
1680
1681 bld->out_kind = CFG_EDGE_FAKE;
1682 }
1683 break;
1684 case TGSI_OPCODE_CONT:
1685 {
1686 struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
1687
1688 bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
1689
1690 nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
1691
1692 if ((bb = bld->join_bb[bld->cond_lvl - 1])) {
1693 bld->join_bb[bld->cond_lvl - 1] = NULL;
1694 nvc0_insn_delete(bb->exit->prev);
1695 }
1696 bld->out_kind = CFG_EDGE_FAKE;
1697 }
1698 break;
1699 case TGSI_OPCODE_ENDLOOP:
1700 {
1701 struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
1702
1703 if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */
1704 bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
1705
1706 nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
1707 }
1708
1709 bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
1710
1711 bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]);
1712 }
1713 break;
1714 case TGSI_OPCODE_ABS:
1715 case TGSI_OPCODE_CEIL:
1716 case TGSI_OPCODE_FLR:
1717 case TGSI_OPCODE_TRUNC:
1718 case TGSI_OPCODE_DDX:
1719 case TGSI_OPCODE_DDY:
1720 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1721 src0 = emit_fetch(bld, insn, 0, c);
1722 dst0[c] = bld_insn_1(bld, opcode, src0);
1723 }
1724 break;
1725 case TGSI_OPCODE_LIT:
1726 bld_lit(bld, dst0, insn);
1727 break;
1728 case TGSI_OPCODE_LRP:
1729 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1730 src0 = emit_fetch(bld, insn, 0, c);
1731 src1 = emit_fetch(bld, insn, 1, c);
1732 src2 = emit_fetch(bld, insn, 2, c);
1733 dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2);
1734 dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2);
1735 }
1736 break;
1737 case TGSI_OPCODE_MOV:
1738 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1739 dst0[c] = emit_fetch(bld, insn, 0, c);
1740 break;
1741 case TGSI_OPCODE_MAD:
1742 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1743 src0 = emit_fetch(bld, insn, 0, c);
1744 src1 = emit_fetch(bld, insn, 1, c);
1745 src2 = emit_fetch(bld, insn, 2, c);
1746 dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2);
1747 }
1748 break;
1749 case TGSI_OPCODE_POW:
1750 src0 = emit_fetch(bld, insn, 0, 0);
1751 src1 = emit_fetch(bld, insn, 1, 0);
1752 temp = bld_pow(bld, src0, src1);
1753 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1754 dst0[c] = temp;
1755 break;
1756 case TGSI_OPCODE_LOG:
1757 src0 = emit_fetch(bld, insn, 0, 0);
1758 src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0);
1759 temp = bld_insn_1(bld, NV_OP_LG2, src0);
1760 dst0[2] = temp;
1761 if (insn->Dst[0].Register.WriteMask & 3) {
1762 temp = bld_insn_1(bld, NV_OP_FLOOR, temp);
1763 dst0[0] = temp;
1764 }
1765 if (insn->Dst[0].Register.WriteMask & 2) {
1766 temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
1767 temp = bld_insn_1(bld, NV_OP_EX2, temp);
1768 temp = bld_insn_1(bld, NV_OP_RCP, temp);
1769 dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp);
1770 }
1771 if (insn->Dst[0].Register.WriteMask & 8)
1772 dst0[3] = bld_imm_f32(bld, 1.0f);
1773 break;
1774 case TGSI_OPCODE_RCP:
1775 case TGSI_OPCODE_LG2:
1776 src0 = emit_fetch(bld, insn, 0, 0);
1777 temp = bld_insn_1(bld, opcode, src0);
1778 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1779 dst0[c] = temp;
1780 break;
1781 case TGSI_OPCODE_RSQ:
1782 src0 = emit_fetch(bld, insn, 0, 0);
1783 temp = bld_insn_1(bld, NV_OP_ABS_F32, src0);
1784 temp = bld_insn_1(bld, NV_OP_RSQ, temp);
1785 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1786 dst0[c] = temp;
1787 break;
1788 case TGSI_OPCODE_SLT:
1789 case TGSI_OPCODE_SGE:
1790 case TGSI_OPCODE_SEQ:
1791 case TGSI_OPCODE_SGT:
1792 case TGSI_OPCODE_SLE:
1793 case TGSI_OPCODE_SNE:
1794 case TGSI_OPCODE_ISLT:
1795 case TGSI_OPCODE_ISGE:
1796 case TGSI_OPCODE_USEQ:
1797 case TGSI_OPCODE_USGE:
1798 case TGSI_OPCODE_USLT:
1799 case TGSI_OPCODE_USNE:
1800 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1801 src0 = emit_fetch(bld, insn, 0, c);
1802 src1 = emit_fetch(bld, insn, 1, c);
1803 dst0[c] = bld_insn_2(bld, opcode, src0, src1);
1804 dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode);
1805 }
1806 break;
1807 case TGSI_OPCODE_SCS:
1808 if (insn->Dst[0].Register.WriteMask & 0x3) {
1809 src0 = emit_fetch(bld, insn, 0, 0);
1810 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1811 if (insn->Dst[0].Register.WriteMask & 0x1)
1812 dst0[0] = bld_insn_1(bld, NV_OP_COS, temp);
1813 if (insn->Dst[0].Register.WriteMask & 0x2)
1814 dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp);
1815 }
1816 if (insn->Dst[0].Register.WriteMask & 0x4)
1817 dst0[2] = bld_imm_f32(bld, 0.0f);
1818 if (insn->Dst[0].Register.WriteMask & 0x8)
1819 dst0[3] = bld_imm_f32(bld, 1.0f);
1820 break;
1821 case TGSI_OPCODE_SSG:
1822 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */
1823 src0 = emit_fetch(bld, insn, 0, c);
1824 src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero);
1825 temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000));
1826 temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f));
1827 dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp);
1828 bld_src_predicate(bld, dst0[c]->insn, 1, src1);
1829 }
1830 break;
1831 case TGSI_OPCODE_SUB:
1832 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1833 src0 = emit_fetch(bld, insn, 0, c);
1834 src1 = emit_fetch(bld, insn, 1, c);
1835 dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1);
1836 }
1837 break;
1838 case TGSI_OPCODE_TEX:
1839 case TGSI_OPCODE_TXB:
1840 case TGSI_OPCODE_TXL:
1841 case TGSI_OPCODE_TXP:
1842 bld_tex(bld, dst0, insn);
1843 break;
1844 case TGSI_OPCODE_XPD:
1845 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1846 if (c == 3) {
1847 dst0[3] = bld_imm_f32(bld, 1.0f);
1848 break;
1849 }
1850 src0 = emit_fetch(bld, insn, 1, (c + 1) % 3);
1851 src1 = emit_fetch(bld, insn, 0, (c + 2) % 3);
1852 dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
1853
1854 src0 = emit_fetch(bld, insn, 0, (c + 1) % 3);
1855 src1 = emit_fetch(bld, insn, 1, (c + 2) % 3);
1856 dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]);
1857
1858 dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG;
1859 }
1860 break;
1861 case TGSI_OPCODE_RET:
1862 (new_instruction(bld->pc, NV_OP_RET))->fixed = 1;
1863 break;
1864 case TGSI_OPCODE_END:
1865 /* VP outputs are exported in-place as scalars, optimization later */
1866 if (bld->pc->is_fragprog)
1867 bld_export_fp_outputs(bld);
1868 if (bld->ti->append_ucp)
1869 bld_append_vp_ucp(bld);
1870 return;
1871 default:
1872 NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode);
1873 abort();
1874 return;
1875 }
1876
1877 if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
1878 !bld->pc->is_fragprog) {
1879 struct nv_instruction *mi = NULL;
1880 uint size;
1881
1882 if (bld->ti->append_ucp) {
1883 if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) {
1884 bld->hpos_index = insn->Dst[0].Register.Index;
1885 for (c = 0; c < 4; ++c)
1886 if (mask & (1 << c))
1887 STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]);
1888 }
1889 }
1890
1891 for (c = 0; c < 4; ++c)
1892 if (mask & (1 << c))
1893 if ((dst0[c]->reg.file == NV_FILE_IMM) ||
1894 (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63))
1895 dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
1896
1897 c = 0;
1898 if ((mask & 0x3) == 0x3) {
1899 mask &= ~0x3;
1900 size = 8;
1901 mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn;
1902 }
1903 if ((mask & 0xc) == 0xc) {
1904 mask &= ~0xc;
1905 if (mi) {
1906 size = 16;
1907 nv_reference(bld->pc, mi, 2, dst0[2]);
1908 nv_reference(bld->pc, mi, 3, dst0[3]);
1909 } else {
1910 c = 2;
1911 size = 8;
1912 mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn;
1913 }
1914 } else
1915 if (mi && (mask & 0x4)) {
1916 size = 12;
1917 mask &= ~0x4;
1918 nv_reference(bld->pc, mi, 2, dst0[2]);
1919 }
1920
1921 if (mi) {
1922 struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT);
1923 int s;
1924
1925 nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4));
1926 nv_reference(bld->pc, ex, 1, mi->def[0]);
1927
1928 for (s = 1; s < size / 4; ++s) {
1929 bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4));
1930 nv_reference(bld->pc, ex, s + 1, mi->def[s]);
1931 }
1932
1933 ex->fixed = 1;
1934 ex->src[0]->value->reg.size = size;
1935 ex->src[0]->value->reg.address =
1936 bld->ti->output_loc[insn->Dst[0].Register.Index][c];
1937 }
1938 }
1939
1940 for (c = 0; c < 4; ++c)
1941 if (mask & (1 << c))
1942 emit_store(bld, insn, c, dst0[c]);
1943 }
1944
1945 static INLINE void
1946 bld_free_registers(struct bld_register *base, int n)
1947 {
1948 int i, c;
1949
1950 for (i = 0; i < n; ++i)
1951 for (c = 0; c < 4; ++c)
1952 util_dynarray_fini(&base[i * 4 + c].vals);
1953 }
1954
1955 int
1956 nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti)
1957 {
1958 struct bld_context *bld = CALLOC_STRUCT(bld_context);
1959 unsigned ip;
1960
1961 pc->root[0] = pc->current_block = new_basic_block(pc);
1962
1963 bld->pc = pc;
1964 bld->ti = ti;
1965
1966 pc->loop_nesting_bound = 1;
1967
1968 bld->zero = new_value(pc, NV_FILE_GPR, 4);
1969 bld->zero->reg.id = 63;
1970
1971 if (pc->is_fragprog) {
1972 struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4);
1973 mem->reg.address = 0x7c;
1974
1975 bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem);
1976 bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]);
1977 }
1978
1979 for (ip = 0; ip < ti->num_insns; ++ip)
1980 bld_instruction(bld, &ti->insns[ip]);
1981
1982 bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS);
1983 bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS);
1984 bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS);
1985 bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
1986
1987 FREE(bld);
1988 return 0;
1989 }
1990
1991 /* If a variable is assigned in a loop, replace all references to the value
1992 * from outside the loop with a phi value.
1993 */
1994 static void
1995 bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
1996 struct nv_value *old_val,
1997 struct nv_value *new_val)
1998 {
1999 struct nv_instruction *nvi;
2000
2001 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) {
2002 int s;
2003 for (s = 0; s < 6 && nvi->src[s]; ++s)
2004 if (nvi->src[s]->value == old_val)
2005 nv_reference(pc, nvi, s, new_val);
2006 }
2007
2008 b->pass_seq = pc->pass_seq;
2009
2010 if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
2011 bld_replace_value(pc, b->out[0], old_val, new_val);
2012
2013 if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
2014 bld_replace_value(pc, b->out[1], old_val, new_val);
2015 }