e1c6ed87bfe2049decd9453d259ed1c553c6b55a
[mesa.git] / src / gallium / drivers / nv50 / nv50_tgsi_to_nc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 /* #define NV50_TGSI2NC_DEBUG */
24
25 /* XXX: need to clean this up so we get the typecasting right more naturally */
26
27 #include <unistd.h>
28
29 #include "nv50_context.h"
30 #include "nv50_pc.h"
31
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_util.h"
35
36 #include "util/u_simple_list.h"
37 #include "tgsi/tgsi_dump.h"
38
39 #define BLD_MAX_TEMPS 64
40 #define BLD_MAX_ADDRS 4
41 #define BLD_MAX_PREDS 4
42 #define BLD_MAX_IMMDS 128
43
44 #define BLD_MAX_COND_NESTING 4
45 #define BLD_MAX_LOOP_NESTING 4
46 #define BLD_MAX_CALL_NESTING 2
47
48 /* collects all values assigned to the same TGSI register */
49 struct bld_value_stack {
50 struct nv_value *top;
51 struct nv_value **body;
52 unsigned size;
53 uint16_t loop_use; /* 1 bit per loop level, indicates if used/defd */
54 uint16_t loop_def;
55 };
56
57 static INLINE void
58 bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val)
59 {
60 assert(!stk->size || (stk->body[stk->size - 1] != val));
61
62 if (!(stk->size % 8)) {
63 unsigned old_sz = (stk->size + 0) * sizeof(struct nv_value *);
64 unsigned new_sz = (stk->size + 8) * sizeof(struct nv_value *);
65 stk->body = (struct nv_value **)REALLOC(stk->body, old_sz, new_sz);
66 }
67 stk->body[stk->size++] = val;
68 }
69
70 static INLINE boolean
71 bld_vals_del_val(struct bld_value_stack *stk, struct nv_value *val)
72 {
73 unsigned i;
74
75 for (i = stk->size - 1; i >= 0; --i)
76 if (stk->body[i] == val)
77 break;
78 if (i < 0)
79 return FALSE;
80
81 if (i != stk->size - 1)
82 stk->body[i] = stk->body[stk->size - 1];
83
84 --stk->size; /* XXX: old size in REALLOC */
85 return TRUE;
86 }
87
88 static INLINE void
89 bld_vals_push(struct bld_value_stack *stk)
90 {
91 bld_vals_push_val(stk, stk->top);
92 stk->top = NULL;
93 }
94
95 static INLINE void
96 bld_push_values(struct bld_value_stack *stacks, int n)
97 {
98 int i, c;
99
100 for (i = 0; i < n; ++i)
101 for (c = 0; c < 4; ++c)
102 if (stacks[i * 4 + c].top)
103 bld_vals_push(&stacks[i * 4 + c]);
104 }
105
106 struct bld_context {
107 struct nv50_translation_info *ti;
108
109 struct nv_pc *pc;
110 struct nv_basic_block *b;
111
112 struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING];
113 int call_lvl;
114
115 struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING];
116 struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING];
117 struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
118 int cond_lvl;
119 struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
120 struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
121 int loop_lvl;
122
123 ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
124
125 struct bld_value_stack tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
126 struct bld_value_stack avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */
127 struct bld_value_stack pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
128 struct bld_value_stack ovs[PIPE_MAX_SHADER_OUTPUTS][4];
129
130 uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 31) / 32];
131
132 struct nv_value *frgcrd[4];
133 struct nv_value *sysval[4];
134
135 /* wipe on new BB */
136 struct nv_value *saved_addr[4][2];
137 struct nv_value *saved_inputs[128];
138 struct nv_value *saved_immd[BLD_MAX_IMMDS];
139 uint num_immds;
140 };
141
142 static INLINE ubyte
143 bld_stack_file(struct bld_context *bld, struct bld_value_stack *stk)
144 {
145 if (stk < &bld->avs[0][0])
146 return NV_FILE_GPR;
147 else
148 if (stk < &bld->pvs[0][0])
149 return NV_FILE_ADDR;
150 else
151 if (stk < &bld->ovs[0][0])
152 return NV_FILE_FLAGS;
153 else
154 return NV_FILE_OUT;
155 }
156
157 static INLINE struct nv_value *
158 bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c)
159 {
160 stk[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
161
162 return stk[i * 4 + c].top;
163 }
164
165 static struct nv_value *
166 bld_loop_phi(struct bld_context *, struct bld_value_stack *, struct nv_value *);
167
168 /* If a variable is defined in a loop without prior use, we don't need
169 * a phi in the loop header to account for backwards flow.
170 *
171 * However, if this variable is then also used outside the loop, we do
172 * need a phi after all. But we must not use this phi's def inside the
173 * loop, so we can eliminate the phi if it is unused later.
174 */
175 static INLINE void
176 bld_store(struct bld_context *bld, struct bld_value_stack *stk, int i, int c,
177 struct nv_value *val)
178 {
179 const uint16_t m = 1 << bld->loop_lvl;
180
181 stk = &stk[i * 4 + c];
182
183 if (bld->loop_lvl && !(m & (stk->loop_def | stk->loop_use)))
184 bld_loop_phi(bld, stk, val);
185
186 stk->top = val;
187 stk->loop_def |= 1 << bld->loop_lvl;
188 }
189
190 static INLINE void
191 bld_clear_def_use(struct bld_value_stack *stk, int n, int lvl)
192 {
193 int i;
194 const uint16_t mask = ~(1 << lvl);
195
196 for (i = 0; i < n * 4; ++i) {
197 stk[i].loop_def &= mask;
198 stk[i].loop_use &= mask;
199 }
200 }
201
202 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
203 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
204 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
205 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
206 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
207 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
208
209 #define STORE_OUTR(i, c, v) \
210 do { \
211 bld->ovs[i][c].top = (v); \
212 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
213 } while (0)
214
215 static INLINE void
216 bld_warn_uninitialized(struct bld_context *bld, int kind,
217 struct bld_value_stack *stk, struct nv_basic_block *b)
218 {
219 long i = (stk - &bld->tvs[0][0]) / 4;
220 long c = (stk - &bld->tvs[0][0]) & 3;
221
222 if (c == 3)
223 c = -1;
224
225 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
226 i, (int)('x' + c), kind ? "may be" : "is", b->id);
227 }
228
229 static INLINE struct nv_value *
230 bld_def(struct nv_instruction *i, int c, struct nv_value *value)
231 {
232 i->def[c] = value;
233 value->insn = i;
234 return value;
235 }
236
237 static INLINE struct nv_value *
238 find_by_bb(struct bld_value_stack *stack, struct nv_basic_block *b)
239 {
240 int i;
241
242 if (stack->top && stack->top->insn->bb == b)
243 return stack->top;
244
245 for (i = stack->size - 1; i >= 0; --i)
246 if (stack->body[i]->insn->bb == b)
247 return stack->body[i];
248 return NULL;
249 }
250
251 /* fetch value from stack that was defined in the specified basic block,
252 * or search for first definitions in all of its predecessors
253 */
254 static void
255 fetch_by_bb(struct bld_value_stack *stack,
256 struct nv_value **vals, int *n,
257 struct nv_basic_block *b)
258 {
259 int i;
260 struct nv_value *val;
261
262 assert(*n < 16); /* MAX_COND_NESTING */
263
264 val = find_by_bb(stack, b);
265 if (val) {
266 for (i = 0; i < *n; ++i)
267 if (vals[i] == val)
268 return;
269 vals[(*n)++] = val;
270 return;
271 }
272 for (i = 0; i < b->num_in; ++i)
273 if (!IS_WALL_EDGE(b->in_kind[i]))
274 fetch_by_bb(stack, vals, n, b->in[i]);
275 }
276
277 static INLINE struct nv_value *
278 bld_load_imm_u32(struct bld_context *bld, uint32_t u);
279
280 static INLINE struct nv_value *
281 bld_undef(struct bld_context *bld, ubyte file)
282 {
283 struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF);
284
285 return bld_def(nvi, 0, new_value(bld->pc, file, NV_TYPE_U32));
286 }
287
288 static struct nv_value *
289 bld_phi(struct bld_context *bld, struct nv_basic_block *b,
290 struct bld_value_stack *stack)
291 {
292 struct nv_basic_block *in;
293 struct nv_value *vals[16], *val;
294 struct nv_instruction *phi;
295 int i, j, n;
296
297 do {
298 i = n = 0;
299 fetch_by_bb(stack, vals, &n, b);
300
301 if (!n) {
302 bld_warn_uninitialized(bld, 0, stack, b);
303 return NULL;
304 }
305
306 if (n == 1) {
307 if (nvbb_dominated_by(b, vals[0]->insn->bb))
308 break;
309
310 bld_warn_uninitialized(bld, 1, stack, b);
311
312 /* back-tracking to insert missing value of other path */
313 in = b;
314 while (in->in[0]) {
315 if (in->num_in == 1) {
316 in = in->in[0];
317 } else {
318 if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b))
319 in = in->in[0];
320 else
321 if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b))
322 in = in->in[1];
323 else
324 in = in->in[0];
325 }
326 }
327 bld->pc->current_block = in;
328
329 /* should make this a no-op */
330 bld_vals_push_val(stack, bld_undef(bld, vals[0]->reg.file));
331 continue;
332 }
333
334 for (i = 0; i < n; ++i) {
335 /* if value dominates b, continue to the redefinitions */
336 if (nvbb_dominated_by(b, vals[i]->insn->bb))
337 continue;
338
339 /* if value dominates any in-block, b should be the dom frontier */
340 for (j = 0; j < b->num_in; ++j)
341 if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb))
342 break;
343 /* otherwise, find the dominance frontier and put the phi there */
344 if (j == b->num_in) {
345 in = nvbb_dom_frontier(vals[i]->insn->bb);
346 val = bld_phi(bld, in, stack);
347 bld_vals_push_val(stack, val);
348 break;
349 }
350 }
351 } while(i < n);
352
353 bld->pc->current_block = b;
354
355 if (n == 1)
356 return vals[0];
357
358 phi = new_instruction(bld->pc, NV_OP_PHI);
359
360 bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type));
361 for (i = 0; i < n; ++i)
362 phi->src[i] = new_ref(bld->pc, vals[i]);
363
364 return phi->def[0];
365 }
366
367 /* Insert a phi function in the loop header.
368 * For nested loops, we need to insert phi functions in all the outer
369 * loop headers if they don't have one yet.
370 *
371 * @def: redefinition from inside loop, or NULL if to be replaced later
372 */
373 static struct nv_value *
374 bld_loop_phi(struct bld_context *bld, struct bld_value_stack *stack,
375 struct nv_value *def)
376 {
377 struct nv_instruction *phi;
378 struct nv_basic_block *bb = bld->pc->current_block;
379 struct nv_value *val = NULL;
380
381 if (bld->loop_lvl > 1) {
382 --bld->loop_lvl;
383 if (!((stack->loop_def | stack->loop_use) & (1 << bld->loop_lvl)))
384 val = bld_loop_phi(bld, stack, NULL);
385 ++bld->loop_lvl;
386 }
387
388 if (!val)
389 val = bld_phi(bld, bld->pc->current_block, stack); /* old definition */
390 if (!val) {
391 bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0];
392 val = bld_undef(bld, bld_stack_file(bld, stack));
393 }
394
395 bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1];
396
397 phi = new_instruction(bld->pc, NV_OP_PHI);
398
399 bld_def(phi, 0, new_value_like(bld->pc, val));
400 if (!def)
401 def = phi->def[0];
402
403 bld_vals_push_val(stack, phi->def[0]);
404
405 phi->target = (struct nv_basic_block *)stack; /* cheat */
406
407 nv_reference(bld->pc, &phi->src[0], val);
408 nv_reference(bld->pc, &phi->src[1], def);
409
410 bld->pc->current_block = bb;
411
412 return phi->def[0];
413 }
414
415 static INLINE struct nv_value *
416 bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
417 {
418 const uint16_t m = 1 << bld->loop_lvl;
419 const uint16_t use = stack->loop_use;
420
421 stack->loop_use |= m;
422
423 /* If neither used nor def'd inside the loop, build a phi in foresight,
424 * so we don't have to replace stuff later on, which requires tracking.
425 */
426 if (bld->loop_lvl && !((use | stack->loop_def) & m))
427 return bld_loop_phi(bld, stack, NULL);
428
429 return bld_phi(bld, bld->pc->current_block, stack);
430 }
431
432 static INLINE struct nv_value *
433 bld_imm_u32(struct bld_context *bld, uint32_t u)
434 {
435 int i;
436 unsigned n = bld->num_immds;
437
438 for (i = 0; i < n; ++i)
439 if (bld->saved_immd[i]->reg.imm.u32 == u)
440 return bld->saved_immd[i];
441 assert(n < BLD_MAX_IMMDS);
442
443 bld->num_immds++;
444
445 bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, NV_TYPE_U32);
446 bld->saved_immd[n]->reg.imm.u32 = u;
447 return bld->saved_immd[n];
448 }
449
450 static void
451 bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
452 struct nv_value *);
453
454 /* Replace the source of the phi in the loop header by the last assignment,
455 * or eliminate the phi function if there is no assignment inside the loop.
456 *
457 * Redundancy situation 1 - (used) but (not redefined) value:
458 * %3 = phi %0, %3 = %3 is used
459 * %3 = phi %0, %4 = is new definition
460 *
461 * Redundancy situation 2 - (not used) but (redefined) value:
462 * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
463 */
464 static void
465 bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
466 {
467 struct nv_basic_block *save = bld->pc->current_block;
468 struct nv_instruction *phi, *next;
469 struct nv_value *val;
470 struct bld_value_stack *stk;
471 int i, s, n;
472
473 for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {
474 next = phi->next;
475
476 stk = (struct bld_value_stack *)phi->target;
477 phi->target = NULL;
478
479 for (s = 1, n = 0; n < bb->num_in; ++n) {
480 if (bb->in_kind[n] != CFG_EDGE_BACK)
481 continue;
482
483 assert(s < 4);
484 bld->pc->current_block = bb->in[n];
485 val = bld_fetch_global(bld, stk);
486
487 for (i = 0; i < 4; ++i)
488 if (phi->src[i] && phi->src[i]->value == val)
489 break;
490 if (i == 4)
491 nv_reference(bld->pc, &phi->src[s++], val);
492 }
493 bld->pc->current_block = save;
494
495 if (phi->src[0]->value == phi->def[0] ||
496 phi->src[0]->value == phi->src[1]->value)
497 s = 1;
498 else
499 if (phi->src[1]->value == phi->def[0])
500 s = 0;
501 else
502 continue;
503
504 if (s >= 0) {
505 /* eliminate the phi */
506 bld_vals_del_val(stk, phi->def[0]);
507
508 ++bld->pc->pass_seq;
509 bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value);
510
511 nv_nvi_delete(phi);
512 }
513 }
514 }
515
516 static INLINE struct nv_value *
517 bld_imm_f32(struct bld_context *bld, float f)
518 {
519 return bld_imm_u32(bld, fui(f));
520 }
521
522 #define SET_TYPE(v, t) ((v)->reg.type = NV_TYPE_##t)
523
524 static struct nv_value *
525 bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
526 {
527 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
528 assert(insn);
529
530 nv_reference(bld->pc, &insn->src[0], src0); /* NOTE: new_ref would suffice */
531
532 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.type));
533 }
534
535 static struct nv_value *
536 bld_insn_2(struct bld_context *bld, uint opcode,
537 struct nv_value *src0, struct nv_value *src1)
538 {
539 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
540
541 nv_reference(bld->pc, &insn->src[0], src0);
542 nv_reference(bld->pc, &insn->src[1], src1);
543
544 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.type));
545 }
546
547 static struct nv_value *
548 bld_insn_3(struct bld_context *bld, uint opcode,
549 struct nv_value *src0, struct nv_value *src1,
550 struct nv_value *src2)
551 {
552 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
553
554 nv_reference(bld->pc, &insn->src[0], src0);
555 nv_reference(bld->pc, &insn->src[1], src1);
556 nv_reference(bld->pc, &insn->src[2], src2);
557
558 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.type));
559 }
560
561 #define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
562 do { \
563 (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
564 (d)->reg.type = NV_TYPE_##dt; \
565 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
566 } while(0)
567
568 #define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
569 do { \
570 (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
571 (d)->reg.type = NV_TYPE_##dt; \
572 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
573 (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
574 } while(0)
575
576 static struct nv_value *
577 bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e)
578 {
579 struct nv_value *val;
580
581 BLD_INSN_1_EX(val, LG2, F32, x, F32);
582 BLD_INSN_2_EX(val, MUL, F32, e, F32, val, F32);
583 val = bld_insn_1(bld, NV_OP_PREEX2, val);
584 val = bld_insn_1(bld, NV_OP_EX2, val);
585
586 return val;
587 }
588
589 static INLINE struct nv_value *
590 bld_load_imm_f32(struct bld_context *bld, float f)
591 {
592 return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f));
593 }
594
595 static INLINE struct nv_value *
596 bld_load_imm_u32(struct bld_context *bld, uint32_t u)
597 {
598 return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u));
599 }
600
601 static struct nv_value *
602 bld_get_address(struct bld_context *bld, int id, struct nv_value *indirect)
603 {
604 int i;
605 struct nv_instruction *nvi;
606
607 for (i = 0; i < 4; ++i) {
608 if (!bld->saved_addr[i][0])
609 break;
610 if (bld->saved_addr[i][1] == indirect) {
611 nvi = bld->saved_addr[i][0]->insn;
612 if (nvi->src[0]->value->reg.imm.u32 == id)
613 return bld->saved_addr[i][0];
614 }
615 }
616 i &= 3;
617
618 bld->saved_addr[i][0] = bld_load_imm_u32(bld, id);
619 bld->saved_addr[i][0]->reg.file = NV_FILE_ADDR;
620 bld->saved_addr[i][1] = indirect;
621 return bld->saved_addr[i][0];
622 }
623
624
625 static struct nv_value *
626 bld_predicate(struct bld_context *bld, struct nv_value *src, boolean bool_only)
627 {
628 struct nv_instruction *s0i, *nvi = src->insn;
629
630 if (!nvi) {
631 nvi = bld_insn_1(bld,
632 (src->reg.file == NV_FILE_IMM) ? NV_OP_MOV : NV_OP_LDA,
633 src)->insn;
634 src = nvi->def[0];
635 } else
636 if (bool_only) {
637 while (nvi->opcode == NV_OP_ABS || nvi->opcode == NV_OP_NEG ||
638 nvi->opcode == NV_OP_CVT) {
639 s0i = nvi->src[0]->value->insn;
640 if (!s0i ||
641 s0i->opcode == NV_OP_LDA ||
642 s0i->opcode == NV_OP_MOV ||
643 s0i->opcode == NV_OP_PHI)
644 break;
645 nvi = s0i;
646 assert(!nvi->flags_src);
647 }
648 }
649
650 if (nvi->opcode == NV_OP_LDA ||
651 nvi->opcode == NV_OP_MOV ||
652 nvi->opcode == NV_OP_PHI || nvi->bb != bld->pc->current_block) {
653 nvi = new_instruction(bld->pc, NV_OP_CVT);
654 nv_reference(bld->pc, &nvi->src[0], src);
655 }
656
657 if (!nvi->flags_def) {
658 nvi->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16);
659 nvi->flags_def->insn = nvi;
660 }
661 return nvi->flags_def;
662 }
663
664 static void
665 bld_kil(struct bld_context *bld, struct nv_value *src)
666 {
667 struct nv_instruction *nvi;
668
669 src = bld_predicate(bld, src, FALSE);
670 nvi = new_instruction(bld->pc, NV_OP_KIL);
671 nvi->fixed = 1;
672 nvi->flags_src = new_ref(bld->pc, src);
673 nvi->cc = NV_CC_LT;
674 }
675
676 static void
677 bld_flow(struct bld_context *bld, uint opcode, ubyte cc,
678 struct nv_value *src, struct nv_basic_block *target,
679 boolean plan_reconverge)
680 {
681 struct nv_instruction *nvi;
682
683 if (plan_reconverge)
684 new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1;
685
686 nvi = new_instruction(bld->pc, opcode);
687 nvi->is_terminator = 1;
688 nvi->cc = cc;
689 nvi->target = target;
690 if (src)
691 nvi->flags_src = new_ref(bld->pc, src);
692 }
693
694 static ubyte
695 translate_setcc(unsigned opcode)
696 {
697 switch (opcode) {
698 case TGSI_OPCODE_SLT: return NV_CC_LT;
699 case TGSI_OPCODE_SGE: return NV_CC_GE;
700 case TGSI_OPCODE_SEQ: return NV_CC_EQ;
701 case TGSI_OPCODE_SGT: return NV_CC_GT;
702 case TGSI_OPCODE_SLE: return NV_CC_LE;
703 case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U;
704 case TGSI_OPCODE_STR: return NV_CC_TR;
705 case TGSI_OPCODE_SFL: return NV_CC_FL;
706
707 case TGSI_OPCODE_ISLT: return NV_CC_LT;
708 case TGSI_OPCODE_ISGE: return NV_CC_GE;
709 case TGSI_OPCODE_USEQ: return NV_CC_EQ;
710 case TGSI_OPCODE_USGE: return NV_CC_GE;
711 case TGSI_OPCODE_USLT: return NV_CC_LT;
712 case TGSI_OPCODE_USNE: return NV_CC_NE;
713 default:
714 assert(0);
715 return NV_CC_FL;
716 }
717 }
718
719 static uint
720 translate_opcode(uint opcode)
721 {
722 switch (opcode) {
723 case TGSI_OPCODE_ABS: return NV_OP_ABS;
724 case TGSI_OPCODE_ADD:
725 case TGSI_OPCODE_SUB:
726 case TGSI_OPCODE_UADD: return NV_OP_ADD;
727 case TGSI_OPCODE_AND: return NV_OP_AND;
728 case TGSI_OPCODE_EX2: return NV_OP_EX2;
729 case TGSI_OPCODE_CEIL: return NV_OP_CEIL;
730 case TGSI_OPCODE_FLR: return NV_OP_FLOOR;
731 case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC;
732 case TGSI_OPCODE_COS: return NV_OP_COS;
733 case TGSI_OPCODE_SIN: return NV_OP_SIN;
734 case TGSI_OPCODE_DDX: return NV_OP_DFDX;
735 case TGSI_OPCODE_DDY: return NV_OP_DFDY;
736 case TGSI_OPCODE_F2I:
737 case TGSI_OPCODE_F2U:
738 case TGSI_OPCODE_I2F:
739 case TGSI_OPCODE_U2F: return NV_OP_CVT;
740 case TGSI_OPCODE_INEG: return NV_OP_NEG;
741 case TGSI_OPCODE_LG2: return NV_OP_LG2;
742 case TGSI_OPCODE_ISHR:
743 case TGSI_OPCODE_USHR: return NV_OP_SHR;
744 case TGSI_OPCODE_MAD:
745 case TGSI_OPCODE_UMAD: return NV_OP_MAD;
746 case TGSI_OPCODE_MAX:
747 case TGSI_OPCODE_IMAX:
748 case TGSI_OPCODE_UMAX: return NV_OP_MAX;
749 case TGSI_OPCODE_MIN:
750 case TGSI_OPCODE_IMIN:
751 case TGSI_OPCODE_UMIN: return NV_OP_MIN;
752 case TGSI_OPCODE_MUL:
753 case TGSI_OPCODE_UMUL: return NV_OP_MUL;
754 case TGSI_OPCODE_OR: return NV_OP_OR;
755 case TGSI_OPCODE_RCP: return NV_OP_RCP;
756 case TGSI_OPCODE_RSQ: return NV_OP_RSQ;
757 case TGSI_OPCODE_SAD: return NV_OP_SAD;
758 case TGSI_OPCODE_SHL: return NV_OP_SHL;
759 case TGSI_OPCODE_SLT:
760 case TGSI_OPCODE_SGE:
761 case TGSI_OPCODE_SEQ:
762 case TGSI_OPCODE_SGT:
763 case TGSI_OPCODE_SLE:
764 case TGSI_OPCODE_SNE:
765 case TGSI_OPCODE_ISLT:
766 case TGSI_OPCODE_ISGE:
767 case TGSI_OPCODE_USEQ:
768 case TGSI_OPCODE_USGE:
769 case TGSI_OPCODE_USLT:
770 case TGSI_OPCODE_USNE: return NV_OP_SET;
771 case TGSI_OPCODE_TEX: return NV_OP_TEX;
772 case TGSI_OPCODE_TXP: return NV_OP_TEX;
773 case TGSI_OPCODE_TXB: return NV_OP_TXB;
774 case TGSI_OPCODE_TXL: return NV_OP_TXL;
775 case TGSI_OPCODE_XOR: return NV_OP_XOR;
776 default:
777 return NV_OP_NOP;
778 }
779 }
780
781 static ubyte
782 infer_src_type(unsigned opcode)
783 {
784 switch (opcode) {
785 case TGSI_OPCODE_MOV:
786 case TGSI_OPCODE_AND:
787 case TGSI_OPCODE_OR:
788 case TGSI_OPCODE_XOR:
789 case TGSI_OPCODE_SAD:
790 case TGSI_OPCODE_U2F:
791 case TGSI_OPCODE_UADD:
792 case TGSI_OPCODE_UDIV:
793 case TGSI_OPCODE_UMOD:
794 case TGSI_OPCODE_UMAD:
795 case TGSI_OPCODE_UMUL:
796 case TGSI_OPCODE_UMAX:
797 case TGSI_OPCODE_UMIN:
798 case TGSI_OPCODE_USEQ:
799 case TGSI_OPCODE_USGE:
800 case TGSI_OPCODE_USLT:
801 case TGSI_OPCODE_USNE:
802 case TGSI_OPCODE_USHR:
803 return NV_TYPE_U32;
804 case TGSI_OPCODE_I2F:
805 case TGSI_OPCODE_IDIV:
806 case TGSI_OPCODE_IMAX:
807 case TGSI_OPCODE_IMIN:
808 case TGSI_OPCODE_INEG:
809 case TGSI_OPCODE_ISGE:
810 case TGSI_OPCODE_ISHR:
811 case TGSI_OPCODE_ISLT:
812 return NV_TYPE_S32;
813 default:
814 return NV_TYPE_F32;
815 }
816 }
817
818 static ubyte
819 infer_dst_type(unsigned opcode)
820 {
821 switch (opcode) {
822 case TGSI_OPCODE_MOV:
823 case TGSI_OPCODE_F2U:
824 case TGSI_OPCODE_AND:
825 case TGSI_OPCODE_OR:
826 case TGSI_OPCODE_XOR:
827 case TGSI_OPCODE_SAD:
828 case TGSI_OPCODE_UADD:
829 case TGSI_OPCODE_UDIV:
830 case TGSI_OPCODE_UMOD:
831 case TGSI_OPCODE_UMAD:
832 case TGSI_OPCODE_UMUL:
833 case TGSI_OPCODE_UMAX:
834 case TGSI_OPCODE_UMIN:
835 case TGSI_OPCODE_USEQ:
836 case TGSI_OPCODE_USGE:
837 case TGSI_OPCODE_USLT:
838 case TGSI_OPCODE_USNE:
839 case TGSI_OPCODE_USHR:
840 return NV_TYPE_U32;
841 case TGSI_OPCODE_F2I:
842 case TGSI_OPCODE_IDIV:
843 case TGSI_OPCODE_IMAX:
844 case TGSI_OPCODE_IMIN:
845 case TGSI_OPCODE_INEG:
846 case TGSI_OPCODE_ISGE:
847 case TGSI_OPCODE_ISHR:
848 case TGSI_OPCODE_ISLT:
849 return NV_TYPE_S32;
850 default:
851 return NV_TYPE_F32;
852 }
853 }
854
855 static void
856 emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
857 unsigned chan, struct nv_value *value)
858 {
859 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
860
861 assert(chan < 4);
862
863 if (inst->Instruction.Opcode != TGSI_OPCODE_MOV)
864 value->reg.type = infer_dst_type(inst->Instruction.Opcode);
865
866 switch (inst->Instruction.Saturate) {
867 case TGSI_SAT_NONE:
868 break;
869 case TGSI_SAT_ZERO_ONE:
870 BLD_INSN_1_EX(value, SAT, F32, value, F32);
871 break;
872 case TGSI_SAT_MINUS_PLUS_ONE:
873 value = bld_insn_2(bld, NV_OP_MAX, value, bld_load_imm_f32(bld, -1.0f));
874 value = bld_insn_2(bld, NV_OP_MIN, value, bld_load_imm_f32(bld, 1.0f));
875 value->reg.type = NV_TYPE_F32;
876 break;
877 }
878
879 switch (reg->Register.File) {
880 case TGSI_FILE_OUTPUT:
881 value = bld_insn_1(bld, NV_OP_MOV, value);
882 value->reg.file = bld->ti->output_file;
883
884 if (bld->ti->p->type == PIPE_SHADER_FRAGMENT) {
885 STORE_OUTR(reg->Register.Index, chan, value);
886 } else {
887 value->insn->fixed = 1;
888 value->reg.id = bld->ti->output_map[reg->Register.Index][chan];
889 }
890 break;
891 case TGSI_FILE_TEMPORARY:
892 assert(reg->Register.Index < BLD_MAX_TEMPS);
893 value->reg.file = NV_FILE_GPR;
894 if (value->insn->bb != bld->pc->current_block)
895 value = bld_insn_1(bld, NV_OP_MOV, value);
896 STORE_TEMP(reg->Register.Index, chan, value);
897 break;
898 case TGSI_FILE_ADDRESS:
899 assert(reg->Register.Index < BLD_MAX_ADDRS);
900 value->reg.file = NV_FILE_ADDR;
901 STORE_ADDR(reg->Register.Index, chan, value);
902 break;
903 }
904 }
905
906 static INLINE uint32_t
907 bld_is_output_written(struct bld_context *bld, int i, int c)
908 {
909 if (c < 0)
910 return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32));
911 return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32));
912 }
913
914 static void
915 bld_export_outputs(struct bld_context *bld)
916 {
917 struct nv_value *vals[4];
918 struct nv_instruction *nvi;
919 int i, c, n;
920
921 bld_push_values(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
922
923 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
924 if (!bld_is_output_written(bld, i, -1))
925 continue;
926 for (n = 0, c = 0; c < 4; ++c) {
927 if (!bld_is_output_written(bld, i, c))
928 continue;
929 vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]);
930 assert(vals[n]);
931 vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]);
932 vals[n++]->reg.id = bld->ti->output_map[i][c];
933 }
934 assert(n);
935
936 (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
937
938 for (c = 0; c < n; ++c)
939 nvi->src[c] = new_ref(bld->pc, vals[c]);
940 }
941 }
942
943 static void
944 bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
945 {
946 int i;
947
948 bld_push_values(&bld->tvs[0][0], BLD_MAX_TEMPS);
949 bld_push_values(&bld->avs[0][0], BLD_MAX_ADDRS);
950 bld_push_values(&bld->pvs[0][0], BLD_MAX_PREDS);
951 bld_push_values(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
952
953 bld->pc->current_block = b;
954
955 for (i = 0; i < 4; ++i)
956 bld->saved_addr[i][0] = NULL;
957
958 for (i = 0; i < 128; ++i)
959 bld->saved_inputs[i] = NULL;
960
961 bld->out_kind = CFG_EDGE_FORWARD;
962 }
963
964 static struct nv_value *
965 bld_saved_input(struct bld_context *bld, unsigned i, unsigned c)
966 {
967 unsigned idx = bld->ti->input_map[i][c];
968
969 if (bld->ti->p->type != PIPE_SHADER_FRAGMENT)
970 return NULL;
971 if (bld->saved_inputs[idx])
972 return bld->saved_inputs[idx];
973 return NULL;
974 }
975
976 static struct nv_value *
977 bld_interpolate(struct bld_context *bld, unsigned mode, struct nv_value *val)
978 {
979 if (val->reg.id == 255) {
980 /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
981 val = bld_insn_1(bld, NV_OP_LINTERP, val);
982 val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31));
983 val->insn->src[0]->typecast = NV_TYPE_U32;
984 val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f));
985 val->insn->src[0]->typecast = NV_TYPE_U32;
986 } else
987 if (mode & (NV50_INTERP_LINEAR | NV50_INTERP_FLAT))
988 val = bld_insn_1(bld, NV_OP_LINTERP, val);
989 else
990 val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frgcrd[3]);
991
992 val->insn->flat = (mode & NV50_INTERP_FLAT) ? 1 : 0;
993 val->insn->centroid = (mode & NV50_INTERP_CENTROID) ? 1 : 0;
994 return val;
995 }
996
997 static struct nv_value *
998 emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
999 const unsigned s, const unsigned chan)
1000 {
1001 const struct tgsi_full_src_register *src = &insn->Src[s];
1002 struct nv_value *res;
1003 unsigned idx, swz, dim_idx, ind_idx, ind_swz;
1004 ubyte type = infer_src_type(insn->Instruction.Opcode);
1005
1006 idx = src->Register.Index;
1007 swz = tgsi_util_get_full_src_register_swizzle(src, chan);
1008 dim_idx = -1;
1009 ind_idx = -1;
1010 ind_swz = 0;
1011
1012 if (src->Register.Indirect) {
1013 ind_idx = src->Indirect.Index;
1014 ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0);
1015 }
1016
1017 switch (src->Register.File) {
1018 case TGSI_FILE_CONSTANT:
1019 dim_idx = src->Dimension.Index ? src->Dimension.Index + 2 : 1;
1020 assert(dim_idx < 14);
1021 assert(dim_idx == 1); /* for now */
1022
1023 res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), type);
1024 res->reg.type = type;
1025 res->reg.id = (idx * 4 + swz) & 127;
1026 res = bld_insn_1(bld, NV_OP_LDA, res);
1027
1028 if (src->Register.Indirect)
1029 res->insn->src[4] = new_ref(bld->pc, FETCH_ADDR(ind_idx, ind_swz));
1030 if (idx >= (128 / 4))
1031 res->insn->src[4] =
1032 new_ref(bld->pc, bld_get_address(bld, (idx * 16) & ~0x1ff, NULL));
1033 break;
1034 case TGSI_FILE_IMMEDIATE:
1035 assert(idx < bld->ti->immd32_nr);
1036 res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]);
1037
1038 switch (bld->ti->immd32_ty[idx]) {
1039 case TGSI_IMM_FLOAT32: res->reg.type = NV_TYPE_F32; break;
1040 case TGSI_IMM_UINT32: res->reg.type = NV_TYPE_U32; break;
1041 case TGSI_IMM_INT32: res->reg.type = NV_TYPE_S32; break;
1042 default:
1043 res->reg.type = type;
1044 break;
1045 }
1046 break;
1047 case TGSI_FILE_INPUT:
1048 res = bld_saved_input(bld, idx, swz);
1049 if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP))
1050 return res;
1051
1052 res = new_value(bld->pc, bld->ti->input_file, type);
1053 res->reg.id = bld->ti->input_map[idx][swz];
1054
1055 if (res->reg.file == NV_FILE_MEM_V) {
1056 res = bld_interpolate(bld, bld->ti->interp_mode[idx], res);
1057 } else {
1058 assert(src->Dimension.Dimension == 0);
1059 res = bld_insn_1(bld, NV_OP_LDA, res);
1060 assert(res->reg.type == type);
1061 }
1062 bld->saved_inputs[bld->ti->input_map[idx][swz]] = res;
1063 break;
1064 case TGSI_FILE_TEMPORARY:
1065 /* this should be load from l[], with reload elimination later on */
1066 res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
1067 break;
1068 case TGSI_FILE_ADDRESS:
1069 res = bld_fetch_global(bld, &bld->avs[idx][swz]);
1070 break;
1071 case TGSI_FILE_PREDICATE:
1072 res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
1073 break;
1074 default:
1075 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
1076 abort();
1077 break;
1078 }
1079 if (!res)
1080 return bld_undef(bld, NV_FILE_GPR);
1081
1082 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
1083 case TGSI_UTIL_SIGN_KEEP:
1084 break;
1085 case TGSI_UTIL_SIGN_CLEAR:
1086 res = bld_insn_1(bld, NV_OP_ABS, res);
1087 break;
1088 case TGSI_UTIL_SIGN_TOGGLE:
1089 res = bld_insn_1(bld, NV_OP_NEG, res);
1090 break;
1091 case TGSI_UTIL_SIGN_SET:
1092 res = bld_insn_1(bld, NV_OP_ABS, res);
1093 res = bld_insn_1(bld, NV_OP_NEG, res);
1094 break;
1095 default:
1096 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
1097 abort();
1098 break;
1099 }
1100
1101 return res;
1102 }
1103
1104 static void
1105 bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
1106 const struct tgsi_full_instruction *insn)
1107 {
1108 struct nv_value *val0, *zero;
1109 unsigned mask = insn->Dst[0].Register.WriteMask;
1110
1111 if (mask & ((1 << 0) | (1 << 3)))
1112 dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f);
1113
1114 if (mask & (3 << 1)) {
1115 zero = bld_load_imm_f32(bld, 0.0f);
1116 val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), zero);
1117
1118 if (mask & (1 << 1))
1119 dst0[1] = val0;
1120 }
1121
1122 if (mask & (1 << 2)) {
1123 struct nv_value *val1, *val3, *src1, *src3;
1124 struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f);
1125 struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f);
1126
1127 src1 = emit_fetch(bld, insn, 0, 1);
1128 src3 = emit_fetch(bld, insn, 0, 3);
1129
1130 val0->insn->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16);
1131 val0->insn->flags_def->insn = val0->insn;
1132
1133 val1 = bld_insn_2(bld, NV_OP_MAX, src1, zero);
1134 val3 = bld_insn_2(bld, NV_OP_MAX, src3, neg128);
1135 val3 = bld_insn_2(bld, NV_OP_MIN, val3, pos128);
1136 val3 = bld_pow(bld, val1, val3);
1137
1138 dst0[2] = bld_insn_1(bld, NV_OP_MOV, zero);
1139 dst0[2]->insn->cc = NV_CC_LE;
1140 dst0[2]->insn->flags_src = new_ref(bld->pc, val0->insn->flags_def);
1141
1142 dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]);
1143 }
1144 }
1145
1146 static INLINE void
1147 get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg)
1148 {
1149 switch (insn->Texture.Texture) {
1150 case TGSI_TEXTURE_1D:
1151 *arg = *dim = 1;
1152 break;
1153 case TGSI_TEXTURE_SHADOW1D:
1154 *dim = 1;
1155 *arg = 2;
1156 break;
1157 case TGSI_TEXTURE_UNKNOWN:
1158 case TGSI_TEXTURE_2D:
1159 case TGSI_TEXTURE_RECT:
1160 *arg = *dim = 2;
1161 break;
1162 case TGSI_TEXTURE_SHADOW2D:
1163 case TGSI_TEXTURE_SHADOWRECT:
1164 *dim = 2;
1165 *arg = 3;
1166 break;
1167 case TGSI_TEXTURE_3D:
1168 case TGSI_TEXTURE_CUBE:
1169 *dim = *arg = 3;
1170 break;
1171 default:
1172 assert(0);
1173 break;
1174 }
1175 }
1176
1177 static void
1178 load_proj_tex_coords(struct bld_context *bld,
1179 struct nv_value *t[4], int dim,
1180 const struct tgsi_full_instruction *insn)
1181 {
1182 int c, mask = 0;
1183
1184 t[3] = emit_fetch(bld, insn, 0, 3);
1185
1186 if (t[3]->insn->opcode == NV_OP_PINTERP) {
1187 t[3]->insn->opcode = NV_OP_LINTERP;
1188 nv_reference(bld->pc, &t[3]->insn->src[1], NULL);
1189 }
1190
1191 t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
1192
1193 for (c = 0; c < dim; ++c) {
1194 t[c] = emit_fetch(bld, insn, 0, c);
1195 if (t[c]->insn->opcode == NV_OP_LINTERP)
1196 t[c]->insn->opcode = NV_OP_PINTERP;
1197
1198 if (t[c]->insn->opcode == NV_OP_PINTERP)
1199 nv_reference(bld->pc, &t[c]->insn->src[1], t[3]);
1200 else
1201 mask |= 1 << c;
1202 }
1203
1204 for (c = 0; mask; ++c, mask >>= 1) {
1205 if (!(mask & 1))
1206 continue;
1207 t[c] = bld_insn_2(bld, NV_OP_MUL, t[c], t[3]);
1208 }
1209 }
1210
1211 /* For a quad of threads / top left, top right, bottom left, bottom right
1212 * pixels, do a different operation, and take src0 from a specific thread.
1213 */
1214 #define QOP_ADD 0
1215 #define QOP_SUBR 1
1216 #define QOP_SUB 2
1217 #define QOP_MOV1 3
1218
1219 #define QOP(a, b, c, d) \
1220 ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
1221
1222 static INLINE struct nv_value *
1223 bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane,
1224 struct nv_value *src1, boolean wp)
1225 {
1226 struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1);
1227 val->insn->lanes = lane;
1228 val->insn->quadop = qop;
1229 if (wp) {
1230 val->insn->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16);
1231 val->insn->flags_def->insn = val->insn;
1232 }
1233 return val;
1234 }
1235
1236 static INLINE struct nv_value *
1237 bld_cmov(struct bld_context *bld,
1238 struct nv_value *src, ubyte cc, struct nv_value *cr)
1239 {
1240 src = bld_insn_1(bld, NV_OP_MOV, src);
1241
1242 src->insn->cc = cc;
1243 src->insn->flags_src = new_ref(bld->pc, cr);
1244
1245 return src;
1246 }
1247
1248 static struct nv_instruction *
1249 emit_tex(struct bld_context *bld, uint opcode,
1250 struct nv_value *dst[4], struct nv_value *t_in[4],
1251 int argc, int tic, int tsc, int cube)
1252 {
1253 struct nv_value *t[4];
1254 struct nv_instruction *nvi;
1255 int c;
1256
1257 /* the inputs to a tex instruction must be separate values */
1258 for (c = 0; c < argc; ++c) {
1259 t[c] = bld_insn_1(bld, NV_OP_MOV, t_in[c]);
1260 t[c]->reg.type = NV_TYPE_F32;
1261 t[c]->insn->fixed = 1;
1262 }
1263
1264 nvi = new_instruction(bld->pc, opcode);
1265
1266 for (c = 0; c < 4; ++c)
1267 dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, NV_TYPE_F32));
1268
1269 for (c = 0; c < argc; ++c)
1270 nvi->src[c] = new_ref(bld->pc, t[c]);
1271
1272 nvi->tex_t = tic;
1273 nvi->tex_s = tsc;
1274 nvi->tex_mask = 0xf;
1275 nvi->tex_cube = cube;
1276 nvi->tex_live = 0;
1277 nvi->tex_argc = argc;
1278
1279 return nvi;
1280 }
1281
1282 static void
1283 bld_texlod_sequence(struct bld_context *bld,
1284 struct nv_value *dst[4], struct nv_value *t[4], int arg,
1285 int tic, int tsc, int cube)
1286 {
1287 emit_tex(bld, NV_OP_TXL, dst, t, arg, tic, tsc, cube); /* TODO */
1288 }
1289
1290
1291 /* The lanes of a quad are grouped by the bit in the condition register
1292 * they have set, which is selected by differing bias values.
1293 * Move the input values for TEX into a new register set for each group
1294 * and execute TEX only for a specific group.
1295 * We always need to use 4 new registers for the inputs/outputs because
1296 * the implicitly calculated derivatives must be correct.
1297 */
1298 static void
1299 bld_texbias_sequence(struct bld_context *bld,
1300 struct nv_value *dst[4], struct nv_value *t[4], int arg,
1301 int tic, int tsc, int cube)
1302 {
1303 struct nv_instruction *sel, *tex;
1304 struct nv_value *bit[4], *cr[4], *res[4][4], *val;
1305 int l, c;
1306
1307 const ubyte cc[4] = { NV_CC_EQ, NV_CC_S, NV_CC_C, NV_CC_O };
1308
1309 for (l = 0; l < 4; ++l) {
1310 bit[l] = bld_load_imm_u32(bld, 1 << l);
1311
1312 val = bld_quadop(bld, QOP(SUBR, SUBR, SUBR, SUBR),
1313 t[arg - 1], l, t[arg - 1], TRUE);
1314
1315 cr[l] = bld_cmov(bld, bit[l], NV_CC_EQ, val->insn->flags_def);
1316
1317 cr[l]->reg.file = NV_FILE_FLAGS;
1318 cr[l]->reg.type = NV_TYPE_U16;
1319 }
1320
1321 sel = new_instruction(bld->pc, NV_OP_SELECT);
1322
1323 for (l = 0; l < 4; ++l)
1324 sel->src[l] = new_ref(bld->pc, cr[l]);
1325
1326 bld_def(sel, 0, new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16));
1327
1328 for (l = 0; l < 4; ++l) {
1329 tex = emit_tex(bld, NV_OP_TXB, dst, t, arg, tic, tsc, cube);
1330
1331 tex->cc = cc[l];
1332 tex->flags_src = new_ref(bld->pc, sel->def[0]);
1333
1334 for (c = 0; c < 4; ++c)
1335 res[l][c] = tex->def[c];
1336 }
1337
1338 for (l = 0; l < 4; ++l)
1339 for (c = 0; c < 4; ++c)
1340 res[l][c] = bld_cmov(bld, res[l][c], cc[l], sel->def[0]);
1341
1342 for (c = 0; c < 4; ++c) {
1343 sel = new_instruction(bld->pc, NV_OP_SELECT);
1344
1345 for (l = 0; l < 4; ++l)
1346 sel->src[l] = new_ref(bld->pc, res[l][c]);
1347
1348 bld_def(sel, 0, (dst[c] = new_value(bld->pc, NV_FILE_GPR, NV_TYPE_F32)));
1349 }
1350 }
1351
1352 static boolean
1353 bld_is_constant(struct nv_value *val)
1354 {
1355 if (val->reg.file == NV_FILE_IMM)
1356 return TRUE;
1357 return val->insn && nvcg_find_constant(val->insn->src[0]);
1358 }
1359
1360 static void
1361 bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
1362 const struct tgsi_full_instruction *insn)
1363 {
1364 struct nv_value *t[4], *s[3];
1365 uint opcode = translate_opcode(insn->Instruction.Opcode);
1366 int arg, dim, c;
1367 const int tic = insn->Src[1].Register.Index;
1368 const int tsc = 0;
1369 const int cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0;
1370
1371 get_tex_dim(insn, &dim, &arg);
1372
1373 if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
1374 load_proj_tex_coords(bld, t, dim, insn);
1375 else
1376 for (c = 0; c < dim; ++c)
1377 t[c] = emit_fetch(bld, insn, 0, c);
1378
1379 if (cube) {
1380 assert(dim >= 3);
1381 for (c = 0; c < 3; ++c)
1382 s[c] = bld_insn_1(bld, NV_OP_ABS, t[c]);
1383
1384 s[0] = bld_insn_2(bld, NV_OP_MAX, s[0], s[1]);
1385 s[0] = bld_insn_2(bld, NV_OP_MAX, s[0], s[2]);
1386 s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]);
1387
1388 for (c = 0; c < 3; ++c)
1389 t[c] = bld_insn_2(bld, NV_OP_MUL, t[c], s[0]);
1390 }
1391
1392 if (arg != dim)
1393 t[dim] = emit_fetch(bld, insn, 0, 2);
1394
1395 if (opcode == NV_OP_TXB || opcode == NV_OP_TXL) {
1396 t[arg++] = emit_fetch(bld, insn, 0, 3);
1397
1398 if ((bld->ti->p->type == PIPE_SHADER_FRAGMENT) &&
1399 !bld_is_constant(t[arg - 1])) {
1400 if (opcode == NV_OP_TXB)
1401 bld_texbias_sequence(bld, dst0, t, arg, tic, tsc, cube);
1402 else
1403 bld_texlod_sequence(bld, dst0, t, arg, tic, tsc, cube);
1404 return;
1405 }
1406 }
1407
1408 emit_tex(bld, opcode, dst0, t, arg, tic, tsc, cube);
1409 }
1410
1411 static INLINE struct nv_value *
1412 bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn,
1413 int n)
1414 {
1415 struct nv_value *dotp, *src0, *src1;
1416 int c;
1417
1418 src0 = emit_fetch(bld, insn, 0, 0);
1419 src1 = emit_fetch(bld, insn, 1, 0);
1420 dotp = bld_insn_2(bld, NV_OP_MUL, src0, src1);
1421
1422 for (c = 1; c < n; ++c) {
1423 src0 = emit_fetch(bld, insn, 0, c);
1424 src1 = emit_fetch(bld, insn, 1, c);
1425 dotp = bld_insn_3(bld, NV_OP_MAD, src0, src1, dotp);
1426 }
1427 return dotp;
1428 }
1429
1430 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1431 for (chan = 0; chan < 4; ++chan) \
1432 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1433
1434 static void
1435 bld_instruction(struct bld_context *bld,
1436 const struct tgsi_full_instruction *insn)
1437 {
1438 struct nv_value *src0;
1439 struct nv_value *src1;
1440 struct nv_value *src2;
1441 struct nv_value *dst0[4];
1442 struct nv_value *temp;
1443 int c;
1444 uint opcode = translate_opcode(insn->Instruction.Opcode);
1445
1446 #ifdef NV50_TGSI2NC_DEBUG
1447 debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1);
1448 #endif
1449
1450 switch (insn->Instruction.Opcode) {
1451 case TGSI_OPCODE_ADD:
1452 case TGSI_OPCODE_MAX:
1453 case TGSI_OPCODE_MIN:
1454 case TGSI_OPCODE_MUL:
1455 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1456 src0 = emit_fetch(bld, insn, 0, c);
1457 src1 = emit_fetch(bld, insn, 1, c);
1458 dst0[c] = bld_insn_2(bld, opcode, src0, src1);
1459 }
1460 break;
1461 case TGSI_OPCODE_ARL:
1462 src1 = bld_imm_u32(bld, 4);
1463 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1464 src0 = emit_fetch(bld, insn, 0, c);
1465 (temp = bld_insn_1(bld, NV_OP_FLOOR, src0))->reg.type = NV_TYPE_S32;
1466 dst0[c] = bld_insn_2(bld, NV_OP_SHL, temp, src1);
1467 }
1468 break;
1469 case TGSI_OPCODE_CMP:
1470 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1471 src0 = emit_fetch(bld, insn, 0, c);
1472 src1 = emit_fetch(bld, insn, 1, c);
1473 src2 = emit_fetch(bld, insn, 2, c);
1474 src0 = bld_predicate(bld, src0, FALSE);
1475
1476 src1 = bld_insn_1(bld, NV_OP_MOV, src1);
1477 src1->insn->flags_src = new_ref(bld->pc, src0);
1478 src1->insn->cc = NV_CC_LT;
1479
1480 src2 = bld_insn_1(bld, NV_OP_MOV, src2);
1481 src2->insn->flags_src = new_ref(bld->pc, src0);
1482 src2->insn->cc = NV_CC_GE;
1483
1484 dst0[c] = bld_insn_2(bld, NV_OP_SELECT, src1, src2);
1485 }
1486 break;
1487 case TGSI_OPCODE_COS:
1488 case TGSI_OPCODE_SIN:
1489 src0 = emit_fetch(bld, insn, 0, 0);
1490 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1491 if (insn->Dst[0].Register.WriteMask & 7)
1492 temp = bld_insn_1(bld, opcode, temp);
1493 for (c = 0; c < 3; ++c)
1494 if (insn->Dst[0].Register.WriteMask & (1 << c))
1495 dst0[c] = temp;
1496 if (!(insn->Dst[0].Register.WriteMask & (1 << 3)))
1497 break;
1498 src0 = emit_fetch(bld, insn, 0, 3);
1499 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1500 dst0[3] = bld_insn_1(bld, opcode, temp);
1501 break;
1502 case TGSI_OPCODE_DP2:
1503 temp = bld_dot(bld, insn, 2);
1504 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1505 dst0[c] = temp;
1506 break;
1507 case TGSI_OPCODE_DP3:
1508 temp = bld_dot(bld, insn, 3);
1509 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1510 dst0[c] = temp;
1511 break;
1512 case TGSI_OPCODE_DP4:
1513 temp = bld_dot(bld, insn, 4);
1514 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1515 dst0[c] = temp;
1516 break;
1517 case TGSI_OPCODE_DPH:
1518 src0 = bld_dot(bld, insn, 3);
1519 src1 = emit_fetch(bld, insn, 1, 3);
1520 temp = bld_insn_2(bld, NV_OP_ADD, src0, src1);
1521 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1522 dst0[c] = temp;
1523 break;
1524 case TGSI_OPCODE_DST:
1525 if (insn->Dst[0].Register.WriteMask & 1)
1526 dst0[0] = bld_imm_f32(bld, 1.0f);
1527 if (insn->Dst[0].Register.WriteMask & 2) {
1528 src0 = emit_fetch(bld, insn, 0, 1);
1529 src1 = emit_fetch(bld, insn, 1, 1);
1530 dst0[1] = bld_insn_2(bld, NV_OP_MUL, src0, src1);
1531 }
1532 if (insn->Dst[0].Register.WriteMask & 4)
1533 dst0[2] = emit_fetch(bld, insn, 0, 2);
1534 if (insn->Dst[0].Register.WriteMask & 8)
1535 dst0[3] = emit_fetch(bld, insn, 1, 3);
1536 break;
1537 case TGSI_OPCODE_EX2:
1538 src0 = emit_fetch(bld, insn, 0, 0);
1539 temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
1540 temp = bld_insn_1(bld, NV_OP_EX2, temp);
1541 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1542 dst0[c] = temp;
1543 break;
1544 case TGSI_OPCODE_FRC:
1545 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1546 src0 = emit_fetch(bld, insn, 0, c);
1547 dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0);
1548 dst0[c] = bld_insn_2(bld, NV_OP_SUB, src0, dst0[c]);
1549 }
1550 break;
1551 case TGSI_OPCODE_KIL:
1552 for (c = 0; c < 4; ++c) {
1553 src0 = emit_fetch(bld, insn, 0, c);
1554 bld_kil(bld, src0);
1555 }
1556 break;
1557 case TGSI_OPCODE_KILP:
1558 (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1;
1559 break;
1560 case TGSI_OPCODE_IF:
1561 {
1562 struct nv_basic_block *b = new_basic_block(bld->pc);
1563
1564 nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD);
1565
1566 bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
1567 bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
1568
1569 src1 = bld_predicate(bld, emit_fetch(bld, insn, 0, 0), TRUE);
1570
1571 bld_flow(bld, NV_OP_BRA, NV_CC_EQ, src1, NULL, (bld->cond_lvl == 0));
1572
1573 ++bld->cond_lvl;
1574 bld_new_block(bld, b);
1575 }
1576 break;
1577 case TGSI_OPCODE_ELSE:
1578 {
1579 struct nv_basic_block *b = new_basic_block(bld->pc);
1580
1581 --bld->cond_lvl;
1582 nvbb_attach_block(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
1583
1584 bld->cond_bb[bld->cond_lvl]->exit->target = b;
1585 bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
1586
1587 new_instruction(bld->pc, NV_OP_BRA)->is_terminator = 1;
1588
1589 ++bld->cond_lvl;
1590 bld_new_block(bld, b);
1591 }
1592 break;
1593 case TGSI_OPCODE_ENDIF:
1594 {
1595 struct nv_basic_block *b = new_basic_block(bld->pc);
1596
1597 --bld->cond_lvl;
1598 nvbb_attach_block(bld->pc->current_block, b, bld->out_kind);
1599 nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
1600
1601 bld->cond_bb[bld->cond_lvl]->exit->target = b;
1602
1603 bld_new_block(bld, b);
1604
1605 if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) {
1606 bld->join_bb[bld->cond_lvl]->exit->prev->target = b;
1607 new_instruction(bld->pc, NV_OP_JOIN)->is_join = TRUE;
1608 }
1609 }
1610 break;
1611 case TGSI_OPCODE_BGNLOOP:
1612 {
1613 struct nv_basic_block *bl = new_basic_block(bld->pc);
1614 struct nv_basic_block *bb = new_basic_block(bld->pc);
1615
1616 bld->loop_bb[bld->loop_lvl] = bl;
1617 bld->brkt_bb[bld->loop_lvl] = bb;
1618
1619 bld_flow(bld, NV_OP_BREAKADDR, NV_CC_TR, NULL, bb, FALSE);
1620
1621 nvbb_attach_block(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
1622
1623 bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
1624
1625 if (bld->loop_lvl == bld->pc->loop_nesting_bound)
1626 bld->pc->loop_nesting_bound++;
1627
1628 bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
1629 bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
1630 bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
1631 }
1632 break;
1633 case TGSI_OPCODE_BRK:
1634 {
1635 struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
1636
1637 bld_flow(bld, NV_OP_BREAK, NV_CC_TR, NULL, bb, FALSE);
1638
1639 if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
1640 nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
1641
1642 bld->out_kind = CFG_EDGE_FAKE;
1643 }
1644 break;
1645 case TGSI_OPCODE_CONT:
1646 {
1647 struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
1648
1649 bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
1650
1651 nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);
1652
1653 if ((bb = bld->join_bb[bld->cond_lvl - 1])) {
1654 bld->join_bb[bld->cond_lvl - 1] = NULL;
1655 nv_nvi_delete(bb->exit->prev);
1656 }
1657 bld->out_kind = CFG_EDGE_FAKE;
1658 }
1659 break;
1660 case TGSI_OPCODE_ENDLOOP:
1661 {
1662 struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
1663
1664 bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
1665
1666 nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);
1667
1668 bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
1669
1670 bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]);
1671 }
1672 break;
1673 case TGSI_OPCODE_ABS:
1674 case TGSI_OPCODE_CEIL:
1675 case TGSI_OPCODE_FLR:
1676 case TGSI_OPCODE_TRUNC:
1677 case TGSI_OPCODE_DDX:
1678 case TGSI_OPCODE_DDY:
1679 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1680 src0 = emit_fetch(bld, insn, 0, c);
1681 dst0[c] = bld_insn_1(bld, opcode, src0);
1682 }
1683 break;
1684 case TGSI_OPCODE_LIT:
1685 bld_lit(bld, dst0, insn);
1686 break;
1687 case TGSI_OPCODE_LRP:
1688 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1689 src0 = emit_fetch(bld, insn, 0, c);
1690 src1 = emit_fetch(bld, insn, 1, c);
1691 src2 = emit_fetch(bld, insn, 2, c);
1692 dst0[c] = bld_insn_2(bld, NV_OP_SUB, src1, src2);
1693 dst0[c] = bld_insn_3(bld, NV_OP_MAD, dst0[c], src0, src2);
1694 }
1695 break;
1696 case TGSI_OPCODE_MOV:
1697 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1698 dst0[c] = emit_fetch(bld, insn, 0, c);
1699 break;
1700 case TGSI_OPCODE_MAD:
1701 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1702 src0 = emit_fetch(bld, insn, 0, c);
1703 src1 = emit_fetch(bld, insn, 1, c);
1704 src2 = emit_fetch(bld, insn, 2, c);
1705 dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2);
1706 }
1707 break;
1708 case TGSI_OPCODE_POW:
1709 src0 = emit_fetch(bld, insn, 0, 0);
1710 src1 = emit_fetch(bld, insn, 1, 0);
1711 temp = bld_pow(bld, src0, src1);
1712 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1713 dst0[c] = temp;
1714 break;
1715 case TGSI_OPCODE_RCP:
1716 case TGSI_OPCODE_LG2:
1717 src0 = emit_fetch(bld, insn, 0, 0);
1718 temp = bld_insn_1(bld, opcode, src0);
1719 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1720 dst0[c] = temp;
1721 break;
1722 case TGSI_OPCODE_RSQ:
1723 src0 = emit_fetch(bld, insn, 0, 0);
1724 temp = bld_insn_1(bld, NV_OP_ABS, src0);
1725 temp = bld_insn_1(bld, NV_OP_RSQ, temp);
1726 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1727 dst0[c] = temp;
1728 break;
1729 case TGSI_OPCODE_SLT:
1730 case TGSI_OPCODE_SGE:
1731 case TGSI_OPCODE_SEQ:
1732 case TGSI_OPCODE_SGT:
1733 case TGSI_OPCODE_SLE:
1734 case TGSI_OPCODE_SNE:
1735 case TGSI_OPCODE_ISLT:
1736 case TGSI_OPCODE_ISGE:
1737 case TGSI_OPCODE_USEQ:
1738 case TGSI_OPCODE_USGE:
1739 case TGSI_OPCODE_USLT:
1740 case TGSI_OPCODE_USNE:
1741 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1742 src0 = emit_fetch(bld, insn, 0, c);
1743 src1 = emit_fetch(bld, insn, 1, c);
1744 dst0[c] = bld_insn_2(bld, NV_OP_SET, src0, src1);
1745 dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode);
1746 dst0[c]->reg.type = infer_dst_type(insn->Instruction.Opcode);
1747
1748 dst0[c]->insn->src[0]->typecast =
1749 dst0[c]->insn->src[1]->typecast =
1750 infer_src_type(insn->Instruction.Opcode);
1751
1752 if (dst0[c]->reg.type != NV_TYPE_F32)
1753 break;
1754 dst0[c] = bld_insn_1(bld, NV_OP_ABS, dst0[c]);
1755 dst0[c]->insn->src[0]->typecast = NV_TYPE_S32;
1756 dst0[c]->reg.type = NV_TYPE_S32;
1757 dst0[c] = bld_insn_1(bld, NV_OP_CVT, dst0[c]);
1758 dst0[c]->reg.type = NV_TYPE_F32;
1759 }
1760 break;
1761 case TGSI_OPCODE_SCS:
1762 if (insn->Dst[0].Register.WriteMask & 0x3) {
1763 src0 = emit_fetch(bld, insn, 0, 0);
1764 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1765 if (insn->Dst[0].Register.WriteMask & 0x1)
1766 dst0[0] = bld_insn_1(bld, NV_OP_COS, temp);
1767 if (insn->Dst[0].Register.WriteMask & 0x2)
1768 dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp);
1769 }
1770 if (insn->Dst[0].Register.WriteMask & 0x4)
1771 dst0[2] = bld_imm_f32(bld, 0.0f);
1772 if (insn->Dst[0].Register.WriteMask & 0x8)
1773 dst0[3] = bld_imm_f32(bld, 1.0f);
1774 break;
1775 case TGSI_OPCODE_SSG:
1776 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1777 src0 = emit_fetch(bld, insn, 0, c);
1778 src1 = bld_predicate(bld, src0, FALSE);
1779 temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000));
1780 temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f));
1781 dst0[c] = bld_insn_2(bld, NV_OP_XOR, temp, temp);
1782 dst0[c]->insn->cc = NV_CC_EQ;
1783 nv_reference(bld->pc, &dst0[c]->insn->flags_src, src1);
1784 }
1785 break;
1786 case TGSI_OPCODE_SUB:
1787 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1788 src0 = emit_fetch(bld, insn, 0, c);
1789 src1 = emit_fetch(bld, insn, 1, c);
1790 dst0[c] = bld_insn_2(bld, NV_OP_ADD, src0, src1);
1791 dst0[c]->insn->src[1]->mod ^= NV_MOD_NEG;
1792 }
1793 break;
1794 case TGSI_OPCODE_TEX:
1795 case TGSI_OPCODE_TXB:
1796 case TGSI_OPCODE_TXL:
1797 case TGSI_OPCODE_TXP:
1798 bld_tex(bld, dst0, insn);
1799 break;
1800 case TGSI_OPCODE_XPD:
1801 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1802 if (c == 3) {
1803 dst0[3] = bld_imm_f32(bld, 1.0f);
1804 break;
1805 }
1806 src0 = emit_fetch(bld, insn, 1, (c + 1) % 3);
1807 src1 = emit_fetch(bld, insn, 0, (c + 2) % 3);
1808 dst0[c] = bld_insn_2(bld, NV_OP_MUL, src0, src1);
1809
1810 src0 = emit_fetch(bld, insn, 0, (c + 1) % 3);
1811 src1 = emit_fetch(bld, insn, 1, (c + 2) % 3);
1812 dst0[c] = bld_insn_3(bld, NV_OP_MAD, src0, src1, dst0[c]);
1813
1814 dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG;
1815 }
1816 break;
1817 case TGSI_OPCODE_RET:
1818 (new_instruction(bld->pc, NV_OP_RET))->fixed = 1;
1819 break;
1820 case TGSI_OPCODE_END:
1821 if (bld->ti->p->type == PIPE_SHADER_FRAGMENT)
1822 bld_export_outputs(bld);
1823 break;
1824 default:
1825 NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode);
1826 abort();
1827 break;
1828 }
1829
1830 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1831 emit_store(bld, insn, c, dst0[c]);
1832 }
1833
1834 static INLINE void
1835 bld_free_value_trackers(struct bld_value_stack *base, int n)
1836 {
1837 int i, c;
1838
1839 for (i = 0; i < n; ++i)
1840 for (c = 0; c < 4; ++c)
1841 if (base[i * 4 + c].body)
1842 FREE(base[i * 4 + c].body);
1843 }
1844
1845 int
1846 nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti)
1847 {
1848 struct bld_context *bld = CALLOC_STRUCT(bld_context);
1849 int c;
1850
1851 pc->root = pc->current_block = new_basic_block(pc);
1852
1853 bld->pc = pc;
1854 bld->ti = ti;
1855
1856 pc->loop_nesting_bound = 1;
1857
1858 c = util_bitcount(bld->ti->p->fp.interp >> 24);
1859 if (c && ti->p->type == PIPE_SHADER_FRAGMENT) {
1860 bld->frgcrd[3] = new_value(pc, NV_FILE_MEM_V, NV_TYPE_F32);
1861 bld->frgcrd[3]->reg.id = c - 1;
1862 bld->frgcrd[3] = bld_insn_1(bld, NV_OP_LINTERP, bld->frgcrd[3]);
1863 bld->frgcrd[3] = bld_insn_1(bld, NV_OP_RCP, bld->frgcrd[3]);
1864 }
1865
1866 tgsi_parse_init(&bld->parse[0], ti->p->pipe.tokens);
1867
1868 while (!tgsi_parse_end_of_tokens(&bld->parse[bld->call_lvl])) {
1869 const union tgsi_full_token *tok = &bld->parse[bld->call_lvl].FullToken;
1870
1871 tgsi_parse_token(&bld->parse[bld->call_lvl]);
1872
1873 switch (tok->Token.Type) {
1874 case TGSI_TOKEN_TYPE_INSTRUCTION:
1875 bld_instruction(bld, &tok->FullInstruction);
1876 break;
1877 default:
1878 break;
1879 }
1880 }
1881
1882 bld_free_value_trackers(&bld->tvs[0][0], BLD_MAX_TEMPS);
1883 bld_free_value_trackers(&bld->avs[0][0], BLD_MAX_ADDRS);
1884 bld_free_value_trackers(&bld->pvs[0][0], BLD_MAX_PREDS);
1885
1886 bld_free_value_trackers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
1887
1888 FREE(bld);
1889 return 0;
1890 }
1891
1892 /* If a variable is assigned in a loop, replace all references to the value
1893 * from outside the loop with a phi value.
1894 */
1895 static void
1896 bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
1897 struct nv_value *old_val,
1898 struct nv_value *new_val)
1899 {
1900 struct nv_instruction *nvi;
1901
1902 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) {
1903 int s;
1904 for (s = 0; s < 5; ++s) {
1905 if (!nvi->src[s])
1906 continue;
1907 if (nvi->src[s]->value == old_val)
1908 nv_reference(pc, &nvi->src[s], new_val);
1909 }
1910 if (nvi->flags_src && nvi->flags_src->value == old_val)
1911 nv_reference(pc, &nvi->flags_src, new_val);
1912 }
1913
1914 b->pass_seq = pc->pass_seq;
1915
1916 if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
1917 bld_replace_value(pc, b->out[0], old_val, new_val);
1918
1919 if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
1920 bld_replace_value(pc, b->out[1], old_val, new_val);
1921 }