2faabc8d7d4b40a7df8c6f833f4a6f3ab8982a65
[mesa.git] / src / gallium / drivers / nv50 / nv50_tgsi_to_nc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 /* #define NV50_TGSI2NC_DEBUG */
24
25 #include <unistd.h>
26
27 #include "nv50_context.h"
28 #include "nv50_pc.h"
29
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_util.h"
33
34 #include "tgsi/tgsi_dump.h"
35
36 #define BLD_MAX_TEMPS 64
37 #define BLD_MAX_ADDRS 4
38 #define BLD_MAX_PREDS 4
39 #define BLD_MAX_IMMDS 128
40
41 #define BLD_MAX_COND_NESTING 8
42 #define BLD_MAX_LOOP_NESTING 4
43 #define BLD_MAX_CALL_NESTING 2
44
45 /* collects all values assigned to the same TGSI register */
46 struct bld_value_stack {
47 struct nv_value *top;
48 struct nv_value **body;
49 unsigned size;
50 uint16_t loop_use; /* 1 bit per loop level, indicates if used/defd */
51 uint16_t loop_def;
52 };
53
54 static INLINE void
55 bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val)
56 {
57 assert(!stk->size || (stk->body[stk->size - 1] != val));
58
59 if (!(stk->size % 8)) {
60 unsigned old_sz = (stk->size + 0) * sizeof(struct nv_value *);
61 unsigned new_sz = (stk->size + 8) * sizeof(struct nv_value *);
62 stk->body = (struct nv_value **)REALLOC(stk->body, old_sz, new_sz);
63 }
64 stk->body[stk->size++] = val;
65 }
66
67 static INLINE boolean
68 bld_vals_del_val(struct bld_value_stack *stk, struct nv_value *val)
69 {
70 unsigned i;
71
72 for (i = stk->size; i > 0; --i)
73 if (stk->body[i - 1] == val)
74 break;
75 if (!i)
76 return FALSE;
77
78 if (i != stk->size)
79 stk->body[i - 1] = stk->body[stk->size - 1];
80
81 --stk->size; /* XXX: old size in REALLOC */
82 return TRUE;
83 }
84
85 static INLINE void
86 bld_vals_push(struct bld_value_stack *stk)
87 {
88 bld_vals_push_val(stk, stk->top);
89 stk->top = NULL;
90 }
91
92 static INLINE void
93 bld_push_values(struct bld_value_stack *stacks, int n)
94 {
95 int i, c;
96
97 for (i = 0; i < n; ++i)
98 for (c = 0; c < 4; ++c)
99 if (stacks[i * 4 + c].top)
100 bld_vals_push(&stacks[i * 4 + c]);
101 }
102
103 struct bld_context {
104 struct nv50_translation_info *ti;
105
106 struct nv_pc *pc;
107 struct nv_basic_block *b;
108
109 struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING];
110 int call_lvl;
111
112 struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING];
113 struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING];
114 struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
115 int cond_lvl;
116 struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
117 struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
118 int loop_lvl;
119
120 ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
121
122 struct bld_value_stack tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
123 struct bld_value_stack avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */
124 struct bld_value_stack pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
125 struct bld_value_stack ovs[PIPE_MAX_SHADER_OUTPUTS][4];
126
127 uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8];
128
129 struct nv_value *frgcrd[4];
130 struct nv_value *sysval[4];
131
132 /* wipe on new BB */
133 struct nv_value *saved_addr[4][2];
134 struct nv_value *saved_inputs[128];
135 struct nv_value *saved_immd[BLD_MAX_IMMDS];
136 uint num_immds;
137 };
138
139 static INLINE ubyte
140 bld_stack_file(struct bld_context *bld, struct bld_value_stack *stk)
141 {
142 if (stk < &bld->avs[0][0])
143 return NV_FILE_GPR;
144 else
145 if (stk < &bld->pvs[0][0])
146 return NV_FILE_ADDR;
147 else
148 if (stk < &bld->ovs[0][0])
149 return NV_FILE_FLAGS;
150 else
151 return NV_FILE_OUT;
152 }
153
154 static INLINE struct nv_value *
155 bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c)
156 {
157 stk[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
158
159 return stk[i * 4 + c].top;
160 }
161
162 static struct nv_value *
163 bld_loop_phi(struct bld_context *, struct bld_value_stack *, struct nv_value *);
164
165 /* If a variable is defined in a loop without prior use, we don't need
166 * a phi in the loop header to account for backwards flow.
167 *
168 * However, if this variable is then also used outside the loop, we do
169 * need a phi after all. But we must not use this phi's def inside the
170 * loop, so we can eliminate the phi if it is unused later.
171 */
172 static INLINE void
173 bld_store(struct bld_context *bld, struct bld_value_stack *stk, int i, int c,
174 struct nv_value *val)
175 {
176 const uint16_t m = 1 << bld->loop_lvl;
177
178 stk = &stk[i * 4 + c];
179
180 if (bld->loop_lvl && !(m & (stk->loop_def | stk->loop_use)))
181 bld_loop_phi(bld, stk, val);
182
183 stk->top = val;
184 stk->loop_def |= 1 << bld->loop_lvl;
185 }
186
187 static INLINE void
188 bld_clear_def_use(struct bld_value_stack *stk, int n, int lvl)
189 {
190 int i;
191 const uint16_t mask = ~(1 << lvl);
192
193 for (i = 0; i < n * 4; ++i) {
194 stk[i].loop_def &= mask;
195 stk[i].loop_use &= mask;
196 }
197 }
198
199 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
200 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
201 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
202 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
203 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
204 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
205
206 #define STORE_OUTR(i, c, v) \
207 do { \
208 bld->ovs[i][c].top = (v); \
209 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
210 } while (0)
211
212 static INLINE void
213 bld_warn_uninitialized(struct bld_context *bld, int kind,
214 struct bld_value_stack *stk, struct nv_basic_block *b)
215 {
216 #ifdef NV50_TGSI2NC_DEBUG
217 long i = (stk - &bld->tvs[0][0]) / 4;
218 long c = (stk - &bld->tvs[0][0]) & 3;
219
220 if (c == 3)
221 c = -1;
222
223 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
224 i, (int)('x' + c), kind ? "may be" : "is", b->id);
225 #endif
226 }
227
228 static INLINE struct nv_value *
229 bld_def(struct nv_instruction *i, int c, struct nv_value *value)
230 {
231 i->def[c] = value;
232 value->insn = i;
233 return value;
234 }
235
236 static INLINE struct nv_value *
237 find_by_bb(struct bld_value_stack *stack, struct nv_basic_block *b)
238 {
239 int i;
240
241 if (stack->top && stack->top->insn->bb == b)
242 return stack->top;
243
244 for (i = stack->size - 1; i >= 0; --i)
245 if (stack->body[i]->insn->bb == b)
246 return stack->body[i];
247 return NULL;
248 }
249
250 /* fetch value from stack that was defined in the specified basic block,
251 * or search for first definitions in all of its predecessors
252 */
253 static void
254 fetch_by_bb(struct bld_value_stack *stack,
255 struct nv_value **vals, int *n,
256 struct nv_basic_block *b)
257 {
258 int i;
259 struct nv_value *val;
260
261 assert(*n < 16); /* MAX_COND_NESTING */
262
263 val = find_by_bb(stack, b);
264 if (val) {
265 for (i = 0; i < *n; ++i)
266 if (vals[i] == val)
267 return;
268 vals[(*n)++] = val;
269 return;
270 }
271 for (i = 0; i < b->num_in; ++i)
272 if (!IS_WALL_EDGE(b->in_kind[i]))
273 fetch_by_bb(stack, vals, n, b->in[i]);
274 }
275
276 static INLINE struct nv_value *
277 bld_load_imm_u32(struct bld_context *bld, uint32_t u);
278
279 static INLINE struct nv_value *
280 bld_undef(struct bld_context *bld, ubyte file)
281 {
282 struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF);
283
284 return bld_def(nvi, 0, new_value(bld->pc, file, NV_TYPE_U32));
285 }
286
287 static struct nv_value *
288 bld_phi(struct bld_context *bld, struct nv_basic_block *b,
289 struct bld_value_stack *stack)
290 {
291 struct nv_basic_block *in;
292 struct nv_value *vals[16], *val;
293 struct nv_instruction *phi;
294 int i, j, n;
295
296 do {
297 i = n = 0;
298 fetch_by_bb(stack, vals, &n, b);
299
300 if (!n) {
301 bld_warn_uninitialized(bld, 0, stack, b);
302 return NULL;
303 }
304
305 if (n == 1) {
306 if (nvbb_dominated_by(b, vals[0]->insn->bb))
307 break;
308
309 bld_warn_uninitialized(bld, 1, stack, b);
310
311 /* back-tracking to insert missing value of other path */
312 in = b;
313 while (in->in[0]) {
314 if (in->num_in == 1) {
315 in = in->in[0];
316 } else {
317 if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b))
318 in = in->in[0];
319 else
320 if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b))
321 in = in->in[1];
322 else
323 in = in->in[0];
324 }
325 }
326 bld->pc->current_block = in;
327
328 /* should make this a no-op */
329 bld_vals_push_val(stack, bld_undef(bld, vals[0]->reg.file));
330 continue;
331 }
332
333 for (i = 0; i < n; ++i) {
334 /* if value dominates b, continue to the redefinitions */
335 if (nvbb_dominated_by(b, vals[i]->insn->bb))
336 continue;
337
338 /* if value dominates any in-block, b should be the dom frontier */
339 for (j = 0; j < b->num_in; ++j)
340 if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb))
341 break;
342 /* otherwise, find the dominance frontier and put the phi there */
343 if (j == b->num_in) {
344 in = nvbb_dom_frontier(vals[i]->insn->bb);
345 val = bld_phi(bld, in, stack);
346 bld_vals_push_val(stack, val);
347 break;
348 }
349 }
350 } while(i < n);
351
352 bld->pc->current_block = b;
353
354 if (n == 1)
355 return vals[0];
356
357 phi = new_instruction(bld->pc, NV_OP_PHI);
358
359 bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type));
360 for (i = 0; i < n; ++i)
361 phi->src[i] = new_ref(bld->pc, vals[i]);
362
363 return phi->def[0];
364 }
365
366 /* Insert a phi function in the loop header.
367 * For nested loops, we need to insert phi functions in all the outer
368 * loop headers if they don't have one yet.
369 *
370 * @def: redefinition from inside loop, or NULL if to be replaced later
371 */
372 static struct nv_value *
373 bld_loop_phi(struct bld_context *bld, struct bld_value_stack *stack,
374 struct nv_value *def)
375 {
376 struct nv_instruction *phi;
377 struct nv_basic_block *bb = bld->pc->current_block;
378 struct nv_value *val = NULL;
379
380 if (bld->loop_lvl > 1) {
381 --bld->loop_lvl;
382 if (!((stack->loop_def | stack->loop_use) & (1 << bld->loop_lvl)))
383 val = bld_loop_phi(bld, stack, NULL);
384 ++bld->loop_lvl;
385 }
386
387 if (!val)
388 val = bld_phi(bld, bld->pc->current_block, stack); /* old definition */
389 if (!val) {
390 bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0];
391 val = bld_undef(bld, bld_stack_file(bld, stack));
392 }
393
394 bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1];
395
396 phi = new_instruction(bld->pc, NV_OP_PHI);
397
398 bld_def(phi, 0, new_value_like(bld->pc, val));
399 if (!def)
400 def = phi->def[0];
401
402 bld_vals_push_val(stack, phi->def[0]);
403
404 phi->target = (struct nv_basic_block *)stack; /* cheat */
405
406 nv_reference(bld->pc, &phi->src[0], val);
407 nv_reference(bld->pc, &phi->src[1], def);
408
409 bld->pc->current_block = bb;
410
411 return phi->def[0];
412 }
413
414 static INLINE struct nv_value *
415 bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
416 {
417 const uint16_t m = 1 << bld->loop_lvl;
418 const uint16_t use = stack->loop_use;
419
420 stack->loop_use |= m;
421
422 /* If neither used nor def'd inside the loop, build a phi in foresight,
423 * so we don't have to replace stuff later on, which requires tracking.
424 */
425 if (bld->loop_lvl && !((use | stack->loop_def) & m))
426 return bld_loop_phi(bld, stack, NULL);
427
428 return bld_phi(bld, bld->pc->current_block, stack);
429 }
430
431 static INLINE struct nv_value *
432 bld_imm_u32(struct bld_context *bld, uint32_t u)
433 {
434 int i;
435 unsigned n = bld->num_immds;
436
437 for (i = 0; i < n; ++i)
438 if (bld->saved_immd[i]->reg.imm.u32 == u)
439 return bld->saved_immd[i];
440 assert(n < BLD_MAX_IMMDS);
441
442 bld->num_immds++;
443
444 bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, NV_TYPE_U32);
445 bld->saved_immd[n]->reg.imm.u32 = u;
446 return bld->saved_immd[n];
447 }
448
449 static void
450 bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
451 struct nv_value *);
452
453 /* Replace the source of the phi in the loop header by the last assignment,
454 * or eliminate the phi function if there is no assignment inside the loop.
455 *
456 * Redundancy situation 1 - (used) but (not redefined) value:
457 * %3 = phi %0, %3 = %3 is used
458 * %3 = phi %0, %4 = is new definition
459 *
460 * Redundancy situation 2 - (not used) but (redefined) value:
461 * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
462 */
463 static void
464 bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
465 {
466 struct nv_basic_block *save = bld->pc->current_block;
467 struct nv_instruction *phi, *next;
468 struct nv_value *val;
469 struct bld_value_stack *stk;
470 int i, s, n;
471
472 for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {
473 next = phi->next;
474
475 stk = (struct bld_value_stack *)phi->target;
476 phi->target = NULL;
477
478 for (s = 1, n = 0; n < bb->num_in; ++n) {
479 if (bb->in_kind[n] != CFG_EDGE_BACK)
480 continue;
481
482 assert(s < 4);
483 bld->pc->current_block = bb->in[n];
484 val = bld_fetch_global(bld, stk);
485
486 for (i = 0; i < 4; ++i)
487 if (phi->src[i] && phi->src[i]->value == val)
488 break;
489 if (i == 4)
490 nv_reference(bld->pc, &phi->src[s++], val);
491 }
492 bld->pc->current_block = save;
493
494 if (phi->src[0]->value == phi->def[0] ||
495 phi->src[0]->value == phi->src[1]->value)
496 s = 1;
497 else
498 if (phi->src[1]->value == phi->def[0])
499 s = 0;
500 else
501 continue;
502
503 if (s >= 0) {
504 /* eliminate the phi */
505 bld_vals_del_val(stk, phi->def[0]);
506
507 ++bld->pc->pass_seq;
508 bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value);
509
510 nv_nvi_delete(phi);
511 }
512 }
513 }
514
515 static INLINE struct nv_value *
516 bld_imm_f32(struct bld_context *bld, float f)
517 {
518 return bld_imm_u32(bld, fui(f));
519 }
520
521 #define SET_TYPE(v, t) ((v)->reg.type = (v)->reg.as_type = (t))
522
523 static struct nv_value *
524 bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
525 {
526 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
527
528 nv_reference(bld->pc, &insn->src[0], src0);
529
530 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.as_type));
531 }
532
533 static struct nv_value *
534 bld_insn_2(struct bld_context *bld, uint opcode,
535 struct nv_value *src0, struct nv_value *src1)
536 {
537 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
538
539 nv_reference(bld->pc, &insn->src[0], src0);
540 nv_reference(bld->pc, &insn->src[1], src1);
541
542 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.as_type));
543 }
544
545 static struct nv_value *
546 bld_insn_3(struct bld_context *bld, uint opcode,
547 struct nv_value *src0, struct nv_value *src1,
548 struct nv_value *src2)
549 {
550 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
551
552 nv_reference(bld->pc, &insn->src[0], src0);
553 nv_reference(bld->pc, &insn->src[1], src1);
554 nv_reference(bld->pc, &insn->src[2], src2);
555
556 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.as_type));
557 }
558
559 static struct nv_value *
560 bld_duplicate_insn(struct bld_context *bld, struct nv_instruction *nvi)
561 {
562 struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode);
563 int c;
564
565 if (nvi->def[0])
566 bld_def(dupi, 0, new_value_like(bld->pc, nvi->def[0]));
567
568 if (nvi->flags_def) {
569 dupi->flags_def = new_value_like(bld->pc, nvi->flags_def);
570 dupi->flags_def->insn = dupi;
571 }
572
573 for (c = 0; c < 5; ++c)
574 if (nvi->src[c])
575 nv_reference(bld->pc, &dupi->src[c], nvi->src[c]->value);
576 if (nvi->flags_src)
577 nv_reference(bld->pc, &dupi->flags_src, nvi->flags_src->value);
578
579 dupi->cc = nvi->cc;
580 dupi->saturate = nvi->saturate;
581 dupi->centroid = nvi->centroid;
582 dupi->flat = nvi->flat;
583
584 return dupi->def[0];
585 }
586
587 static void
588 bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst,
589 struct nv_value *val)
590 {
591 struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_STA);
592 struct nv_value *loc;
593
594 loc = new_value(bld->pc, NV_FILE_MEM_L, NV_TYPE_U32);
595
596 loc->reg.id = ofst * 4;
597
598 nv_reference(bld->pc, &insn->src[0], loc);
599 nv_reference(bld->pc, &insn->src[1], val);
600 nv_reference(bld->pc, &insn->src[4], ptr);
601 }
602
603 static struct nv_value *
604 bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst)
605 {
606 struct nv_value *loc, *val;
607
608 loc = new_value(bld->pc, NV_FILE_MEM_L, NV_TYPE_U32);
609
610 loc->reg.id = ofst * 4;
611
612 val = bld_insn_1(bld, NV_OP_LDA, loc);
613
614 nv_reference(bld->pc, &val->insn->src[4], ptr);
615
616 return val;
617 }
618
619 #define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
620 do { \
621 (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
622 SET_TYPE(d, NV_TYPE_##dt); \
623 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
624 } while(0)
625
626 #define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
627 do { \
628 (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
629 SET_TYPE(d, NV_TYPE_##dt); \
630 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
631 (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
632 } while(0)
633
634 static struct nv_value *
635 bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e)
636 {
637 struct nv_value *val;
638
639 BLD_INSN_1_EX(val, LG2, F32, x, F32);
640 BLD_INSN_2_EX(val, MUL, F32, e, F32, val, F32);
641 val = bld_insn_1(bld, NV_OP_PREEX2, val);
642 val = bld_insn_1(bld, NV_OP_EX2, val);
643
644 return val;
645 }
646
647 static INLINE struct nv_value *
648 bld_load_imm_f32(struct bld_context *bld, float f)
649 {
650 struct nv_value *imm = bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f));
651
652 SET_TYPE(imm, NV_TYPE_F32);
653 return imm;
654 }
655
656 static INLINE struct nv_value *
657 bld_load_imm_u32(struct bld_context *bld, uint32_t u)
658 {
659 return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u));
660 }
661
662 static struct nv_value *
663 bld_get_address(struct bld_context *bld, int id, struct nv_value *indirect)
664 {
665 int i;
666 struct nv_instruction *nvi;
667 struct nv_value *val;
668
669 for (i = 0; i < 4; ++i) {
670 if (!bld->saved_addr[i][0])
671 break;
672 if (bld->saved_addr[i][1] == indirect) {
673 nvi = bld->saved_addr[i][0]->insn;
674 if (nvi->src[0]->value->reg.imm.u32 == id)
675 return bld->saved_addr[i][0];
676 }
677 }
678 i &= 3;
679
680 val = bld_imm_u32(bld, id);
681 if (indirect)
682 val = bld_insn_2(bld, NV_OP_ADD, indirect, val);
683 else
684 val = bld_insn_1(bld, NV_OP_MOV, val);
685
686 bld->saved_addr[i][0] = val;
687 bld->saved_addr[i][0]->reg.file = NV_FILE_ADDR;
688 bld->saved_addr[i][0]->reg.type = NV_TYPE_U16;
689 bld->saved_addr[i][1] = indirect;
690 return bld->saved_addr[i][0];
691 }
692
693
694 static struct nv_value *
695 bld_predicate(struct bld_context *bld, struct nv_value *src, boolean bool_only)
696 {
697 struct nv_instruction *s0i, *nvi = src->insn;
698
699 if (!nvi) {
700 nvi = bld_insn_1(bld,
701 (src->reg.file == NV_FILE_IMM) ? NV_OP_MOV : NV_OP_LDA,
702 src)->insn;
703 src = nvi->def[0];
704 } else
705 if (bool_only) {
706 while (nvi->opcode == NV_OP_ABS || nvi->opcode == NV_OP_NEG ||
707 nvi->opcode == NV_OP_CVT) {
708 s0i = nvi->src[0]->value->insn;
709 if (!s0i || !nv50_op_can_write_flags(s0i->opcode))
710 break;
711 nvi = s0i;
712 assert(!nvi->flags_src);
713 }
714 }
715
716 if (!nv50_op_can_write_flags(nvi->opcode) ||
717 nvi->bb != bld->pc->current_block) {
718 nvi = new_instruction(bld->pc, NV_OP_CVT);
719 nv_reference(bld->pc, &nvi->src[0], src);
720 }
721
722 if (!nvi->flags_def) {
723 nvi->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16);
724 nvi->flags_def->insn = nvi;
725 }
726 return nvi->flags_def;
727 }
728
729 static void
730 bld_kil(struct bld_context *bld, struct nv_value *src)
731 {
732 struct nv_instruction *nvi;
733
734 src = bld_predicate(bld, src, FALSE);
735 nvi = new_instruction(bld->pc, NV_OP_KIL);
736 nvi->fixed = 1;
737 nvi->flags_src = new_ref(bld->pc, src);
738 nvi->cc = NV_CC_LT;
739 }
740
741 static void
742 bld_flow(struct bld_context *bld, uint opcode, ubyte cc,
743 struct nv_value *src, struct nv_basic_block *target,
744 boolean plan_reconverge)
745 {
746 struct nv_instruction *nvi;
747
748 if (plan_reconverge)
749 new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1;
750
751 nvi = new_instruction(bld->pc, opcode);
752 nvi->is_terminator = 1;
753 nvi->cc = cc;
754 nvi->target = target;
755 if (src)
756 nvi->flags_src = new_ref(bld->pc, src);
757 }
758
759 static ubyte
760 translate_setcc(unsigned opcode)
761 {
762 switch (opcode) {
763 case TGSI_OPCODE_SLT: return NV_CC_LT;
764 case TGSI_OPCODE_SGE: return NV_CC_GE;
765 case TGSI_OPCODE_SEQ: return NV_CC_EQ;
766 case TGSI_OPCODE_SGT: return NV_CC_GT;
767 case TGSI_OPCODE_SLE: return NV_CC_LE;
768 case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U;
769 case TGSI_OPCODE_STR: return NV_CC_TR;
770 case TGSI_OPCODE_SFL: return NV_CC_FL;
771
772 case TGSI_OPCODE_ISLT: return NV_CC_LT;
773 case TGSI_OPCODE_ISGE: return NV_CC_GE;
774 case TGSI_OPCODE_USEQ: return NV_CC_EQ;
775 case TGSI_OPCODE_USGE: return NV_CC_GE;
776 case TGSI_OPCODE_USLT: return NV_CC_LT;
777 case TGSI_OPCODE_USNE: return NV_CC_NE;
778 default:
779 assert(0);
780 return NV_CC_FL;
781 }
782 }
783
784 static uint
785 translate_opcode(uint opcode)
786 {
787 switch (opcode) {
788 case TGSI_OPCODE_ABS: return NV_OP_ABS;
789 case TGSI_OPCODE_ADD:
790 case TGSI_OPCODE_SUB:
791 case TGSI_OPCODE_UADD: return NV_OP_ADD;
792 case TGSI_OPCODE_AND: return NV_OP_AND;
793 case TGSI_OPCODE_EX2: return NV_OP_EX2;
794 case TGSI_OPCODE_CEIL: return NV_OP_CEIL;
795 case TGSI_OPCODE_FLR: return NV_OP_FLOOR;
796 case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC;
797 case TGSI_OPCODE_COS: return NV_OP_COS;
798 case TGSI_OPCODE_SIN: return NV_OP_SIN;
799 case TGSI_OPCODE_DDX: return NV_OP_DFDX;
800 case TGSI_OPCODE_DDY: return NV_OP_DFDY;
801 case TGSI_OPCODE_F2I:
802 case TGSI_OPCODE_F2U:
803 case TGSI_OPCODE_I2F:
804 case TGSI_OPCODE_U2F: return NV_OP_CVT;
805 case TGSI_OPCODE_INEG: return NV_OP_NEG;
806 case TGSI_OPCODE_LG2: return NV_OP_LG2;
807 case TGSI_OPCODE_ISHR:
808 case TGSI_OPCODE_USHR: return NV_OP_SHR;
809 case TGSI_OPCODE_MAD:
810 case TGSI_OPCODE_UMAD: return NV_OP_MAD;
811 case TGSI_OPCODE_MAX:
812 case TGSI_OPCODE_IMAX:
813 case TGSI_OPCODE_UMAX: return NV_OP_MAX;
814 case TGSI_OPCODE_MIN:
815 case TGSI_OPCODE_IMIN:
816 case TGSI_OPCODE_UMIN: return NV_OP_MIN;
817 case TGSI_OPCODE_MUL:
818 case TGSI_OPCODE_UMUL: return NV_OP_MUL;
819 case TGSI_OPCODE_OR: return NV_OP_OR;
820 case TGSI_OPCODE_RCP: return NV_OP_RCP;
821 case TGSI_OPCODE_RSQ: return NV_OP_RSQ;
822 case TGSI_OPCODE_SAD: return NV_OP_SAD;
823 case TGSI_OPCODE_SHL: return NV_OP_SHL;
824 case TGSI_OPCODE_SLT:
825 case TGSI_OPCODE_SGE:
826 case TGSI_OPCODE_SEQ:
827 case TGSI_OPCODE_SGT:
828 case TGSI_OPCODE_SLE:
829 case TGSI_OPCODE_SNE:
830 case TGSI_OPCODE_ISLT:
831 case TGSI_OPCODE_ISGE:
832 case TGSI_OPCODE_USEQ:
833 case TGSI_OPCODE_USGE:
834 case TGSI_OPCODE_USLT:
835 case TGSI_OPCODE_USNE: return NV_OP_SET;
836 case TGSI_OPCODE_TEX: return NV_OP_TEX;
837 case TGSI_OPCODE_TXP: return NV_OP_TEX;
838 case TGSI_OPCODE_TXB: return NV_OP_TXB;
839 case TGSI_OPCODE_TXL: return NV_OP_TXL;
840 case TGSI_OPCODE_XOR: return NV_OP_XOR;
841 default:
842 return NV_OP_NOP;
843 }
844 }
845
846 static ubyte
847 infer_src_type(unsigned opcode)
848 {
849 switch (opcode) {
850 case TGSI_OPCODE_MOV:
851 case TGSI_OPCODE_AND:
852 case TGSI_OPCODE_OR:
853 case TGSI_OPCODE_XOR:
854 case TGSI_OPCODE_SAD:
855 case TGSI_OPCODE_U2F:
856 case TGSI_OPCODE_UADD:
857 case TGSI_OPCODE_UDIV:
858 case TGSI_OPCODE_UMOD:
859 case TGSI_OPCODE_UMAD:
860 case TGSI_OPCODE_UMUL:
861 case TGSI_OPCODE_UMAX:
862 case TGSI_OPCODE_UMIN:
863 case TGSI_OPCODE_USEQ:
864 case TGSI_OPCODE_USGE:
865 case TGSI_OPCODE_USLT:
866 case TGSI_OPCODE_USNE:
867 case TGSI_OPCODE_USHR:
868 return NV_TYPE_U32;
869 case TGSI_OPCODE_I2F:
870 case TGSI_OPCODE_IDIV:
871 case TGSI_OPCODE_IMAX:
872 case TGSI_OPCODE_IMIN:
873 case TGSI_OPCODE_INEG:
874 case TGSI_OPCODE_ISGE:
875 case TGSI_OPCODE_ISHR:
876 case TGSI_OPCODE_ISLT:
877 return NV_TYPE_S32;
878 default:
879 return NV_TYPE_F32;
880 }
881 }
882
883 static ubyte
884 infer_dst_type(unsigned opcode)
885 {
886 switch (opcode) {
887 case TGSI_OPCODE_MOV:
888 case TGSI_OPCODE_F2U:
889 case TGSI_OPCODE_AND:
890 case TGSI_OPCODE_OR:
891 case TGSI_OPCODE_XOR:
892 case TGSI_OPCODE_SAD:
893 case TGSI_OPCODE_UADD:
894 case TGSI_OPCODE_UDIV:
895 case TGSI_OPCODE_UMOD:
896 case TGSI_OPCODE_UMAD:
897 case TGSI_OPCODE_UMUL:
898 case TGSI_OPCODE_UMAX:
899 case TGSI_OPCODE_UMIN:
900 case TGSI_OPCODE_USEQ:
901 case TGSI_OPCODE_USGE:
902 case TGSI_OPCODE_USLT:
903 case TGSI_OPCODE_USNE:
904 case TGSI_OPCODE_USHR:
905 return NV_TYPE_U32;
906 case TGSI_OPCODE_F2I:
907 case TGSI_OPCODE_IDIV:
908 case TGSI_OPCODE_IMAX:
909 case TGSI_OPCODE_IMIN:
910 case TGSI_OPCODE_INEG:
911 case TGSI_OPCODE_ISGE:
912 case TGSI_OPCODE_ISHR:
913 case TGSI_OPCODE_ISLT:
914 return NV_TYPE_S32;
915 default:
916 return NV_TYPE_F32;
917 }
918 }
919
920 static void
921 emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
922 unsigned chan, struct nv_value *value)
923 {
924 struct nv_value *ptr;
925 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
926
927 if (reg->Register.Indirect) {
928 ptr = FETCH_ADDR(reg->Indirect.Index,
929 tgsi_util_get_src_register_swizzle(&reg->Indirect, 0));
930 } else {
931 ptr = NULL;
932 }
933
934 assert(chan < 4);
935
936 if (inst->Instruction.Opcode != TGSI_OPCODE_MOV)
937 value->reg.type = infer_dst_type(inst->Instruction.Opcode);
938
939 switch (inst->Instruction.Saturate) {
940 case TGSI_SAT_NONE:
941 break;
942 case TGSI_SAT_ZERO_ONE:
943 BLD_INSN_1_EX(value, SAT, F32, value, F32);
944 break;
945 case TGSI_SAT_MINUS_PLUS_ONE:
946 value->reg.as_type = NV_TYPE_F32;
947 value = bld_insn_2(bld, NV_OP_MAX, value, bld_load_imm_f32(bld, -1.0f));
948 value = bld_insn_2(bld, NV_OP_MIN, value, bld_load_imm_f32(bld, 1.0f));
949 break;
950 }
951
952 switch (reg->Register.File) {
953 case TGSI_FILE_OUTPUT:
954 if (!value->insn && (bld->ti->output_file == NV_FILE_OUT))
955 value = bld_insn_1(bld, NV_OP_MOV, value);
956 value = bld_insn_1(bld, NV_OP_MOV, value);
957 value->reg.file = bld->ti->output_file;
958
959 if (bld->ti->p->type == PIPE_SHADER_FRAGMENT) {
960 STORE_OUTR(reg->Register.Index, chan, value);
961 } else {
962 value->insn->fixed = 1;
963 value->reg.id = bld->ti->output_map[reg->Register.Index][chan];
964 }
965 break;
966 case TGSI_FILE_TEMPORARY:
967 assert(reg->Register.Index < BLD_MAX_TEMPS);
968 if (!value->insn || (value->insn->bb != bld->pc->current_block))
969 value = bld_insn_1(bld, NV_OP_MOV, value);
970 value->reg.file = NV_FILE_GPR;
971
972 if (bld->ti->store_to_memory)
973 bld_lmem_store(bld, ptr, reg->Register.Index * 4 + chan, value);
974 else
975 STORE_TEMP(reg->Register.Index, chan, value);
976 break;
977 case TGSI_FILE_ADDRESS:
978 assert(reg->Register.Index < BLD_MAX_ADDRS);
979 value->reg.file = NV_FILE_ADDR;
980 value->reg.type = NV_TYPE_U16;
981 STORE_ADDR(reg->Register.Index, chan, value);
982 break;
983 }
984 }
985
986 static INLINE uint32_t
987 bld_is_output_written(struct bld_context *bld, int i, int c)
988 {
989 if (c < 0)
990 return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32));
991 return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32));
992 }
993
994 static void
995 bld_export_outputs(struct bld_context *bld)
996 {
997 struct nv_value *vals[4];
998 struct nv_instruction *nvi;
999 int i, c, n;
1000
1001 bld_push_values(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
1002
1003 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
1004 if (!bld_is_output_written(bld, i, -1))
1005 continue;
1006 for (n = 0, c = 0; c < 4; ++c) {
1007 if (!bld_is_output_written(bld, i, c))
1008 continue;
1009 vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]);
1010 assert(vals[n]);
1011 vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]);
1012 vals[n++]->reg.id = bld->ti->output_map[i][c];
1013 }
1014 assert(n);
1015
1016 (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
1017
1018 for (c = 0; c < n; ++c)
1019 nvi->src[c] = new_ref(bld->pc, vals[c]);
1020 }
1021 }
1022
1023 static void
1024 bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
1025 {
1026 int i;
1027
1028 bld_push_values(&bld->tvs[0][0], BLD_MAX_TEMPS);
1029 bld_push_values(&bld->avs[0][0], BLD_MAX_ADDRS);
1030 bld_push_values(&bld->pvs[0][0], BLD_MAX_PREDS);
1031 bld_push_values(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
1032
1033 bld->pc->current_block = b;
1034
1035 for (i = 0; i < 4; ++i)
1036 bld->saved_addr[i][0] = NULL;
1037
1038 for (i = 0; i < 128; ++i)
1039 bld->saved_inputs[i] = NULL;
1040
1041 bld->out_kind = CFG_EDGE_FORWARD;
1042 }
1043
1044 static struct nv_value *
1045 bld_saved_input(struct bld_context *bld, unsigned i, unsigned c)
1046 {
1047 unsigned idx = bld->ti->input_map[i][c];
1048
1049 if (bld->ti->p->type != PIPE_SHADER_FRAGMENT)
1050 return NULL;
1051 if (bld->saved_inputs[idx])
1052 return bld->saved_inputs[idx];
1053 return NULL;
1054 }
1055
1056 static struct nv_value *
1057 bld_interpolate(struct bld_context *bld, unsigned mode, struct nv_value *val)
1058 {
1059 if (val->reg.id == 255) {
1060 /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
1061 val = bld_insn_1(bld, NV_OP_LINTERP, val);
1062 val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31));
1063 val->insn->src[0]->typecast = NV_TYPE_U32;
1064 val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f));
1065 val->insn->src[0]->typecast = NV_TYPE_U32;
1066 } else
1067 if (mode & (NV50_INTERP_LINEAR | NV50_INTERP_FLAT))
1068 val = bld_insn_1(bld, NV_OP_LINTERP, val);
1069 else
1070 val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frgcrd[3]);
1071
1072 val->insn->flat = (mode & NV50_INTERP_FLAT) ? 1 : 0;
1073 val->insn->centroid = (mode & NV50_INTERP_CENTROID) ? 1 : 0;
1074 return val;
1075 }
1076
1077 static struct nv_value *
1078 emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
1079 const unsigned s, const unsigned chan)
1080 {
1081 const struct tgsi_full_src_register *src = &insn->Src[s];
1082 struct nv_value *res;
1083 struct nv_value *ptr = NULL;
1084 unsigned idx, swz, dim_idx, ind_idx, ind_swz, sgn;
1085 ubyte type = infer_src_type(insn->Instruction.Opcode);
1086
1087 idx = src->Register.Index;
1088 swz = tgsi_util_get_full_src_register_swizzle(src, chan);
1089 dim_idx = -1;
1090 ind_idx = -1;
1091 ind_swz = 0;
1092
1093 if (src->Register.Indirect) {
1094 ind_idx = src->Indirect.Index;
1095 ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0);
1096
1097 ptr = FETCH_ADDR(ind_idx, ind_swz);
1098 }
1099 if (idx >= (128 / 4) && src->Register.File == TGSI_FILE_CONSTANT)
1100 ptr = bld_get_address(bld, (idx * 16) & ~0x1ff, ptr);
1101
1102 switch (src->Register.File) {
1103 case TGSI_FILE_CONSTANT:
1104 dim_idx = src->Dimension.Index ? src->Dimension.Index + 2 : 1;
1105 assert(dim_idx < 14);
1106 assert(dim_idx == 1); /* for now */
1107
1108 res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), type);
1109 SET_TYPE(res, type);
1110 res->reg.id = (idx * 4 + swz) & 127;
1111 res = bld_insn_1(bld, NV_OP_LDA, res);
1112
1113 if (ptr)
1114 res->insn->src[4] = new_ref(bld->pc, ptr);
1115 break;
1116 case TGSI_FILE_IMMEDIATE:
1117 assert(idx < bld->ti->immd32_nr);
1118 res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]);
1119
1120 switch (bld->ti->immd32_ty[idx]) {
1121 case TGSI_IMM_FLOAT32: SET_TYPE(res, NV_TYPE_F32); break;
1122 case TGSI_IMM_UINT32: SET_TYPE(res, NV_TYPE_U32); break;
1123 case TGSI_IMM_INT32: SET_TYPE(res, NV_TYPE_S32); break;
1124 default:
1125 SET_TYPE(res, type);
1126 break;
1127 }
1128 break;
1129 case TGSI_FILE_INPUT:
1130 res = bld_saved_input(bld, idx, swz);
1131 if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP))
1132 return res;
1133
1134 res = new_value(bld->pc, bld->ti->input_file, type);
1135 res->reg.id = bld->ti->input_map[idx][swz];
1136
1137 if (res->reg.file == NV_FILE_MEM_V) {
1138 res = bld_interpolate(bld, bld->ti->interp_mode[idx], res);
1139 } else {
1140 assert(src->Dimension.Dimension == 0);
1141 res = bld_insn_1(bld, NV_OP_LDA, res);
1142 assert(res->reg.type == type);
1143 }
1144 bld->saved_inputs[bld->ti->input_map[idx][swz]] = res;
1145 break;
1146 case TGSI_FILE_TEMPORARY:
1147 if (bld->ti->store_to_memory)
1148 res = bld_lmem_load(bld, ptr, idx * 4 + swz);
1149 else
1150 res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
1151 break;
1152 case TGSI_FILE_ADDRESS:
1153 res = bld_fetch_global(bld, &bld->avs[idx][swz]);
1154 break;
1155 case TGSI_FILE_PREDICATE:
1156 res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
1157 break;
1158 default:
1159 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
1160 abort();
1161 break;
1162 }
1163 if (!res)
1164 return bld_undef(bld, NV_FILE_GPR);
1165
1166 sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
1167
1168 if (insn->Instruction.Opcode != TGSI_OPCODE_MOV)
1169 res->reg.as_type = type;
1170 else
1171 if (sgn != TGSI_UTIL_SIGN_KEEP) /* apparently "MOV A, -B" assumes float */
1172 res->reg.as_type = NV_TYPE_F32;
1173
1174 switch (sgn) {
1175 case TGSI_UTIL_SIGN_KEEP:
1176 break;
1177 case TGSI_UTIL_SIGN_CLEAR:
1178 res = bld_insn_1(bld, NV_OP_ABS, res);
1179 break;
1180 case TGSI_UTIL_SIGN_TOGGLE:
1181 res = bld_insn_1(bld, NV_OP_NEG, res);
1182 break;
1183 case TGSI_UTIL_SIGN_SET:
1184 res = bld_insn_1(bld, NV_OP_ABS, res);
1185 res = bld_insn_1(bld, NV_OP_NEG, res);
1186 break;
1187 default:
1188 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
1189 abort();
1190 break;
1191 }
1192
1193 return res;
1194 }
1195
1196 static void
1197 bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
1198 const struct tgsi_full_instruction *insn)
1199 {
1200 struct nv_value *val0, *zero;
1201 unsigned mask = insn->Dst[0].Register.WriteMask;
1202
1203 if (mask & ((1 << 0) | (1 << 3)))
1204 dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f);
1205
1206 if (mask & (3 << 1)) {
1207 zero = bld_load_imm_f32(bld, 0.0f);
1208 val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), zero);
1209
1210 if (mask & (1 << 1))
1211 dst0[1] = val0;
1212 }
1213
1214 if (mask & (1 << 2)) {
1215 struct nv_value *val1, *val3, *src1, *src3;
1216 struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f);
1217 struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f);
1218
1219 src1 = emit_fetch(bld, insn, 0, 1);
1220 src3 = emit_fetch(bld, insn, 0, 3);
1221
1222 val0->insn->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16);
1223 val0->insn->flags_def->insn = val0->insn;
1224
1225 val1 = bld_insn_2(bld, NV_OP_MAX, src1, zero);
1226 val3 = bld_insn_2(bld, NV_OP_MAX, src3, neg128);
1227 val3 = bld_insn_2(bld, NV_OP_MIN, val3, pos128);
1228 val3 = bld_pow(bld, val1, val3);
1229
1230 dst0[2] = bld_insn_1(bld, NV_OP_MOV, zero);
1231 dst0[2]->insn->cc = NV_CC_LE;
1232 dst0[2]->insn->flags_src = new_ref(bld->pc, val0->insn->flags_def);
1233
1234 dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]);
1235 }
1236 }
1237
1238 static INLINE void
1239 get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg)
1240 {
1241 switch (insn->Texture.Texture) {
1242 case TGSI_TEXTURE_1D:
1243 *arg = *dim = 1;
1244 break;
1245 case TGSI_TEXTURE_SHADOW1D:
1246 *dim = 1;
1247 *arg = 2;
1248 break;
1249 case TGSI_TEXTURE_UNKNOWN:
1250 case TGSI_TEXTURE_2D:
1251 case TGSI_TEXTURE_RECT:
1252 *arg = *dim = 2;
1253 break;
1254 case TGSI_TEXTURE_SHADOW2D:
1255 case TGSI_TEXTURE_SHADOWRECT:
1256 *dim = 2;
1257 *arg = 3;
1258 break;
1259 case TGSI_TEXTURE_3D:
1260 case TGSI_TEXTURE_CUBE:
1261 *dim = *arg = 3;
1262 break;
1263 default:
1264 assert(0);
1265 break;
1266 }
1267 }
1268
1269 static void
1270 load_proj_tex_coords(struct bld_context *bld,
1271 struct nv_value *t[4], int dim, int arg,
1272 const struct tgsi_full_instruction *insn)
1273 {
1274 int c, mask;
1275
1276 mask = (1 << dim) - 1;
1277 if (arg != dim)
1278 mask |= 4; /* depth comparison value */
1279
1280 t[3] = emit_fetch(bld, insn, 0, 3);
1281
1282 if (t[3]->insn->opcode == NV_OP_PINTERP) {
1283 t[3] = bld_duplicate_insn(bld, t[3]->insn);
1284 t[3]->insn->opcode = NV_OP_LINTERP;
1285 nv_reference(bld->pc, &t[3]->insn->src[1], NULL);
1286 }
1287
1288 t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
1289
1290 for (c = 0; c < 4; ++c) {
1291 if (!(mask & (1 << c)))
1292 continue;
1293 t[c] = emit_fetch(bld, insn, 0, c);
1294
1295 if (t[c]->insn->opcode != NV_OP_LINTERP &&
1296 t[c]->insn->opcode != NV_OP_PINTERP)
1297 continue;
1298 t[c] = bld_duplicate_insn(bld, t[c]->insn);
1299 t[c]->insn->opcode = NV_OP_PINTERP;
1300 nv_reference(bld->pc, &t[c]->insn->src[1], t[3]);
1301
1302 mask &= ~(1 << c);
1303 }
1304
1305 for (c = 0; mask; ++c, mask >>= 1) {
1306 if (!(mask & 1))
1307 continue;
1308 t[c] = bld_insn_2(bld, NV_OP_MUL, t[c], t[3]);
1309 }
1310 }
1311
1312 /* For a quad of threads / top left, top right, bottom left, bottom right
1313 * pixels, do a different operation, and take src0 from a specific thread.
1314 */
1315 #define QOP_ADD 0
1316 #define QOP_SUBR 1
1317 #define QOP_SUB 2
1318 #define QOP_MOV1 3
1319
1320 #define QOP(a, b, c, d) \
1321 ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
1322
1323 static INLINE struct nv_value *
1324 bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane,
1325 struct nv_value *src1, boolean wp)
1326 {
1327 struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1);
1328 val->insn->lanes = lane;
1329 val->insn->quadop = qop;
1330 if (wp) {
1331 val->insn->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16);
1332 val->insn->flags_def->insn = val->insn;
1333 }
1334 return val;
1335 }
1336
1337 static INLINE struct nv_value *
1338 bld_cmov(struct bld_context *bld,
1339 struct nv_value *src, ubyte cc, struct nv_value *cr)
1340 {
1341 src = bld_insn_1(bld, NV_OP_MOV, src);
1342
1343 src->insn->cc = cc;
1344 src->insn->flags_src = new_ref(bld->pc, cr);
1345
1346 return src;
1347 }
1348
1349 static struct nv_instruction *
1350 emit_tex(struct bld_context *bld, uint opcode,
1351 struct nv_value *dst[4], struct nv_value *t_in[4],
1352 int argc, int tic, int tsc, int cube)
1353 {
1354 struct nv_value *t[4];
1355 struct nv_instruction *nvi;
1356 int c;
1357
1358 /* the inputs to a tex instruction must be separate values */
1359 for (c = 0; c < argc; ++c) {
1360 t[c] = bld_insn_1(bld, NV_OP_MOV, t_in[c]);
1361 SET_TYPE(t[c], NV_TYPE_F32);
1362 t[c]->insn->fixed = 1;
1363 }
1364
1365 nvi = new_instruction(bld->pc, opcode);
1366
1367 for (c = 0; c < 4; ++c)
1368 dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, NV_TYPE_F32));
1369
1370 for (c = 0; c < argc; ++c)
1371 nvi->src[c] = new_ref(bld->pc, t[c]);
1372
1373 nvi->tex_t = tic;
1374 nvi->tex_s = tsc;
1375 nvi->tex_mask = 0xf;
1376 nvi->tex_cube = cube;
1377 nvi->tex_live = 0;
1378 nvi->tex_argc = argc;
1379
1380 return nvi;
1381 }
1382
1383 static void
1384 bld_texlod_sequence(struct bld_context *bld,
1385 struct nv_value *dst[4], struct nv_value *t[4], int arg,
1386 int tic, int tsc, int cube)
1387 {
1388 emit_tex(bld, NV_OP_TXL, dst, t, arg, tic, tsc, cube); /* TODO */
1389 }
1390
1391
1392 /* The lanes of a quad are grouped by the bit in the condition register
1393 * they have set, which is selected by differing bias values.
1394 * Move the input values for TEX into a new register set for each group
1395 * and execute TEX only for a specific group.
1396 * We always need to use 4 new registers for the inputs/outputs because
1397 * the implicitly calculated derivatives must be correct.
1398 */
1399 static void
1400 bld_texbias_sequence(struct bld_context *bld,
1401 struct nv_value *dst[4], struct nv_value *t[4], int arg,
1402 int tic, int tsc, int cube)
1403 {
1404 struct nv_instruction *sel, *tex;
1405 struct nv_value *bit[4], *cr[4], *res[4][4], *val;
1406 int l, c;
1407
1408 const ubyte cc[4] = { NV_CC_EQ, NV_CC_S, NV_CC_C, NV_CC_O };
1409
1410 for (l = 0; l < 4; ++l) {
1411 bit[l] = bld_load_imm_u32(bld, 1 << l);
1412
1413 val = bld_quadop(bld, QOP(SUBR, SUBR, SUBR, SUBR),
1414 t[arg - 1], l, t[arg - 1], TRUE);
1415
1416 cr[l] = bld_cmov(bld, bit[l], NV_CC_EQ, val->insn->flags_def);
1417
1418 cr[l]->reg.file = NV_FILE_FLAGS;
1419 SET_TYPE(cr[l], NV_TYPE_U16);
1420 }
1421
1422 sel = new_instruction(bld->pc, NV_OP_SELECT);
1423
1424 for (l = 0; l < 4; ++l)
1425 sel->src[l] = new_ref(bld->pc, cr[l]);
1426
1427 bld_def(sel, 0, new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16));
1428
1429 for (l = 0; l < 4; ++l) {
1430 tex = emit_tex(bld, NV_OP_TXB, dst, t, arg, tic, tsc, cube);
1431
1432 tex->cc = cc[l];
1433 tex->flags_src = new_ref(bld->pc, sel->def[0]);
1434
1435 for (c = 0; c < 4; ++c)
1436 res[l][c] = tex->def[c];
1437 }
1438
1439 for (l = 0; l < 4; ++l)
1440 for (c = 0; c < 4; ++c)
1441 res[l][c] = bld_cmov(bld, res[l][c], cc[l], sel->def[0]);
1442
1443 for (c = 0; c < 4; ++c) {
1444 sel = new_instruction(bld->pc, NV_OP_SELECT);
1445
1446 for (l = 0; l < 4; ++l)
1447 sel->src[l] = new_ref(bld->pc, res[l][c]);
1448
1449 bld_def(sel, 0, (dst[c] = new_value(bld->pc, NV_FILE_GPR, NV_TYPE_F32)));
1450 }
1451 }
1452
1453 static boolean
1454 bld_is_constant(struct nv_value *val)
1455 {
1456 if (val->reg.file == NV_FILE_IMM)
1457 return TRUE;
1458 return val->insn && nvcg_find_constant(val->insn->src[0]);
1459 }
1460
1461 static void
1462 bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
1463 const struct tgsi_full_instruction *insn)
1464 {
1465 struct nv_value *t[4], *s[3];
1466 uint opcode = translate_opcode(insn->Instruction.Opcode);
1467 int arg, dim, c;
1468 const int tic = insn->Src[1].Register.Index;
1469 const int tsc = 0;
1470 const int cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0;
1471
1472 get_tex_dim(insn, &dim, &arg);
1473
1474 if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
1475 load_proj_tex_coords(bld, t, dim, arg, insn);
1476 else {
1477 for (c = 0; c < dim; ++c)
1478 t[c] = emit_fetch(bld, insn, 0, c);
1479 if (arg != dim)
1480 t[dim] = emit_fetch(bld, insn, 0, 2);
1481 }
1482
1483 if (cube) {
1484 assert(dim >= 3);
1485 for (c = 0; c < 3; ++c)
1486 s[c] = bld_insn_1(bld, NV_OP_ABS, t[c]);
1487
1488 s[0] = bld_insn_2(bld, NV_OP_MAX, s[0], s[1]);
1489 s[0] = bld_insn_2(bld, NV_OP_MAX, s[0], s[2]);
1490 s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]);
1491
1492 for (c = 0; c < 3; ++c)
1493 t[c] = bld_insn_2(bld, NV_OP_MUL, t[c], s[0]);
1494 }
1495
1496 if (opcode == NV_OP_TXB || opcode == NV_OP_TXL) {
1497 t[arg++] = emit_fetch(bld, insn, 0, 3);
1498
1499 if ((bld->ti->p->type == PIPE_SHADER_FRAGMENT) &&
1500 !bld_is_constant(t[arg - 1])) {
1501 if (opcode == NV_OP_TXB)
1502 bld_texbias_sequence(bld, dst0, t, arg, tic, tsc, cube);
1503 else
1504 bld_texlod_sequence(bld, dst0, t, arg, tic, tsc, cube);
1505 return;
1506 }
1507 }
1508
1509 emit_tex(bld, opcode, dst0, t, arg, tic, tsc, cube);
1510 }
1511
1512 static INLINE struct nv_value *
1513 bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn,
1514 int n)
1515 {
1516 struct nv_value *dotp, *src0, *src1;
1517 int c;
1518
1519 src0 = emit_fetch(bld, insn, 0, 0);
1520 src1 = emit_fetch(bld, insn, 1, 0);
1521 dotp = bld_insn_2(bld, NV_OP_MUL, src0, src1);
1522
1523 for (c = 1; c < n; ++c) {
1524 src0 = emit_fetch(bld, insn, 0, c);
1525 src1 = emit_fetch(bld, insn, 1, c);
1526 dotp = bld_insn_3(bld, NV_OP_MAD, src0, src1, dotp);
1527 }
1528 return dotp;
1529 }
1530
1531 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1532 for (chan = 0; chan < 4; ++chan) \
1533 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1534
1535 static void
1536 bld_instruction(struct bld_context *bld,
1537 const struct tgsi_full_instruction *insn)
1538 {
1539 struct nv_value *src0;
1540 struct nv_value *src1;
1541 struct nv_value *src2;
1542 struct nv_value *dst0[4];
1543 struct nv_value *temp;
1544 int c;
1545 uint opcode = translate_opcode(insn->Instruction.Opcode);
1546
1547 #ifdef NV50_TGSI2NC_DEBUG
1548 debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1);
1549 #endif
1550
1551 switch (insn->Instruction.Opcode) {
1552 case TGSI_OPCODE_ADD:
1553 case TGSI_OPCODE_MAX:
1554 case TGSI_OPCODE_MIN:
1555 case TGSI_OPCODE_MUL:
1556 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1557 src0 = emit_fetch(bld, insn, 0, c);
1558 src1 = emit_fetch(bld, insn, 1, c);
1559 dst0[c] = bld_insn_2(bld, opcode, src0, src1);
1560 }
1561 break;
1562 case TGSI_OPCODE_ARL:
1563 src1 = bld_imm_u32(bld, 4);
1564 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1565 src0 = emit_fetch(bld, insn, 0, c);
1566 temp = bld_insn_1(bld, NV_OP_FLOOR, src0);
1567 SET_TYPE(temp, NV_TYPE_S32);
1568 dst0[c] = bld_insn_2(bld, NV_OP_SHL, temp, src1);
1569 }
1570 break;
1571 case TGSI_OPCODE_CMP:
1572 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1573 src0 = emit_fetch(bld, insn, 0, c);
1574 src1 = emit_fetch(bld, insn, 1, c);
1575 src2 = emit_fetch(bld, insn, 2, c);
1576 src0 = bld_predicate(bld, src0, FALSE);
1577
1578 src1 = bld_insn_1(bld, NV_OP_MOV, src1);
1579 src1->insn->flags_src = new_ref(bld->pc, src0);
1580 src1->insn->cc = NV_CC_LT;
1581
1582 src2 = bld_insn_1(bld, NV_OP_MOV, src2);
1583 src2->insn->flags_src = new_ref(bld->pc, src0);
1584 src2->insn->cc = NV_CC_GE;
1585
1586 dst0[c] = bld_insn_2(bld, NV_OP_SELECT, src1, src2);
1587 }
1588 break;
1589 case TGSI_OPCODE_COS:
1590 case TGSI_OPCODE_SIN:
1591 src0 = emit_fetch(bld, insn, 0, 0);
1592 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1593 if (insn->Dst[0].Register.WriteMask & 7)
1594 temp = bld_insn_1(bld, opcode, temp);
1595 for (c = 0; c < 3; ++c)
1596 if (insn->Dst[0].Register.WriteMask & (1 << c))
1597 dst0[c] = temp;
1598 if (!(insn->Dst[0].Register.WriteMask & (1 << 3)))
1599 break;
1600 src0 = emit_fetch(bld, insn, 0, 3);
1601 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1602 dst0[3] = bld_insn_1(bld, opcode, temp);
1603 break;
1604 case TGSI_OPCODE_DP2:
1605 temp = bld_dot(bld, insn, 2);
1606 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1607 dst0[c] = temp;
1608 break;
1609 case TGSI_OPCODE_DP3:
1610 temp = bld_dot(bld, insn, 3);
1611 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1612 dst0[c] = temp;
1613 break;
1614 case TGSI_OPCODE_DP4:
1615 temp = bld_dot(bld, insn, 4);
1616 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1617 dst0[c] = temp;
1618 break;
1619 case TGSI_OPCODE_DPH:
1620 src0 = bld_dot(bld, insn, 3);
1621 src1 = emit_fetch(bld, insn, 1, 3);
1622 temp = bld_insn_2(bld, NV_OP_ADD, src0, src1);
1623 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1624 dst0[c] = temp;
1625 break;
1626 case TGSI_OPCODE_DST:
1627 if (insn->Dst[0].Register.WriteMask & 1)
1628 dst0[0] = bld_imm_f32(bld, 1.0f);
1629 if (insn->Dst[0].Register.WriteMask & 2) {
1630 src0 = emit_fetch(bld, insn, 0, 1);
1631 src1 = emit_fetch(bld, insn, 1, 1);
1632 dst0[1] = bld_insn_2(bld, NV_OP_MUL, src0, src1);
1633 }
1634 if (insn->Dst[0].Register.WriteMask & 4)
1635 dst0[2] = emit_fetch(bld, insn, 0, 2);
1636 if (insn->Dst[0].Register.WriteMask & 8)
1637 dst0[3] = emit_fetch(bld, insn, 1, 3);
1638 break;
1639 case TGSI_OPCODE_EXP:
1640 src0 = emit_fetch(bld, insn, 0, 0);
1641 temp = bld_insn_1(bld, NV_OP_FLOOR, src0);
1642
1643 if (insn->Dst[0].Register.WriteMask & 2)
1644 dst0[1] = bld_insn_2(bld, NV_OP_SUB, src0, temp);
1645 if (insn->Dst[0].Register.WriteMask & 1) {
1646 temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
1647 dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp);
1648 }
1649 if (insn->Dst[0].Register.WriteMask & 4) {
1650 temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
1651 dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp);
1652 }
1653 if (insn->Dst[0].Register.WriteMask & 8)
1654 dst0[3] = bld_imm_f32(bld, 1.0f);
1655 break;
1656 case TGSI_OPCODE_EX2:
1657 src0 = emit_fetch(bld, insn, 0, 0);
1658 temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
1659 temp = bld_insn_1(bld, NV_OP_EX2, temp);
1660 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1661 dst0[c] = temp;
1662 break;
1663 case TGSI_OPCODE_FRC:
1664 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1665 src0 = emit_fetch(bld, insn, 0, c);
1666 dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0);
1667 dst0[c] = bld_insn_2(bld, NV_OP_SUB, src0, dst0[c]);
1668 }
1669 break;
1670 case TGSI_OPCODE_KIL:
1671 for (c = 0; c < 4; ++c) {
1672 src0 = emit_fetch(bld, insn, 0, c);
1673 bld_kil(bld, src0);
1674 }
1675 break;
1676 case TGSI_OPCODE_KILP:
1677 (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1;
1678 break;
1679 case TGSI_OPCODE_IF:
1680 {
1681 struct nv_basic_block *b = new_basic_block(bld->pc);
1682
1683 assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
1684
1685 nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD);
1686
1687 bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
1688 bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
1689
1690 src1 = bld_predicate(bld, emit_fetch(bld, insn, 0, 0), TRUE);
1691
1692 bld_flow(bld, NV_OP_BRA, NV_CC_EQ, src1, NULL, (bld->cond_lvl == 0));
1693
1694 ++bld->cond_lvl;
1695 bld_new_block(bld, b);
1696 }
1697 break;
1698 case TGSI_OPCODE_ELSE:
1699 {
1700 struct nv_basic_block *b = new_basic_block(bld->pc);
1701
1702 --bld->cond_lvl;
1703 nvbb_attach_block(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
1704
1705 bld->cond_bb[bld->cond_lvl]->exit->target = b;
1706 bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
1707
1708 new_instruction(bld->pc, NV_OP_BRA)->is_terminator = 1;
1709
1710 ++bld->cond_lvl;
1711 bld_new_block(bld, b);
1712 }
1713 break;
1714 case TGSI_OPCODE_ENDIF:
1715 {
1716 struct nv_basic_block *b = new_basic_block(bld->pc);
1717
1718 --bld->cond_lvl;
1719 nvbb_attach_block(bld->pc->current_block, b, bld->out_kind);
1720 nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
1721
1722 bld->cond_bb[bld->cond_lvl]->exit->target = b;
1723
1724 bld_new_block(bld, b);
1725
1726 if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) {
1727 bld->join_bb[bld->cond_lvl]->exit->prev->target = b;
1728 new_instruction(bld->pc, NV_OP_JOIN)->is_join = TRUE;
1729 }
1730 }
1731 break;
1732 case TGSI_OPCODE_BGNLOOP:
1733 {
1734 struct nv_basic_block *bl = new_basic_block(bld->pc);
1735 struct nv_basic_block *bb = new_basic_block(bld->pc);
1736
1737 assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING);
1738
1739 bld->loop_bb[bld->loop_lvl] = bl;
1740 bld->brkt_bb[bld->loop_lvl] = bb;
1741
1742 bld_flow(bld, NV_OP_BREAKADDR, NV_CC_TR, NULL, bb, FALSE);
1743
1744 nvbb_attach_block(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
1745
1746 bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
1747
1748 if (bld->loop_lvl == bld->pc->loop_nesting_bound)
1749 bld->pc->loop_nesting_bound++;
1750
1751 bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
1752 bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
1753 bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
1754 }
1755 break;
1756 case TGSI_OPCODE_BRK:
1757 {
1758 struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
1759
1760 bld_flow(bld, NV_OP_BREAK, NV_CC_TR, NULL, bb, FALSE);
1761
1762 if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
1763 nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
1764
1765 bld->out_kind = CFG_EDGE_FAKE;
1766 }
1767 break;
1768 case TGSI_OPCODE_CONT:
1769 {
1770 struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
1771
1772 bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
1773
1774 nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);
1775
1776 if ((bb = bld->join_bb[bld->cond_lvl - 1])) {
1777 bld->join_bb[bld->cond_lvl - 1] = NULL;
1778 nv_nvi_delete(bb->exit->prev);
1779 }
1780 bld->out_kind = CFG_EDGE_FAKE;
1781 }
1782 break;
1783 case TGSI_OPCODE_ENDLOOP:
1784 {
1785 struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
1786
1787 bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
1788
1789 nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);
1790
1791 bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
1792
1793 bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]);
1794 }
1795 break;
1796 case TGSI_OPCODE_ABS:
1797 case TGSI_OPCODE_CEIL:
1798 case TGSI_OPCODE_FLR:
1799 case TGSI_OPCODE_TRUNC:
1800 case TGSI_OPCODE_DDX:
1801 case TGSI_OPCODE_DDY:
1802 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1803 src0 = emit_fetch(bld, insn, 0, c);
1804 dst0[c] = bld_insn_1(bld, opcode, src0);
1805 }
1806 break;
1807 case TGSI_OPCODE_LIT:
1808 bld_lit(bld, dst0, insn);
1809 break;
1810 case TGSI_OPCODE_LRP:
1811 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1812 src0 = emit_fetch(bld, insn, 0, c);
1813 src1 = emit_fetch(bld, insn, 1, c);
1814 src2 = emit_fetch(bld, insn, 2, c);
1815 dst0[c] = bld_insn_2(bld, NV_OP_SUB, src1, src2);
1816 dst0[c] = bld_insn_3(bld, NV_OP_MAD, dst0[c], src0, src2);
1817 }
1818 break;
1819 case TGSI_OPCODE_MOV:
1820 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1821 dst0[c] = emit_fetch(bld, insn, 0, c);
1822 break;
1823 case TGSI_OPCODE_MAD:
1824 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1825 src0 = emit_fetch(bld, insn, 0, c);
1826 src1 = emit_fetch(bld, insn, 1, c);
1827 src2 = emit_fetch(bld, insn, 2, c);
1828 dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2);
1829 }
1830 break;
1831 case TGSI_OPCODE_POW:
1832 src0 = emit_fetch(bld, insn, 0, 0);
1833 src1 = emit_fetch(bld, insn, 1, 0);
1834 temp = bld_pow(bld, src0, src1);
1835 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1836 dst0[c] = temp;
1837 break;
1838 case TGSI_OPCODE_LOG:
1839 src0 = emit_fetch(bld, insn, 0, 0);
1840 src0 = bld_insn_1(bld, NV_OP_ABS, src0);
1841 temp = bld_insn_1(bld, NV_OP_LG2, src0);
1842 dst0[2] = temp;
1843 if (insn->Dst[0].Register.WriteMask & 3) {
1844 temp = bld_insn_1(bld, NV_OP_FLOOR, temp);
1845 dst0[0] = temp;
1846 }
1847 if (insn->Dst[0].Register.WriteMask & 2) {
1848 temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
1849 temp = bld_insn_1(bld, NV_OP_EX2, temp);
1850 temp = bld_insn_1(bld, NV_OP_RCP, temp);
1851 dst0[1] = bld_insn_2(bld, NV_OP_MUL, src0, temp);
1852 }
1853 if (insn->Dst[0].Register.WriteMask & 8)
1854 dst0[3] = bld_imm_f32(bld, 1.0f);
1855 break;
1856 case TGSI_OPCODE_RCP:
1857 case TGSI_OPCODE_LG2:
1858 src0 = emit_fetch(bld, insn, 0, 0);
1859 temp = bld_insn_1(bld, opcode, src0);
1860 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1861 dst0[c] = temp;
1862 break;
1863 case TGSI_OPCODE_RSQ:
1864 src0 = emit_fetch(bld, insn, 0, 0);
1865 temp = bld_insn_1(bld, NV_OP_ABS, src0);
1866 temp = bld_insn_1(bld, NV_OP_RSQ, temp);
1867 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1868 dst0[c] = temp;
1869 break;
1870 case TGSI_OPCODE_SLT:
1871 case TGSI_OPCODE_SGE:
1872 case TGSI_OPCODE_SEQ:
1873 case TGSI_OPCODE_SGT:
1874 case TGSI_OPCODE_SLE:
1875 case TGSI_OPCODE_SNE:
1876 case TGSI_OPCODE_ISLT:
1877 case TGSI_OPCODE_ISGE:
1878 case TGSI_OPCODE_USEQ:
1879 case TGSI_OPCODE_USGE:
1880 case TGSI_OPCODE_USLT:
1881 case TGSI_OPCODE_USNE:
1882 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1883 src0 = emit_fetch(bld, insn, 0, c);
1884 src1 = emit_fetch(bld, insn, 1, c);
1885 dst0[c] = bld_insn_2(bld, NV_OP_SET, src0, src1);
1886 dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode);
1887 SET_TYPE(dst0[c], infer_dst_type(insn->Instruction.Opcode));
1888
1889 dst0[c]->insn->src[0]->typecast =
1890 dst0[c]->insn->src[1]->typecast =
1891 infer_src_type(insn->Instruction.Opcode);
1892
1893 if (dst0[c]->reg.type != NV_TYPE_F32)
1894 break;
1895 dst0[c]->reg.as_type = NV_TYPE_S32;
1896 dst0[c] = bld_insn_1(bld, NV_OP_ABS, dst0[c]);
1897 dst0[c] = bld_insn_1(bld, NV_OP_CVT, dst0[c]);
1898 SET_TYPE(dst0[c], NV_TYPE_F32);
1899 }
1900 break;
1901 case TGSI_OPCODE_SCS:
1902 if (insn->Dst[0].Register.WriteMask & 0x3) {
1903 src0 = emit_fetch(bld, insn, 0, 0);
1904 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1905 if (insn->Dst[0].Register.WriteMask & 0x1)
1906 dst0[0] = bld_insn_1(bld, NV_OP_COS, temp);
1907 if (insn->Dst[0].Register.WriteMask & 0x2)
1908 dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp);
1909 }
1910 if (insn->Dst[0].Register.WriteMask & 0x4)
1911 dst0[2] = bld_imm_f32(bld, 0.0f);
1912 if (insn->Dst[0].Register.WriteMask & 0x8)
1913 dst0[3] = bld_imm_f32(bld, 1.0f);
1914 break;
1915 case TGSI_OPCODE_SSG:
1916 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1917 src0 = emit_fetch(bld, insn, 0, c);
1918 src1 = bld_predicate(bld, src0, FALSE);
1919 temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000));
1920 temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f));
1921 dst0[c] = bld_insn_2(bld, NV_OP_XOR, temp, temp);
1922 dst0[c]->insn->cc = NV_CC_EQ;
1923 nv_reference(bld->pc, &dst0[c]->insn->flags_src, src1);
1924 }
1925 break;
1926 case TGSI_OPCODE_SUB:
1927 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1928 src0 = emit_fetch(bld, insn, 0, c);
1929 src1 = emit_fetch(bld, insn, 1, c);
1930 dst0[c] = bld_insn_2(bld, NV_OP_ADD, src0, src1);
1931 dst0[c]->insn->src[1]->mod ^= NV_MOD_NEG;
1932 }
1933 break;
1934 case TGSI_OPCODE_TEX:
1935 case TGSI_OPCODE_TXB:
1936 case TGSI_OPCODE_TXL:
1937 case TGSI_OPCODE_TXP:
1938 bld_tex(bld, dst0, insn);
1939 break;
1940 case TGSI_OPCODE_XPD:
1941 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1942 if (c == 3) {
1943 dst0[3] = bld_imm_f32(bld, 1.0f);
1944 break;
1945 }
1946 src0 = emit_fetch(bld, insn, 1, (c + 1) % 3);
1947 src1 = emit_fetch(bld, insn, 0, (c + 2) % 3);
1948 dst0[c] = bld_insn_2(bld, NV_OP_MUL, src0, src1);
1949
1950 src0 = emit_fetch(bld, insn, 0, (c + 1) % 3);
1951 src1 = emit_fetch(bld, insn, 1, (c + 2) % 3);
1952 dst0[c] = bld_insn_3(bld, NV_OP_MAD, src0, src1, dst0[c]);
1953
1954 dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG;
1955 }
1956 break;
1957 case TGSI_OPCODE_RET:
1958 (new_instruction(bld->pc, NV_OP_RET))->fixed = 1;
1959 break;
1960 case TGSI_OPCODE_END:
1961 if (bld->ti->p->type == PIPE_SHADER_FRAGMENT)
1962 bld_export_outputs(bld);
1963 break;
1964 default:
1965 NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode);
1966 abort();
1967 break;
1968 }
1969
1970 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1971 emit_store(bld, insn, c, dst0[c]);
1972 }
1973
1974 static INLINE void
1975 bld_free_value_trackers(struct bld_value_stack *base, int n)
1976 {
1977 int i, c;
1978
1979 for (i = 0; i < n; ++i)
1980 for (c = 0; c < 4; ++c)
1981 if (base[i * 4 + c].body)
1982 FREE(base[i * 4 + c].body);
1983 }
1984
1985 int
1986 nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti)
1987 {
1988 struct bld_context *bld = CALLOC_STRUCT(bld_context);
1989 int c;
1990 unsigned ip;
1991
1992 pc->root[0] = pc->current_block = new_basic_block(pc);
1993
1994 bld->pc = pc;
1995 bld->ti = ti;
1996
1997 pc->loop_nesting_bound = 1;
1998
1999 c = util_bitcount(bld->ti->p->fp.interp >> 24);
2000 if (c && ti->p->type == PIPE_SHADER_FRAGMENT) {
2001 bld->frgcrd[3] = new_value(pc, NV_FILE_MEM_V, NV_TYPE_F32);
2002 bld->frgcrd[3]->reg.id = c - 1;
2003 bld->frgcrd[3] = bld_insn_1(bld, NV_OP_LINTERP, bld->frgcrd[3]);
2004 bld->frgcrd[3] = bld_insn_1(bld, NV_OP_RCP, bld->frgcrd[3]);
2005 }
2006
2007 for (ip = 0; ip < ti->inst_nr; ++ip)
2008 bld_instruction(bld, &ti->insns[ip]);
2009
2010 bld_free_value_trackers(&bld->tvs[0][0], BLD_MAX_TEMPS);
2011 bld_free_value_trackers(&bld->avs[0][0], BLD_MAX_ADDRS);
2012 bld_free_value_trackers(&bld->pvs[0][0], BLD_MAX_PREDS);
2013
2014 bld_free_value_trackers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
2015
2016 FREE(bld);
2017 return 0;
2018 }
2019
2020 /* If a variable is assigned in a loop, replace all references to the value
2021 * from outside the loop with a phi value.
2022 */
2023 static void
2024 bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
2025 struct nv_value *old_val,
2026 struct nv_value *new_val)
2027 {
2028 struct nv_instruction *nvi;
2029
2030 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) {
2031 int s;
2032 for (s = 0; s < 5; ++s) {
2033 if (!nvi->src[s])
2034 continue;
2035 if (nvi->src[s]->value == old_val)
2036 nv_reference(pc, &nvi->src[s], new_val);
2037 }
2038 if (nvi->flags_src && nvi->flags_src->value == old_val)
2039 nv_reference(pc, &nvi->flags_src, new_val);
2040 }
2041
2042 b->pass_seq = pc->pass_seq;
2043
2044 if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
2045 bld_replace_value(pc, b->out[0], old_val, new_val);
2046
2047 if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
2048 bld_replace_value(pc, b->out[1], old_val, new_val);
2049 }