da33adcaa4672ebf4e8b6fd2d699c7164edf190d
[mesa.git] / src / gallium / drivers / nv50 / nv50_tgsi_to_nc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 /* XXX: need to clean this up so we get the typecasting right more naturally */
24
25 /* LOOP FIXME 1
26 * In bld_store_loop_var, only replace values that belong to the TGSI register
27 * written.
28 * For TGSI MOV, we only associate the source value with the value tracker of
29 * the destination, instead of generating an actual MOV.
30 *
31 * Possible solution: generate PHI functions in loop headers in advance.
32 */
33 /* LOOP FIXME 2:
34 * In fetch_by_bb, when going back through a break-block, we miss all of the
35 * definitions from inside the loop.
36 */
37
38 #include <unistd.h>
39
40 #include "nv50_context.h"
41 #include "nv50_pc.h"
42
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46
47 #include "util/u_simple_list.h"
48 #include "tgsi/tgsi_dump.h"
49
50 #define BLD_MAX_TEMPS 64
51 #define BLD_MAX_ADDRS 4
52 #define BLD_MAX_PREDS 4
53 #define BLD_MAX_IMMDS 128
54
55 #define BLD_MAX_COND_NESTING 4
56 #define BLD_MAX_LOOP_NESTING 4
57 #define BLD_MAX_CALL_NESTING 2
58
59 /* collects all values assigned to the same TGSI register */
60 struct bld_value_stack {
61 struct nv_value *top;
62 struct nv_value **body;
63 unsigned size;
64 uint16_t loop_use; /* 1 bit per loop level, indicates if used/defd */
65 uint16_t loop_def;
66 };
67
68 static INLINE void
69 bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val)
70 {
71 assert(!stk->size || (stk->body[stk->size - 1] != val));
72
73 if (!(stk->size % 8)) {
74 unsigned old_sz = (stk->size + 0) * sizeof(struct nv_value *);
75 unsigned new_sz = (stk->size + 8) * sizeof(struct nv_value *);
76 stk->body = (struct nv_value **)REALLOC(stk->body, old_sz, new_sz);
77 }
78 stk->body[stk->size++] = val;
79 }
80
81 static INLINE void
82 bld_vals_push(struct bld_value_stack *stk)
83 {
84 bld_vals_push_val(stk, stk->top);
85 stk->top = NULL;
86 }
87
88 static INLINE void
89 bld_push_values(struct bld_value_stack *stacks, int n)
90 {
91 int i, c;
92
93 for (i = 0; i < n; ++i)
94 for (c = 0; c < 4; ++c)
95 if (stacks[i * 4 + c].top)
96 bld_vals_push(&stacks[i * 4 + c]);
97 }
98
99 struct bld_context {
100 struct nv50_translation_info *ti;
101
102 struct nv_pc *pc;
103 struct nv_basic_block *b;
104
105 struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING];
106 int call_lvl;
107
108 struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING];
109 struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING];
110 struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
111 int cond_lvl;
112 struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
113 struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
114 int loop_lvl;
115
116 struct bld_value_stack tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
117 struct bld_value_stack avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */
118 struct bld_value_stack pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
119 struct bld_value_stack ovs[PIPE_MAX_SHADER_OUTPUTS][4];
120
121 uint32_t outputs_written[PIPE_MAX_SHADER_OUTPUTS / 32];
122
123 struct nv_value *frgcrd[4];
124 struct nv_value *sysval[4];
125
126 /* wipe on new BB */
127 struct nv_value *saved_addr[4][2];
128 struct nv_value *saved_inputs[128];
129 struct nv_value *saved_immd[BLD_MAX_IMMDS];
130 uint num_immds;
131 };
132
133 static INLINE struct nv_value *
134 bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c)
135 {
136 stk[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
137
138 return stk[i * 4 + c].top;
139 }
140
141 static void
142 bld_store_loop_var(struct bld_context *, struct bld_value_stack *);
143
144 static INLINE void
145 bld_store(struct bld_context *bld, struct bld_value_stack *stk, int i, int c,
146 struct nv_value *val)
147 {
148 bld_store_loop_var(bld, &stk[i * 4 + c]);
149
150 stk[i * 4 + c].top = val;
151 }
152
153 static INLINE void
154 bld_clear_def_use(struct bld_value_stack *stk, int n, int lvl)
155 {
156 int i;
157 const uint16_t mask = ~(1 << lvl);
158
159 for (i = 0; i < n * 4; ++i) {
160 stk[i].loop_def &= mask;
161 stk[i].loop_use &= mask;
162 }
163 }
164
165 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
166 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
167 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
168 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
169 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
170 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
171
172 #define STORE_OUTR(i, c, v) \
173 do { \
174 bld->ovs[i][c].top = (v); \
175 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
176 } while (0)
177
178 static INLINE void
179 bld_warn_uninitialized(struct bld_context *bld, int kind,
180 struct bld_value_stack *stk, struct nv_basic_block *b)
181 {
182 long i = (stk - &bld->tvs[0][0]) / 4;
183 long c = (stk - &bld->tvs[0][0]) & 3;
184
185 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
186 i, (int)('x' + c), kind ? "may be" : "is", b->id);
187 }
188
189 static INLINE struct nv_value *
190 bld_def(struct nv_instruction *i, int c, struct nv_value *value)
191 {
192 i->def[c] = value;
193 value->insn = i;
194 return value;
195 }
196
197 static INLINE struct nv_value *
198 find_by_bb(struct bld_value_stack *stack, struct nv_basic_block *b)
199 {
200 int i;
201
202 if (stack->top && stack->top->insn->bb == b)
203 return stack->top;
204
205 for (i = stack->size - 1; i >= 0; --i)
206 if (stack->body[i]->insn->bb == b)
207 return stack->body[i];
208 return NULL;
209 }
210
211 /* fetch value from stack that was defined in the specified basic block,
212 * or search for first definitions in all of its predecessors
213 */
214 static void
215 fetch_by_bb(struct bld_value_stack *stack,
216 struct nv_value **vals, int *n,
217 struct nv_basic_block *b)
218 {
219 int i;
220 struct nv_value *val;
221
222 assert(*n < 16); /* MAX_COND_NESTING */
223
224 val = find_by_bb(stack, b);
225 if (val) {
226 for (i = 0; i < *n; ++i)
227 if (vals[i] == val)
228 return;
229 vals[(*n)++] = val;
230 return;
231 }
232 for (i = 0; i < b->num_in; ++i)
233 if (b->in_kind[i] != CFG_EDGE_BACK)
234 fetch_by_bb(stack, vals, n, b->in[i]);
235 }
236
237 static INLINE struct nv_value *
238 bld_load_imm_u32(struct bld_context *bld, uint32_t u);
239
240 static struct nv_value *
241 bld_phi(struct bld_context *bld, struct nv_basic_block *b,
242 struct bld_value_stack *stack)
243 {
244 struct nv_basic_block *in;
245 struct nv_value *vals[16], *val;
246 struct nv_instruction *phi;
247 int i, j, n;
248
249 do {
250 i = n = 0;
251 fetch_by_bb(stack, vals, &n, b);
252
253 if (!n) {
254 bld_warn_uninitialized(bld, 0, stack, b);
255 return NULL;
256 }
257
258 if (n == 1) {
259 if (nvbb_dominated_by(b, vals[0]->insn->bb))
260 break;
261
262 bld_warn_uninitialized(bld, 1, stack, b);
263
264 /* back-tracking to insert missing value of other path */
265 in = b;
266 while (in->in[0]) {
267 if (in->num_in == 1) {
268 in = in->in[0];
269 } else {
270 if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b)) {
271 in = in->in[0];
272 break;
273 }
274 if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b)) {
275 in = in->in[1];
276 break;
277 }
278 in = in->in[0];
279 }
280 }
281 bld->pc->current_block = in;
282
283 /* should make this a no-op */
284 bld_vals_push_val(stack, bld_load_imm_u32(bld, 0));
285 continue;
286 }
287
288 for (i = 0; i < n; ++i) {
289 /* if value dominates b, continue to the redefinitions */
290 if (nvbb_dominated_by(b, vals[i]->insn->bb))
291 continue;
292
293 /* if value dominates any in-block, b should be the dom frontier */
294 for (j = 0; j < b->num_in; ++j)
295 if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb))
296 break;
297 /* otherwise, find the dominance frontier and put the phi there */
298 if (j == b->num_in) {
299 in = nvbb_dom_frontier(vals[i]->insn->bb);
300 val = bld_phi(bld, in, stack);
301 bld_vals_push_val(stack, val);
302 break;
303 }
304 }
305 } while(i < n);
306
307 bld->pc->current_block = b;
308
309 if (n == 1)
310 return vals[0];
311
312 phi = new_instruction(bld->pc, NV_OP_PHI);
313
314 bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type));
315 for (i = 0; i < n; ++i)
316 phi->src[i] = new_ref(bld->pc, vals[i]);
317
318 return phi->def[0];
319 }
320
321 static INLINE struct nv_value *
322 bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
323 {
324 stack->loop_use |= 1 << bld->loop_lvl;
325 return bld_phi(bld, bld->pc->current_block, stack);
326 }
327
328 static INLINE struct nv_value *
329 bld_imm_u32(struct bld_context *bld, uint32_t u)
330 {
331 int i;
332 unsigned n = bld->num_immds;
333
334 for (i = 0; i < n; ++i)
335 if (bld->saved_immd[i]->reg.imm.u32 == u)
336 return bld->saved_immd[i];
337 assert(n < BLD_MAX_IMMDS);
338
339 bld->num_immds++;
340
341 bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, NV_TYPE_U32);
342 bld->saved_immd[n]->reg.imm.u32 = u;
343 return bld->saved_immd[n];
344 }
345
346 static void
347 bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
348 struct nv_value *);
349
350 /* When setting a variable inside a loop, and we have used it before in the
351 * loop, we need to insert a phi function in the loop header.
352 */
353 static void
354 bld_store_loop_var(struct bld_context *bld, struct bld_value_stack *stk)
355 {
356 struct nv_basic_block *bb;
357 struct nv_instruction *phi;
358 struct nv_value *val;
359 int ll;
360 uint16_t loop_def = stk->loop_def;
361
362 if (!(ll = bld->loop_lvl))
363 return;
364 stk->loop_def |= 1 << ll;
365
366 if ((~stk->loop_use | loop_def) & (1 << ll))
367 return;
368
369 #if 0
370 debug_printf("TEMP[%li].%c used before loop redef (def=%x/use=%x)\n",
371 (stk - &bld->tvs[0][0]) / 4,
372 (int)('x' + ((stk - &bld->tvs[0][0]) & 3)),
373 loop_def, stk->loop_use);
374 #endif
375
376 stk->loop_def |= 1 << ll;
377
378 assert(bld->loop_bb[ll - 1]->num_in == 1);
379
380 /* get last assignment from outside this loop, could be from bld_phi */
381 val = stk->body[stk->size - 1];
382
383 /* create the phi in the loop entry block */
384
385 bb = bld->pc->current_block;
386 bld->pc->current_block = bld->loop_bb[ll - 1];
387
388 phi = new_instruction(bld->pc, NV_OP_PHI);
389
390 bld_def(phi, 0, new_value(bld->pc, val->reg.file, val->reg.type));
391
392 bld->pc->pass_seq++;
393 bld_replace_value(bld->pc, bld->loop_bb[ll - 1], val, phi->def[0]);
394
395 assert(!stk->top);
396 bld_vals_push_val(stk, phi->def[0]);
397
398 phi->target = (struct nv_basic_block *)stk; /* cheat */
399
400 nv_reference(bld->pc, &phi->src[0], val);
401 nv_reference(bld->pc, &phi->src[1], phi->def[0]);
402
403 bld->pc->current_block = bb;
404 }
405
406 static void
407 bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
408 {
409 struct nv_instruction *phi;
410 struct nv_value *val;
411
412 for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = phi->next) {
413 val = bld_fetch_global(bld, (struct bld_value_stack *)phi->target);
414 nv_reference(bld->pc, &phi->src[1], val);
415 phi->target = NULL;
416 }
417 }
418
419 static INLINE struct nv_value *
420 bld_imm_f32(struct bld_context *bld, float f)
421 {
422 return bld_imm_u32(bld, fui(f));
423 }
424
425 #define SET_TYPE(v, t) ((v)->reg.type = NV_TYPE_##t)
426
427 static struct nv_value *
428 bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
429 {
430 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
431 assert(insn);
432
433 nv_reference(bld->pc, &insn->src[0], src0); /* NOTE: new_ref would suffice */
434
435 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.type));
436 }
437
438 static struct nv_value *
439 bld_insn_2(struct bld_context *bld, uint opcode,
440 struct nv_value *src0, struct nv_value *src1)
441 {
442 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
443
444 nv_reference(bld->pc, &insn->src[0], src0);
445 nv_reference(bld->pc, &insn->src[1], src1);
446
447 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.type));
448 }
449
450 static struct nv_value *
451 bld_insn_3(struct bld_context *bld, uint opcode,
452 struct nv_value *src0, struct nv_value *src1,
453 struct nv_value *src2)
454 {
455 struct nv_instruction *insn = new_instruction(bld->pc, opcode);
456
457 nv_reference(bld->pc, &insn->src[0], src0);
458 nv_reference(bld->pc, &insn->src[1], src1);
459 nv_reference(bld->pc, &insn->src[2], src2);
460
461 return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.type));
462 }
463
464 #define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
465 do { \
466 (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
467 (d)->reg.type = NV_TYPE_##dt; \
468 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
469 } while(0)
470
471 #define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
472 do { \
473 (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
474 (d)->reg.type = NV_TYPE_##dt; \
475 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
476 (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
477 } while(0)
478
479 static struct nv_value *
480 bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e)
481 {
482 struct nv_value *val;
483
484 BLD_INSN_1_EX(val, LG2, F32, x, F32);
485 BLD_INSN_2_EX(val, MUL, F32, e, F32, val, F32);
486 val = bld_insn_1(bld, NV_OP_PREEX2, val);
487 val = bld_insn_1(bld, NV_OP_EX2, val);
488
489 return val;
490 }
491
492 static INLINE struct nv_value *
493 bld_load_imm_f32(struct bld_context *bld, float f)
494 {
495 return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f));
496 }
497
498 static INLINE struct nv_value *
499 bld_load_imm_u32(struct bld_context *bld, uint32_t u)
500 {
501 return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u));
502 }
503
504 static struct nv_value *
505 bld_get_address(struct bld_context *bld, int id, struct nv_value *indirect)
506 {
507 int i;
508 struct nv_instruction *nvi;
509
510 for (i = 0; i < 4; ++i) {
511 if (!bld->saved_addr[i][0])
512 break;
513 if (bld->saved_addr[i][1] == indirect) {
514 nvi = bld->saved_addr[i][0]->insn;
515 if (nvi->src[0]->value->reg.imm.u32 == id)
516 return bld->saved_addr[i][0];
517 }
518 }
519 i &= 3;
520
521 bld->saved_addr[i][0] = bld_load_imm_u32(bld, id);
522 bld->saved_addr[i][0]->reg.file = NV_FILE_ADDR;
523 bld->saved_addr[i][1] = indirect;
524 return bld->saved_addr[i][0];
525 }
526
527
528 static struct nv_value *
529 bld_predicate(struct bld_context *bld, struct nv_value *src)
530 {
531 struct nv_instruction *nvi = src->insn;
532
533 if (nvi->opcode == NV_OP_LDA ||
534 nvi->opcode == NV_OP_PHI ||
535 nvi->bb != bld->pc->current_block) {
536 nvi = new_instruction(bld->pc, NV_OP_CVT);
537 nv_reference(bld->pc, &nvi->src[0], src);
538 }
539
540 if (!nvi->flags_def) {
541 nvi->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16);
542 nvi->flags_def->insn = nvi;
543 }
544 return nvi->flags_def;
545 }
546
547 static void
548 bld_kil(struct bld_context *bld, struct nv_value *src)
549 {
550 struct nv_instruction *nvi;
551
552 src = bld_predicate(bld, src);
553 nvi = new_instruction(bld->pc, NV_OP_KIL);
554 nvi->fixed = 1;
555 nvi->flags_src = new_ref(bld->pc, src);
556 nvi->cc = NV_CC_LT;
557 }
558
559 static void
560 bld_flow(struct bld_context *bld, uint opcode, ubyte cc,
561 struct nv_value *src, struct nv_basic_block *target,
562 boolean plan_reconverge)
563 {
564 struct nv_instruction *nvi;
565
566 if (plan_reconverge)
567 new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1;
568
569 nvi = new_instruction(bld->pc, opcode);
570 nvi->is_terminator = 1;
571 nvi->cc = cc;
572 nvi->target = target;
573 if (src)
574 nvi->flags_src = new_ref(bld->pc, src);
575 }
576
577 static ubyte
578 translate_setcc(unsigned opcode)
579 {
580 switch (opcode) {
581 case TGSI_OPCODE_SLT: return NV_CC_LT;
582 case TGSI_OPCODE_SGE: return NV_CC_GE;
583 case TGSI_OPCODE_SEQ: return NV_CC_EQ;
584 case TGSI_OPCODE_SGT: return NV_CC_GT;
585 case TGSI_OPCODE_SLE: return NV_CC_LE;
586 case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U;
587 case TGSI_OPCODE_STR: return NV_CC_TR;
588 case TGSI_OPCODE_SFL: return NV_CC_FL;
589
590 case TGSI_OPCODE_ISLT: return NV_CC_LT;
591 case TGSI_OPCODE_ISGE: return NV_CC_GE;
592 case TGSI_OPCODE_USEQ: return NV_CC_EQ;
593 case TGSI_OPCODE_USGE: return NV_CC_GE;
594 case TGSI_OPCODE_USLT: return NV_CC_LT;
595 case TGSI_OPCODE_USNE: return NV_CC_NE;
596 default:
597 assert(0);
598 return NV_CC_FL;
599 }
600 }
601
602 static uint
603 translate_opcode(uint opcode)
604 {
605 switch (opcode) {
606 case TGSI_OPCODE_ABS: return NV_OP_ABS;
607 case TGSI_OPCODE_ADD:
608 case TGSI_OPCODE_SUB:
609 case TGSI_OPCODE_UADD: return NV_OP_ADD;
610 case TGSI_OPCODE_AND: return NV_OP_AND;
611 case TGSI_OPCODE_EX2: return NV_OP_EX2;
612 case TGSI_OPCODE_CEIL: return NV_OP_CEIL;
613 case TGSI_OPCODE_FLR: return NV_OP_FLOOR;
614 case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC;
615 case TGSI_OPCODE_DDX: return NV_OP_DFDX;
616 case TGSI_OPCODE_DDY: return NV_OP_DFDY;
617 case TGSI_OPCODE_F2I:
618 case TGSI_OPCODE_F2U:
619 case TGSI_OPCODE_I2F:
620 case TGSI_OPCODE_U2F: return NV_OP_CVT;
621 case TGSI_OPCODE_INEG: return NV_OP_NEG;
622 case TGSI_OPCODE_LG2: return NV_OP_LG2;
623 case TGSI_OPCODE_ISHR:
624 case TGSI_OPCODE_USHR: return NV_OP_SHR;
625 case TGSI_OPCODE_MAD:
626 case TGSI_OPCODE_UMAD: return NV_OP_MAD;
627 case TGSI_OPCODE_MAX:
628 case TGSI_OPCODE_IMAX:
629 case TGSI_OPCODE_UMAX: return NV_OP_MAX;
630 case TGSI_OPCODE_MIN:
631 case TGSI_OPCODE_IMIN:
632 case TGSI_OPCODE_UMIN: return NV_OP_MIN;
633 case TGSI_OPCODE_MUL:
634 case TGSI_OPCODE_UMUL: return NV_OP_MUL;
635 case TGSI_OPCODE_OR: return NV_OP_OR;
636 case TGSI_OPCODE_RCP: return NV_OP_RCP;
637 case TGSI_OPCODE_RSQ: return NV_OP_RSQ;
638 case TGSI_OPCODE_SAD: return NV_OP_SAD;
639 case TGSI_OPCODE_SHL: return NV_OP_SHL;
640 case TGSI_OPCODE_SLT:
641 case TGSI_OPCODE_SGE:
642 case TGSI_OPCODE_SEQ:
643 case TGSI_OPCODE_SGT:
644 case TGSI_OPCODE_SLE:
645 case TGSI_OPCODE_SNE:
646 case TGSI_OPCODE_ISLT:
647 case TGSI_OPCODE_ISGE:
648 case TGSI_OPCODE_USEQ:
649 case TGSI_OPCODE_USGE:
650 case TGSI_OPCODE_USLT:
651 case TGSI_OPCODE_USNE: return NV_OP_SET;
652 case TGSI_OPCODE_TEX: return NV_OP_TEX;
653 case TGSI_OPCODE_TXP: return NV_OP_TEX;
654 case TGSI_OPCODE_TXB: return NV_OP_TXB;
655 case TGSI_OPCODE_TXL: return NV_OP_TXL;
656 case TGSI_OPCODE_XOR: return NV_OP_XOR;
657 default:
658 return NV_OP_NOP;
659 }
660 }
661
662 static ubyte
663 infer_src_type(unsigned opcode)
664 {
665 switch (opcode) {
666 case TGSI_OPCODE_MOV:
667 case TGSI_OPCODE_AND:
668 case TGSI_OPCODE_OR:
669 case TGSI_OPCODE_XOR:
670 case TGSI_OPCODE_SAD:
671 case TGSI_OPCODE_U2F:
672 case TGSI_OPCODE_UADD:
673 case TGSI_OPCODE_UDIV:
674 case TGSI_OPCODE_UMOD:
675 case TGSI_OPCODE_UMAD:
676 case TGSI_OPCODE_UMUL:
677 case TGSI_OPCODE_UMAX:
678 case TGSI_OPCODE_UMIN:
679 case TGSI_OPCODE_USEQ:
680 case TGSI_OPCODE_USGE:
681 case TGSI_OPCODE_USLT:
682 case TGSI_OPCODE_USNE:
683 case TGSI_OPCODE_USHR:
684 return NV_TYPE_U32;
685 case TGSI_OPCODE_I2F:
686 case TGSI_OPCODE_IDIV:
687 case TGSI_OPCODE_IMAX:
688 case TGSI_OPCODE_IMIN:
689 case TGSI_OPCODE_INEG:
690 case TGSI_OPCODE_ISGE:
691 case TGSI_OPCODE_ISHR:
692 case TGSI_OPCODE_ISLT:
693 return NV_TYPE_S32;
694 default:
695 return NV_TYPE_F32;
696 }
697 }
698
699 static ubyte
700 infer_dst_type(unsigned opcode)
701 {
702 switch (opcode) {
703 case TGSI_OPCODE_MOV:
704 case TGSI_OPCODE_F2U:
705 case TGSI_OPCODE_AND:
706 case TGSI_OPCODE_OR:
707 case TGSI_OPCODE_XOR:
708 case TGSI_OPCODE_SAD:
709 case TGSI_OPCODE_UADD:
710 case TGSI_OPCODE_UDIV:
711 case TGSI_OPCODE_UMOD:
712 case TGSI_OPCODE_UMAD:
713 case TGSI_OPCODE_UMUL:
714 case TGSI_OPCODE_UMAX:
715 case TGSI_OPCODE_UMIN:
716 case TGSI_OPCODE_USEQ:
717 case TGSI_OPCODE_USGE:
718 case TGSI_OPCODE_USLT:
719 case TGSI_OPCODE_USNE:
720 case TGSI_OPCODE_USHR:
721 return NV_TYPE_U32;
722 case TGSI_OPCODE_F2I:
723 case TGSI_OPCODE_IDIV:
724 case TGSI_OPCODE_IMAX:
725 case TGSI_OPCODE_IMIN:
726 case TGSI_OPCODE_INEG:
727 case TGSI_OPCODE_ISGE:
728 case TGSI_OPCODE_ISHR:
729 case TGSI_OPCODE_ISLT:
730 return NV_TYPE_S32;
731 default:
732 return NV_TYPE_F32;
733 }
734 }
735
736 static void
737 emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
738 unsigned chan, struct nv_value *value)
739 {
740 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
741
742 assert(chan < 4);
743
744 if (inst->Instruction.Opcode != TGSI_OPCODE_MOV)
745 value->reg.type = infer_dst_type(inst->Instruction.Opcode);
746
747 switch (inst->Instruction.Saturate) {
748 case TGSI_SAT_NONE:
749 break;
750 case TGSI_SAT_ZERO_ONE:
751 BLD_INSN_1_EX(value, SAT, F32, value, F32);
752 break;
753 case TGSI_SAT_MINUS_PLUS_ONE:
754 value = bld_insn_2(bld, NV_OP_MAX, value, bld_load_imm_f32(bld, -1.0f));
755 value = bld_insn_2(bld, NV_OP_MIN, value, bld_load_imm_f32(bld, 1.0f));
756 value->reg.type = NV_TYPE_F32;
757 break;
758 }
759
760 switch (reg->Register.File) {
761 case TGSI_FILE_OUTPUT:
762 value = bld_insn_1(bld, NV_OP_MOV, value);
763 value->reg.file = bld->ti->output_file;
764
765 if (bld->ti->p->type == PIPE_SHADER_FRAGMENT) {
766 STORE_OUTR(reg->Register.Index, chan, value);
767 } else {
768 value->insn->fixed = 1;
769 value->reg.id = bld->ti->output_map[reg->Register.Index][chan];
770 }
771 break;
772 case TGSI_FILE_TEMPORARY:
773 assert(reg->Register.Index < BLD_MAX_TEMPS);
774 value->reg.file = NV_FILE_GPR;
775 if (value->insn->bb != bld->pc->current_block)
776 value = bld_insn_1(bld, NV_OP_MOV, value);
777 STORE_TEMP(reg->Register.Index, chan, value);
778 break;
779 case TGSI_FILE_ADDRESS:
780 assert(reg->Register.Index < BLD_MAX_ADDRS);
781 value->reg.file = NV_FILE_ADDR;
782 STORE_ADDR(reg->Register.Index, chan, value);
783 break;
784 }
785 }
786
787 static INLINE uint32_t
788 bld_is_output_written(struct bld_context *bld, int i, int c)
789 {
790 if (c < 0)
791 return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32));
792 return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32));
793 }
794
795 static void
796 bld_export_outputs(struct bld_context *bld)
797 {
798 struct nv_value *vals[4];
799 struct nv_instruction *nvi;
800 int i, c, n;
801
802 bld_push_values(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
803
804 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
805 if (!bld_is_output_written(bld, i, -1))
806 continue;
807 for (n = 0, c = 0; c < 4; ++c) {
808 if (!bld_is_output_written(bld, i, c))
809 continue;
810 vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]);
811 assert(vals[n]);
812 vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]);
813 vals[n++]->reg.id = bld->ti->output_map[i][c];
814 }
815 assert(n);
816
817 (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
818
819 for (c = 0; c < n; ++c)
820 nvi->src[c] = new_ref(bld->pc, vals[c]);
821 }
822 }
823
824 static void
825 bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
826 {
827 int i;
828
829 bld_push_values(&bld->tvs[0][0], BLD_MAX_TEMPS);
830 bld_push_values(&bld->avs[0][0], BLD_MAX_ADDRS);
831 bld_push_values(&bld->pvs[0][0], BLD_MAX_PREDS);
832 bld_push_values(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
833
834 bld->pc->current_block = b;
835
836 for (i = 0; i < 4; ++i)
837 bld->saved_addr[i][0] = NULL;
838
839 for (i = 0; i < 128; ++i)
840 bld->saved_inputs[i] = NULL;
841 }
842
843 static struct nv_value *
844 bld_saved_input(struct bld_context *bld, unsigned i, unsigned c)
845 {
846 unsigned idx = bld->ti->input_map[i][c];
847
848 if (bld->ti->p->type != PIPE_SHADER_FRAGMENT)
849 return NULL;
850 if (bld->saved_inputs[idx])
851 return bld->saved_inputs[idx];
852 return NULL;
853 }
854
855 static struct nv_value *
856 bld_interpolate(struct bld_context *bld, unsigned mode, struct nv_value *val)
857 {
858 if (mode & (NV50_INTERP_LINEAR | NV50_INTERP_FLAT))
859 val = bld_insn_1(bld, NV_OP_LINTERP, val);
860 else
861 val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frgcrd[3]);
862
863 val->insn->flat = (mode & NV50_INTERP_FLAT) ? 1 : 0;
864 val->insn->centroid = (mode & NV50_INTERP_CENTROID) ? 1 : 0;
865 return val;
866 }
867
868 static struct nv_value *
869 emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
870 const unsigned s, const unsigned chan)
871 {
872 const struct tgsi_full_src_register *src = &insn->Src[s];
873 struct nv_value *res;
874 unsigned idx, swz, dim_idx, ind_idx, ind_swz;
875 ubyte type = infer_src_type(insn->Instruction.Opcode);
876
877 idx = src->Register.Index;
878 swz = tgsi_util_get_full_src_register_swizzle(src, chan);
879 dim_idx = -1;
880 ind_idx = -1;
881 ind_swz = 0;
882
883 if (src->Register.Indirect) {
884 ind_idx = src->Indirect.Index;
885 ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0);
886 }
887
888 switch (src->Register.File) {
889 case TGSI_FILE_CONSTANT:
890 dim_idx = src->Dimension.Index ? src->Dimension.Index + 2 : 1;
891 assert(dim_idx < 14);
892 assert(dim_idx == 1); /* for now */
893
894 res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), type);
895 res->reg.type = type;
896 res->reg.id = (idx * 4 + swz) & 127;
897 res = bld_insn_1(bld, NV_OP_LDA, res);
898
899 if (src->Register.Indirect)
900 res->insn->src[4] = new_ref(bld->pc, FETCH_ADDR(ind_idx, ind_swz));
901 if (idx >= (128 / 4))
902 res->insn->src[4] =
903 new_ref(bld->pc, bld_get_address(bld, (idx * 16) & ~0x1ff, NULL));
904 break;
905 case TGSI_FILE_IMMEDIATE:
906 assert(idx < bld->ti->immd32_nr);
907 res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]);
908 res->reg.type = type;
909 break;
910 case TGSI_FILE_INPUT:
911 res = bld_saved_input(bld, idx, swz);
912 if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP))
913 return res;
914
915 res = new_value(bld->pc, bld->ti->input_file, type);
916 res->reg.id = bld->ti->input_map[idx][swz];
917
918 if (res->reg.file == NV_FILE_MEM_V) {
919 res = bld_interpolate(bld, bld->ti->interp_mode[idx], res);
920 } else {
921 assert(src->Dimension.Dimension == 0);
922 res = bld_insn_1(bld, NV_OP_LDA, res);
923 }
924 assert(res->reg.type == type);
925
926 bld->saved_inputs[bld->ti->input_map[idx][swz]] = res;
927 break;
928 case TGSI_FILE_TEMPORARY:
929 /* this should be load from l[], with reload elimination later on */
930 res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
931 break;
932 case TGSI_FILE_ADDRESS:
933 res = bld_fetch_global(bld, &bld->avs[idx][swz]);
934 break;
935 case TGSI_FILE_PREDICATE:
936 res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
937 break;
938 default:
939 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
940 abort();
941 break;
942 }
943 if (!res) {
944 debug_printf("WARNING: undefined source value in TGSI instruction\n");
945 return bld_load_imm_u32(bld, 0);
946 }
947
948 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
949 case TGSI_UTIL_SIGN_KEEP:
950 break;
951 case TGSI_UTIL_SIGN_CLEAR:
952 res = bld_insn_1(bld, NV_OP_ABS, res);
953 break;
954 case TGSI_UTIL_SIGN_TOGGLE:
955 res = bld_insn_1(bld, NV_OP_NEG, res);
956 break;
957 case TGSI_UTIL_SIGN_SET:
958 res = bld_insn_1(bld, NV_OP_ABS, res);
959 res = bld_insn_1(bld, NV_OP_NEG, res);
960 break;
961 default:
962 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
963 abort();
964 break;
965 }
966
967 return res;
968 }
969
970 static void
971 bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
972 const struct tgsi_full_instruction *insn)
973 {
974 struct nv_value *val0, *zero;
975 unsigned mask = insn->Dst[0].Register.WriteMask;
976
977 if (mask & ((1 << 0) | (1 << 3)))
978 dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f);
979
980 if (mask & (3 << 1)) {
981 zero = bld_load_imm_f32(bld, 0.0f);
982 val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), zero);
983
984 if (mask & (1 << 1))
985 dst0[1] = val0;
986 }
987
988 if (mask & (1 << 2)) {
989 struct nv_value *val1, *val3, *src1, *src3;
990 struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f);
991 struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f);
992
993 src1 = emit_fetch(bld, insn, 0, 1);
994 src3 = emit_fetch(bld, insn, 0, 3);
995
996 val0->insn->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16);
997 val0->insn->flags_def->insn = val0->insn;
998
999 val1 = bld_insn_2(bld, NV_OP_MAX, src1, zero);
1000 val3 = bld_insn_2(bld, NV_OP_MAX, src3, neg128);
1001 val3 = bld_insn_2(bld, NV_OP_MIN, val3, pos128);
1002 val3 = bld_pow(bld, val1, val3);
1003
1004 dst0[2] = bld_insn_1(bld, NV_OP_MOV, zero);
1005 dst0[2]->insn->cc = NV_CC_LE;
1006 dst0[2]->insn->flags_src = new_ref(bld->pc, val0->insn->flags_def);
1007
1008 dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]);
1009 }
1010 }
1011
1012 static INLINE void
1013 get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg)
1014 {
1015 switch (insn->Texture.Texture) {
1016 case TGSI_TEXTURE_1D:
1017 *arg = *dim = 1;
1018 break;
1019 case TGSI_TEXTURE_SHADOW1D:
1020 *dim = 1;
1021 *arg = 2;
1022 break;
1023 case TGSI_TEXTURE_UNKNOWN:
1024 case TGSI_TEXTURE_2D:
1025 case TGSI_TEXTURE_RECT:
1026 *arg = *dim = 2;
1027 break;
1028 case TGSI_TEXTURE_SHADOW2D:
1029 case TGSI_TEXTURE_SHADOWRECT:
1030 *dim = 2;
1031 *arg = 3;
1032 break;
1033 case TGSI_TEXTURE_3D:
1034 case TGSI_TEXTURE_CUBE:
1035 *dim = *arg = 3;
1036 break;
1037 default:
1038 assert(0);
1039 break;
1040 }
1041 }
1042
1043 static void
1044 load_proj_tex_coords(struct bld_context *bld,
1045 struct nv_value *t[4], int dim,
1046 const struct tgsi_full_instruction *insn)
1047 {
1048 int c, mask = 0;
1049
1050 t[3] = emit_fetch(bld, insn, 0, 3);
1051
1052 if (t[3]->insn->opcode == NV_OP_PINTERP) {
1053 t[3]->insn->opcode = NV_OP_LINTERP;
1054 nv_reference(bld->pc, &t[3]->insn->src[1], NULL);
1055 }
1056
1057 t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
1058
1059 for (c = 0; c < dim; ++c) {
1060 t[c] = emit_fetch(bld, insn, 0, c);
1061 if (t[c]->insn->opcode == NV_OP_LINTERP)
1062 t[c]->insn->opcode = NV_OP_PINTERP;
1063
1064 if (t[c]->insn->opcode == NV_OP_PINTERP)
1065 nv_reference(bld->pc, &t[c]->insn->src[1], t[3]);
1066 else
1067 mask |= 1 << c;
1068 }
1069
1070 for (c = 0; mask; ++c, mask >>= 1) {
1071 if (!(mask & 1))
1072 continue;
1073 t[c] = bld_insn_2(bld, NV_OP_MUL, t[c], t[3]);
1074 }
1075 }
1076
1077 static void
1078 bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
1079 const struct tgsi_full_instruction *insn)
1080 {
1081 struct nv_value *t[4];
1082 struct nv_instruction *nvi;
1083 uint opcode = translate_opcode(insn->Instruction.Opcode);
1084 int arg, dim, c;
1085
1086 get_tex_dim(insn, &dim, &arg);
1087
1088 if (insn->Texture.Texture == TGSI_TEXTURE_CUBE) {
1089 }
1090 // else
1091 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1092 load_proj_tex_coords(bld, t, dim, insn);
1093 } else
1094 for (c = 0; c < dim; ++c)
1095 t[c] = emit_fetch(bld, insn, 0, c);
1096
1097 if (arg != dim)
1098 t[dim] = emit_fetch(bld, insn, 0, 2);
1099
1100 if (insn->Instruction.Opcode == TGSI_OPCODE_TXB ||
1101 insn->Instruction.Opcode == TGSI_OPCODE_TXL) {
1102 t[arg++] = emit_fetch(bld, insn, 0, 3);
1103 }
1104
1105 for (c = 0; c < arg; ++c) {
1106 t[c] = bld_insn_1(bld, NV_OP_MOV, t[c]);
1107 t[c]->reg.type = NV_TYPE_F32;
1108 }
1109
1110 nvi = new_instruction(bld->pc, opcode);
1111
1112 for (c = 0; c < 4; ++c) {
1113 nvi->def[c] = dst0[c] = new_value(bld->pc, NV_FILE_GPR, NV_TYPE_F32);
1114 nvi->def[c]->insn = nvi;
1115 }
1116 for (c = 0; c < arg; ++c)
1117 nvi->src[c] = new_ref(bld->pc, t[c]);
1118
1119 nvi->tex_t = insn->Src[1].Register.Index;
1120 nvi->tex_s = 0;
1121 nvi->tex_mask = 0xf;
1122 nvi->tex_cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0;
1123 nvi->tex_live = 0;
1124 nvi->tex_argc = arg;
1125 }
1126
1127 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1128 for (chan = 0; chan < 4; ++chan) \
1129 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1130
1131 static void
1132 bld_instruction(struct bld_context *bld,
1133 const struct tgsi_full_instruction *insn)
1134 {
1135 struct nv_value *src0;
1136 struct nv_value *src1;
1137 struct nv_value *src2;
1138 struct nv_value *dst0[4];
1139 struct nv_value *temp;
1140 int c;
1141 uint opcode = translate_opcode(insn->Instruction.Opcode);
1142
1143 tgsi_dump_instruction(insn, 1);
1144
1145 switch (insn->Instruction.Opcode) {
1146 case TGSI_OPCODE_ADD:
1147 case TGSI_OPCODE_MAX:
1148 case TGSI_OPCODE_MIN:
1149 case TGSI_OPCODE_MUL:
1150 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1151 src0 = emit_fetch(bld, insn, 0, c);
1152 src1 = emit_fetch(bld, insn, 1, c);
1153 dst0[c] = bld_insn_2(bld, opcode, src0, src1);
1154 }
1155 break;
1156 case TGSI_OPCODE_CMP:
1157 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1158 src0 = emit_fetch(bld, insn, 0, c);
1159 src1 = emit_fetch(bld, insn, 1, c);
1160 src2 = emit_fetch(bld, insn, 2, c);
1161 src0 = bld_predicate(bld, src0);
1162
1163 src1 = bld_insn_1(bld, NV_OP_MOV, src1);
1164 src1->insn->flags_src = new_ref(bld->pc, src0);
1165 src1->insn->cc = NV_CC_LT;
1166
1167 src2 = bld_insn_1(bld, NV_OP_MOV, src2);
1168 src2->insn->flags_src = new_ref(bld->pc, src0);
1169 src2->insn->cc = NV_CC_GE;
1170
1171 dst0[c] = bld_insn_2(bld, NV_OP_SELECT, src1, src2);
1172 }
1173 break;
1174 case TGSI_OPCODE_COS:
1175 src0 = emit_fetch(bld, insn, 0, 0);
1176 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1177 if (insn->Dst[0].Register.WriteMask & 7)
1178 temp = bld_insn_1(bld, NV_OP_COS, temp);
1179 for (c = 0; c < 3; ++c)
1180 if (insn->Dst[0].Register.WriteMask & (1 << c))
1181 dst0[c] = temp;
1182 if (!(insn->Dst[0].Register.WriteMask & (1 << 3)))
1183 break;
1184 /* XXX: if src0.x is src0.w, don't emit new insns */
1185 src0 = emit_fetch(bld, insn, 0, 3);
1186 temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
1187 dst0[3] = bld_insn_1(bld, NV_OP_COS, temp);
1188 break;
1189 case TGSI_OPCODE_DP3:
1190 src0 = emit_fetch(bld, insn, 0, 0);
1191 src1 = emit_fetch(bld, insn, 1, 0);
1192 temp = bld_insn_2(bld, NV_OP_MUL, src0, src1);
1193 for (c = 1; c < 3; ++c) {
1194 src0 = emit_fetch(bld, insn, 0, c);
1195 src1 = emit_fetch(bld, insn, 1, c);
1196 temp = bld_insn_3(bld, NV_OP_MAD, src0, src1, temp);
1197 }
1198 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1199 dst0[c] = temp;
1200 break;
1201 case TGSI_OPCODE_DP4:
1202 src0 = emit_fetch(bld, insn, 0, 0);
1203 src1 = emit_fetch(bld, insn, 1, 0);
1204 temp = bld_insn_2(bld, NV_OP_MUL, src0, src1);
1205 for (c = 1; c < 4; ++c) {
1206 src0 = emit_fetch(bld, insn, 0, c);
1207 src1 = emit_fetch(bld, insn, 1, c);
1208 temp = bld_insn_3(bld, NV_OP_MAD, src0, src1, temp);
1209 }
1210 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1211 dst0[c] = temp;
1212 break;
1213 case TGSI_OPCODE_EX2:
1214 src0 = emit_fetch(bld, insn, 0, 0);
1215 temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
1216 temp = bld_insn_1(bld, NV_OP_EX2, temp);
1217 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1218 dst0[c] = temp;
1219 break;
1220 case TGSI_OPCODE_FRC:
1221 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1222 src0 = emit_fetch(bld, insn, 0, c);
1223 dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0);
1224 dst0[c] = bld_insn_2(bld, NV_OP_SUB, src0, dst0[c]);
1225 }
1226 break;
1227 case TGSI_OPCODE_KIL:
1228 for (c = 0; c < 4; ++c) {
1229 src0 = emit_fetch(bld, insn, 0, c);
1230 bld_kil(bld, src0);
1231 }
1232 break;
1233 case TGSI_OPCODE_IF:
1234 {
1235 struct nv_basic_block *b = new_basic_block(bld->pc);
1236
1237 nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD);
1238
1239 bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
1240 bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
1241
1242 src1 = bld_predicate(bld, emit_fetch(bld, insn, 0, 0));
1243
1244 bld_flow(bld, NV_OP_BRA, NV_CC_EQ, src1, NULL, FALSE);
1245
1246 ++bld->cond_lvl;
1247 bld_new_block(bld, b);
1248 }
1249 break;
1250 case TGSI_OPCODE_ELSE:
1251 {
1252 struct nv_basic_block *b = new_basic_block(bld->pc);
1253
1254 --bld->cond_lvl;
1255 nvbb_attach_block(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
1256
1257 bld->cond_bb[bld->cond_lvl]->exit->target = b;
1258 bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
1259
1260 new_instruction(bld->pc, NV_OP_BRA)->is_terminator = 1;
1261
1262 ++bld->cond_lvl;
1263 bld_new_block(bld, b);
1264 }
1265 break;
1266 case TGSI_OPCODE_ENDIF:
1267 {
1268 struct nv_basic_block *b = new_basic_block(bld->pc);
1269
1270 --bld->cond_lvl;
1271 nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD);
1272 nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
1273
1274 bld->cond_bb[bld->cond_lvl]->exit->target = b;
1275
1276 if (0 && bld->join_bb[bld->cond_lvl]) {
1277 bld->join_bb[bld->cond_lvl]->exit->prev->target = b;
1278
1279 new_instruction(bld->pc, NV_OP_NOP)->is_join = TRUE;
1280 }
1281
1282 bld_new_block(bld, b);
1283 }
1284 break;
1285 case TGSI_OPCODE_BGNLOOP:
1286 {
1287 struct nv_basic_block *bl = new_basic_block(bld->pc);
1288 struct nv_basic_block *bb = new_basic_block(bld->pc);
1289
1290 bld->loop_bb[bld->loop_lvl] = bl;
1291 bld->brkt_bb[bld->loop_lvl] = bb;
1292
1293 bld_flow(bld, NV_OP_BREAKADDR, NV_CC_TR, NULL, bb, FALSE);
1294
1295 nvbb_attach_block(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
1296
1297 bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
1298
1299 if (bld->loop_lvl == bld->pc->loop_nesting_bound)
1300 bld->pc->loop_nesting_bound++;
1301
1302 bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
1303 bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
1304 bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
1305 }
1306 break;
1307 case TGSI_OPCODE_BRK:
1308 {
1309 struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
1310
1311 bld_flow(bld, NV_OP_BREAK, NV_CC_TR, NULL, bb, FALSE);
1312
1313 /* XXX: don't do this for redundant BRKs */
1314 nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
1315 }
1316 break;
1317 case TGSI_OPCODE_CONT:
1318 {
1319 struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
1320
1321 bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
1322
1323 nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);
1324 }
1325 break;
1326 case TGSI_OPCODE_ENDLOOP:
1327 {
1328 struct nv_basic_block *bb = bld->loop_bb[--bld->loop_lvl];
1329
1330 bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
1331
1332 nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);
1333
1334 bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
1335
1336 bld_new_block(bld, bld->brkt_bb[bld->loop_lvl]);
1337 }
1338 break;
1339 case TGSI_OPCODE_ABS:
1340 case TGSI_OPCODE_CEIL:
1341 case TGSI_OPCODE_FLR:
1342 case TGSI_OPCODE_TRUNC:
1343 case TGSI_OPCODE_DDX:
1344 case TGSI_OPCODE_DDY:
1345 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1346 src0 = emit_fetch(bld, insn, 0, c);
1347 dst0[c] = bld_insn_1(bld, opcode, src0);
1348 }
1349 break;
1350 case TGSI_OPCODE_LIT:
1351 bld_lit(bld, dst0, insn);
1352 break;
1353 case TGSI_OPCODE_LRP:
1354 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1355 src0 = emit_fetch(bld, insn, 0, c);
1356 src1 = emit_fetch(bld, insn, 1, c);
1357 src2 = emit_fetch(bld, insn, 2, c);
1358 dst0[c] = bld_insn_2(bld, NV_OP_SUB, src1, src2);
1359 dst0[c] = bld_insn_3(bld, NV_OP_MAD, dst0[c], src0, src2);
1360 }
1361 break;
1362 case TGSI_OPCODE_MOV:
1363 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1364 dst0[c] = emit_fetch(bld, insn, 0, c);
1365 break;
1366 case TGSI_OPCODE_MAD:
1367 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1368 src0 = emit_fetch(bld, insn, 0, c);
1369 src1 = emit_fetch(bld, insn, 1, c);
1370 src2 = emit_fetch(bld, insn, 2, c);
1371 dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2);
1372 }
1373 break;
1374 case TGSI_OPCODE_POW:
1375 src0 = emit_fetch(bld, insn, 0, 0);
1376 src1 = emit_fetch(bld, insn, 1, 0);
1377 temp = bld_pow(bld, src0, src1);
1378 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1379 dst0[c] = temp;
1380 break;
1381 case TGSI_OPCODE_RCP:
1382 case TGSI_OPCODE_LG2:
1383 src0 = emit_fetch(bld, insn, 0, 0);
1384 temp = bld_insn_1(bld, opcode, src0);
1385 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1386 dst0[c] = temp;
1387 break;
1388 case TGSI_OPCODE_RSQ:
1389 src0 = emit_fetch(bld, insn, 0, 0);
1390 temp = bld_insn_1(bld, NV_OP_ABS, src0);
1391 temp = bld_insn_1(bld, NV_OP_RSQ, temp);
1392 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1393 dst0[c] = temp;
1394 break;
1395 case TGSI_OPCODE_SLT:
1396 case TGSI_OPCODE_SGE:
1397 case TGSI_OPCODE_SEQ:
1398 case TGSI_OPCODE_SGT:
1399 case TGSI_OPCODE_SLE:
1400 case TGSI_OPCODE_SNE:
1401 case TGSI_OPCODE_ISLT:
1402 case TGSI_OPCODE_ISGE:
1403 case TGSI_OPCODE_USEQ:
1404 case TGSI_OPCODE_USGE:
1405 case TGSI_OPCODE_USLT:
1406 case TGSI_OPCODE_USNE:
1407 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1408 src0 = emit_fetch(bld, insn, 0, c);
1409 src1 = emit_fetch(bld, insn, 1, c);
1410 dst0[c] = bld_insn_2(bld, NV_OP_SET, src0, src1);
1411 dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode);
1412 dst0[c]->reg.type = infer_dst_type(insn->Instruction.Opcode);
1413
1414 dst0[c]->insn->src[0]->typecast =
1415 dst0[c]->insn->src[1]->typecast =
1416 infer_src_type(insn->Instruction.Opcode);
1417
1418 if (dst0[c]->reg.type != NV_TYPE_F32)
1419 break;
1420 dst0[c] = bld_insn_1(bld, NV_OP_ABS, dst0[c]);
1421 dst0[c]->insn->src[0]->typecast = NV_TYPE_S32;
1422 dst0[c]->reg.type = NV_TYPE_S32;
1423 dst0[c] = bld_insn_1(bld, NV_OP_CVT, dst0[c]);
1424 dst0[c]->reg.type = NV_TYPE_F32;
1425 }
1426 break;
1427 case TGSI_OPCODE_SUB:
1428 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1429 src0 = emit_fetch(bld, insn, 0, c);
1430 src1 = emit_fetch(bld, insn, 1, c);
1431 dst0[c] = bld_insn_2(bld, NV_OP_ADD, src0, src1);
1432 dst0[c]->insn->src[1]->mod ^= NV_MOD_NEG;
1433 }
1434 break;
1435 case TGSI_OPCODE_TEX:
1436 case TGSI_OPCODE_TXB:
1437 case TGSI_OPCODE_TXL:
1438 case TGSI_OPCODE_TXP:
1439 bld_tex(bld, dst0, insn);
1440 break;
1441 case TGSI_OPCODE_XPD:
1442 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
1443 if (c == 3) {
1444 dst0[3] = bld_imm_f32(bld, 1.0f);
1445 break;
1446 }
1447 src0 = emit_fetch(bld, insn, 0, (c + 1) % 3);
1448 src1 = emit_fetch(bld, insn, 1, (c + 2) % 3);
1449 dst0[c] = bld_insn_2(bld, NV_OP_MUL, src0, src1);
1450
1451 src0 = emit_fetch(bld, insn, 0, (c + 2) % 3);
1452 src1 = emit_fetch(bld, insn, 1, (c + 1) % 3);
1453 dst0[c] = bld_insn_3(bld, NV_OP_MAD, src0, src1, dst0[c]);
1454
1455 dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG;
1456 }
1457 break;
1458 case TGSI_OPCODE_END:
1459 if (bld->ti->p->type == PIPE_SHADER_FRAGMENT)
1460 bld_export_outputs(bld);
1461 break;
1462 default:
1463 NOUVEAU_ERR("nv_bld: unhandled opcode %u\n", insn->Instruction.Opcode);
1464 abort();
1465 break;
1466 }
1467
1468 FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
1469 emit_store(bld, insn, c, dst0[c]);
1470 }
1471
1472 static INLINE void
1473 bld_free_value_trackers(struct bld_value_stack *base, int n)
1474 {
1475 int i, c;
1476
1477 for (i = 0; i < n; ++i)
1478 for (c = 0; c < 4; ++c)
1479 if (base[i * 4 + c].body)
1480 FREE(base[i * 4 + c].body);
1481 }
1482
1483 int
1484 nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti)
1485 {
1486 struct bld_context *bld = CALLOC_STRUCT(bld_context);
1487 int c;
1488
1489 pc->root = pc->current_block = new_basic_block(pc);
1490
1491 bld->pc = pc;
1492 bld->ti = ti;
1493
1494 pc->loop_nesting_bound = 1;
1495
1496 c = util_bitcount(bld->ti->p->fp.interp >> 24);
1497 if (c && ti->p->type == PIPE_SHADER_FRAGMENT) {
1498 bld->frgcrd[3] = new_value(pc, NV_FILE_MEM_V, NV_TYPE_F32);
1499 bld->frgcrd[3]->reg.id = c - 1;
1500 bld->frgcrd[3] = bld_insn_1(bld, NV_OP_LINTERP, bld->frgcrd[3]);
1501 bld->frgcrd[3] = bld_insn_1(bld, NV_OP_RCP, bld->frgcrd[3]);
1502 }
1503
1504 tgsi_parse_init(&bld->parse[0], ti->p->pipe.tokens);
1505
1506 while (!tgsi_parse_end_of_tokens(&bld->parse[bld->call_lvl])) {
1507 const union tgsi_full_token *tok = &bld->parse[bld->call_lvl].FullToken;
1508
1509 tgsi_parse_token(&bld->parse[bld->call_lvl]);
1510
1511 switch (tok->Token.Type) {
1512 case TGSI_TOKEN_TYPE_INSTRUCTION:
1513 bld_instruction(bld, &tok->FullInstruction);
1514 break;
1515 default:
1516 break;
1517 }
1518 }
1519
1520 bld_free_value_trackers(&bld->tvs[0][0], BLD_MAX_TEMPS);
1521 bld_free_value_trackers(&bld->avs[0][0], BLD_MAX_ADDRS);
1522 bld_free_value_trackers(&bld->pvs[0][0], BLD_MAX_PREDS);
1523
1524 bld_free_value_trackers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
1525
1526 FREE(bld);
1527 return 0;
1528 }
1529
1530 /* If a variable is assigned in a loop, replace all references to the value
1531 * from outside the loop with a phi value.
1532 */
1533 static void
1534 bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
1535 struct nv_value *old_val,
1536 struct nv_value *new_val)
1537 {
1538 struct nv_instruction *nvi;
1539
1540 for (nvi = b->entry; nvi; nvi = nvi->next) {
1541 int s;
1542 for (s = 0; s < 5; ++s) {
1543 if (!nvi->src[s])
1544 continue;
1545 if (nvi->src[s]->value == old_val)
1546 nv_reference(pc, &nvi->src[s], new_val);
1547 }
1548 if (nvi->flags_src && nvi->flags_src->value == old_val)
1549 nv_reference(pc, &nvi->flags_src, new_val);
1550 }
1551
1552 b->pass_seq = pc->pass_seq;
1553
1554 if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
1555 bld_replace_value(pc, b->out[0], old_val, new_val);
1556
1557 if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
1558 bld_replace_value(pc, b->out[1], old_val, new_val);
1559 }