2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 /* XXX: need to clean this up so we get the typecasting right more naturally */
27 #include "nv50_context.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_util.h"
34 #include "util/u_simple_list.h"
35 #include "tgsi/tgsi_dump.h"
37 #define BLD_MAX_TEMPS 64
38 #define BLD_MAX_ADDRS 4
39 #define BLD_MAX_PREDS 4
40 #define BLD_MAX_IMMDS 128
42 #define BLD_MAX_COND_NESTING 4
43 #define BLD_MAX_LOOP_NESTING 4
44 #define BLD_MAX_CALL_NESTING 2
46 /* collects all values assigned to the same TGSI register */
47 struct bld_value_stack
{
49 struct nv_value
**body
;
51 uint16_t loop_use
; /* 1 bit per loop level, indicates if used/defd */
56 bld_vals_push_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
58 assert(!stk
->size
|| (stk
->body
[stk
->size
- 1] != val
));
60 if (!(stk
->size
% 8)) {
61 unsigned old_sz
= (stk
->size
+ 0) * sizeof(struct nv_value
*);
62 unsigned new_sz
= (stk
->size
+ 8) * sizeof(struct nv_value
*);
63 stk
->body
= (struct nv_value
**)REALLOC(stk
->body
, old_sz
, new_sz
);
65 stk
->body
[stk
->size
++] = val
;
69 bld_vals_del_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
73 for (i
= stk
->size
- 1; i
>= 0; --i
)
74 if (stk
->body
[i
] == val
)
79 if (i
!= stk
->size
- 1)
80 stk
->body
[i
] = stk
->body
[stk
->size
- 1];
82 --stk
->size
; /* XXX: old size in REALLOC */
87 bld_vals_push(struct bld_value_stack
*stk
)
89 bld_vals_push_val(stk
, stk
->top
);
94 bld_push_values(struct bld_value_stack
*stacks
, int n
)
98 for (i
= 0; i
< n
; ++i
)
99 for (c
= 0; c
< 4; ++c
)
100 if (stacks
[i
* 4 + c
].top
)
101 bld_vals_push(&stacks
[i
* 4 + c
]);
105 struct nv50_translation_info
*ti
;
108 struct nv_basic_block
*b
;
110 struct tgsi_parse_context parse
[BLD_MAX_CALL_NESTING
];
113 struct nv_basic_block
*cond_bb
[BLD_MAX_COND_NESTING
];
114 struct nv_basic_block
*join_bb
[BLD_MAX_COND_NESTING
];
115 struct nv_basic_block
*else_bb
[BLD_MAX_COND_NESTING
];
117 struct nv_basic_block
*loop_bb
[BLD_MAX_LOOP_NESTING
];
118 struct nv_basic_block
*brkt_bb
[BLD_MAX_LOOP_NESTING
];
121 struct bld_value_stack tvs
[BLD_MAX_TEMPS
][4]; /* TGSI_FILE_TEMPORARY */
122 struct bld_value_stack avs
[BLD_MAX_ADDRS
][4]; /* TGSI_FILE_ADDRESS */
123 struct bld_value_stack pvs
[BLD_MAX_PREDS
][4]; /* TGSI_FILE_PREDICATE */
124 struct bld_value_stack ovs
[PIPE_MAX_SHADER_OUTPUTS
][4];
126 uint32_t outputs_written
[(PIPE_MAX_SHADER_OUTPUTS
+ 31) / 32];
128 struct nv_value
*frgcrd
[4];
129 struct nv_value
*sysval
[4];
132 struct nv_value
*saved_addr
[4][2];
133 struct nv_value
*saved_inputs
[128];
134 struct nv_value
*saved_immd
[BLD_MAX_IMMDS
];
139 bld_stack_file(struct bld_context
*bld
, struct bld_value_stack
*stk
)
141 if (stk
< &bld
->avs
[0][0])
144 if (stk
< &bld
->pvs
[0][0])
147 if (stk
< &bld
->ovs
[0][0])
148 return NV_FILE_FLAGS
;
153 static INLINE
struct nv_value
*
154 bld_fetch(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
)
156 stk
[i
* 4 + c
].loop_use
|= 1 << bld
->loop_lvl
;
158 return stk
[i
* 4 + c
].top
;
161 static struct nv_value
*
162 bld_loop_phi(struct bld_context
*, struct bld_value_stack
*, struct nv_value
*);
164 /* If a variable is defined in a loop without prior use, we don't need
165 * a phi in the loop header to account for backwards flow.
167 * However, if this variable is then also used outside the loop, we do
168 * need a phi after all. But we must not use this phi's def inside the
169 * loop, so we can eliminate the phi if it is unused later.
172 bld_store(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
,
173 struct nv_value
*val
)
175 const uint16_t m
= 1 << bld
->loop_lvl
;
177 stk
= &stk
[i
* 4 + c
];
179 if (bld
->loop_lvl
&& !(m
& (stk
->loop_def
| stk
->loop_use
)))
180 bld_loop_phi(bld
, stk
, val
);
183 stk
->loop_def
|= 1 << bld
->loop_lvl
;
187 bld_clear_def_use(struct bld_value_stack
*stk
, int n
, int lvl
)
190 const uint16_t mask
= ~(1 << lvl
);
192 for (i
= 0; i
< n
* 4; ++i
) {
193 stk
[i
].loop_def
&= mask
;
194 stk
[i
].loop_use
&= mask
;
198 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
199 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
200 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
201 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
202 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
203 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
205 #define STORE_OUTR(i, c, v) \
207 bld->ovs[i][c].top = (v); \
208 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
212 bld_warn_uninitialized(struct bld_context
*bld
, int kind
,
213 struct bld_value_stack
*stk
, struct nv_basic_block
*b
)
215 long i
= (stk
- &bld
->tvs
[0][0]) / 4;
216 long c
= (stk
- &bld
->tvs
[0][0]) & 3;
221 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
222 i
, (int)('x' + c
), kind
? "may be" : "is", b
->id
);
225 static INLINE
struct nv_value
*
226 bld_def(struct nv_instruction
*i
, int c
, struct nv_value
*value
)
233 static INLINE
struct nv_value
*
234 find_by_bb(struct bld_value_stack
*stack
, struct nv_basic_block
*b
)
238 if (stack
->top
&& stack
->top
->insn
->bb
== b
)
241 for (i
= stack
->size
- 1; i
>= 0; --i
)
242 if (stack
->body
[i
]->insn
->bb
== b
)
243 return stack
->body
[i
];
247 /* fetch value from stack that was defined in the specified basic block,
248 * or search for first definitions in all of its predecessors
251 fetch_by_bb(struct bld_value_stack
*stack
,
252 struct nv_value
**vals
, int *n
,
253 struct nv_basic_block
*b
)
256 struct nv_value
*val
;
258 assert(*n
< 16); /* MAX_COND_NESTING */
260 val
= find_by_bb(stack
, b
);
262 for (i
= 0; i
< *n
; ++i
)
268 for (i
= 0; i
< b
->num_in
; ++i
)
269 if (b
->in_kind
[i
] != CFG_EDGE_BACK
)
270 fetch_by_bb(stack
, vals
, n
, b
->in
[i
]);
273 static INLINE
struct nv_value
*
274 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
);
276 static INLINE
struct nv_value
*
277 bld_undef(struct bld_context
*bld
, ubyte file
)
279 struct nv_instruction
*nvi
= new_instruction(bld
->pc
, NV_OP_UNDEF
);
281 return bld_def(nvi
, 0, new_value(bld
->pc
, file
, NV_TYPE_U32
));
284 static struct nv_value
*
285 bld_phi(struct bld_context
*bld
, struct nv_basic_block
*b
,
286 struct bld_value_stack
*stack
)
288 struct nv_basic_block
*in
;
289 struct nv_value
*vals
[16], *val
;
290 struct nv_instruction
*phi
;
295 fetch_by_bb(stack
, vals
, &n
, b
);
298 bld_warn_uninitialized(bld
, 0, stack
, b
);
303 if (nvbb_dominated_by(b
, vals
[0]->insn
->bb
))
306 bld_warn_uninitialized(bld
, 1, stack
, b
);
308 /* back-tracking to insert missing value of other path */
311 if (in
->num_in
== 1) {
314 if (!nvbb_reachable_by(in
->in
[0], vals
[0]->insn
->bb
, b
))
317 if (!nvbb_reachable_by(in
->in
[1], vals
[0]->insn
->bb
, b
))
323 bld
->pc
->current_block
= in
;
325 /* should make this a no-op */
326 bld_vals_push_val(stack
, bld_undef(bld
, vals
[0]->reg
.file
));
330 for (i
= 0; i
< n
; ++i
) {
331 /* if value dominates b, continue to the redefinitions */
332 if (nvbb_dominated_by(b
, vals
[i
]->insn
->bb
))
335 /* if value dominates any in-block, b should be the dom frontier */
336 for (j
= 0; j
< b
->num_in
; ++j
)
337 if (nvbb_dominated_by(b
->in
[j
], vals
[i
]->insn
->bb
))
339 /* otherwise, find the dominance frontier and put the phi there */
340 if (j
== b
->num_in
) {
341 in
= nvbb_dom_frontier(vals
[i
]->insn
->bb
);
342 val
= bld_phi(bld
, in
, stack
);
343 bld_vals_push_val(stack
, val
);
349 bld
->pc
->current_block
= b
;
354 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
356 bld_def(phi
, 0, new_value(bld
->pc
, vals
[0]->reg
.file
, vals
[0]->reg
.type
));
357 for (i
= 0; i
< n
; ++i
)
358 phi
->src
[i
] = new_ref(bld
->pc
, vals
[i
]);
363 static struct nv_value
*
364 bld_loop_phi(struct bld_context
*bld
, struct bld_value_stack
*stack
,
365 struct nv_value
*def
)
367 struct nv_basic_block
*bb
= bld
->pc
->current_block
;
368 struct nv_instruction
*phi
;
369 struct nv_value
*val
;
371 val
= bld_phi(bld
, bld
->pc
->current_block
, stack
);
373 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1]->in
[0];
375 val
= bld_undef(bld
, bld_stack_file(bld
, stack
));
378 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1];
380 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
382 bld_def(phi
, 0, new_value_like(bld
->pc
, val
));
386 bld_vals_push_val(stack
, phi
->def
[0]);
388 phi
->target
= (struct nv_basic_block
*)stack
; /* cheat */
390 nv_reference(bld
->pc
, &phi
->src
[0], val
);
391 nv_reference(bld
->pc
, &phi
->src
[1], def
);
393 bld
->pc
->current_block
= bb
;
398 static INLINE
struct nv_value
*
399 bld_fetch_global(struct bld_context
*bld
, struct bld_value_stack
*stack
)
401 const uint16_t m
= 1 << bld
->loop_lvl
;
402 const uint16_t use
= stack
->loop_use
;
404 stack
->loop_use
|= m
;
406 /* If neither used nor def'd inside the loop, build a phi in foresight,
407 * so we don't have to replace stuff later on, which requires tracking.
409 if (bld
->loop_lvl
&& !((use
| stack
->loop_def
) & m
))
410 return bld_loop_phi(bld
, stack
, NULL
);
412 return bld_phi(bld
, bld
->pc
->current_block
, stack
);
415 static INLINE
struct nv_value
*
416 bld_imm_u32(struct bld_context
*bld
, uint32_t u
)
419 unsigned n
= bld
->num_immds
;
421 for (i
= 0; i
< n
; ++i
)
422 if (bld
->saved_immd
[i
]->reg
.imm
.u32
== u
)
423 return bld
->saved_immd
[i
];
424 assert(n
< BLD_MAX_IMMDS
);
428 bld
->saved_immd
[n
] = new_value(bld
->pc
, NV_FILE_IMM
, NV_TYPE_U32
);
429 bld
->saved_immd
[n
]->reg
.imm
.u32
= u
;
430 return bld
->saved_immd
[n
];
434 bld_replace_value(struct nv_pc
*, struct nv_basic_block
*, struct nv_value
*,
437 /* Replace the source of the phi in the loop header by the last assignment,
438 * or eliminate the phi function if there is no assignment inside the loop.
440 * Redundancy situation 1 - (used) but (not redefined) value:
441 * %3 = phi %0, %3 = %3 is used
442 * %3 = phi %0, %4 = is new definition
444 * Redundancy situation 2 - (not used) but (redefined) value:
445 * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
448 bld_loop_end(struct bld_context
*bld
, struct nv_basic_block
*bb
)
450 struct nv_instruction
*phi
, *next
;
451 struct nv_value
*val
;
452 struct bld_value_stack
*stk
;
455 for (phi
= bb
->phi
; phi
&& phi
->opcode
== NV_OP_PHI
; phi
= next
) {
458 stk
= (struct bld_value_stack
*)phi
->target
;
461 val
= bld_fetch_global(bld
, stk
);
463 nv_reference(bld
->pc
, &phi
->src
[1], val
);
466 if (phi
->src
[0]->value
== phi
->def
[0] ||
467 phi
->src
[0]->value
== phi
->src
[1]->value
)
470 if (phi
->src
[1]->value
== phi
->def
[0])
474 bld_vals_del_val(stk
, phi
->def
[0]);
477 bld_replace_value(bld
->pc
, bb
, phi
->def
[0], phi
->src
[s
]->value
);
484 static INLINE
struct nv_value
*
485 bld_imm_f32(struct bld_context
*bld
, float f
)
487 return bld_imm_u32(bld
, fui(f
));
490 #define SET_TYPE(v, t) ((v)->reg.type = NV_TYPE_##t)
492 static struct nv_value
*
493 bld_insn_1(struct bld_context
*bld
, uint opcode
, struct nv_value
*src0
)
495 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
498 nv_reference(bld
->pc
, &insn
->src
[0], src0
); /* NOTE: new_ref would suffice */
500 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.type
));
503 static struct nv_value
*
504 bld_insn_2(struct bld_context
*bld
, uint opcode
,
505 struct nv_value
*src0
, struct nv_value
*src1
)
507 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
509 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
510 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
512 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.type
));
515 static struct nv_value
*
516 bld_insn_3(struct bld_context
*bld
, uint opcode
,
517 struct nv_value
*src0
, struct nv_value
*src1
,
518 struct nv_value
*src2
)
520 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
522 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
523 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
524 nv_reference(bld
->pc
, &insn
->src
[2], src2
);
526 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.type
));
529 #define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
531 (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
532 (d)->reg.type = NV_TYPE_##dt; \
533 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
536 #define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
538 (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
539 (d)->reg.type = NV_TYPE_##dt; \
540 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
541 (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
544 static struct nv_value
*
545 bld_pow(struct bld_context
*bld
, struct nv_value
*x
, struct nv_value
*e
)
547 struct nv_value
*val
;
549 BLD_INSN_1_EX(val
, LG2
, F32
, x
, F32
);
550 BLD_INSN_2_EX(val
, MUL
, F32
, e
, F32
, val
, F32
);
551 val
= bld_insn_1(bld
, NV_OP_PREEX2
, val
);
552 val
= bld_insn_1(bld
, NV_OP_EX2
, val
);
557 static INLINE
struct nv_value
*
558 bld_load_imm_f32(struct bld_context
*bld
, float f
)
560 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_f32(bld
, f
));
563 static INLINE
struct nv_value
*
564 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
)
566 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_u32(bld
, u
));
569 static struct nv_value
*
570 bld_get_address(struct bld_context
*bld
, int id
, struct nv_value
*indirect
)
573 struct nv_instruction
*nvi
;
575 for (i
= 0; i
< 4; ++i
) {
576 if (!bld
->saved_addr
[i
][0])
578 if (bld
->saved_addr
[i
][1] == indirect
) {
579 nvi
= bld
->saved_addr
[i
][0]->insn
;
580 if (nvi
->src
[0]->value
->reg
.imm
.u32
== id
)
581 return bld
->saved_addr
[i
][0];
586 bld
->saved_addr
[i
][0] = bld_load_imm_u32(bld
, id
);
587 bld
->saved_addr
[i
][0]->reg
.file
= NV_FILE_ADDR
;
588 bld
->saved_addr
[i
][1] = indirect
;
589 return bld
->saved_addr
[i
][0];
593 static struct nv_value
*
594 bld_predicate(struct bld_context
*bld
, struct nv_value
*src
, boolean bool_only
)
596 struct nv_instruction
*nvi
= src
->insn
;
598 if (nvi
->opcode
== NV_OP_LDA
||
599 nvi
->opcode
== NV_OP_PHI
||
600 nvi
->bb
!= bld
->pc
->current_block
) {
601 nvi
= new_instruction(bld
->pc
, NV_OP_CVT
);
602 nv_reference(bld
->pc
, &nvi
->src
[0], src
);
605 while (nvi
->opcode
== NV_OP_ABS
|| nvi
->opcode
== NV_OP_CVT
||
606 nvi
->opcode
== NV_OP_NEG
) {
607 /* TGSI SET gets conversion to f32, we only need source 0/~0 */
608 if (!nvi
->def
[0]->insn
->flags_src
)
609 nvi
= nvi
->src
[0]->value
->insn
;
613 if (!nvi
->flags_def
) {
614 nvi
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
615 nvi
->flags_def
->insn
= nvi
;
617 return nvi
->flags_def
;
621 bld_kil(struct bld_context
*bld
, struct nv_value
*src
)
623 struct nv_instruction
*nvi
;
625 src
= bld_predicate(bld
, src
, FALSE
);
626 nvi
= new_instruction(bld
->pc
, NV_OP_KIL
);
628 nvi
->flags_src
= new_ref(bld
->pc
, src
);
633 bld_flow(struct bld_context
*bld
, uint opcode
, ubyte cc
,
634 struct nv_value
*src
, struct nv_basic_block
*target
,
635 boolean plan_reconverge
)
637 struct nv_instruction
*nvi
;
640 new_instruction(bld
->pc
, NV_OP_JOINAT
)->fixed
= 1;
642 nvi
= new_instruction(bld
->pc
, opcode
);
643 nvi
->is_terminator
= 1;
645 nvi
->target
= target
;
647 nvi
->flags_src
= new_ref(bld
->pc
, src
);
651 translate_setcc(unsigned opcode
)
654 case TGSI_OPCODE_SLT
: return NV_CC_LT
;
655 case TGSI_OPCODE_SGE
: return NV_CC_GE
;
656 case TGSI_OPCODE_SEQ
: return NV_CC_EQ
;
657 case TGSI_OPCODE_SGT
: return NV_CC_GT
;
658 case TGSI_OPCODE_SLE
: return NV_CC_LE
;
659 case TGSI_OPCODE_SNE
: return NV_CC_NE
| NV_CC_U
;
660 case TGSI_OPCODE_STR
: return NV_CC_TR
;
661 case TGSI_OPCODE_SFL
: return NV_CC_FL
;
663 case TGSI_OPCODE_ISLT
: return NV_CC_LT
;
664 case TGSI_OPCODE_ISGE
: return NV_CC_GE
;
665 case TGSI_OPCODE_USEQ
: return NV_CC_EQ
;
666 case TGSI_OPCODE_USGE
: return NV_CC_GE
;
667 case TGSI_OPCODE_USLT
: return NV_CC_LT
;
668 case TGSI_OPCODE_USNE
: return NV_CC_NE
;
676 translate_opcode(uint opcode
)
679 case TGSI_OPCODE_ABS
: return NV_OP_ABS
;
680 case TGSI_OPCODE_ADD
:
681 case TGSI_OPCODE_SUB
:
682 case TGSI_OPCODE_UADD
: return NV_OP_ADD
;
683 case TGSI_OPCODE_AND
: return NV_OP_AND
;
684 case TGSI_OPCODE_EX2
: return NV_OP_EX2
;
685 case TGSI_OPCODE_CEIL
: return NV_OP_CEIL
;
686 case TGSI_OPCODE_FLR
: return NV_OP_FLOOR
;
687 case TGSI_OPCODE_TRUNC
: return NV_OP_TRUNC
;
688 case TGSI_OPCODE_COS
: return NV_OP_COS
;
689 case TGSI_OPCODE_SIN
: return NV_OP_SIN
;
690 case TGSI_OPCODE_DDX
: return NV_OP_DFDX
;
691 case TGSI_OPCODE_DDY
: return NV_OP_DFDY
;
692 case TGSI_OPCODE_F2I
:
693 case TGSI_OPCODE_F2U
:
694 case TGSI_OPCODE_I2F
:
695 case TGSI_OPCODE_U2F
: return NV_OP_CVT
;
696 case TGSI_OPCODE_INEG
: return NV_OP_NEG
;
697 case TGSI_OPCODE_LG2
: return NV_OP_LG2
;
698 case TGSI_OPCODE_ISHR
:
699 case TGSI_OPCODE_USHR
: return NV_OP_SHR
;
700 case TGSI_OPCODE_MAD
:
701 case TGSI_OPCODE_UMAD
: return NV_OP_MAD
;
702 case TGSI_OPCODE_MAX
:
703 case TGSI_OPCODE_IMAX
:
704 case TGSI_OPCODE_UMAX
: return NV_OP_MAX
;
705 case TGSI_OPCODE_MIN
:
706 case TGSI_OPCODE_IMIN
:
707 case TGSI_OPCODE_UMIN
: return NV_OP_MIN
;
708 case TGSI_OPCODE_MUL
:
709 case TGSI_OPCODE_UMUL
: return NV_OP_MUL
;
710 case TGSI_OPCODE_OR
: return NV_OP_OR
;
711 case TGSI_OPCODE_RCP
: return NV_OP_RCP
;
712 case TGSI_OPCODE_RSQ
: return NV_OP_RSQ
;
713 case TGSI_OPCODE_SAD
: return NV_OP_SAD
;
714 case TGSI_OPCODE_SHL
: return NV_OP_SHL
;
715 case TGSI_OPCODE_SLT
:
716 case TGSI_OPCODE_SGE
:
717 case TGSI_OPCODE_SEQ
:
718 case TGSI_OPCODE_SGT
:
719 case TGSI_OPCODE_SLE
:
720 case TGSI_OPCODE_SNE
:
721 case TGSI_OPCODE_ISLT
:
722 case TGSI_OPCODE_ISGE
:
723 case TGSI_OPCODE_USEQ
:
724 case TGSI_OPCODE_USGE
:
725 case TGSI_OPCODE_USLT
:
726 case TGSI_OPCODE_USNE
: return NV_OP_SET
;
727 case TGSI_OPCODE_TEX
: return NV_OP_TEX
;
728 case TGSI_OPCODE_TXP
: return NV_OP_TEX
;
729 case TGSI_OPCODE_TXB
: return NV_OP_TXB
;
730 case TGSI_OPCODE_TXL
: return NV_OP_TXL
;
731 case TGSI_OPCODE_XOR
: return NV_OP_XOR
;
738 infer_src_type(unsigned opcode
)
741 case TGSI_OPCODE_MOV
:
742 case TGSI_OPCODE_AND
:
744 case TGSI_OPCODE_XOR
:
745 case TGSI_OPCODE_SAD
:
746 case TGSI_OPCODE_U2F
:
747 case TGSI_OPCODE_UADD
:
748 case TGSI_OPCODE_UDIV
:
749 case TGSI_OPCODE_UMOD
:
750 case TGSI_OPCODE_UMAD
:
751 case TGSI_OPCODE_UMUL
:
752 case TGSI_OPCODE_UMAX
:
753 case TGSI_OPCODE_UMIN
:
754 case TGSI_OPCODE_USEQ
:
755 case TGSI_OPCODE_USGE
:
756 case TGSI_OPCODE_USLT
:
757 case TGSI_OPCODE_USNE
:
758 case TGSI_OPCODE_USHR
:
760 case TGSI_OPCODE_I2F
:
761 case TGSI_OPCODE_IDIV
:
762 case TGSI_OPCODE_IMAX
:
763 case TGSI_OPCODE_IMIN
:
764 case TGSI_OPCODE_INEG
:
765 case TGSI_OPCODE_ISGE
:
766 case TGSI_OPCODE_ISHR
:
767 case TGSI_OPCODE_ISLT
:
775 infer_dst_type(unsigned opcode
)
778 case TGSI_OPCODE_MOV
:
779 case TGSI_OPCODE_F2U
:
780 case TGSI_OPCODE_AND
:
782 case TGSI_OPCODE_XOR
:
783 case TGSI_OPCODE_SAD
:
784 case TGSI_OPCODE_UADD
:
785 case TGSI_OPCODE_UDIV
:
786 case TGSI_OPCODE_UMOD
:
787 case TGSI_OPCODE_UMAD
:
788 case TGSI_OPCODE_UMUL
:
789 case TGSI_OPCODE_UMAX
:
790 case TGSI_OPCODE_UMIN
:
791 case TGSI_OPCODE_USEQ
:
792 case TGSI_OPCODE_USGE
:
793 case TGSI_OPCODE_USLT
:
794 case TGSI_OPCODE_USNE
:
795 case TGSI_OPCODE_USHR
:
797 case TGSI_OPCODE_F2I
:
798 case TGSI_OPCODE_IDIV
:
799 case TGSI_OPCODE_IMAX
:
800 case TGSI_OPCODE_IMIN
:
801 case TGSI_OPCODE_INEG
:
802 case TGSI_OPCODE_ISGE
:
803 case TGSI_OPCODE_ISHR
:
804 case TGSI_OPCODE_ISLT
:
812 emit_store(struct bld_context
*bld
, const struct tgsi_full_instruction
*inst
,
813 unsigned chan
, struct nv_value
*value
)
815 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[0];
819 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
820 value
->reg
.type
= infer_dst_type(inst
->Instruction
.Opcode
);
822 switch (inst
->Instruction
.Saturate
) {
825 case TGSI_SAT_ZERO_ONE
:
826 BLD_INSN_1_EX(value
, SAT
, F32
, value
, F32
);
828 case TGSI_SAT_MINUS_PLUS_ONE
:
829 value
= bld_insn_2(bld
, NV_OP_MAX
, value
, bld_load_imm_f32(bld
, -1.0f
));
830 value
= bld_insn_2(bld
, NV_OP_MIN
, value
, bld_load_imm_f32(bld
, 1.0f
));
831 value
->reg
.type
= NV_TYPE_F32
;
835 switch (reg
->Register
.File
) {
836 case TGSI_FILE_OUTPUT
:
837 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
838 value
->reg
.file
= bld
->ti
->output_file
;
840 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
841 STORE_OUTR(reg
->Register
.Index
, chan
, value
);
843 value
->insn
->fixed
= 1;
844 value
->reg
.id
= bld
->ti
->output_map
[reg
->Register
.Index
][chan
];
847 case TGSI_FILE_TEMPORARY
:
848 assert(reg
->Register
.Index
< BLD_MAX_TEMPS
);
849 value
->reg
.file
= NV_FILE_GPR
;
850 if (value
->insn
->bb
!= bld
->pc
->current_block
)
851 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
852 STORE_TEMP(reg
->Register
.Index
, chan
, value
);
854 case TGSI_FILE_ADDRESS
:
855 assert(reg
->Register
.Index
< BLD_MAX_ADDRS
);
856 value
->reg
.file
= NV_FILE_ADDR
;
857 STORE_ADDR(reg
->Register
.Index
, chan
, value
);
862 static INLINE
uint32_t
863 bld_is_output_written(struct bld_context
*bld
, int i
, int c
)
866 return bld
->outputs_written
[i
/ 8] & (0xf << ((i
* 4) % 32));
867 return bld
->outputs_written
[i
/ 8] & (1 << ((i
* 4 + c
) % 32));
871 bld_export_outputs(struct bld_context
*bld
)
873 struct nv_value
*vals
[4];
874 struct nv_instruction
*nvi
;
877 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
879 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
) {
880 if (!bld_is_output_written(bld
, i
, -1))
882 for (n
= 0, c
= 0; c
< 4; ++c
) {
883 if (!bld_is_output_written(bld
, i
, c
))
885 vals
[n
] = bld_fetch_global(bld
, &bld
->ovs
[i
][c
]);
887 vals
[n
] = bld_insn_1(bld
, NV_OP_MOV
, vals
[n
]);
888 vals
[n
++]->reg
.id
= bld
->ti
->output_map
[i
][c
];
892 (nvi
= new_instruction(bld
->pc
, NV_OP_EXPORT
))->fixed
= 1;
894 for (c
= 0; c
< n
; ++c
)
895 nvi
->src
[c
] = new_ref(bld
->pc
, vals
[c
]);
900 bld_new_block(struct bld_context
*bld
, struct nv_basic_block
*b
)
904 bld_push_values(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
905 bld_push_values(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
906 bld_push_values(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
907 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
909 bld
->pc
->current_block
= b
;
911 for (i
= 0; i
< 4; ++i
)
912 bld
->saved_addr
[i
][0] = NULL
;
914 for (i
= 0; i
< 128; ++i
)
915 bld
->saved_inputs
[i
] = NULL
;
918 static struct nv_value
*
919 bld_saved_input(struct bld_context
*bld
, unsigned i
, unsigned c
)
921 unsigned idx
= bld
->ti
->input_map
[i
][c
];
923 if (bld
->ti
->p
->type
!= PIPE_SHADER_FRAGMENT
)
925 if (bld
->saved_inputs
[idx
])
926 return bld
->saved_inputs
[idx
];
930 static struct nv_value
*
931 bld_interpolate(struct bld_context
*bld
, unsigned mode
, struct nv_value
*val
)
933 if (mode
& (NV50_INTERP_LINEAR
| NV50_INTERP_FLAT
))
934 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
936 val
= bld_insn_2(bld
, NV_OP_PINTERP
, val
, bld
->frgcrd
[3]);
938 val
->insn
->flat
= (mode
& NV50_INTERP_FLAT
) ? 1 : 0;
939 val
->insn
->centroid
= (mode
& NV50_INTERP_CENTROID
) ? 1 : 0;
943 static struct nv_value
*
944 emit_fetch(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
945 const unsigned s
, const unsigned chan
)
947 const struct tgsi_full_src_register
*src
= &insn
->Src
[s
];
948 struct nv_value
*res
;
949 unsigned idx
, swz
, dim_idx
, ind_idx
, ind_swz
;
950 ubyte type
= infer_src_type(insn
->Instruction
.Opcode
);
952 idx
= src
->Register
.Index
;
953 swz
= tgsi_util_get_full_src_register_swizzle(src
, chan
);
958 if (src
->Register
.Indirect
) {
959 ind_idx
= src
->Indirect
.Index
;
960 ind_swz
= tgsi_util_get_src_register_swizzle(&src
->Indirect
, 0);
963 switch (src
->Register
.File
) {
964 case TGSI_FILE_CONSTANT
:
965 dim_idx
= src
->Dimension
.Index
? src
->Dimension
.Index
+ 2 : 1;
966 assert(dim_idx
< 14);
967 assert(dim_idx
== 1); /* for now */
969 res
= new_value(bld
->pc
, NV_FILE_MEM_C(dim_idx
), type
);
970 res
->reg
.type
= type
;
971 res
->reg
.id
= (idx
* 4 + swz
) & 127;
972 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
974 if (src
->Register
.Indirect
)
975 res
->insn
->src
[4] = new_ref(bld
->pc
, FETCH_ADDR(ind_idx
, ind_swz
));
976 if (idx
>= (128 / 4))
978 new_ref(bld
->pc
, bld_get_address(bld
, (idx
* 16) & ~0x1ff, NULL
));
980 case TGSI_FILE_IMMEDIATE
:
981 assert(idx
< bld
->ti
->immd32_nr
);
982 res
= bld_load_imm_u32(bld
, bld
->ti
->immd32
[idx
* 4 + swz
]);
983 res
->reg
.type
= type
;
985 case TGSI_FILE_INPUT
:
986 res
= bld_saved_input(bld
, idx
, swz
);
987 if (res
&& (insn
->Instruction
.Opcode
!= TGSI_OPCODE_TXP
))
990 res
= new_value(bld
->pc
, bld
->ti
->input_file
, type
);
991 res
->reg
.id
= bld
->ti
->input_map
[idx
][swz
];
993 if (res
->reg
.file
== NV_FILE_MEM_V
) {
994 res
= bld_interpolate(bld
, bld
->ti
->interp_mode
[idx
], res
);
996 assert(src
->Dimension
.Dimension
== 0);
997 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
999 assert(res
->reg
.type
== type
);
1001 bld
->saved_inputs
[bld
->ti
->input_map
[idx
][swz
]] = res
;
1003 case TGSI_FILE_TEMPORARY
:
1004 /* this should be load from l[], with reload elimination later on */
1005 res
= bld_fetch_global(bld
, &bld
->tvs
[idx
][swz
]);
1007 case TGSI_FILE_ADDRESS
:
1008 res
= bld_fetch_global(bld
, &bld
->avs
[idx
][swz
]);
1010 case TGSI_FILE_PREDICATE
:
1011 res
= bld_fetch_global(bld
, &bld
->pvs
[idx
][swz
]);
1014 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src
->Register
.File
);
1019 debug_printf("WARNING: undefined source value in TGSI instruction\n");
1020 return bld_load_imm_u32(bld
, 0);
1023 switch (tgsi_util_get_full_src_register_sign_mode(src
, chan
)) {
1024 case TGSI_UTIL_SIGN_KEEP
:
1026 case TGSI_UTIL_SIGN_CLEAR
:
1027 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1029 case TGSI_UTIL_SIGN_TOGGLE
:
1030 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1032 case TGSI_UTIL_SIGN_SET
:
1033 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1034 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1037 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
1046 bld_lit(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1047 const struct tgsi_full_instruction
*insn
)
1049 struct nv_value
*val0
, *zero
;
1050 unsigned mask
= insn
->Dst
[0].Register
.WriteMask
;
1052 if (mask
& ((1 << 0) | (1 << 3)))
1053 dst0
[3] = dst0
[0] = bld_load_imm_f32(bld
, 1.0f
);
1055 if (mask
& (3 << 1)) {
1056 zero
= bld_load_imm_f32(bld
, 0.0f
);
1057 val0
= bld_insn_2(bld
, NV_OP_MAX
, emit_fetch(bld
, insn
, 0, 0), zero
);
1059 if (mask
& (1 << 1))
1063 if (mask
& (1 << 2)) {
1064 struct nv_value
*val1
, *val3
, *src1
, *src3
;
1065 struct nv_value
*pos128
= bld_load_imm_f32(bld
, 127.999999f
);
1066 struct nv_value
*neg128
= bld_load_imm_f32(bld
, -127.999999f
);
1068 src1
= emit_fetch(bld
, insn
, 0, 1);
1069 src3
= emit_fetch(bld
, insn
, 0, 3);
1071 val0
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1072 val0
->insn
->flags_def
->insn
= val0
->insn
;
1074 val1
= bld_insn_2(bld
, NV_OP_MAX
, src1
, zero
);
1075 val3
= bld_insn_2(bld
, NV_OP_MAX
, src3
, neg128
);
1076 val3
= bld_insn_2(bld
, NV_OP_MIN
, val3
, pos128
);
1077 val3
= bld_pow(bld
, val1
, val3
);
1079 dst0
[2] = bld_insn_1(bld
, NV_OP_MOV
, zero
);
1080 dst0
[2]->insn
->cc
= NV_CC_LE
;
1081 dst0
[2]->insn
->flags_src
= new_ref(bld
->pc
, val0
->insn
->flags_def
);
1083 dst0
[2] = bld_insn_2(bld
, NV_OP_SELECT
, val3
, dst0
[2]);
1088 get_tex_dim(const struct tgsi_full_instruction
*insn
, int *dim
, int *arg
)
1090 switch (insn
->Texture
.Texture
) {
1091 case TGSI_TEXTURE_1D
:
1094 case TGSI_TEXTURE_SHADOW1D
:
1098 case TGSI_TEXTURE_UNKNOWN
:
1099 case TGSI_TEXTURE_2D
:
1100 case TGSI_TEXTURE_RECT
:
1103 case TGSI_TEXTURE_SHADOW2D
:
1104 case TGSI_TEXTURE_SHADOWRECT
:
1108 case TGSI_TEXTURE_3D
:
1109 case TGSI_TEXTURE_CUBE
:
1119 load_proj_tex_coords(struct bld_context
*bld
,
1120 struct nv_value
*t
[4], int dim
,
1121 const struct tgsi_full_instruction
*insn
)
1125 t
[3] = emit_fetch(bld
, insn
, 0, 3);
1127 if (t
[3]->insn
->opcode
== NV_OP_PINTERP
) {
1128 t
[3]->insn
->opcode
= NV_OP_LINTERP
;
1129 nv_reference(bld
->pc
, &t
[3]->insn
->src
[1], NULL
);
1132 t
[3] = bld_insn_1(bld
, NV_OP_RCP
, t
[3]);
1134 for (c
= 0; c
< dim
; ++c
) {
1135 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1136 if (t
[c
]->insn
->opcode
== NV_OP_LINTERP
)
1137 t
[c
]->insn
->opcode
= NV_OP_PINTERP
;
1139 if (t
[c
]->insn
->opcode
== NV_OP_PINTERP
)
1140 nv_reference(bld
->pc
, &t
[c
]->insn
->src
[1], t
[3]);
1145 for (c
= 0; mask
; ++c
, mask
>>= 1) {
1148 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], t
[3]);
1153 bld_tex(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1154 const struct tgsi_full_instruction
*insn
)
1156 struct nv_value
*t
[4];
1157 struct nv_instruction
*nvi
;
1158 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1161 get_tex_dim(insn
, &dim
, &arg
);
1163 if (insn
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) {
1166 if (insn
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
1167 load_proj_tex_coords(bld
, t
, dim
, insn
);
1169 for (c
= 0; c
< dim
; ++c
)
1170 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1173 t
[dim
] = emit_fetch(bld
, insn
, 0, 2);
1175 if (insn
->Instruction
.Opcode
== TGSI_OPCODE_TXB
||
1176 insn
->Instruction
.Opcode
== TGSI_OPCODE_TXL
) {
1177 t
[arg
++] = emit_fetch(bld
, insn
, 0, 3);
1180 for (c
= 0; c
< arg
; ++c
) {
1181 t
[c
] = bld_insn_1(bld
, NV_OP_MOV
, t
[c
]);
1182 t
[c
]->reg
.type
= NV_TYPE_F32
;
1185 nvi
= new_instruction(bld
->pc
, opcode
);
1187 for (c
= 0; c
< 4; ++c
) {
1188 nvi
->def
[c
] = dst0
[c
] = new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
);
1189 nvi
->def
[c
]->insn
= nvi
;
1191 for (c
= 0; c
< arg
; ++c
)
1192 nvi
->src
[c
] = new_ref(bld
->pc
, t
[c
]);
1194 nvi
->tex_t
= insn
->Src
[1].Register
.Index
;
1196 nvi
->tex_mask
= 0xf;
1197 nvi
->tex_cube
= (insn
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) ? 1 : 0;
1199 nvi
->tex_argc
= arg
;
1202 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1203 for (chan = 0; chan < 4; ++chan) \
1204 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1207 bld_instruction(struct bld_context
*bld
,
1208 const struct tgsi_full_instruction
*insn
)
1210 struct nv_value
*src0
;
1211 struct nv_value
*src1
;
1212 struct nv_value
*src2
;
1213 struct nv_value
*dst0
[4];
1214 struct nv_value
*temp
;
1216 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1218 tgsi_dump_instruction(insn
, 1);
1220 switch (insn
->Instruction
.Opcode
) {
1221 case TGSI_OPCODE_ADD
:
1222 case TGSI_OPCODE_MAX
:
1223 case TGSI_OPCODE_MIN
:
1224 case TGSI_OPCODE_MUL
:
1225 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1226 src0
= emit_fetch(bld
, insn
, 0, c
);
1227 src1
= emit_fetch(bld
, insn
, 1, c
);
1228 dst0
[c
] = bld_insn_2(bld
, opcode
, src0
, src1
);
1231 case TGSI_OPCODE_ARL
:
1232 src1
= bld_imm_u32(bld
, 4);
1233 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1234 src0
= emit_fetch(bld
, insn
, 0, c
);
1235 (temp
= bld_insn_1(bld
, NV_OP_FLOOR
, temp
))->reg
.type
= NV_TYPE_S32
;
1236 dst0
[c
] = bld_insn_2(bld
, NV_OP_SHL
, temp
, src1
);
1239 case TGSI_OPCODE_CMP
:
1240 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1241 src0
= emit_fetch(bld
, insn
, 0, c
);
1242 src1
= emit_fetch(bld
, insn
, 1, c
);
1243 src2
= emit_fetch(bld
, insn
, 2, c
);
1244 src0
= bld_predicate(bld
, src0
, FALSE
);
1246 src1
= bld_insn_1(bld
, NV_OP_MOV
, src1
);
1247 src1
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1248 src1
->insn
->cc
= NV_CC_LT
;
1250 src2
= bld_insn_1(bld
, NV_OP_MOV
, src2
);
1251 src2
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1252 src2
->insn
->cc
= NV_CC_GE
;
1254 dst0
[c
] = bld_insn_2(bld
, NV_OP_SELECT
, src1
, src2
);
1257 case TGSI_OPCODE_COS
:
1258 case TGSI_OPCODE_SIN
:
1259 src0
= emit_fetch(bld
, insn
, 0, 0);
1260 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1261 if (insn
->Dst
[0].Register
.WriteMask
& 7)
1262 temp
= bld_insn_1(bld
, opcode
, temp
);
1263 for (c
= 0; c
< 3; ++c
)
1264 if (insn
->Dst
[0].Register
.WriteMask
& (1 << c
))
1266 if (!(insn
->Dst
[0].Register
.WriteMask
& (1 << 3)))
1268 src0
= emit_fetch(bld
, insn
, 0, 3);
1269 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1270 dst0
[3] = bld_insn_1(bld
, opcode
, temp
);
1272 case TGSI_OPCODE_DP3
:
1273 src0
= emit_fetch(bld
, insn
, 0, 0);
1274 src1
= emit_fetch(bld
, insn
, 1, 0);
1275 temp
= bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1276 for (c
= 1; c
< 3; ++c
) {
1277 src0
= emit_fetch(bld
, insn
, 0, c
);
1278 src1
= emit_fetch(bld
, insn
, 1, c
);
1279 temp
= bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, temp
);
1281 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1284 case TGSI_OPCODE_DP4
:
1285 src0
= emit_fetch(bld
, insn
, 0, 0);
1286 src1
= emit_fetch(bld
, insn
, 1, 0);
1287 temp
= bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1288 for (c
= 1; c
< 4; ++c
) {
1289 src0
= emit_fetch(bld
, insn
, 0, c
);
1290 src1
= emit_fetch(bld
, insn
, 1, c
);
1291 temp
= bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, temp
);
1293 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1296 case TGSI_OPCODE_EX2
:
1297 src0
= emit_fetch(bld
, insn
, 0, 0);
1298 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1299 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1300 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1303 case TGSI_OPCODE_FRC
:
1304 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1305 src0
= emit_fetch(bld
, insn
, 0, c
);
1306 dst0
[c
] = bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1307 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src0
, dst0
[c
]);
1310 case TGSI_OPCODE_KIL
:
1311 for (c
= 0; c
< 4; ++c
) {
1312 src0
= emit_fetch(bld
, insn
, 0, c
);
1316 case TGSI_OPCODE_KILP
:
1317 (new_instruction(bld
->pc
, NV_OP_KIL
))->fixed
= 1;
1319 case TGSI_OPCODE_IF
:
1321 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1323 nvbb_attach_block(bld
->pc
->current_block
, b
, CFG_EDGE_FORWARD
);
1325 bld
->join_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1326 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1328 src1
= bld_predicate(bld
, emit_fetch(bld
, insn
, 0, 0), TRUE
);
1330 bld_flow(bld
, NV_OP_BRA
, NV_CC_EQ
, src1
, NULL
, (bld
->cond_lvl
== 0));
1333 bld_new_block(bld
, b
);
1336 case TGSI_OPCODE_ELSE
:
1338 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1341 nvbb_attach_block(bld
->join_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1343 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1344 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1346 new_instruction(bld
->pc
, NV_OP_BRA
)->is_terminator
= 1;
1349 bld_new_block(bld
, b
);
1352 case TGSI_OPCODE_ENDIF
:
1354 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1357 nvbb_attach_block(bld
->pc
->current_block
, b
, CFG_EDGE_FORWARD
);
1358 nvbb_attach_block(bld
->cond_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1360 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1362 bld_new_block(bld
, b
);
1364 if (!bld
->cond_lvl
&& bld
->join_bb
[bld
->cond_lvl
]) {
1365 bld
->join_bb
[bld
->cond_lvl
]->exit
->prev
->target
= b
;
1366 new_instruction(bld
->pc
, NV_OP_JOIN
)->is_join
= TRUE
;
1370 case TGSI_OPCODE_BGNLOOP
:
1372 struct nv_basic_block
*bl
= new_basic_block(bld
->pc
);
1373 struct nv_basic_block
*bb
= new_basic_block(bld
->pc
);
1375 bld
->loop_bb
[bld
->loop_lvl
] = bl
;
1376 bld
->brkt_bb
[bld
->loop_lvl
] = bb
;
1378 bld_flow(bld
, NV_OP_BREAKADDR
, NV_CC_TR
, NULL
, bb
, FALSE
);
1380 nvbb_attach_block(bld
->pc
->current_block
, bl
, CFG_EDGE_LOOP_ENTER
);
1382 bld_new_block(bld
, bld
->loop_bb
[bld
->loop_lvl
++]);
1384 if (bld
->loop_lvl
== bld
->pc
->loop_nesting_bound
)
1385 bld
->pc
->loop_nesting_bound
++;
1387 bld_clear_def_use(&bld
->tvs
[0][0], BLD_MAX_TEMPS
, bld
->loop_lvl
);
1388 bld_clear_def_use(&bld
->avs
[0][0], BLD_MAX_ADDRS
, bld
->loop_lvl
);
1389 bld_clear_def_use(&bld
->pvs
[0][0], BLD_MAX_PREDS
, bld
->loop_lvl
);
1392 case TGSI_OPCODE_BRK
:
1394 struct nv_basic_block
*bb
= bld
->brkt_bb
[bld
->loop_lvl
- 1];
1396 bld_flow(bld
, NV_OP_BREAK
, NV_CC_TR
, NULL
, bb
, FALSE
);
1398 /* XXX: don't do this for redundant BRKs */
1399 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_LOOP_LEAVE
);
1402 case TGSI_OPCODE_CONT
:
1404 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1406 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1408 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1411 case TGSI_OPCODE_ENDLOOP
:
1413 struct nv_basic_block
*bb
= bld
->loop_bb
[--bld
->loop_lvl
];
1415 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1417 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1419 bld_loop_end(bld
, bb
); /* replace loop-side operand of the phis */
1421 bld_new_block(bld
, bld
->brkt_bb
[bld
->loop_lvl
]);
1424 case TGSI_OPCODE_ABS
:
1425 case TGSI_OPCODE_CEIL
:
1426 case TGSI_OPCODE_FLR
:
1427 case TGSI_OPCODE_TRUNC
:
1428 case TGSI_OPCODE_DDX
:
1429 case TGSI_OPCODE_DDY
:
1430 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1431 src0
= emit_fetch(bld
, insn
, 0, c
);
1432 dst0
[c
] = bld_insn_1(bld
, opcode
, src0
);
1435 case TGSI_OPCODE_LIT
:
1436 bld_lit(bld
, dst0
, insn
);
1438 case TGSI_OPCODE_LRP
:
1439 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1440 src0
= emit_fetch(bld
, insn
, 0, c
);
1441 src1
= emit_fetch(bld
, insn
, 1, c
);
1442 src2
= emit_fetch(bld
, insn
, 2, c
);
1443 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src1
, src2
);
1444 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, dst0
[c
], src0
, src2
);
1447 case TGSI_OPCODE_MOV
:
1448 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1449 dst0
[c
] = emit_fetch(bld
, insn
, 0, c
);
1451 case TGSI_OPCODE_MAD
:
1452 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1453 src0
= emit_fetch(bld
, insn
, 0, c
);
1454 src1
= emit_fetch(bld
, insn
, 1, c
);
1455 src2
= emit_fetch(bld
, insn
, 2, c
);
1456 dst0
[c
] = bld_insn_3(bld
, opcode
, src0
, src1
, src2
);
1459 case TGSI_OPCODE_POW
:
1460 src0
= emit_fetch(bld
, insn
, 0, 0);
1461 src1
= emit_fetch(bld
, insn
, 1, 0);
1462 temp
= bld_pow(bld
, src0
, src1
);
1463 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1466 case TGSI_OPCODE_RCP
:
1467 case TGSI_OPCODE_LG2
:
1468 src0
= emit_fetch(bld
, insn
, 0, 0);
1469 temp
= bld_insn_1(bld
, opcode
, src0
);
1470 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1473 case TGSI_OPCODE_RSQ
:
1474 src0
= emit_fetch(bld
, insn
, 0, 0);
1475 temp
= bld_insn_1(bld
, NV_OP_ABS
, src0
);
1476 temp
= bld_insn_1(bld
, NV_OP_RSQ
, temp
);
1477 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1480 case TGSI_OPCODE_SLT
:
1481 case TGSI_OPCODE_SGE
:
1482 case TGSI_OPCODE_SEQ
:
1483 case TGSI_OPCODE_SGT
:
1484 case TGSI_OPCODE_SLE
:
1485 case TGSI_OPCODE_SNE
:
1486 case TGSI_OPCODE_ISLT
:
1487 case TGSI_OPCODE_ISGE
:
1488 case TGSI_OPCODE_USEQ
:
1489 case TGSI_OPCODE_USGE
:
1490 case TGSI_OPCODE_USLT
:
1491 case TGSI_OPCODE_USNE
:
1492 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1493 src0
= emit_fetch(bld
, insn
, 0, c
);
1494 src1
= emit_fetch(bld
, insn
, 1, c
);
1495 dst0
[c
] = bld_insn_2(bld
, NV_OP_SET
, src0
, src1
);
1496 dst0
[c
]->insn
->set_cond
= translate_setcc(insn
->Instruction
.Opcode
);
1497 dst0
[c
]->reg
.type
= infer_dst_type(insn
->Instruction
.Opcode
);
1499 dst0
[c
]->insn
->src
[0]->typecast
=
1500 dst0
[c
]->insn
->src
[1]->typecast
=
1501 infer_src_type(insn
->Instruction
.Opcode
);
1503 if (dst0
[c
]->reg
.type
!= NV_TYPE_F32
)
1505 dst0
[c
] = bld_insn_1(bld
, NV_OP_ABS
, dst0
[c
]);
1506 dst0
[c
]->insn
->src
[0]->typecast
= NV_TYPE_S32
;
1507 dst0
[c
]->reg
.type
= NV_TYPE_S32
;
1508 dst0
[c
] = bld_insn_1(bld
, NV_OP_CVT
, dst0
[c
]);
1509 dst0
[c
]->reg
.type
= NV_TYPE_F32
;
1512 case TGSI_OPCODE_SCS
:
1513 if (insn
->Dst
[0].Register
.WriteMask
& 0x3) {
1514 src0
= emit_fetch(bld
, insn
, 0, 0);
1515 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1516 if (insn
->Dst
[0].Register
.WriteMask
& 0x1)
1517 dst0
[0] = bld_insn_1(bld
, NV_OP_COS
, temp
);
1518 if (insn
->Dst
[0].Register
.WriteMask
& 0x2)
1519 dst0
[1] = bld_insn_1(bld
, NV_OP_SIN
, temp
);
1521 if (insn
->Dst
[0].Register
.WriteMask
& 0x4)
1522 dst0
[2] = bld_imm_f32(bld
, 0.0f
);
1523 if (insn
->Dst
[0].Register
.WriteMask
& 0x8)
1524 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1526 case TGSI_OPCODE_SUB
:
1527 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1528 src0
= emit_fetch(bld
, insn
, 0, c
);
1529 src1
= emit_fetch(bld
, insn
, 1, c
);
1530 dst0
[c
] = bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1531 dst0
[c
]->insn
->src
[1]->mod
^= NV_MOD_NEG
;
1534 case TGSI_OPCODE_TEX
:
1535 case TGSI_OPCODE_TXB
:
1536 case TGSI_OPCODE_TXL
:
1537 case TGSI_OPCODE_TXP
:
1538 bld_tex(bld
, dst0
, insn
);
1540 case TGSI_OPCODE_XPD
:
1541 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1543 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1546 src0
= emit_fetch(bld
, insn
, 0, (c
+ 1) % 3);
1547 src1
= emit_fetch(bld
, insn
, 1, (c
+ 2) % 3);
1548 dst0
[c
] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1550 src0
= emit_fetch(bld
, insn
, 0, (c
+ 2) % 3);
1551 src1
= emit_fetch(bld
, insn
, 1, (c
+ 1) % 3);
1552 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dst0
[c
]);
1554 dst0
[c
]->insn
->src
[2]->mod
^= NV_MOD_NEG
;
1557 case TGSI_OPCODE_RET
:
1558 (new_instruction(bld
->pc
, NV_OP_RET
))->fixed
= 1;
1560 case TGSI_OPCODE_END
:
1561 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
1562 bld_export_outputs(bld
);
1565 NOUVEAU_ERR("unhandled opcode %u\n", insn
->Instruction
.Opcode
);
1570 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1571 emit_store(bld
, insn
, c
, dst0
[c
]);
1575 bld_free_value_trackers(struct bld_value_stack
*base
, int n
)
1579 for (i
= 0; i
< n
; ++i
)
1580 for (c
= 0; c
< 4; ++c
)
1581 if (base
[i
* 4 + c
].body
)
1582 FREE(base
[i
* 4 + c
].body
);
1586 nv50_tgsi_to_nc(struct nv_pc
*pc
, struct nv50_translation_info
*ti
)
1588 struct bld_context
*bld
= CALLOC_STRUCT(bld_context
);
1591 pc
->root
= pc
->current_block
= new_basic_block(pc
);
1596 pc
->loop_nesting_bound
= 1;
1598 c
= util_bitcount(bld
->ti
->p
->fp
.interp
>> 24);
1599 if (c
&& ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1600 bld
->frgcrd
[3] = new_value(pc
, NV_FILE_MEM_V
, NV_TYPE_F32
);
1601 bld
->frgcrd
[3]->reg
.id
= c
- 1;
1602 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_LINTERP
, bld
->frgcrd
[3]);
1603 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_RCP
, bld
->frgcrd
[3]);
1606 tgsi_parse_init(&bld
->parse
[0], ti
->p
->pipe
.tokens
);
1608 while (!tgsi_parse_end_of_tokens(&bld
->parse
[bld
->call_lvl
])) {
1609 const union tgsi_full_token
*tok
= &bld
->parse
[bld
->call_lvl
].FullToken
;
1611 tgsi_parse_token(&bld
->parse
[bld
->call_lvl
]);
1613 switch (tok
->Token
.Type
) {
1614 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1615 bld_instruction(bld
, &tok
->FullInstruction
);
1622 bld_free_value_trackers(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
1623 bld_free_value_trackers(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
1624 bld_free_value_trackers(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
1626 bld_free_value_trackers(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1632 /* If a variable is assigned in a loop, replace all references to the value
1633 * from outside the loop with a phi value.
1636 bld_replace_value(struct nv_pc
*pc
, struct nv_basic_block
*b
,
1637 struct nv_value
*old_val
,
1638 struct nv_value
*new_val
)
1640 struct nv_instruction
*nvi
;
1642 for (nvi
= b
->entry
; nvi
; nvi
= nvi
->next
) {
1644 for (s
= 0; s
< 5; ++s
) {
1647 if (nvi
->src
[s
]->value
== old_val
)
1648 nv_reference(pc
, &nvi
->src
[s
], new_val
);
1650 if (nvi
->flags_src
&& nvi
->flags_src
->value
== old_val
)
1651 nv_reference(pc
, &nvi
->flags_src
, new_val
);
1654 b
->pass_seq
= pc
->pass_seq
;
1656 if (b
->out
[0] && b
->out
[0]->pass_seq
< pc
->pass_seq
)
1657 bld_replace_value(pc
, b
->out
[0], old_val
, new_val
);
1659 if (b
->out
[1] && b
->out
[1]->pass_seq
< pc
->pass_seq
)
1660 bld_replace_value(pc
, b
->out
[1], old_val
, new_val
);