2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 /* #define NV50_TGSI2NC_DEBUG */
25 /* XXX: need to clean this up so we get the typecasting right more naturally */
29 #include "nv50_context.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_util.h"
36 #include "util/u_simple_list.h"
37 #include "tgsi/tgsi_dump.h"
39 #define BLD_MAX_TEMPS 64
40 #define BLD_MAX_ADDRS 4
41 #define BLD_MAX_PREDS 4
42 #define BLD_MAX_IMMDS 128
44 #define BLD_MAX_COND_NESTING 4
45 #define BLD_MAX_LOOP_NESTING 4
46 #define BLD_MAX_CALL_NESTING 2
48 /* collects all values assigned to the same TGSI register */
49 struct bld_value_stack
{
51 struct nv_value
**body
;
53 uint16_t loop_use
; /* 1 bit per loop level, indicates if used/defd */
58 bld_vals_push_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
60 assert(!stk
->size
|| (stk
->body
[stk
->size
- 1] != val
));
62 if (!(stk
->size
% 8)) {
63 unsigned old_sz
= (stk
->size
+ 0) * sizeof(struct nv_value
*);
64 unsigned new_sz
= (stk
->size
+ 8) * sizeof(struct nv_value
*);
65 stk
->body
= (struct nv_value
**)REALLOC(stk
->body
, old_sz
, new_sz
);
67 stk
->body
[stk
->size
++] = val
;
71 bld_vals_del_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
75 for (i
= stk
->size
- 1; i
>= 0; --i
)
76 if (stk
->body
[i
] == val
)
81 if (i
!= stk
->size
- 1)
82 stk
->body
[i
] = stk
->body
[stk
->size
- 1];
84 --stk
->size
; /* XXX: old size in REALLOC */
89 bld_vals_push(struct bld_value_stack
*stk
)
91 bld_vals_push_val(stk
, stk
->top
);
96 bld_push_values(struct bld_value_stack
*stacks
, int n
)
100 for (i
= 0; i
< n
; ++i
)
101 for (c
= 0; c
< 4; ++c
)
102 if (stacks
[i
* 4 + c
].top
)
103 bld_vals_push(&stacks
[i
* 4 + c
]);
107 struct nv50_translation_info
*ti
;
110 struct nv_basic_block
*b
;
112 struct tgsi_parse_context parse
[BLD_MAX_CALL_NESTING
];
115 struct nv_basic_block
*cond_bb
[BLD_MAX_COND_NESTING
];
116 struct nv_basic_block
*join_bb
[BLD_MAX_COND_NESTING
];
117 struct nv_basic_block
*else_bb
[BLD_MAX_COND_NESTING
];
119 struct nv_basic_block
*loop_bb
[BLD_MAX_LOOP_NESTING
];
120 struct nv_basic_block
*brkt_bb
[BLD_MAX_LOOP_NESTING
];
123 ubyte out_kind
; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
125 struct bld_value_stack tvs
[BLD_MAX_TEMPS
][4]; /* TGSI_FILE_TEMPORARY */
126 struct bld_value_stack avs
[BLD_MAX_ADDRS
][4]; /* TGSI_FILE_ADDRESS */
127 struct bld_value_stack pvs
[BLD_MAX_PREDS
][4]; /* TGSI_FILE_PREDICATE */
128 struct bld_value_stack ovs
[PIPE_MAX_SHADER_OUTPUTS
][4];
130 uint32_t outputs_written
[(PIPE_MAX_SHADER_OUTPUTS
+ 31) / 32];
132 struct nv_value
*frgcrd
[4];
133 struct nv_value
*sysval
[4];
136 struct nv_value
*saved_addr
[4][2];
137 struct nv_value
*saved_inputs
[128];
138 struct nv_value
*saved_immd
[BLD_MAX_IMMDS
];
143 bld_stack_file(struct bld_context
*bld
, struct bld_value_stack
*stk
)
145 if (stk
< &bld
->avs
[0][0])
148 if (stk
< &bld
->pvs
[0][0])
151 if (stk
< &bld
->ovs
[0][0])
152 return NV_FILE_FLAGS
;
157 static INLINE
struct nv_value
*
158 bld_fetch(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
)
160 stk
[i
* 4 + c
].loop_use
|= 1 << bld
->loop_lvl
;
162 return stk
[i
* 4 + c
].top
;
165 static struct nv_value
*
166 bld_loop_phi(struct bld_context
*, struct bld_value_stack
*, struct nv_value
*);
168 /* If a variable is defined in a loop without prior use, we don't need
169 * a phi in the loop header to account for backwards flow.
171 * However, if this variable is then also used outside the loop, we do
172 * need a phi after all. But we must not use this phi's def inside the
173 * loop, so we can eliminate the phi if it is unused later.
176 bld_store(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
,
177 struct nv_value
*val
)
179 const uint16_t m
= 1 << bld
->loop_lvl
;
181 stk
= &stk
[i
* 4 + c
];
183 if (bld
->loop_lvl
&& !(m
& (stk
->loop_def
| stk
->loop_use
)))
184 bld_loop_phi(bld
, stk
, val
);
187 stk
->loop_def
|= 1 << bld
->loop_lvl
;
191 bld_clear_def_use(struct bld_value_stack
*stk
, int n
, int lvl
)
194 const uint16_t mask
= ~(1 << lvl
);
196 for (i
= 0; i
< n
* 4; ++i
) {
197 stk
[i
].loop_def
&= mask
;
198 stk
[i
].loop_use
&= mask
;
202 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
203 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
204 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
205 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
206 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
207 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
209 #define STORE_OUTR(i, c, v) \
211 bld->ovs[i][c].top = (v); \
212 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
216 bld_warn_uninitialized(struct bld_context
*bld
, int kind
,
217 struct bld_value_stack
*stk
, struct nv_basic_block
*b
)
219 long i
= (stk
- &bld
->tvs
[0][0]) / 4;
220 long c
= (stk
- &bld
->tvs
[0][0]) & 3;
225 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
226 i
, (int)('x' + c
), kind
? "may be" : "is", b
->id
);
229 static INLINE
struct nv_value
*
230 bld_def(struct nv_instruction
*i
, int c
, struct nv_value
*value
)
237 static INLINE
struct nv_value
*
238 find_by_bb(struct bld_value_stack
*stack
, struct nv_basic_block
*b
)
242 if (stack
->top
&& stack
->top
->insn
->bb
== b
)
245 for (i
= stack
->size
- 1; i
>= 0; --i
)
246 if (stack
->body
[i
]->insn
->bb
== b
)
247 return stack
->body
[i
];
251 /* fetch value from stack that was defined in the specified basic block,
252 * or search for first definitions in all of its predecessors
255 fetch_by_bb(struct bld_value_stack
*stack
,
256 struct nv_value
**vals
, int *n
,
257 struct nv_basic_block
*b
)
260 struct nv_value
*val
;
262 assert(*n
< 16); /* MAX_COND_NESTING */
264 val
= find_by_bb(stack
, b
);
266 for (i
= 0; i
< *n
; ++i
)
272 for (i
= 0; i
< b
->num_in
; ++i
)
273 if (!IS_WALL_EDGE(b
->in_kind
[i
]))
274 fetch_by_bb(stack
, vals
, n
, b
->in
[i
]);
277 static INLINE
struct nv_value
*
278 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
);
280 static INLINE
struct nv_value
*
281 bld_undef(struct bld_context
*bld
, ubyte file
)
283 struct nv_instruction
*nvi
= new_instruction(bld
->pc
, NV_OP_UNDEF
);
285 return bld_def(nvi
, 0, new_value(bld
->pc
, file
, NV_TYPE_U32
));
288 static struct nv_value
*
289 bld_phi(struct bld_context
*bld
, struct nv_basic_block
*b
,
290 struct bld_value_stack
*stack
)
292 struct nv_basic_block
*in
;
293 struct nv_value
*vals
[16], *val
;
294 struct nv_instruction
*phi
;
299 fetch_by_bb(stack
, vals
, &n
, b
);
302 bld_warn_uninitialized(bld
, 0, stack
, b
);
307 if (nvbb_dominated_by(b
, vals
[0]->insn
->bb
))
310 bld_warn_uninitialized(bld
, 1, stack
, b
);
312 /* back-tracking to insert missing value of other path */
315 if (in
->num_in
== 1) {
318 if (!nvbb_reachable_by(in
->in
[0], vals
[0]->insn
->bb
, b
))
321 if (!nvbb_reachable_by(in
->in
[1], vals
[0]->insn
->bb
, b
))
327 bld
->pc
->current_block
= in
;
329 /* should make this a no-op */
330 bld_vals_push_val(stack
, bld_undef(bld
, vals
[0]->reg
.file
));
334 for (i
= 0; i
< n
; ++i
) {
335 /* if value dominates b, continue to the redefinitions */
336 if (nvbb_dominated_by(b
, vals
[i
]->insn
->bb
))
339 /* if value dominates any in-block, b should be the dom frontier */
340 for (j
= 0; j
< b
->num_in
; ++j
)
341 if (nvbb_dominated_by(b
->in
[j
], vals
[i
]->insn
->bb
))
343 /* otherwise, find the dominance frontier and put the phi there */
344 if (j
== b
->num_in
) {
345 in
= nvbb_dom_frontier(vals
[i
]->insn
->bb
);
346 val
= bld_phi(bld
, in
, stack
);
347 bld_vals_push_val(stack
, val
);
353 bld
->pc
->current_block
= b
;
358 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
360 bld_def(phi
, 0, new_value(bld
->pc
, vals
[0]->reg
.file
, vals
[0]->reg
.type
));
361 for (i
= 0; i
< n
; ++i
)
362 phi
->src
[i
] = new_ref(bld
->pc
, vals
[i
]);
367 /* Insert a phi function in the loop header.
368 * For nested loops, we need to insert phi functions in all the outer
369 * loop headers if they don't have one yet.
371 * @def: redefinition from inside loop, or NULL if to be replaced later
373 static struct nv_value
*
374 bld_loop_phi(struct bld_context
*bld
, struct bld_value_stack
*stack
,
375 struct nv_value
*def
)
377 struct nv_instruction
*phi
;
378 struct nv_basic_block
*bb
= bld
->pc
->current_block
;
379 struct nv_value
*val
= NULL
;
381 if (bld
->loop_lvl
> 1) {
383 if (!((stack
->loop_def
| stack
->loop_use
) & (1 << bld
->loop_lvl
)))
384 val
= bld_loop_phi(bld
, stack
, NULL
);
389 val
= bld_phi(bld
, bld
->pc
->current_block
, stack
); /* old definition */
391 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1]->in
[0];
392 val
= bld_undef(bld
, bld_stack_file(bld
, stack
));
395 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1];
397 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
399 bld_def(phi
, 0, new_value_like(bld
->pc
, val
));
403 bld_vals_push_val(stack
, phi
->def
[0]);
405 phi
->target
= (struct nv_basic_block
*)stack
; /* cheat */
407 nv_reference(bld
->pc
, &phi
->src
[0], val
);
408 nv_reference(bld
->pc
, &phi
->src
[1], def
);
410 bld
->pc
->current_block
= bb
;
415 static INLINE
struct nv_value
*
416 bld_fetch_global(struct bld_context
*bld
, struct bld_value_stack
*stack
)
418 const uint16_t m
= 1 << bld
->loop_lvl
;
419 const uint16_t use
= stack
->loop_use
;
421 stack
->loop_use
|= m
;
423 /* If neither used nor def'd inside the loop, build a phi in foresight,
424 * so we don't have to replace stuff later on, which requires tracking.
426 if (bld
->loop_lvl
&& !((use
| stack
->loop_def
) & m
))
427 return bld_loop_phi(bld
, stack
, NULL
);
429 return bld_phi(bld
, bld
->pc
->current_block
, stack
);
432 static INLINE
struct nv_value
*
433 bld_imm_u32(struct bld_context
*bld
, uint32_t u
)
436 unsigned n
= bld
->num_immds
;
438 for (i
= 0; i
< n
; ++i
)
439 if (bld
->saved_immd
[i
]->reg
.imm
.u32
== u
)
440 return bld
->saved_immd
[i
];
441 assert(n
< BLD_MAX_IMMDS
);
445 bld
->saved_immd
[n
] = new_value(bld
->pc
, NV_FILE_IMM
, NV_TYPE_U32
);
446 bld
->saved_immd
[n
]->reg
.imm
.u32
= u
;
447 return bld
->saved_immd
[n
];
451 bld_replace_value(struct nv_pc
*, struct nv_basic_block
*, struct nv_value
*,
454 /* Replace the source of the phi in the loop header by the last assignment,
455 * or eliminate the phi function if there is no assignment inside the loop.
457 * Redundancy situation 1 - (used) but (not redefined) value:
458 * %3 = phi %0, %3 = %3 is used
459 * %3 = phi %0, %4 = is new definition
461 * Redundancy situation 2 - (not used) but (redefined) value:
462 * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
465 bld_loop_end(struct bld_context
*bld
, struct nv_basic_block
*bb
)
467 struct nv_basic_block
*save
= bld
->pc
->current_block
;
468 struct nv_instruction
*phi
, *next
;
469 struct nv_value
*val
;
470 struct bld_value_stack
*stk
;
473 for (phi
= bb
->phi
; phi
&& phi
->opcode
== NV_OP_PHI
; phi
= next
) {
476 stk
= (struct bld_value_stack
*)phi
->target
;
479 for (s
= 1, n
= 0; n
< bb
->num_in
; ++n
) {
480 if (bb
->in_kind
[n
] != CFG_EDGE_BACK
)
484 bld
->pc
->current_block
= bb
->in
[n
];
485 val
= bld_fetch_global(bld
, stk
);
487 for (i
= 0; i
< 4; ++i
)
488 if (phi
->src
[i
] && phi
->src
[i
]->value
== val
)
491 nv_reference(bld
->pc
, &phi
->src
[s
++], val
);
493 bld
->pc
->current_block
= save
;
495 if (phi
->src
[0]->value
== phi
->def
[0] ||
496 phi
->src
[0]->value
== phi
->src
[1]->value
)
499 if (phi
->src
[1]->value
== phi
->def
[0])
505 /* eliminate the phi */
506 bld_vals_del_val(stk
, phi
->def
[0]);
509 bld_replace_value(bld
->pc
, bb
, phi
->def
[0], phi
->src
[s
]->value
);
516 static INLINE
struct nv_value
*
517 bld_imm_f32(struct bld_context
*bld
, float f
)
519 return bld_imm_u32(bld
, fui(f
));
522 #define SET_TYPE(v, t) ((v)->reg.type = NV_TYPE_##t)
524 static struct nv_value
*
525 bld_insn_1(struct bld_context
*bld
, uint opcode
, struct nv_value
*src0
)
527 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
530 nv_reference(bld
->pc
, &insn
->src
[0], src0
); /* NOTE: new_ref would suffice */
532 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.type
));
535 static struct nv_value
*
536 bld_insn_2(struct bld_context
*bld
, uint opcode
,
537 struct nv_value
*src0
, struct nv_value
*src1
)
539 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
541 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
542 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
544 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.type
));
547 static struct nv_value
*
548 bld_insn_3(struct bld_context
*bld
, uint opcode
,
549 struct nv_value
*src0
, struct nv_value
*src1
,
550 struct nv_value
*src2
)
552 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
554 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
555 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
556 nv_reference(bld
->pc
, &insn
->src
[2], src2
);
558 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.type
));
561 #define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
563 (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
564 (d)->reg.type = NV_TYPE_##dt; \
565 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
568 #define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
570 (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
571 (d)->reg.type = NV_TYPE_##dt; \
572 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
573 (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
576 static struct nv_value
*
577 bld_pow(struct bld_context
*bld
, struct nv_value
*x
, struct nv_value
*e
)
579 struct nv_value
*val
;
581 BLD_INSN_1_EX(val
, LG2
, F32
, x
, F32
);
582 BLD_INSN_2_EX(val
, MUL
, F32
, e
, F32
, val
, F32
);
583 val
= bld_insn_1(bld
, NV_OP_PREEX2
, val
);
584 val
= bld_insn_1(bld
, NV_OP_EX2
, val
);
589 static INLINE
struct nv_value
*
590 bld_load_imm_f32(struct bld_context
*bld
, float f
)
592 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_f32(bld
, f
));
595 static INLINE
struct nv_value
*
596 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
)
598 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_u32(bld
, u
));
601 static struct nv_value
*
602 bld_get_address(struct bld_context
*bld
, int id
, struct nv_value
*indirect
)
605 struct nv_instruction
*nvi
;
607 for (i
= 0; i
< 4; ++i
) {
608 if (!bld
->saved_addr
[i
][0])
610 if (bld
->saved_addr
[i
][1] == indirect
) {
611 nvi
= bld
->saved_addr
[i
][0]->insn
;
612 if (nvi
->src
[0]->value
->reg
.imm
.u32
== id
)
613 return bld
->saved_addr
[i
][0];
618 bld
->saved_addr
[i
][0] = bld_load_imm_u32(bld
, id
);
619 bld
->saved_addr
[i
][0]->reg
.file
= NV_FILE_ADDR
;
620 bld
->saved_addr
[i
][1] = indirect
;
621 return bld
->saved_addr
[i
][0];
625 static struct nv_value
*
626 bld_predicate(struct bld_context
*bld
, struct nv_value
*src
, boolean bool_only
)
628 struct nv_instruction
*s0i
, *nvi
= src
->insn
;
631 nvi
= bld_insn_1(bld
,
632 (src
->reg
.file
== NV_FILE_IMM
) ? NV_OP_MOV
: NV_OP_LDA
,
637 while (nvi
->opcode
== NV_OP_ABS
|| nvi
->opcode
== NV_OP_NEG
||
638 nvi
->opcode
== NV_OP_CVT
) {
639 s0i
= nvi
->src
[0]->value
->insn
;
641 s0i
->opcode
== NV_OP_LDA
||
642 s0i
->opcode
== NV_OP_MOV
||
643 s0i
->opcode
== NV_OP_PHI
)
646 assert(!nvi
->flags_src
);
650 if (nvi
->opcode
== NV_OP_LDA
||
651 nvi
->opcode
== NV_OP_MOV
||
652 nvi
->opcode
== NV_OP_PHI
|| nvi
->bb
!= bld
->pc
->current_block
) {
653 nvi
= new_instruction(bld
->pc
, NV_OP_CVT
);
654 nv_reference(bld
->pc
, &nvi
->src
[0], src
);
657 if (!nvi
->flags_def
) {
658 nvi
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
659 nvi
->flags_def
->insn
= nvi
;
661 return nvi
->flags_def
;
665 bld_kil(struct bld_context
*bld
, struct nv_value
*src
)
667 struct nv_instruction
*nvi
;
669 src
= bld_predicate(bld
, src
, FALSE
);
670 nvi
= new_instruction(bld
->pc
, NV_OP_KIL
);
672 nvi
->flags_src
= new_ref(bld
->pc
, src
);
677 bld_flow(struct bld_context
*bld
, uint opcode
, ubyte cc
,
678 struct nv_value
*src
, struct nv_basic_block
*target
,
679 boolean plan_reconverge
)
681 struct nv_instruction
*nvi
;
684 new_instruction(bld
->pc
, NV_OP_JOINAT
)->fixed
= 1;
686 nvi
= new_instruction(bld
->pc
, opcode
);
687 nvi
->is_terminator
= 1;
689 nvi
->target
= target
;
691 nvi
->flags_src
= new_ref(bld
->pc
, src
);
695 translate_setcc(unsigned opcode
)
698 case TGSI_OPCODE_SLT
: return NV_CC_LT
;
699 case TGSI_OPCODE_SGE
: return NV_CC_GE
;
700 case TGSI_OPCODE_SEQ
: return NV_CC_EQ
;
701 case TGSI_OPCODE_SGT
: return NV_CC_GT
;
702 case TGSI_OPCODE_SLE
: return NV_CC_LE
;
703 case TGSI_OPCODE_SNE
: return NV_CC_NE
| NV_CC_U
;
704 case TGSI_OPCODE_STR
: return NV_CC_TR
;
705 case TGSI_OPCODE_SFL
: return NV_CC_FL
;
707 case TGSI_OPCODE_ISLT
: return NV_CC_LT
;
708 case TGSI_OPCODE_ISGE
: return NV_CC_GE
;
709 case TGSI_OPCODE_USEQ
: return NV_CC_EQ
;
710 case TGSI_OPCODE_USGE
: return NV_CC_GE
;
711 case TGSI_OPCODE_USLT
: return NV_CC_LT
;
712 case TGSI_OPCODE_USNE
: return NV_CC_NE
;
720 translate_opcode(uint opcode
)
723 case TGSI_OPCODE_ABS
: return NV_OP_ABS
;
724 case TGSI_OPCODE_ADD
:
725 case TGSI_OPCODE_SUB
:
726 case TGSI_OPCODE_UADD
: return NV_OP_ADD
;
727 case TGSI_OPCODE_AND
: return NV_OP_AND
;
728 case TGSI_OPCODE_EX2
: return NV_OP_EX2
;
729 case TGSI_OPCODE_CEIL
: return NV_OP_CEIL
;
730 case TGSI_OPCODE_FLR
: return NV_OP_FLOOR
;
731 case TGSI_OPCODE_TRUNC
: return NV_OP_TRUNC
;
732 case TGSI_OPCODE_COS
: return NV_OP_COS
;
733 case TGSI_OPCODE_SIN
: return NV_OP_SIN
;
734 case TGSI_OPCODE_DDX
: return NV_OP_DFDX
;
735 case TGSI_OPCODE_DDY
: return NV_OP_DFDY
;
736 case TGSI_OPCODE_F2I
:
737 case TGSI_OPCODE_F2U
:
738 case TGSI_OPCODE_I2F
:
739 case TGSI_OPCODE_U2F
: return NV_OP_CVT
;
740 case TGSI_OPCODE_INEG
: return NV_OP_NEG
;
741 case TGSI_OPCODE_LG2
: return NV_OP_LG2
;
742 case TGSI_OPCODE_ISHR
:
743 case TGSI_OPCODE_USHR
: return NV_OP_SHR
;
744 case TGSI_OPCODE_MAD
:
745 case TGSI_OPCODE_UMAD
: return NV_OP_MAD
;
746 case TGSI_OPCODE_MAX
:
747 case TGSI_OPCODE_IMAX
:
748 case TGSI_OPCODE_UMAX
: return NV_OP_MAX
;
749 case TGSI_OPCODE_MIN
:
750 case TGSI_OPCODE_IMIN
:
751 case TGSI_OPCODE_UMIN
: return NV_OP_MIN
;
752 case TGSI_OPCODE_MUL
:
753 case TGSI_OPCODE_UMUL
: return NV_OP_MUL
;
754 case TGSI_OPCODE_OR
: return NV_OP_OR
;
755 case TGSI_OPCODE_RCP
: return NV_OP_RCP
;
756 case TGSI_OPCODE_RSQ
: return NV_OP_RSQ
;
757 case TGSI_OPCODE_SAD
: return NV_OP_SAD
;
758 case TGSI_OPCODE_SHL
: return NV_OP_SHL
;
759 case TGSI_OPCODE_SLT
:
760 case TGSI_OPCODE_SGE
:
761 case TGSI_OPCODE_SEQ
:
762 case TGSI_OPCODE_SGT
:
763 case TGSI_OPCODE_SLE
:
764 case TGSI_OPCODE_SNE
:
765 case TGSI_OPCODE_ISLT
:
766 case TGSI_OPCODE_ISGE
:
767 case TGSI_OPCODE_USEQ
:
768 case TGSI_OPCODE_USGE
:
769 case TGSI_OPCODE_USLT
:
770 case TGSI_OPCODE_USNE
: return NV_OP_SET
;
771 case TGSI_OPCODE_TEX
: return NV_OP_TEX
;
772 case TGSI_OPCODE_TXP
: return NV_OP_TEX
;
773 case TGSI_OPCODE_TXB
: return NV_OP_TXB
;
774 case TGSI_OPCODE_TXL
: return NV_OP_TXL
;
775 case TGSI_OPCODE_XOR
: return NV_OP_XOR
;
782 infer_src_type(unsigned opcode
)
785 case TGSI_OPCODE_MOV
:
786 case TGSI_OPCODE_AND
:
788 case TGSI_OPCODE_XOR
:
789 case TGSI_OPCODE_SAD
:
790 case TGSI_OPCODE_U2F
:
791 case TGSI_OPCODE_UADD
:
792 case TGSI_OPCODE_UDIV
:
793 case TGSI_OPCODE_UMOD
:
794 case TGSI_OPCODE_UMAD
:
795 case TGSI_OPCODE_UMUL
:
796 case TGSI_OPCODE_UMAX
:
797 case TGSI_OPCODE_UMIN
:
798 case TGSI_OPCODE_USEQ
:
799 case TGSI_OPCODE_USGE
:
800 case TGSI_OPCODE_USLT
:
801 case TGSI_OPCODE_USNE
:
802 case TGSI_OPCODE_USHR
:
804 case TGSI_OPCODE_I2F
:
805 case TGSI_OPCODE_IDIV
:
806 case TGSI_OPCODE_IMAX
:
807 case TGSI_OPCODE_IMIN
:
808 case TGSI_OPCODE_INEG
:
809 case TGSI_OPCODE_ISGE
:
810 case TGSI_OPCODE_ISHR
:
811 case TGSI_OPCODE_ISLT
:
819 infer_dst_type(unsigned opcode
)
822 case TGSI_OPCODE_MOV
:
823 case TGSI_OPCODE_F2U
:
824 case TGSI_OPCODE_AND
:
826 case TGSI_OPCODE_XOR
:
827 case TGSI_OPCODE_SAD
:
828 case TGSI_OPCODE_UADD
:
829 case TGSI_OPCODE_UDIV
:
830 case TGSI_OPCODE_UMOD
:
831 case TGSI_OPCODE_UMAD
:
832 case TGSI_OPCODE_UMUL
:
833 case TGSI_OPCODE_UMAX
:
834 case TGSI_OPCODE_UMIN
:
835 case TGSI_OPCODE_USEQ
:
836 case TGSI_OPCODE_USGE
:
837 case TGSI_OPCODE_USLT
:
838 case TGSI_OPCODE_USNE
:
839 case TGSI_OPCODE_USHR
:
841 case TGSI_OPCODE_F2I
:
842 case TGSI_OPCODE_IDIV
:
843 case TGSI_OPCODE_IMAX
:
844 case TGSI_OPCODE_IMIN
:
845 case TGSI_OPCODE_INEG
:
846 case TGSI_OPCODE_ISGE
:
847 case TGSI_OPCODE_ISHR
:
848 case TGSI_OPCODE_ISLT
:
856 emit_store(struct bld_context
*bld
, const struct tgsi_full_instruction
*inst
,
857 unsigned chan
, struct nv_value
*value
)
859 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[0];
863 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
864 value
->reg
.type
= infer_dst_type(inst
->Instruction
.Opcode
);
866 switch (inst
->Instruction
.Saturate
) {
869 case TGSI_SAT_ZERO_ONE
:
870 BLD_INSN_1_EX(value
, SAT
, F32
, value
, F32
);
872 case TGSI_SAT_MINUS_PLUS_ONE
:
873 value
= bld_insn_2(bld
, NV_OP_MAX
, value
, bld_load_imm_f32(bld
, -1.0f
));
874 value
= bld_insn_2(bld
, NV_OP_MIN
, value
, bld_load_imm_f32(bld
, 1.0f
));
875 value
->reg
.type
= NV_TYPE_F32
;
879 switch (reg
->Register
.File
) {
880 case TGSI_FILE_OUTPUT
:
881 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
882 value
->reg
.file
= bld
->ti
->output_file
;
884 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
885 STORE_OUTR(reg
->Register
.Index
, chan
, value
);
887 value
->insn
->fixed
= 1;
888 value
->reg
.id
= bld
->ti
->output_map
[reg
->Register
.Index
][chan
];
891 case TGSI_FILE_TEMPORARY
:
892 assert(reg
->Register
.Index
< BLD_MAX_TEMPS
);
893 value
->reg
.file
= NV_FILE_GPR
;
894 if (value
->insn
->bb
!= bld
->pc
->current_block
)
895 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
896 STORE_TEMP(reg
->Register
.Index
, chan
, value
);
898 case TGSI_FILE_ADDRESS
:
899 assert(reg
->Register
.Index
< BLD_MAX_ADDRS
);
900 value
->reg
.file
= NV_FILE_ADDR
;
901 STORE_ADDR(reg
->Register
.Index
, chan
, value
);
906 static INLINE
uint32_t
907 bld_is_output_written(struct bld_context
*bld
, int i
, int c
)
910 return bld
->outputs_written
[i
/ 8] & (0xf << ((i
* 4) % 32));
911 return bld
->outputs_written
[i
/ 8] & (1 << ((i
* 4 + c
) % 32));
915 bld_export_outputs(struct bld_context
*bld
)
917 struct nv_value
*vals
[4];
918 struct nv_instruction
*nvi
;
921 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
923 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
) {
924 if (!bld_is_output_written(bld
, i
, -1))
926 for (n
= 0, c
= 0; c
< 4; ++c
) {
927 if (!bld_is_output_written(bld
, i
, c
))
929 vals
[n
] = bld_fetch_global(bld
, &bld
->ovs
[i
][c
]);
931 vals
[n
] = bld_insn_1(bld
, NV_OP_MOV
, vals
[n
]);
932 vals
[n
++]->reg
.id
= bld
->ti
->output_map
[i
][c
];
936 (nvi
= new_instruction(bld
->pc
, NV_OP_EXPORT
))->fixed
= 1;
938 for (c
= 0; c
< n
; ++c
)
939 nvi
->src
[c
] = new_ref(bld
->pc
, vals
[c
]);
944 bld_new_block(struct bld_context
*bld
, struct nv_basic_block
*b
)
948 bld_push_values(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
949 bld_push_values(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
950 bld_push_values(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
951 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
953 bld
->pc
->current_block
= b
;
955 for (i
= 0; i
< 4; ++i
)
956 bld
->saved_addr
[i
][0] = NULL
;
958 for (i
= 0; i
< 128; ++i
)
959 bld
->saved_inputs
[i
] = NULL
;
961 bld
->out_kind
= CFG_EDGE_FORWARD
;
964 static struct nv_value
*
965 bld_saved_input(struct bld_context
*bld
, unsigned i
, unsigned c
)
967 unsigned idx
= bld
->ti
->input_map
[i
][c
];
969 if (bld
->ti
->p
->type
!= PIPE_SHADER_FRAGMENT
)
971 if (bld
->saved_inputs
[idx
])
972 return bld
->saved_inputs
[idx
];
976 static struct nv_value
*
977 bld_interpolate(struct bld_context
*bld
, unsigned mode
, struct nv_value
*val
)
979 if (val
->reg
.id
== 255) {
980 /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
981 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
982 val
= bld_insn_2(bld
, NV_OP_SHL
, val
, bld_imm_u32(bld
, 31));
983 val
->insn
->src
[0]->typecast
= NV_TYPE_U32
;
984 val
= bld_insn_2(bld
, NV_OP_XOR
, val
, bld_imm_f32(bld
, -1.0f
));
985 val
->insn
->src
[0]->typecast
= NV_TYPE_U32
;
987 if (mode
& (NV50_INTERP_LINEAR
| NV50_INTERP_FLAT
))
988 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
990 val
= bld_insn_2(bld
, NV_OP_PINTERP
, val
, bld
->frgcrd
[3]);
992 val
->insn
->flat
= (mode
& NV50_INTERP_FLAT
) ? 1 : 0;
993 val
->insn
->centroid
= (mode
& NV50_INTERP_CENTROID
) ? 1 : 0;
997 static struct nv_value
*
998 emit_fetch(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
999 const unsigned s
, const unsigned chan
)
1001 const struct tgsi_full_src_register
*src
= &insn
->Src
[s
];
1002 struct nv_value
*res
;
1003 unsigned idx
, swz
, dim_idx
, ind_idx
, ind_swz
;
1004 ubyte type
= infer_src_type(insn
->Instruction
.Opcode
);
1006 idx
= src
->Register
.Index
;
1007 swz
= tgsi_util_get_full_src_register_swizzle(src
, chan
);
1012 if (src
->Register
.Indirect
) {
1013 ind_idx
= src
->Indirect
.Index
;
1014 ind_swz
= tgsi_util_get_src_register_swizzle(&src
->Indirect
, 0);
1017 switch (src
->Register
.File
) {
1018 case TGSI_FILE_CONSTANT
:
1019 dim_idx
= src
->Dimension
.Index
? src
->Dimension
.Index
+ 2 : 1;
1020 assert(dim_idx
< 14);
1021 assert(dim_idx
== 1); /* for now */
1023 res
= new_value(bld
->pc
, NV_FILE_MEM_C(dim_idx
), type
);
1024 res
->reg
.type
= type
;
1025 res
->reg
.id
= (idx
* 4 + swz
) & 127;
1026 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1028 if (src
->Register
.Indirect
)
1029 res
->insn
->src
[4] = new_ref(bld
->pc
, FETCH_ADDR(ind_idx
, ind_swz
));
1030 if (idx
>= (128 / 4))
1032 new_ref(bld
->pc
, bld_get_address(bld
, (idx
* 16) & ~0x1ff, NULL
));
1034 case TGSI_FILE_IMMEDIATE
:
1035 assert(idx
< bld
->ti
->immd32_nr
);
1036 res
= bld_load_imm_u32(bld
, bld
->ti
->immd32
[idx
* 4 + swz
]);
1038 switch (bld
->ti
->immd32_ty
[idx
]) {
1039 case TGSI_IMM_FLOAT32
: res
->reg
.type
= NV_TYPE_F32
; break;
1040 case TGSI_IMM_UINT32
: res
->reg
.type
= NV_TYPE_U32
; break;
1041 case TGSI_IMM_INT32
: res
->reg
.type
= NV_TYPE_S32
; break;
1043 res
->reg
.type
= type
;
1047 case TGSI_FILE_INPUT
:
1048 res
= bld_saved_input(bld
, idx
, swz
);
1049 if (res
&& (insn
->Instruction
.Opcode
!= TGSI_OPCODE_TXP
))
1052 res
= new_value(bld
->pc
, bld
->ti
->input_file
, type
);
1053 res
->reg
.id
= bld
->ti
->input_map
[idx
][swz
];
1055 if (res
->reg
.file
== NV_FILE_MEM_V
) {
1056 res
= bld_interpolate(bld
, bld
->ti
->interp_mode
[idx
], res
);
1058 assert(src
->Dimension
.Dimension
== 0);
1059 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1060 assert(res
->reg
.type
== type
);
1062 bld
->saved_inputs
[bld
->ti
->input_map
[idx
][swz
]] = res
;
1064 case TGSI_FILE_TEMPORARY
:
1065 /* this should be load from l[], with reload elimination later on */
1066 res
= bld_fetch_global(bld
, &bld
->tvs
[idx
][swz
]);
1068 case TGSI_FILE_ADDRESS
:
1069 res
= bld_fetch_global(bld
, &bld
->avs
[idx
][swz
]);
1071 case TGSI_FILE_PREDICATE
:
1072 res
= bld_fetch_global(bld
, &bld
->pvs
[idx
][swz
]);
1075 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src
->Register
.File
);
1080 return bld_undef(bld
, NV_FILE_GPR
);
1082 switch (tgsi_util_get_full_src_register_sign_mode(src
, chan
)) {
1083 case TGSI_UTIL_SIGN_KEEP
:
1085 case TGSI_UTIL_SIGN_CLEAR
:
1086 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1088 case TGSI_UTIL_SIGN_TOGGLE
:
1089 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1091 case TGSI_UTIL_SIGN_SET
:
1092 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1093 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1096 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
1105 bld_lit(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1106 const struct tgsi_full_instruction
*insn
)
1108 struct nv_value
*val0
, *zero
;
1109 unsigned mask
= insn
->Dst
[0].Register
.WriteMask
;
1111 if (mask
& ((1 << 0) | (1 << 3)))
1112 dst0
[3] = dst0
[0] = bld_load_imm_f32(bld
, 1.0f
);
1114 if (mask
& (3 << 1)) {
1115 zero
= bld_load_imm_f32(bld
, 0.0f
);
1116 val0
= bld_insn_2(bld
, NV_OP_MAX
, emit_fetch(bld
, insn
, 0, 0), zero
);
1118 if (mask
& (1 << 1))
1122 if (mask
& (1 << 2)) {
1123 struct nv_value
*val1
, *val3
, *src1
, *src3
;
1124 struct nv_value
*pos128
= bld_load_imm_f32(bld
, 127.999999f
);
1125 struct nv_value
*neg128
= bld_load_imm_f32(bld
, -127.999999f
);
1127 src1
= emit_fetch(bld
, insn
, 0, 1);
1128 src3
= emit_fetch(bld
, insn
, 0, 3);
1130 val0
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1131 val0
->insn
->flags_def
->insn
= val0
->insn
;
1133 val1
= bld_insn_2(bld
, NV_OP_MAX
, src1
, zero
);
1134 val3
= bld_insn_2(bld
, NV_OP_MAX
, src3
, neg128
);
1135 val3
= bld_insn_2(bld
, NV_OP_MIN
, val3
, pos128
);
1136 val3
= bld_pow(bld
, val1
, val3
);
1138 dst0
[2] = bld_insn_1(bld
, NV_OP_MOV
, zero
);
1139 dst0
[2]->insn
->cc
= NV_CC_LE
;
1140 dst0
[2]->insn
->flags_src
= new_ref(bld
->pc
, val0
->insn
->flags_def
);
1142 dst0
[2] = bld_insn_2(bld
, NV_OP_SELECT
, val3
, dst0
[2]);
1147 get_tex_dim(const struct tgsi_full_instruction
*insn
, int *dim
, int *arg
)
1149 switch (insn
->Texture
.Texture
) {
1150 case TGSI_TEXTURE_1D
:
1153 case TGSI_TEXTURE_SHADOW1D
:
1157 case TGSI_TEXTURE_UNKNOWN
:
1158 case TGSI_TEXTURE_2D
:
1159 case TGSI_TEXTURE_RECT
:
1162 case TGSI_TEXTURE_SHADOW2D
:
1163 case TGSI_TEXTURE_SHADOWRECT
:
1167 case TGSI_TEXTURE_3D
:
1168 case TGSI_TEXTURE_CUBE
:
1178 load_proj_tex_coords(struct bld_context
*bld
,
1179 struct nv_value
*t
[4], int dim
,
1180 const struct tgsi_full_instruction
*insn
)
1184 t
[3] = emit_fetch(bld
, insn
, 0, 3);
1186 if (t
[3]->insn
->opcode
== NV_OP_PINTERP
) {
1187 t
[3]->insn
->opcode
= NV_OP_LINTERP
;
1188 nv_reference(bld
->pc
, &t
[3]->insn
->src
[1], NULL
);
1191 t
[3] = bld_insn_1(bld
, NV_OP_RCP
, t
[3]);
1193 for (c
= 0; c
< dim
; ++c
) {
1194 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1195 if (t
[c
]->insn
->opcode
== NV_OP_LINTERP
)
1196 t
[c
]->insn
->opcode
= NV_OP_PINTERP
;
1198 if (t
[c
]->insn
->opcode
== NV_OP_PINTERP
)
1199 nv_reference(bld
->pc
, &t
[c
]->insn
->src
[1], t
[3]);
1204 for (c
= 0; mask
; ++c
, mask
>>= 1) {
1207 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], t
[3]);
1211 /* For a quad of threads / top left, top right, bottom left, bottom right
1212 * pixels, do a different operation, and take src0 from a specific thread.
1219 #define QOP(a, b, c, d) \
1220 ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
1222 static INLINE
struct nv_value
*
1223 bld_quadop(struct bld_context
*bld
, ubyte qop
, struct nv_value
*src0
, int lane
,
1224 struct nv_value
*src1
, boolean wp
)
1226 struct nv_value
*val
= bld_insn_2(bld
, NV_OP_QUADOP
, src0
, src1
);
1227 val
->insn
->lanes
= lane
;
1228 val
->insn
->quadop
= qop
;
1230 val
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1231 val
->insn
->flags_def
->insn
= val
->insn
;
1236 static INLINE
struct nv_value
*
1237 bld_cmov(struct bld_context
*bld
,
1238 struct nv_value
*src
, ubyte cc
, struct nv_value
*cr
)
1240 src
= bld_insn_1(bld
, NV_OP_MOV
, src
);
1243 src
->insn
->flags_src
= new_ref(bld
->pc
, cr
);
1248 static struct nv_instruction
*
1249 emit_tex(struct bld_context
*bld
, uint opcode
,
1250 struct nv_value
*dst
[4], struct nv_value
*t_in
[4],
1251 int argc
, int tic
, int tsc
, int cube
)
1253 struct nv_value
*t
[4];
1254 struct nv_instruction
*nvi
;
1257 /* the inputs to a tex instruction must be separate values */
1258 for (c
= 0; c
< argc
; ++c
) {
1259 t
[c
] = bld_insn_1(bld
, NV_OP_MOV
, t_in
[c
]);
1260 t
[c
]->reg
.type
= NV_TYPE_F32
;
1261 t
[c
]->insn
->fixed
= 1;
1264 nvi
= new_instruction(bld
->pc
, opcode
);
1266 for (c
= 0; c
< 4; ++c
)
1267 dst
[c
] = bld_def(nvi
, c
, new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
));
1269 for (c
= 0; c
< argc
; ++c
)
1270 nvi
->src
[c
] = new_ref(bld
->pc
, t
[c
]);
1274 nvi
->tex_mask
= 0xf;
1275 nvi
->tex_cube
= cube
;
1277 nvi
->tex_argc
= argc
;
1283 bld_texlod_sequence(struct bld_context
*bld
,
1284 struct nv_value
*dst
[4], struct nv_value
*t
[4], int arg
,
1285 int tic
, int tsc
, int cube
)
1287 emit_tex(bld
, NV_OP_TXL
, dst
, t
, arg
, tic
, tsc
, cube
); /* TODO */
1291 /* The lanes of a quad are grouped by the bit in the condition register
1292 * they have set, which is selected by differing bias values.
1293 * Move the input values for TEX into a new register set for each group
1294 * and execute TEX only for a specific group.
1295 * We always need to use 4 new registers for the inputs/outputs because
1296 * the implicitly calculated derivatives must be correct.
1299 bld_texbias_sequence(struct bld_context
*bld
,
1300 struct nv_value
*dst
[4], struct nv_value
*t
[4], int arg
,
1301 int tic
, int tsc
, int cube
)
1303 struct nv_instruction
*sel
, *tex
;
1304 struct nv_value
*bit
[4], *cr
[4], *res
[4][4], *val
;
1307 const ubyte cc
[4] = { NV_CC_EQ
, NV_CC_S
, NV_CC_C
, NV_CC_O
};
1309 for (l
= 0; l
< 4; ++l
) {
1310 bit
[l
] = bld_load_imm_u32(bld
, 1 << l
);
1312 val
= bld_quadop(bld
, QOP(SUBR
, SUBR
, SUBR
, SUBR
),
1313 t
[arg
- 1], l
, t
[arg
- 1], TRUE
);
1315 cr
[l
] = bld_cmov(bld
, bit
[l
], NV_CC_EQ
, val
->insn
->flags_def
);
1317 cr
[l
]->reg
.file
= NV_FILE_FLAGS
;
1318 cr
[l
]->reg
.type
= NV_TYPE_U16
;
1321 sel
= new_instruction(bld
->pc
, NV_OP_SELECT
);
1323 for (l
= 0; l
< 4; ++l
)
1324 sel
->src
[l
] = new_ref(bld
->pc
, cr
[l
]);
1326 bld_def(sel
, 0, new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
));
1328 for (l
= 0; l
< 4; ++l
) {
1329 tex
= emit_tex(bld
, NV_OP_TXB
, dst
, t
, arg
, tic
, tsc
, cube
);
1332 tex
->flags_src
= new_ref(bld
->pc
, sel
->def
[0]);
1334 for (c
= 0; c
< 4; ++c
)
1335 res
[l
][c
] = tex
->def
[c
];
1338 for (l
= 0; l
< 4; ++l
)
1339 for (c
= 0; c
< 4; ++c
)
1340 res
[l
][c
] = bld_cmov(bld
, res
[l
][c
], cc
[l
], sel
->def
[0]);
1342 for (c
= 0; c
< 4; ++c
) {
1343 sel
= new_instruction(bld
->pc
, NV_OP_SELECT
);
1345 for (l
= 0; l
< 4; ++l
)
1346 sel
->src
[l
] = new_ref(bld
->pc
, res
[l
][c
]);
1348 bld_def(sel
, 0, (dst
[c
] = new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
)));
1353 bld_is_constant(struct nv_value
*val
)
1355 if (val
->reg
.file
== NV_FILE_IMM
)
1357 return val
->insn
&& nvcg_find_constant(val
->insn
->src
[0]);
1361 bld_tex(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1362 const struct tgsi_full_instruction
*insn
)
1364 struct nv_value
*t
[4], *s
[3];
1365 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1367 const int tic
= insn
->Src
[1].Register
.Index
;
1369 const int cube
= (insn
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) ? 1 : 0;
1371 get_tex_dim(insn
, &dim
, &arg
);
1373 if (!cube
&& insn
->Instruction
.Opcode
== TGSI_OPCODE_TXP
)
1374 load_proj_tex_coords(bld
, t
, dim
, insn
);
1376 for (c
= 0; c
< dim
; ++c
)
1377 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1381 for (c
= 0; c
< 3; ++c
)
1382 s
[c
] = bld_insn_1(bld
, NV_OP_ABS
, t
[c
]);
1384 s
[0] = bld_insn_2(bld
, NV_OP_MAX
, s
[0], s
[1]);
1385 s
[0] = bld_insn_2(bld
, NV_OP_MAX
, s
[0], s
[2]);
1386 s
[0] = bld_insn_1(bld
, NV_OP_RCP
, s
[0]);
1388 for (c
= 0; c
< 3; ++c
)
1389 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], s
[0]);
1393 t
[dim
] = emit_fetch(bld
, insn
, 0, 2);
1395 if (opcode
== NV_OP_TXB
|| opcode
== NV_OP_TXL
) {
1396 t
[arg
++] = emit_fetch(bld
, insn
, 0, 3);
1398 if ((bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) &&
1399 !bld_is_constant(t
[arg
- 1])) {
1400 if (opcode
== NV_OP_TXB
)
1401 bld_texbias_sequence(bld
, dst0
, t
, arg
, tic
, tsc
, cube
);
1403 bld_texlod_sequence(bld
, dst0
, t
, arg
, tic
, tsc
, cube
);
1408 emit_tex(bld
, opcode
, dst0
, t
, arg
, tic
, tsc
, cube
);
1411 static INLINE
struct nv_value
*
1412 bld_dot(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1415 struct nv_value
*dotp
, *src0
, *src1
;
1418 src0
= emit_fetch(bld
, insn
, 0, 0);
1419 src1
= emit_fetch(bld
, insn
, 1, 0);
1420 dotp
= bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1422 for (c
= 1; c
< n
; ++c
) {
1423 src0
= emit_fetch(bld
, insn
, 0, c
);
1424 src1
= emit_fetch(bld
, insn
, 1, c
);
1425 dotp
= bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dotp
);
1430 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1431 for (chan = 0; chan < 4; ++chan) \
1432 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1435 bld_instruction(struct bld_context
*bld
,
1436 const struct tgsi_full_instruction
*insn
)
1438 struct nv_value
*src0
;
1439 struct nv_value
*src1
;
1440 struct nv_value
*src2
;
1441 struct nv_value
*dst0
[4];
1442 struct nv_value
*temp
;
1444 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1446 #ifdef NV50_TGSI2NC_DEBUG
1447 debug_printf("bld_instruction:"); tgsi_dump_instruction(insn
, 1);
1450 switch (insn
->Instruction
.Opcode
) {
1451 case TGSI_OPCODE_ADD
:
1452 case TGSI_OPCODE_MAX
:
1453 case TGSI_OPCODE_MIN
:
1454 case TGSI_OPCODE_MUL
:
1455 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1456 src0
= emit_fetch(bld
, insn
, 0, c
);
1457 src1
= emit_fetch(bld
, insn
, 1, c
);
1458 dst0
[c
] = bld_insn_2(bld
, opcode
, src0
, src1
);
1461 case TGSI_OPCODE_ARL
:
1462 src1
= bld_imm_u32(bld
, 4);
1463 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1464 src0
= emit_fetch(bld
, insn
, 0, c
);
1465 (temp
= bld_insn_1(bld
, NV_OP_FLOOR
, src0
))->reg
.type
= NV_TYPE_S32
;
1466 dst0
[c
] = bld_insn_2(bld
, NV_OP_SHL
, temp
, src1
);
1469 case TGSI_OPCODE_CMP
:
1470 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1471 src0
= emit_fetch(bld
, insn
, 0, c
);
1472 src1
= emit_fetch(bld
, insn
, 1, c
);
1473 src2
= emit_fetch(bld
, insn
, 2, c
);
1474 src0
= bld_predicate(bld
, src0
, FALSE
);
1476 src1
= bld_insn_1(bld
, NV_OP_MOV
, src1
);
1477 src1
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1478 src1
->insn
->cc
= NV_CC_LT
;
1480 src2
= bld_insn_1(bld
, NV_OP_MOV
, src2
);
1481 src2
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1482 src2
->insn
->cc
= NV_CC_GE
;
1484 dst0
[c
] = bld_insn_2(bld
, NV_OP_SELECT
, src1
, src2
);
1487 case TGSI_OPCODE_COS
:
1488 case TGSI_OPCODE_SIN
:
1489 src0
= emit_fetch(bld
, insn
, 0, 0);
1490 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1491 if (insn
->Dst
[0].Register
.WriteMask
& 7)
1492 temp
= bld_insn_1(bld
, opcode
, temp
);
1493 for (c
= 0; c
< 3; ++c
)
1494 if (insn
->Dst
[0].Register
.WriteMask
& (1 << c
))
1496 if (!(insn
->Dst
[0].Register
.WriteMask
& (1 << 3)))
1498 src0
= emit_fetch(bld
, insn
, 0, 3);
1499 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1500 dst0
[3] = bld_insn_1(bld
, opcode
, temp
);
1502 case TGSI_OPCODE_DP2
:
1503 temp
= bld_dot(bld
, insn
, 2);
1504 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1507 case TGSI_OPCODE_DP3
:
1508 temp
= bld_dot(bld
, insn
, 3);
1509 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1512 case TGSI_OPCODE_DP4
:
1513 temp
= bld_dot(bld
, insn
, 4);
1514 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1517 case TGSI_OPCODE_DPH
:
1518 src0
= bld_dot(bld
, insn
, 3);
1519 src1
= emit_fetch(bld
, insn
, 1, 3);
1520 temp
= bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1521 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1524 case TGSI_OPCODE_DST
:
1525 if (insn
->Dst
[0].Register
.WriteMask
& 1)
1526 dst0
[0] = bld_imm_f32(bld
, 1.0f
);
1527 if (insn
->Dst
[0].Register
.WriteMask
& 2) {
1528 src0
= emit_fetch(bld
, insn
, 0, 1);
1529 src1
= emit_fetch(bld
, insn
, 1, 1);
1530 dst0
[1] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1532 if (insn
->Dst
[0].Register
.WriteMask
& 4)
1533 dst0
[2] = emit_fetch(bld
, insn
, 0, 2);
1534 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1535 dst0
[3] = emit_fetch(bld
, insn
, 1, 3);
1537 case TGSI_OPCODE_EX2
:
1538 src0
= emit_fetch(bld
, insn
, 0, 0);
1539 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1540 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1541 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1544 case TGSI_OPCODE_FRC
:
1545 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1546 src0
= emit_fetch(bld
, insn
, 0, c
);
1547 dst0
[c
] = bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1548 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src0
, dst0
[c
]);
1551 case TGSI_OPCODE_KIL
:
1552 for (c
= 0; c
< 4; ++c
) {
1553 src0
= emit_fetch(bld
, insn
, 0, c
);
1557 case TGSI_OPCODE_KILP
:
1558 (new_instruction(bld
->pc
, NV_OP_KIL
))->fixed
= 1;
1560 case TGSI_OPCODE_IF
:
1562 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1564 nvbb_attach_block(bld
->pc
->current_block
, b
, CFG_EDGE_FORWARD
);
1566 bld
->join_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1567 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1569 src1
= bld_predicate(bld
, emit_fetch(bld
, insn
, 0, 0), TRUE
);
1571 bld_flow(bld
, NV_OP_BRA
, NV_CC_EQ
, src1
, NULL
, (bld
->cond_lvl
== 0));
1574 bld_new_block(bld
, b
);
1577 case TGSI_OPCODE_ELSE
:
1579 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1582 nvbb_attach_block(bld
->join_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1584 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1585 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1587 new_instruction(bld
->pc
, NV_OP_BRA
)->is_terminator
= 1;
1590 bld_new_block(bld
, b
);
1593 case TGSI_OPCODE_ENDIF
:
1595 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1598 nvbb_attach_block(bld
->pc
->current_block
, b
, bld
->out_kind
);
1599 nvbb_attach_block(bld
->cond_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1601 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1603 bld_new_block(bld
, b
);
1605 if (!bld
->cond_lvl
&& bld
->join_bb
[bld
->cond_lvl
]) {
1606 bld
->join_bb
[bld
->cond_lvl
]->exit
->prev
->target
= b
;
1607 new_instruction(bld
->pc
, NV_OP_JOIN
)->is_join
= TRUE
;
1611 case TGSI_OPCODE_BGNLOOP
:
1613 struct nv_basic_block
*bl
= new_basic_block(bld
->pc
);
1614 struct nv_basic_block
*bb
= new_basic_block(bld
->pc
);
1616 bld
->loop_bb
[bld
->loop_lvl
] = bl
;
1617 bld
->brkt_bb
[bld
->loop_lvl
] = bb
;
1619 bld_flow(bld
, NV_OP_BREAKADDR
, NV_CC_TR
, NULL
, bb
, FALSE
);
1621 nvbb_attach_block(bld
->pc
->current_block
, bl
, CFG_EDGE_LOOP_ENTER
);
1623 bld_new_block(bld
, bld
->loop_bb
[bld
->loop_lvl
++]);
1625 if (bld
->loop_lvl
== bld
->pc
->loop_nesting_bound
)
1626 bld
->pc
->loop_nesting_bound
++;
1628 bld_clear_def_use(&bld
->tvs
[0][0], BLD_MAX_TEMPS
, bld
->loop_lvl
);
1629 bld_clear_def_use(&bld
->avs
[0][0], BLD_MAX_ADDRS
, bld
->loop_lvl
);
1630 bld_clear_def_use(&bld
->pvs
[0][0], BLD_MAX_PREDS
, bld
->loop_lvl
);
1633 case TGSI_OPCODE_BRK
:
1635 struct nv_basic_block
*bb
= bld
->brkt_bb
[bld
->loop_lvl
- 1];
1637 bld_flow(bld
, NV_OP_BREAK
, NV_CC_TR
, NULL
, bb
, FALSE
);
1639 if (bld
->out_kind
== CFG_EDGE_FORWARD
) /* else we already had BRK/CONT */
1640 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_LOOP_LEAVE
);
1642 bld
->out_kind
= CFG_EDGE_FAKE
;
1645 case TGSI_OPCODE_CONT
:
1647 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1649 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1651 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1653 if ((bb
= bld
->join_bb
[bld
->cond_lvl
- 1])) {
1654 bld
->join_bb
[bld
->cond_lvl
- 1] = NULL
;
1655 nv_nvi_delete(bb
->exit
->prev
);
1657 bld
->out_kind
= CFG_EDGE_FAKE
;
1660 case TGSI_OPCODE_ENDLOOP
:
1662 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1664 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1666 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1668 bld_loop_end(bld
, bb
); /* replace loop-side operand of the phis */
1670 bld_new_block(bld
, bld
->brkt_bb
[--bld
->loop_lvl
]);
1673 case TGSI_OPCODE_ABS
:
1674 case TGSI_OPCODE_CEIL
:
1675 case TGSI_OPCODE_FLR
:
1676 case TGSI_OPCODE_TRUNC
:
1677 case TGSI_OPCODE_DDX
:
1678 case TGSI_OPCODE_DDY
:
1679 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1680 src0
= emit_fetch(bld
, insn
, 0, c
);
1681 dst0
[c
] = bld_insn_1(bld
, opcode
, src0
);
1684 case TGSI_OPCODE_LIT
:
1685 bld_lit(bld
, dst0
, insn
);
1687 case TGSI_OPCODE_LRP
:
1688 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1689 src0
= emit_fetch(bld
, insn
, 0, c
);
1690 src1
= emit_fetch(bld
, insn
, 1, c
);
1691 src2
= emit_fetch(bld
, insn
, 2, c
);
1692 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src1
, src2
);
1693 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, dst0
[c
], src0
, src2
);
1696 case TGSI_OPCODE_MOV
:
1697 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1698 dst0
[c
] = emit_fetch(bld
, insn
, 0, c
);
1700 case TGSI_OPCODE_MAD
:
1701 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1702 src0
= emit_fetch(bld
, insn
, 0, c
);
1703 src1
= emit_fetch(bld
, insn
, 1, c
);
1704 src2
= emit_fetch(bld
, insn
, 2, c
);
1705 dst0
[c
] = bld_insn_3(bld
, opcode
, src0
, src1
, src2
);
1708 case TGSI_OPCODE_POW
:
1709 src0
= emit_fetch(bld
, insn
, 0, 0);
1710 src1
= emit_fetch(bld
, insn
, 1, 0);
1711 temp
= bld_pow(bld
, src0
, src1
);
1712 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1715 case TGSI_OPCODE_RCP
:
1716 case TGSI_OPCODE_LG2
:
1717 src0
= emit_fetch(bld
, insn
, 0, 0);
1718 temp
= bld_insn_1(bld
, opcode
, src0
);
1719 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1722 case TGSI_OPCODE_RSQ
:
1723 src0
= emit_fetch(bld
, insn
, 0, 0);
1724 temp
= bld_insn_1(bld
, NV_OP_ABS
, src0
);
1725 temp
= bld_insn_1(bld
, NV_OP_RSQ
, temp
);
1726 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1729 case TGSI_OPCODE_SLT
:
1730 case TGSI_OPCODE_SGE
:
1731 case TGSI_OPCODE_SEQ
:
1732 case TGSI_OPCODE_SGT
:
1733 case TGSI_OPCODE_SLE
:
1734 case TGSI_OPCODE_SNE
:
1735 case TGSI_OPCODE_ISLT
:
1736 case TGSI_OPCODE_ISGE
:
1737 case TGSI_OPCODE_USEQ
:
1738 case TGSI_OPCODE_USGE
:
1739 case TGSI_OPCODE_USLT
:
1740 case TGSI_OPCODE_USNE
:
1741 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1742 src0
= emit_fetch(bld
, insn
, 0, c
);
1743 src1
= emit_fetch(bld
, insn
, 1, c
);
1744 dst0
[c
] = bld_insn_2(bld
, NV_OP_SET
, src0
, src1
);
1745 dst0
[c
]->insn
->set_cond
= translate_setcc(insn
->Instruction
.Opcode
);
1746 dst0
[c
]->reg
.type
= infer_dst_type(insn
->Instruction
.Opcode
);
1748 dst0
[c
]->insn
->src
[0]->typecast
=
1749 dst0
[c
]->insn
->src
[1]->typecast
=
1750 infer_src_type(insn
->Instruction
.Opcode
);
1752 if (dst0
[c
]->reg
.type
!= NV_TYPE_F32
)
1754 dst0
[c
] = bld_insn_1(bld
, NV_OP_ABS
, dst0
[c
]);
1755 dst0
[c
]->insn
->src
[0]->typecast
= NV_TYPE_S32
;
1756 dst0
[c
]->reg
.type
= NV_TYPE_S32
;
1757 dst0
[c
] = bld_insn_1(bld
, NV_OP_CVT
, dst0
[c
]);
1758 dst0
[c
]->reg
.type
= NV_TYPE_F32
;
1761 case TGSI_OPCODE_SCS
:
1762 if (insn
->Dst
[0].Register
.WriteMask
& 0x3) {
1763 src0
= emit_fetch(bld
, insn
, 0, 0);
1764 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1765 if (insn
->Dst
[0].Register
.WriteMask
& 0x1)
1766 dst0
[0] = bld_insn_1(bld
, NV_OP_COS
, temp
);
1767 if (insn
->Dst
[0].Register
.WriteMask
& 0x2)
1768 dst0
[1] = bld_insn_1(bld
, NV_OP_SIN
, temp
);
1770 if (insn
->Dst
[0].Register
.WriteMask
& 0x4)
1771 dst0
[2] = bld_imm_f32(bld
, 0.0f
);
1772 if (insn
->Dst
[0].Register
.WriteMask
& 0x8)
1773 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1775 case TGSI_OPCODE_SSG
:
1776 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1777 src0
= emit_fetch(bld
, insn
, 0, c
);
1778 src1
= bld_predicate(bld
, src0
, FALSE
);
1779 temp
= bld_insn_2(bld
, NV_OP_AND
, src0
, bld_imm_u32(bld
, 0x80000000));
1780 temp
= bld_insn_2(bld
, NV_OP_OR
, temp
, bld_imm_f32(bld
, 1.0f
));
1781 dst0
[c
] = bld_insn_2(bld
, NV_OP_XOR
, temp
, temp
);
1782 dst0
[c
]->insn
->cc
= NV_CC_EQ
;
1783 nv_reference(bld
->pc
, &dst0
[c
]->insn
->flags_src
, src1
);
1786 case TGSI_OPCODE_SUB
:
1787 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1788 src0
= emit_fetch(bld
, insn
, 0, c
);
1789 src1
= emit_fetch(bld
, insn
, 1, c
);
1790 dst0
[c
] = bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1791 dst0
[c
]->insn
->src
[1]->mod
^= NV_MOD_NEG
;
1794 case TGSI_OPCODE_TEX
:
1795 case TGSI_OPCODE_TXB
:
1796 case TGSI_OPCODE_TXL
:
1797 case TGSI_OPCODE_TXP
:
1798 bld_tex(bld
, dst0
, insn
);
1800 case TGSI_OPCODE_XPD
:
1801 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1803 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1806 src0
= emit_fetch(bld
, insn
, 1, (c
+ 1) % 3);
1807 src1
= emit_fetch(bld
, insn
, 0, (c
+ 2) % 3);
1808 dst0
[c
] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1810 src0
= emit_fetch(bld
, insn
, 0, (c
+ 1) % 3);
1811 src1
= emit_fetch(bld
, insn
, 1, (c
+ 2) % 3);
1812 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dst0
[c
]);
1814 dst0
[c
]->insn
->src
[2]->mod
^= NV_MOD_NEG
;
1817 case TGSI_OPCODE_RET
:
1818 (new_instruction(bld
->pc
, NV_OP_RET
))->fixed
= 1;
1820 case TGSI_OPCODE_END
:
1821 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
1822 bld_export_outputs(bld
);
1825 NOUVEAU_ERR("unhandled opcode %u\n", insn
->Instruction
.Opcode
);
1830 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1831 emit_store(bld
, insn
, c
, dst0
[c
]);
1835 bld_free_value_trackers(struct bld_value_stack
*base
, int n
)
1839 for (i
= 0; i
< n
; ++i
)
1840 for (c
= 0; c
< 4; ++c
)
1841 if (base
[i
* 4 + c
].body
)
1842 FREE(base
[i
* 4 + c
].body
);
1846 nv50_tgsi_to_nc(struct nv_pc
*pc
, struct nv50_translation_info
*ti
)
1848 struct bld_context
*bld
= CALLOC_STRUCT(bld_context
);
1851 pc
->root
= pc
->current_block
= new_basic_block(pc
);
1856 pc
->loop_nesting_bound
= 1;
1858 c
= util_bitcount(bld
->ti
->p
->fp
.interp
>> 24);
1859 if (c
&& ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1860 bld
->frgcrd
[3] = new_value(pc
, NV_FILE_MEM_V
, NV_TYPE_F32
);
1861 bld
->frgcrd
[3]->reg
.id
= c
- 1;
1862 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_LINTERP
, bld
->frgcrd
[3]);
1863 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_RCP
, bld
->frgcrd
[3]);
1866 tgsi_parse_init(&bld
->parse
[0], ti
->p
->pipe
.tokens
);
1868 while (!tgsi_parse_end_of_tokens(&bld
->parse
[bld
->call_lvl
])) {
1869 const union tgsi_full_token
*tok
= &bld
->parse
[bld
->call_lvl
].FullToken
;
1871 tgsi_parse_token(&bld
->parse
[bld
->call_lvl
]);
1873 switch (tok
->Token
.Type
) {
1874 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1875 bld_instruction(bld
, &tok
->FullInstruction
);
1882 bld_free_value_trackers(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
1883 bld_free_value_trackers(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
1884 bld_free_value_trackers(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
1886 bld_free_value_trackers(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1892 /* If a variable is assigned in a loop, replace all references to the value
1893 * from outside the loop with a phi value.
1896 bld_replace_value(struct nv_pc
*pc
, struct nv_basic_block
*b
,
1897 struct nv_value
*old_val
,
1898 struct nv_value
*new_val
)
1900 struct nv_instruction
*nvi
;
1902 for (nvi
= b
->phi
? b
->phi
: b
->entry
; nvi
; nvi
= nvi
->next
) {
1904 for (s
= 0; s
< 5; ++s
) {
1907 if (nvi
->src
[s
]->value
== old_val
)
1908 nv_reference(pc
, &nvi
->src
[s
], new_val
);
1910 if (nvi
->flags_src
&& nvi
->flags_src
->value
== old_val
)
1911 nv_reference(pc
, &nvi
->flags_src
, new_val
);
1914 b
->pass_seq
= pc
->pass_seq
;
1916 if (b
->out
[0] && b
->out
[0]->pass_seq
< pc
->pass_seq
)
1917 bld_replace_value(pc
, b
->out
[0], old_val
, new_val
);
1919 if (b
->out
[1] && b
->out
[1]->pass_seq
< pc
->pass_seq
)
1920 bld_replace_value(pc
, b
->out
[1], old_val
, new_val
);