2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 /* #define NV50_TGSI2NC_DEBUG */
27 #include "nv50_context.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_util.h"
34 #include "tgsi/tgsi_dump.h"
36 #define BLD_MAX_TEMPS 64
37 #define BLD_MAX_ADDRS 4
38 #define BLD_MAX_PREDS 4
39 #define BLD_MAX_IMMDS 128
41 #define BLD_MAX_COND_NESTING 8
42 #define BLD_MAX_LOOP_NESTING 4
43 #define BLD_MAX_CALL_NESTING 2
45 /* collects all values assigned to the same TGSI register */
46 struct bld_value_stack
{
48 struct nv_value
**body
;
50 uint16_t loop_use
; /* 1 bit per loop level, indicates if used/defd */
55 bld_vals_push_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
57 assert(!stk
->size
|| (stk
->body
[stk
->size
- 1] != val
));
59 if (!(stk
->size
% 8)) {
60 unsigned old_sz
= (stk
->size
+ 0) * sizeof(struct nv_value
*);
61 unsigned new_sz
= (stk
->size
+ 8) * sizeof(struct nv_value
*);
62 stk
->body
= (struct nv_value
**)REALLOC(stk
->body
, old_sz
, new_sz
);
64 stk
->body
[stk
->size
++] = val
;
68 bld_vals_del_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
72 for (i
= stk
->size
; i
> 0; --i
)
73 if (stk
->body
[i
- 1] == val
)
79 stk
->body
[i
- 1] = stk
->body
[stk
->size
- 1];
81 --stk
->size
; /* XXX: old size in REALLOC */
86 bld_vals_push(struct bld_value_stack
*stk
)
88 bld_vals_push_val(stk
, stk
->top
);
93 bld_push_values(struct bld_value_stack
*stacks
, int n
)
97 for (i
= 0; i
< n
; ++i
)
98 for (c
= 0; c
< 4; ++c
)
99 if (stacks
[i
* 4 + c
].top
)
100 bld_vals_push(&stacks
[i
* 4 + c
]);
104 struct nv50_translation_info
*ti
;
107 struct nv_basic_block
*b
;
109 struct tgsi_parse_context parse
[BLD_MAX_CALL_NESTING
];
112 struct nv_basic_block
*cond_bb
[BLD_MAX_COND_NESTING
];
113 struct nv_basic_block
*join_bb
[BLD_MAX_COND_NESTING
];
114 struct nv_basic_block
*else_bb
[BLD_MAX_COND_NESTING
];
116 struct nv_basic_block
*loop_bb
[BLD_MAX_LOOP_NESTING
];
117 struct nv_basic_block
*brkt_bb
[BLD_MAX_LOOP_NESTING
];
120 ubyte out_kind
; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
122 struct bld_value_stack tvs
[BLD_MAX_TEMPS
][4]; /* TGSI_FILE_TEMPORARY */
123 struct bld_value_stack avs
[BLD_MAX_ADDRS
][4]; /* TGSI_FILE_ADDRESS */
124 struct bld_value_stack pvs
[BLD_MAX_PREDS
][4]; /* TGSI_FILE_PREDICATE */
125 struct bld_value_stack ovs
[PIPE_MAX_SHADER_OUTPUTS
][4];
127 uint32_t outputs_written
[(PIPE_MAX_SHADER_OUTPUTS
+ 7) / 8];
129 struct nv_value
*frgcrd
[4];
130 struct nv_value
*sysval
[4];
133 struct nv_value
*saved_addr
[4][2];
134 struct nv_value
*saved_inputs
[128];
135 struct nv_value
*saved_immd
[BLD_MAX_IMMDS
];
140 bld_stack_file(struct bld_context
*bld
, struct bld_value_stack
*stk
)
142 if (stk
< &bld
->avs
[0][0])
145 if (stk
< &bld
->pvs
[0][0])
148 if (stk
< &bld
->ovs
[0][0])
149 return NV_FILE_FLAGS
;
154 static INLINE
struct nv_value
*
155 bld_fetch(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
)
157 stk
[i
* 4 + c
].loop_use
|= 1 << bld
->loop_lvl
;
159 return stk
[i
* 4 + c
].top
;
162 static struct nv_value
*
163 bld_loop_phi(struct bld_context
*, struct bld_value_stack
*, struct nv_value
*);
165 /* If a variable is defined in a loop without prior use, we don't need
166 * a phi in the loop header to account for backwards flow.
168 * However, if this variable is then also used outside the loop, we do
169 * need a phi after all. But we must not use this phi's def inside the
170 * loop, so we can eliminate the phi if it is unused later.
173 bld_store(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
,
174 struct nv_value
*val
)
176 const uint16_t m
= 1 << bld
->loop_lvl
;
178 stk
= &stk
[i
* 4 + c
];
180 if (bld
->loop_lvl
&& !(m
& (stk
->loop_def
| stk
->loop_use
)))
181 bld_loop_phi(bld
, stk
, val
);
184 stk
->loop_def
|= 1 << bld
->loop_lvl
;
188 bld_clear_def_use(struct bld_value_stack
*stk
, int n
, int lvl
)
191 const uint16_t mask
= ~(1 << lvl
);
193 for (i
= 0; i
< n
* 4; ++i
) {
194 stk
[i
].loop_def
&= mask
;
195 stk
[i
].loop_use
&= mask
;
199 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
200 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
201 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
202 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
203 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
204 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
206 #define STORE_OUTR(i, c, v) \
208 bld->ovs[i][c].top = (v); \
209 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
213 bld_warn_uninitialized(struct bld_context
*bld
, int kind
,
214 struct bld_value_stack
*stk
, struct nv_basic_block
*b
)
216 #ifdef NV50_TGSI2NC_DEBUG
217 long i
= (stk
- &bld
->tvs
[0][0]) / 4;
218 long c
= (stk
- &bld
->tvs
[0][0]) & 3;
223 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
224 i
, (int)('x' + c
), kind
? "may be" : "is", b
->id
);
228 static INLINE
struct nv_value
*
229 bld_def(struct nv_instruction
*i
, int c
, struct nv_value
*value
)
236 static INLINE
struct nv_value
*
237 find_by_bb(struct bld_value_stack
*stack
, struct nv_basic_block
*b
)
241 if (stack
->top
&& stack
->top
->insn
->bb
== b
)
244 for (i
= stack
->size
- 1; i
>= 0; --i
)
245 if (stack
->body
[i
]->insn
->bb
== b
)
246 return stack
->body
[i
];
250 /* fetch value from stack that was defined in the specified basic block,
251 * or search for first definitions in all of its predecessors
254 fetch_by_bb(struct bld_value_stack
*stack
,
255 struct nv_value
**vals
, int *n
,
256 struct nv_basic_block
*b
)
259 struct nv_value
*val
;
261 assert(*n
< 16); /* MAX_COND_NESTING */
263 val
= find_by_bb(stack
, b
);
265 for (i
= 0; i
< *n
; ++i
)
271 for (i
= 0; i
< b
->num_in
; ++i
)
272 if (!IS_WALL_EDGE(b
->in_kind
[i
]))
273 fetch_by_bb(stack
, vals
, n
, b
->in
[i
]);
276 static INLINE
struct nv_value
*
277 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
);
279 static INLINE
struct nv_value
*
280 bld_undef(struct bld_context
*bld
, ubyte file
)
282 struct nv_instruction
*nvi
= new_instruction(bld
->pc
, NV_OP_UNDEF
);
284 return bld_def(nvi
, 0, new_value(bld
->pc
, file
, NV_TYPE_U32
));
287 static struct nv_value
*
288 bld_phi(struct bld_context
*bld
, struct nv_basic_block
*b
,
289 struct bld_value_stack
*stack
)
291 struct nv_basic_block
*in
;
292 struct nv_value
*vals
[16] = { 0 };
293 struct nv_value
*val
;
294 struct nv_instruction
*phi
;
299 fetch_by_bb(stack
, vals
, &n
, b
);
302 bld_warn_uninitialized(bld
, 0, stack
, b
);
307 if (nvbb_dominated_by(b
, vals
[0]->insn
->bb
))
310 bld_warn_uninitialized(bld
, 1, stack
, b
);
312 /* back-tracking to insert missing value of other path */
315 if (in
->num_in
== 1) {
318 if (!nvbb_reachable_by(in
->in
[0], vals
[0]->insn
->bb
, b
))
321 if (!nvbb_reachable_by(in
->in
[1], vals
[0]->insn
->bb
, b
))
327 bld
->pc
->current_block
= in
;
329 /* should make this a no-op */
330 bld_vals_push_val(stack
, bld_undef(bld
, vals
[0]->reg
.file
));
334 for (i
= 0; i
< n
; ++i
) {
335 /* if value dominates b, continue to the redefinitions */
336 if (nvbb_dominated_by(b
, vals
[i
]->insn
->bb
))
339 /* if value dominates any in-block, b should be the dom frontier */
340 for (j
= 0; j
< b
->num_in
; ++j
)
341 if (nvbb_dominated_by(b
->in
[j
], vals
[i
]->insn
->bb
))
343 /* otherwise, find the dominance frontier and put the phi there */
344 if (j
== b
->num_in
) {
345 in
= nvbb_dom_frontier(vals
[i
]->insn
->bb
);
346 val
= bld_phi(bld
, in
, stack
);
347 bld_vals_push_val(stack
, val
);
353 bld
->pc
->current_block
= b
;
358 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
360 bld_def(phi
, 0, new_value(bld
->pc
, vals
[0]->reg
.file
, vals
[0]->reg
.type
));
361 for (i
= 0; i
< n
; ++i
)
362 phi
->src
[i
] = new_ref(bld
->pc
, vals
[i
]);
367 /* Insert a phi function in the loop header.
368 * For nested loops, we need to insert phi functions in all the outer
369 * loop headers if they don't have one yet.
371 * @def: redefinition from inside loop, or NULL if to be replaced later
373 static struct nv_value
*
374 bld_loop_phi(struct bld_context
*bld
, struct bld_value_stack
*stack
,
375 struct nv_value
*def
)
377 struct nv_instruction
*phi
;
378 struct nv_basic_block
*bb
= bld
->pc
->current_block
;
379 struct nv_value
*val
= NULL
;
381 if (bld
->loop_lvl
> 1) {
383 if (!((stack
->loop_def
| stack
->loop_use
) & (1 << bld
->loop_lvl
)))
384 val
= bld_loop_phi(bld
, stack
, NULL
);
389 val
= bld_phi(bld
, bld
->pc
->current_block
, stack
); /* old definition */
391 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1]->in
[0];
392 val
= bld_undef(bld
, bld_stack_file(bld
, stack
));
395 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1];
397 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
399 bld_def(phi
, 0, new_value_like(bld
->pc
, val
));
403 bld_vals_push_val(stack
, phi
->def
[0]);
405 phi
->target
= (struct nv_basic_block
*)stack
; /* cheat */
407 nv_reference(bld
->pc
, &phi
->src
[0], val
);
408 nv_reference(bld
->pc
, &phi
->src
[1], def
);
410 bld
->pc
->current_block
= bb
;
415 static INLINE
struct nv_value
*
416 bld_fetch_global(struct bld_context
*bld
, struct bld_value_stack
*stack
)
418 const uint16_t m
= 1 << bld
->loop_lvl
;
419 const uint16_t use
= stack
->loop_use
;
421 stack
->loop_use
|= m
;
423 /* If neither used nor def'd inside the loop, build a phi in foresight,
424 * so we don't have to replace stuff later on, which requires tracking.
426 if (bld
->loop_lvl
&& !((use
| stack
->loop_def
) & m
))
427 return bld_loop_phi(bld
, stack
, NULL
);
429 return bld_phi(bld
, bld
->pc
->current_block
, stack
);
432 static INLINE
struct nv_value
*
433 bld_imm_u32(struct bld_context
*bld
, uint32_t u
)
436 unsigned n
= bld
->num_immds
;
438 for (i
= 0; i
< n
; ++i
)
439 if (bld
->saved_immd
[i
]->reg
.imm
.u32
== u
)
440 return bld
->saved_immd
[i
];
441 assert(n
< BLD_MAX_IMMDS
);
445 bld
->saved_immd
[n
] = new_value(bld
->pc
, NV_FILE_IMM
, NV_TYPE_U32
);
446 bld
->saved_immd
[n
]->reg
.imm
.u32
= u
;
447 return bld
->saved_immd
[n
];
451 bld_replace_value(struct nv_pc
*, struct nv_basic_block
*, struct nv_value
*,
454 /* Replace the source of the phi in the loop header by the last assignment,
455 * or eliminate the phi function if there is no assignment inside the loop.
457 * Redundancy situation 1 - (used) but (not redefined) value:
458 * %3 = phi %0, %3 = %3 is used
459 * %3 = phi %0, %4 = is new definition
461 * Redundancy situation 2 - (not used) but (redefined) value:
462 * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
465 bld_loop_end(struct bld_context
*bld
, struct nv_basic_block
*bb
)
467 struct nv_basic_block
*save
= bld
->pc
->current_block
;
468 struct nv_instruction
*phi
, *next
;
469 struct nv_value
*val
;
470 struct bld_value_stack
*stk
;
473 for (phi
= bb
->phi
; phi
&& phi
->opcode
== NV_OP_PHI
; phi
= next
) {
476 stk
= (struct bld_value_stack
*)phi
->target
;
479 for (s
= 1, n
= 0; n
< bb
->num_in
; ++n
) {
480 if (bb
->in_kind
[n
] != CFG_EDGE_BACK
)
484 bld
->pc
->current_block
= bb
->in
[n
];
485 val
= bld_fetch_global(bld
, stk
);
487 for (i
= 0; i
< 4; ++i
)
488 if (phi
->src
[i
] && phi
->src
[i
]->value
== val
)
491 nv_reference(bld
->pc
, &phi
->src
[s
++], val
);
493 bld
->pc
->current_block
= save
;
495 if (phi
->src
[0]->value
== phi
->def
[0] ||
496 phi
->src
[0]->value
== phi
->src
[1]->value
)
499 if (phi
->src
[1]->value
== phi
->def
[0])
505 /* eliminate the phi */
506 bld_vals_del_val(stk
, phi
->def
[0]);
509 bld_replace_value(bld
->pc
, bb
, phi
->def
[0], phi
->src
[s
]->value
);
516 static INLINE
struct nv_value
*
517 bld_imm_f32(struct bld_context
*bld
, float f
)
519 return bld_imm_u32(bld
, fui(f
));
522 #define SET_TYPE(v, t) ((v)->reg.type = (v)->reg.as_type = (t))
524 static struct nv_value
*
525 bld_insn_1(struct bld_context
*bld
, uint opcode
, struct nv_value
*src0
)
527 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
529 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
531 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
534 static struct nv_value
*
535 bld_insn_2(struct bld_context
*bld
, uint opcode
,
536 struct nv_value
*src0
, struct nv_value
*src1
)
538 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
540 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
541 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
543 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
546 static struct nv_value
*
547 bld_insn_3(struct bld_context
*bld
, uint opcode
,
548 struct nv_value
*src0
, struct nv_value
*src1
,
549 struct nv_value
*src2
)
551 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
553 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
554 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
555 nv_reference(bld
->pc
, &insn
->src
[2], src2
);
557 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
560 static struct nv_value
*
561 bld_duplicate_insn(struct bld_context
*bld
, struct nv_instruction
*nvi
)
563 struct nv_instruction
*dupi
= new_instruction(bld
->pc
, nvi
->opcode
);
567 bld_def(dupi
, 0, new_value_like(bld
->pc
, nvi
->def
[0]));
569 if (nvi
->flags_def
) {
570 dupi
->flags_def
= new_value_like(bld
->pc
, nvi
->flags_def
);
571 dupi
->flags_def
->insn
= dupi
;
574 for (c
= 0; c
< 5; ++c
)
576 nv_reference(bld
->pc
, &dupi
->src
[c
], nvi
->src
[c
]->value
);
578 nv_reference(bld
->pc
, &dupi
->flags_src
, nvi
->flags_src
->value
);
581 dupi
->saturate
= nvi
->saturate
;
582 dupi
->centroid
= nvi
->centroid
;
583 dupi
->flat
= nvi
->flat
;
589 bld_lmem_store(struct bld_context
*bld
, struct nv_value
*ptr
, int ofst
,
590 struct nv_value
*val
)
592 struct nv_instruction
*insn
= new_instruction(bld
->pc
, NV_OP_STA
);
593 struct nv_value
*loc
;
595 loc
= new_value(bld
->pc
, NV_FILE_MEM_L
, NV_TYPE_U32
);
597 loc
->reg
.id
= ofst
* 4;
599 nv_reference(bld
->pc
, &insn
->src
[0], loc
);
600 nv_reference(bld
->pc
, &insn
->src
[1], val
);
601 nv_reference(bld
->pc
, &insn
->src
[4], ptr
);
604 static struct nv_value
*
605 bld_lmem_load(struct bld_context
*bld
, struct nv_value
*ptr
, int ofst
)
607 struct nv_value
*loc
, *val
;
609 loc
= new_value(bld
->pc
, NV_FILE_MEM_L
, NV_TYPE_U32
);
611 loc
->reg
.id
= ofst
* 4;
613 val
= bld_insn_1(bld
, NV_OP_LDA
, loc
);
615 nv_reference(bld
->pc
, &val
->insn
->src
[4], ptr
);
620 #define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
622 (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
623 SET_TYPE(d, NV_TYPE_##dt); \
624 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
627 #define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
629 (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
630 SET_TYPE(d, NV_TYPE_##dt); \
631 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
632 (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
635 static struct nv_value
*
636 bld_pow(struct bld_context
*bld
, struct nv_value
*x
, struct nv_value
*e
)
638 struct nv_value
*val
;
640 BLD_INSN_1_EX(val
, LG2
, F32
, x
, F32
);
641 BLD_INSN_2_EX(val
, MUL
, F32
, e
, F32
, val
, F32
);
642 val
= bld_insn_1(bld
, NV_OP_PREEX2
, val
);
643 val
= bld_insn_1(bld
, NV_OP_EX2
, val
);
648 static INLINE
struct nv_value
*
649 bld_load_imm_f32(struct bld_context
*bld
, float f
)
651 struct nv_value
*imm
= bld_insn_1(bld
, NV_OP_MOV
, bld_imm_f32(bld
, f
));
653 SET_TYPE(imm
, NV_TYPE_F32
);
657 static INLINE
struct nv_value
*
658 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
)
660 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_u32(bld
, u
));
663 static struct nv_value
*
664 bld_get_address(struct bld_context
*bld
, int id
, struct nv_value
*indirect
)
667 struct nv_instruction
*nvi
;
668 struct nv_value
*val
;
670 for (i
= 0; i
< 4; ++i
) {
671 if (!bld
->saved_addr
[i
][0])
673 if (bld
->saved_addr
[i
][1] == indirect
) {
674 nvi
= bld
->saved_addr
[i
][0]->insn
;
675 if (nvi
->src
[0]->value
->reg
.imm
.u32
== id
)
676 return bld
->saved_addr
[i
][0];
681 val
= bld_imm_u32(bld
, id
);
683 val
= bld_insn_2(bld
, NV_OP_ADD
, indirect
, val
);
685 val
= bld_insn_1(bld
, NV_OP_MOV
, val
);
687 bld
->saved_addr
[i
][0] = val
;
688 bld
->saved_addr
[i
][0]->reg
.file
= NV_FILE_ADDR
;
689 bld
->saved_addr
[i
][0]->reg
.type
= NV_TYPE_U16
;
690 bld
->saved_addr
[i
][1] = indirect
;
691 return bld
->saved_addr
[i
][0];
695 static struct nv_value
*
696 bld_predicate(struct bld_context
*bld
, struct nv_value
*src
, boolean bool_only
)
698 struct nv_instruction
*s0i
, *nvi
= src
->insn
;
701 nvi
= bld_insn_1(bld
,
702 (src
->reg
.file
== NV_FILE_IMM
) ? NV_OP_MOV
: NV_OP_LDA
,
707 while (nvi
->opcode
== NV_OP_ABS
|| nvi
->opcode
== NV_OP_NEG
||
708 nvi
->opcode
== NV_OP_CVT
) {
709 s0i
= nvi
->src
[0]->value
->insn
;
710 if (!s0i
|| !nv50_op_can_write_flags(s0i
->opcode
))
713 assert(!nvi
->flags_src
);
717 if (!nv50_op_can_write_flags(nvi
->opcode
) ||
718 nvi
->bb
!= bld
->pc
->current_block
) {
719 nvi
= new_instruction(bld
->pc
, NV_OP_CVT
);
720 nv_reference(bld
->pc
, &nvi
->src
[0], src
);
723 if (!nvi
->flags_def
) {
724 nvi
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
725 nvi
->flags_def
->insn
= nvi
;
727 return nvi
->flags_def
;
731 bld_kil(struct bld_context
*bld
, struct nv_value
*src
)
733 struct nv_instruction
*nvi
;
735 src
= bld_predicate(bld
, src
, FALSE
);
736 nvi
= new_instruction(bld
->pc
, NV_OP_KIL
);
738 nvi
->flags_src
= new_ref(bld
->pc
, src
);
743 bld_flow(struct bld_context
*bld
, uint opcode
, ubyte cc
,
744 struct nv_value
*src
, struct nv_basic_block
*target
,
745 boolean plan_reconverge
)
747 struct nv_instruction
*nvi
;
750 new_instruction(bld
->pc
, NV_OP_JOINAT
)->fixed
= 1;
752 nvi
= new_instruction(bld
->pc
, opcode
);
753 nvi
->is_terminator
= 1;
755 nvi
->target
= target
;
757 nvi
->flags_src
= new_ref(bld
->pc
, src
);
761 translate_setcc(unsigned opcode
)
764 case TGSI_OPCODE_SLT
: return NV_CC_LT
;
765 case TGSI_OPCODE_SGE
: return NV_CC_GE
;
766 case TGSI_OPCODE_SEQ
: return NV_CC_EQ
;
767 case TGSI_OPCODE_SGT
: return NV_CC_GT
;
768 case TGSI_OPCODE_SLE
: return NV_CC_LE
;
769 case TGSI_OPCODE_SNE
: return NV_CC_NE
| NV_CC_U
;
770 case TGSI_OPCODE_STR
: return NV_CC_TR
;
771 case TGSI_OPCODE_SFL
: return NV_CC_FL
;
773 case TGSI_OPCODE_ISLT
: return NV_CC_LT
;
774 case TGSI_OPCODE_ISGE
: return NV_CC_GE
;
775 case TGSI_OPCODE_USEQ
: return NV_CC_EQ
;
776 case TGSI_OPCODE_USGE
: return NV_CC_GE
;
777 case TGSI_OPCODE_USLT
: return NV_CC_LT
;
778 case TGSI_OPCODE_USNE
: return NV_CC_NE
;
786 translate_opcode(uint opcode
)
789 case TGSI_OPCODE_ABS
: return NV_OP_ABS
;
790 case TGSI_OPCODE_ADD
:
791 case TGSI_OPCODE_SUB
:
792 case TGSI_OPCODE_UADD
: return NV_OP_ADD
;
793 case TGSI_OPCODE_AND
: return NV_OP_AND
;
794 case TGSI_OPCODE_EX2
: return NV_OP_EX2
;
795 case TGSI_OPCODE_CEIL
: return NV_OP_CEIL
;
796 case TGSI_OPCODE_FLR
: return NV_OP_FLOOR
;
797 case TGSI_OPCODE_TRUNC
: return NV_OP_TRUNC
;
798 case TGSI_OPCODE_COS
: return NV_OP_COS
;
799 case TGSI_OPCODE_SIN
: return NV_OP_SIN
;
800 case TGSI_OPCODE_DDX
: return NV_OP_DFDX
;
801 case TGSI_OPCODE_DDY
: return NV_OP_DFDY
;
802 case TGSI_OPCODE_F2I
:
803 case TGSI_OPCODE_F2U
:
804 case TGSI_OPCODE_I2F
:
805 case TGSI_OPCODE_U2F
: return NV_OP_CVT
;
806 case TGSI_OPCODE_INEG
: return NV_OP_NEG
;
807 case TGSI_OPCODE_LG2
: return NV_OP_LG2
;
808 case TGSI_OPCODE_ISHR
:
809 case TGSI_OPCODE_USHR
: return NV_OP_SHR
;
810 case TGSI_OPCODE_MAD
:
811 case TGSI_OPCODE_UMAD
: return NV_OP_MAD
;
812 case TGSI_OPCODE_MAX
:
813 case TGSI_OPCODE_IMAX
:
814 case TGSI_OPCODE_UMAX
: return NV_OP_MAX
;
815 case TGSI_OPCODE_MIN
:
816 case TGSI_OPCODE_IMIN
:
817 case TGSI_OPCODE_UMIN
: return NV_OP_MIN
;
818 case TGSI_OPCODE_MUL
:
819 case TGSI_OPCODE_UMUL
: return NV_OP_MUL
;
820 case TGSI_OPCODE_OR
: return NV_OP_OR
;
821 case TGSI_OPCODE_RCP
: return NV_OP_RCP
;
822 case TGSI_OPCODE_RSQ
: return NV_OP_RSQ
;
823 case TGSI_OPCODE_SAD
: return NV_OP_SAD
;
824 case TGSI_OPCODE_SHL
: return NV_OP_SHL
;
825 case TGSI_OPCODE_SLT
:
826 case TGSI_OPCODE_SGE
:
827 case TGSI_OPCODE_SEQ
:
828 case TGSI_OPCODE_SGT
:
829 case TGSI_OPCODE_SLE
:
830 case TGSI_OPCODE_SNE
:
831 case TGSI_OPCODE_ISLT
:
832 case TGSI_OPCODE_ISGE
:
833 case TGSI_OPCODE_USEQ
:
834 case TGSI_OPCODE_USGE
:
835 case TGSI_OPCODE_USLT
:
836 case TGSI_OPCODE_USNE
: return NV_OP_SET
;
837 case TGSI_OPCODE_TEX
: return NV_OP_TEX
;
838 case TGSI_OPCODE_TXP
: return NV_OP_TEX
;
839 case TGSI_OPCODE_TXB
: return NV_OP_TXB
;
840 case TGSI_OPCODE_TXL
: return NV_OP_TXL
;
841 case TGSI_OPCODE_XOR
: return NV_OP_XOR
;
848 infer_src_type(unsigned opcode
)
851 case TGSI_OPCODE_MOV
:
852 case TGSI_OPCODE_AND
:
854 case TGSI_OPCODE_XOR
:
855 case TGSI_OPCODE_SAD
:
856 case TGSI_OPCODE_U2F
:
857 case TGSI_OPCODE_UADD
:
858 case TGSI_OPCODE_UDIV
:
859 case TGSI_OPCODE_UMOD
:
860 case TGSI_OPCODE_UMAD
:
861 case TGSI_OPCODE_UMUL
:
862 case TGSI_OPCODE_UMAX
:
863 case TGSI_OPCODE_UMIN
:
864 case TGSI_OPCODE_USEQ
:
865 case TGSI_OPCODE_USGE
:
866 case TGSI_OPCODE_USLT
:
867 case TGSI_OPCODE_USNE
:
868 case TGSI_OPCODE_USHR
:
870 case TGSI_OPCODE_I2F
:
871 case TGSI_OPCODE_IDIV
:
872 case TGSI_OPCODE_IMAX
:
873 case TGSI_OPCODE_IMIN
:
874 case TGSI_OPCODE_INEG
:
875 case TGSI_OPCODE_ISGE
:
876 case TGSI_OPCODE_ISHR
:
877 case TGSI_OPCODE_ISLT
:
885 infer_dst_type(unsigned opcode
)
888 case TGSI_OPCODE_MOV
:
889 case TGSI_OPCODE_F2U
:
890 case TGSI_OPCODE_AND
:
892 case TGSI_OPCODE_XOR
:
893 case TGSI_OPCODE_SAD
:
894 case TGSI_OPCODE_UADD
:
895 case TGSI_OPCODE_UDIV
:
896 case TGSI_OPCODE_UMOD
:
897 case TGSI_OPCODE_UMAD
:
898 case TGSI_OPCODE_UMUL
:
899 case TGSI_OPCODE_UMAX
:
900 case TGSI_OPCODE_UMIN
:
901 case TGSI_OPCODE_USEQ
:
902 case TGSI_OPCODE_USGE
:
903 case TGSI_OPCODE_USLT
:
904 case TGSI_OPCODE_USNE
:
905 case TGSI_OPCODE_USHR
:
907 case TGSI_OPCODE_F2I
:
908 case TGSI_OPCODE_IDIV
:
909 case TGSI_OPCODE_IMAX
:
910 case TGSI_OPCODE_IMIN
:
911 case TGSI_OPCODE_INEG
:
912 case TGSI_OPCODE_ISGE
:
913 case TGSI_OPCODE_ISHR
:
914 case TGSI_OPCODE_ISLT
:
922 emit_store(struct bld_context
*bld
, const struct tgsi_full_instruction
*inst
,
923 unsigned chan
, struct nv_value
*value
)
925 struct nv_value
*ptr
;
926 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[0];
928 if (reg
->Register
.Indirect
) {
929 ptr
= FETCH_ADDR(reg
->Indirect
.Index
,
930 tgsi_util_get_src_register_swizzle(®
->Indirect
, 0));
937 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
938 value
->reg
.type
= infer_dst_type(inst
->Instruction
.Opcode
);
940 switch (inst
->Instruction
.Saturate
) {
943 case TGSI_SAT_ZERO_ONE
:
944 BLD_INSN_1_EX(value
, SAT
, F32
, value
, F32
);
946 case TGSI_SAT_MINUS_PLUS_ONE
:
947 value
->reg
.as_type
= NV_TYPE_F32
;
948 value
= bld_insn_2(bld
, NV_OP_MAX
, value
, bld_load_imm_f32(bld
, -1.0f
));
949 value
= bld_insn_2(bld
, NV_OP_MIN
, value
, bld_load_imm_f32(bld
, 1.0f
));
953 switch (reg
->Register
.File
) {
954 case TGSI_FILE_OUTPUT
:
955 if (!value
->insn
&& (bld
->ti
->output_file
== NV_FILE_OUT
))
956 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
957 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
958 value
->reg
.file
= bld
->ti
->output_file
;
960 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
961 STORE_OUTR(reg
->Register
.Index
, chan
, value
);
963 value
->insn
->fixed
= 1;
964 value
->reg
.id
= bld
->ti
->output_map
[reg
->Register
.Index
][chan
];
967 case TGSI_FILE_TEMPORARY
:
968 assert(reg
->Register
.Index
< BLD_MAX_TEMPS
);
969 if (!value
->insn
|| (value
->insn
->bb
!= bld
->pc
->current_block
))
970 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
971 value
->reg
.file
= NV_FILE_GPR
;
973 if (bld
->ti
->store_to_memory
)
974 bld_lmem_store(bld
, ptr
, reg
->Register
.Index
* 4 + chan
, value
);
976 STORE_TEMP(reg
->Register
.Index
, chan
, value
);
978 case TGSI_FILE_ADDRESS
:
979 assert(reg
->Register
.Index
< BLD_MAX_ADDRS
);
980 value
->reg
.file
= NV_FILE_ADDR
;
981 value
->reg
.type
= NV_TYPE_U16
;
982 STORE_ADDR(reg
->Register
.Index
, chan
, value
);
987 static INLINE
uint32_t
988 bld_is_output_written(struct bld_context
*bld
, int i
, int c
)
991 return bld
->outputs_written
[i
/ 8] & (0xf << ((i
* 4) % 32));
992 return bld
->outputs_written
[i
/ 8] & (1 << ((i
* 4 + c
) % 32));
996 bld_export_outputs(struct bld_context
*bld
)
998 struct nv_value
*vals
[4];
999 struct nv_instruction
*nvi
;
1002 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1004 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
) {
1005 if (!bld_is_output_written(bld
, i
, -1))
1007 for (n
= 0, c
= 0; c
< 4; ++c
) {
1008 if (!bld_is_output_written(bld
, i
, c
))
1010 vals
[n
] = bld_fetch_global(bld
, &bld
->ovs
[i
][c
]);
1012 vals
[n
] = bld_insn_1(bld
, NV_OP_MOV
, vals
[n
]);
1013 vals
[n
++]->reg
.id
= bld
->ti
->output_map
[i
][c
];
1017 (nvi
= new_instruction(bld
->pc
, NV_OP_EXPORT
))->fixed
= 1;
1019 for (c
= 0; c
< n
; ++c
)
1020 nvi
->src
[c
] = new_ref(bld
->pc
, vals
[c
]);
1025 bld_new_block(struct bld_context
*bld
, struct nv_basic_block
*b
)
1029 bld_push_values(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
1030 bld_push_values(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
1031 bld_push_values(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
1032 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1034 bld
->pc
->current_block
= b
;
1036 for (i
= 0; i
< 4; ++i
)
1037 bld
->saved_addr
[i
][0] = NULL
;
1039 for (i
= 0; i
< 128; ++i
)
1040 bld
->saved_inputs
[i
] = NULL
;
1042 bld
->out_kind
= CFG_EDGE_FORWARD
;
1045 static struct nv_value
*
1046 bld_saved_input(struct bld_context
*bld
, unsigned i
, unsigned c
)
1048 unsigned idx
= bld
->ti
->input_map
[i
][c
];
1050 if (bld
->ti
->p
->type
!= PIPE_SHADER_FRAGMENT
)
1052 if (bld
->saved_inputs
[idx
])
1053 return bld
->saved_inputs
[idx
];
1057 static struct nv_value
*
1058 bld_interpolate(struct bld_context
*bld
, unsigned mode
, struct nv_value
*val
)
1060 if (val
->reg
.id
== 255) {
1061 /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
1062 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
1063 val
= bld_insn_2(bld
, NV_OP_SHL
, val
, bld_imm_u32(bld
, 31));
1064 val
->insn
->src
[0]->typecast
= NV_TYPE_U32
;
1065 val
= bld_insn_2(bld
, NV_OP_XOR
, val
, bld_imm_f32(bld
, -1.0f
));
1066 val
->insn
->src
[0]->typecast
= NV_TYPE_U32
;
1068 if (mode
& (NV50_INTERP_LINEAR
| NV50_INTERP_FLAT
))
1069 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
1071 val
= bld_insn_2(bld
, NV_OP_PINTERP
, val
, bld
->frgcrd
[3]);
1073 val
->insn
->flat
= (mode
& NV50_INTERP_FLAT
) ? 1 : 0;
1074 val
->insn
->centroid
= (mode
& NV50_INTERP_CENTROID
) ? 1 : 0;
1078 static struct nv_value
*
1079 emit_fetch(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1080 const unsigned s
, const unsigned chan
)
1082 const struct tgsi_full_src_register
*src
= &insn
->Src
[s
];
1083 struct nv_value
*res
;
1084 struct nv_value
*ptr
= NULL
;
1085 unsigned idx
, swz
, dim_idx
, ind_idx
, ind_swz
, sgn
;
1086 ubyte type
= infer_src_type(insn
->Instruction
.Opcode
);
1088 idx
= src
->Register
.Index
;
1089 swz
= tgsi_util_get_full_src_register_swizzle(src
, chan
);
1094 if (src
->Register
.Indirect
) {
1095 ind_idx
= src
->Indirect
.Index
;
1096 ind_swz
= tgsi_util_get_src_register_swizzle(&src
->Indirect
, 0);
1098 ptr
= FETCH_ADDR(ind_idx
, ind_swz
);
1100 if (idx
>= (128 / 4) && src
->Register
.File
== TGSI_FILE_CONSTANT
)
1101 ptr
= bld_get_address(bld
, (idx
* 16) & ~0x1ff, ptr
);
1103 switch (src
->Register
.File
) {
1104 case TGSI_FILE_CONSTANT
:
1105 dim_idx
= src
->Dimension
.Index
? src
->Dimension
.Index
+ 2 : 1;
1106 assert(dim_idx
< 14);
1107 assert(dim_idx
== 1); /* for now */
1109 res
= new_value(bld
->pc
, NV_FILE_MEM_C(dim_idx
), type
);
1110 SET_TYPE(res
, type
);
1111 res
->reg
.id
= (idx
* 4 + swz
) & 127;
1112 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1115 res
->insn
->src
[4] = new_ref(bld
->pc
, ptr
);
1117 case TGSI_FILE_IMMEDIATE
:
1118 assert(idx
< bld
->ti
->immd32_nr
);
1119 res
= bld_load_imm_u32(bld
, bld
->ti
->immd32
[idx
* 4 + swz
]);
1121 switch (bld
->ti
->immd32_ty
[idx
]) {
1122 case TGSI_IMM_FLOAT32
: SET_TYPE(res
, NV_TYPE_F32
); break;
1123 case TGSI_IMM_UINT32
: SET_TYPE(res
, NV_TYPE_U32
); break;
1124 case TGSI_IMM_INT32
: SET_TYPE(res
, NV_TYPE_S32
); break;
1126 SET_TYPE(res
, type
);
1130 case TGSI_FILE_INPUT
:
1131 res
= bld_saved_input(bld
, idx
, swz
);
1132 if (res
&& (insn
->Instruction
.Opcode
!= TGSI_OPCODE_TXP
))
1135 res
= new_value(bld
->pc
, bld
->ti
->input_file
, type
);
1136 res
->reg
.id
= bld
->ti
->input_map
[idx
][swz
];
1138 if (res
->reg
.file
== NV_FILE_MEM_V
) {
1139 res
= bld_interpolate(bld
, bld
->ti
->interp_mode
[idx
], res
);
1141 assert(src
->Dimension
.Dimension
== 0);
1142 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1143 assert(res
->reg
.type
== type
);
1145 bld
->saved_inputs
[bld
->ti
->input_map
[idx
][swz
]] = res
;
1147 case TGSI_FILE_TEMPORARY
:
1148 if (bld
->ti
->store_to_memory
)
1149 res
= bld_lmem_load(bld
, ptr
, idx
* 4 + swz
);
1151 res
= bld_fetch_global(bld
, &bld
->tvs
[idx
][swz
]);
1153 case TGSI_FILE_ADDRESS
:
1154 res
= bld_fetch_global(bld
, &bld
->avs
[idx
][swz
]);
1156 case TGSI_FILE_PREDICATE
:
1157 res
= bld_fetch_global(bld
, &bld
->pvs
[idx
][swz
]);
1160 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src
->Register
.File
);
1165 return bld_undef(bld
, NV_FILE_GPR
);
1167 sgn
= tgsi_util_get_full_src_register_sign_mode(src
, chan
);
1169 if (insn
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
1170 res
->reg
.as_type
= type
;
1172 if (sgn
!= TGSI_UTIL_SIGN_KEEP
) /* apparently "MOV A, -B" assumes float */
1173 res
->reg
.as_type
= NV_TYPE_F32
;
1176 case TGSI_UTIL_SIGN_KEEP
:
1178 case TGSI_UTIL_SIGN_CLEAR
:
1179 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1181 case TGSI_UTIL_SIGN_TOGGLE
:
1182 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1184 case TGSI_UTIL_SIGN_SET
:
1185 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1186 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1189 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
1198 bld_lit(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1199 const struct tgsi_full_instruction
*insn
)
1201 struct nv_value
*val0
= NULL
;
1202 struct nv_value
*zero
= NULL
;
1203 unsigned mask
= insn
->Dst
[0].Register
.WriteMask
;
1205 if (mask
& ((1 << 0) | (1 << 3)))
1206 dst0
[3] = dst0
[0] = bld_load_imm_f32(bld
, 1.0f
);
1208 if (mask
& (3 << 1)) {
1209 zero
= bld_load_imm_f32(bld
, 0.0f
);
1210 val0
= bld_insn_2(bld
, NV_OP_MAX
, emit_fetch(bld
, insn
, 0, 0), zero
);
1212 if (mask
& (1 << 1))
1216 if (mask
& (1 << 2)) {
1217 struct nv_value
*val1
, *val3
, *src1
, *src3
;
1218 struct nv_value
*pos128
= bld_load_imm_f32(bld
, 127.999999f
);
1219 struct nv_value
*neg128
= bld_load_imm_f32(bld
, -127.999999f
);
1221 src1
= emit_fetch(bld
, insn
, 0, 1);
1222 src3
= emit_fetch(bld
, insn
, 0, 3);
1224 val0
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1225 val0
->insn
->flags_def
->insn
= val0
->insn
;
1227 val1
= bld_insn_2(bld
, NV_OP_MAX
, src1
, zero
);
1228 val3
= bld_insn_2(bld
, NV_OP_MAX
, src3
, neg128
);
1229 val3
= bld_insn_2(bld
, NV_OP_MIN
, val3
, pos128
);
1230 val3
= bld_pow(bld
, val1
, val3
);
1232 dst0
[2] = bld_insn_1(bld
, NV_OP_MOV
, zero
);
1233 dst0
[2]->insn
->cc
= NV_CC_LE
;
1234 dst0
[2]->insn
->flags_src
= new_ref(bld
->pc
, val0
->insn
->flags_def
);
1236 dst0
[2] = bld_insn_2(bld
, NV_OP_SELECT
, val3
, dst0
[2]);
1241 get_tex_dim(const struct tgsi_full_instruction
*insn
, int *dim
, int *arg
)
1243 switch (insn
->Texture
.Texture
) {
1244 case TGSI_TEXTURE_1D
:
1247 case TGSI_TEXTURE_SHADOW1D
:
1251 case TGSI_TEXTURE_UNKNOWN
:
1252 case TGSI_TEXTURE_2D
:
1253 case TGSI_TEXTURE_RECT
:
1256 case TGSI_TEXTURE_SHADOW2D
:
1257 case TGSI_TEXTURE_SHADOWRECT
:
1261 case TGSI_TEXTURE_3D
:
1262 case TGSI_TEXTURE_CUBE
:
1272 load_proj_tex_coords(struct bld_context
*bld
,
1273 struct nv_value
*t
[4], int dim
, int arg
,
1274 const struct tgsi_full_instruction
*insn
)
1278 mask
= (1 << dim
) - 1;
1280 mask
|= 4; /* depth comparison value */
1282 t
[3] = emit_fetch(bld
, insn
, 0, 3);
1284 if (t
[3]->insn
->opcode
== NV_OP_PINTERP
) {
1285 t
[3] = bld_duplicate_insn(bld
, t
[3]->insn
);
1286 t
[3]->insn
->opcode
= NV_OP_LINTERP
;
1287 nv_reference(bld
->pc
, &t
[3]->insn
->src
[1], NULL
);
1290 t
[3] = bld_insn_1(bld
, NV_OP_RCP
, t
[3]);
1292 for (c
= 0; c
< 4; ++c
) {
1293 if (!(mask
& (1 << c
)))
1295 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1297 if (t
[c
]->insn
->opcode
!= NV_OP_LINTERP
&&
1298 t
[c
]->insn
->opcode
!= NV_OP_PINTERP
)
1300 t
[c
] = bld_duplicate_insn(bld
, t
[c
]->insn
);
1301 t
[c
]->insn
->opcode
= NV_OP_PINTERP
;
1302 nv_reference(bld
->pc
, &t
[c
]->insn
->src
[1], t
[3]);
1307 for (c
= 0; mask
; ++c
, mask
>>= 1) {
1310 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], t
[3]);
1314 /* For a quad of threads / top left, top right, bottom left, bottom right
1315 * pixels, do a different operation, and take src0 from a specific thread.
1322 #define QOP(a, b, c, d) \
1323 ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
1325 static INLINE
struct nv_value
*
1326 bld_quadop(struct bld_context
*bld
, ubyte qop
, struct nv_value
*src0
, int lane
,
1327 struct nv_value
*src1
, boolean wp
)
1329 struct nv_value
*val
= bld_insn_2(bld
, NV_OP_QUADOP
, src0
, src1
);
1330 val
->insn
->lanes
= lane
;
1331 val
->insn
->quadop
= qop
;
1333 val
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1334 val
->insn
->flags_def
->insn
= val
->insn
;
1339 static INLINE
struct nv_value
*
1340 bld_cmov(struct bld_context
*bld
,
1341 struct nv_value
*src
, ubyte cc
, struct nv_value
*cr
)
1343 src
= bld_insn_1(bld
, NV_OP_MOV
, src
);
1346 src
->insn
->flags_src
= new_ref(bld
->pc
, cr
);
1351 static struct nv_instruction
*
1352 emit_tex(struct bld_context
*bld
, uint opcode
,
1353 struct nv_value
*dst
[4], struct nv_value
*t_in
[4],
1354 int argc
, int tic
, int tsc
, int cube
)
1356 struct nv_value
*t
[4];
1357 struct nv_instruction
*nvi
;
1360 /* the inputs to a tex instruction must be separate values */
1361 for (c
= 0; c
< argc
; ++c
) {
1362 t
[c
] = bld_insn_1(bld
, NV_OP_MOV
, t_in
[c
]);
1363 SET_TYPE(t
[c
], NV_TYPE_F32
);
1364 t
[c
]->insn
->fixed
= 1;
1367 nvi
= new_instruction(bld
->pc
, opcode
);
1369 for (c
= 0; c
< 4; ++c
)
1370 dst
[c
] = bld_def(nvi
, c
, new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
));
1372 for (c
= 0; c
< argc
; ++c
)
1373 nvi
->src
[c
] = new_ref(bld
->pc
, t
[c
]);
1377 nvi
->tex_mask
= 0xf;
1378 nvi
->tex_cube
= cube
;
1380 nvi
->tex_argc
= argc
;
1386 bld_texlod_sequence(struct bld_context
*bld
,
1387 struct nv_value
*dst
[4], struct nv_value
*t
[4], int arg
,
1388 int tic
, int tsc
, int cube
)
1390 emit_tex(bld
, NV_OP_TXL
, dst
, t
, arg
, tic
, tsc
, cube
); /* TODO */
1394 /* The lanes of a quad are grouped by the bit in the condition register
1395 * they have set, which is selected by differing bias values.
1396 * Move the input values for TEX into a new register set for each group
1397 * and execute TEX only for a specific group.
1398 * We always need to use 4 new registers for the inputs/outputs because
1399 * the implicitly calculated derivatives must be correct.
1402 bld_texbias_sequence(struct bld_context
*bld
,
1403 struct nv_value
*dst
[4], struct nv_value
*t
[4], int arg
,
1404 int tic
, int tsc
, int cube
)
1406 struct nv_instruction
*sel
, *tex
;
1407 struct nv_value
*bit
[4], *cr
[4], *res
[4][4], *val
;
1410 const ubyte cc
[4] = { NV_CC_EQ
, NV_CC_S
, NV_CC_C
, NV_CC_O
};
1412 for (l
= 0; l
< 4; ++l
) {
1413 bit
[l
] = bld_load_imm_u32(bld
, 1 << l
);
1415 val
= bld_quadop(bld
, QOP(SUBR
, SUBR
, SUBR
, SUBR
),
1416 t
[arg
- 1], l
, t
[arg
- 1], TRUE
);
1418 cr
[l
] = bld_cmov(bld
, bit
[l
], NV_CC_EQ
, val
->insn
->flags_def
);
1420 cr
[l
]->reg
.file
= NV_FILE_FLAGS
;
1421 SET_TYPE(cr
[l
], NV_TYPE_U16
);
1424 sel
= new_instruction(bld
->pc
, NV_OP_SELECT
);
1426 for (l
= 0; l
< 4; ++l
)
1427 sel
->src
[l
] = new_ref(bld
->pc
, cr
[l
]);
1429 bld_def(sel
, 0, new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
));
1431 for (l
= 0; l
< 4; ++l
) {
1432 tex
= emit_tex(bld
, NV_OP_TXB
, dst
, t
, arg
, tic
, tsc
, cube
);
1435 tex
->flags_src
= new_ref(bld
->pc
, sel
->def
[0]);
1437 for (c
= 0; c
< 4; ++c
)
1438 res
[l
][c
] = tex
->def
[c
];
1441 for (l
= 0; l
< 4; ++l
)
1442 for (c
= 0; c
< 4; ++c
)
1443 res
[l
][c
] = bld_cmov(bld
, res
[l
][c
], cc
[l
], sel
->def
[0]);
1445 for (c
= 0; c
< 4; ++c
) {
1446 sel
= new_instruction(bld
->pc
, NV_OP_SELECT
);
1448 for (l
= 0; l
< 4; ++l
)
1449 sel
->src
[l
] = new_ref(bld
->pc
, res
[l
][c
]);
1451 bld_def(sel
, 0, (dst
[c
] = new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
)));
1456 bld_is_constant(struct nv_value
*val
)
1458 if (val
->reg
.file
== NV_FILE_IMM
)
1460 return val
->insn
&& nvcg_find_constant(val
->insn
->src
[0]);
1464 bld_tex(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1465 const struct tgsi_full_instruction
*insn
)
1467 struct nv_value
*t
[4], *s
[3];
1468 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1470 const int tic
= insn
->Src
[1].Register
.Index
;
1472 const int cube
= (insn
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) ? 1 : 0;
1474 get_tex_dim(insn
, &dim
, &arg
);
1476 if (!cube
&& insn
->Instruction
.Opcode
== TGSI_OPCODE_TXP
)
1477 load_proj_tex_coords(bld
, t
, dim
, arg
, insn
);
1479 for (c
= 0; c
< dim
; ++c
)
1480 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1482 t
[dim
] = emit_fetch(bld
, insn
, 0, 2);
1487 for (c
= 0; c
< 3; ++c
)
1488 s
[c
] = bld_insn_1(bld
, NV_OP_ABS
, t
[c
]);
1490 s
[0] = bld_insn_2(bld
, NV_OP_MAX
, s
[0], s
[1]);
1491 s
[0] = bld_insn_2(bld
, NV_OP_MAX
, s
[0], s
[2]);
1492 s
[0] = bld_insn_1(bld
, NV_OP_RCP
, s
[0]);
1494 for (c
= 0; c
< 3; ++c
)
1495 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], s
[0]);
1498 if (opcode
== NV_OP_TXB
|| opcode
== NV_OP_TXL
) {
1499 t
[arg
++] = emit_fetch(bld
, insn
, 0, 3);
1501 if ((bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) &&
1502 !bld_is_constant(t
[arg
- 1])) {
1503 if (opcode
== NV_OP_TXB
)
1504 bld_texbias_sequence(bld
, dst0
, t
, arg
, tic
, tsc
, cube
);
1506 bld_texlod_sequence(bld
, dst0
, t
, arg
, tic
, tsc
, cube
);
1511 emit_tex(bld
, opcode
, dst0
, t
, arg
, tic
, tsc
, cube
);
1514 static INLINE
struct nv_value
*
1515 bld_dot(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1518 struct nv_value
*dotp
, *src0
, *src1
;
1521 src0
= emit_fetch(bld
, insn
, 0, 0);
1522 src1
= emit_fetch(bld
, insn
, 1, 0);
1523 dotp
= bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1525 for (c
= 1; c
< n
; ++c
) {
1526 src0
= emit_fetch(bld
, insn
, 0, c
);
1527 src1
= emit_fetch(bld
, insn
, 1, c
);
1528 dotp
= bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dotp
);
1533 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1534 for (chan = 0; chan < 4; ++chan) \
1535 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1538 bld_instruction(struct bld_context
*bld
,
1539 const struct tgsi_full_instruction
*insn
)
1541 struct nv_value
*src0
;
1542 struct nv_value
*src1
;
1543 struct nv_value
*src2
;
1544 struct nv_value
*dst0
[4] = { 0 };
1545 struct nv_value
*temp
;
1547 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1549 #ifdef NV50_TGSI2NC_DEBUG
1550 debug_printf("bld_instruction:"); tgsi_dump_instruction(insn
, 1);
1553 switch (insn
->Instruction
.Opcode
) {
1554 case TGSI_OPCODE_ADD
:
1555 case TGSI_OPCODE_MAX
:
1556 case TGSI_OPCODE_MIN
:
1557 case TGSI_OPCODE_MUL
:
1558 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1559 src0
= emit_fetch(bld
, insn
, 0, c
);
1560 src1
= emit_fetch(bld
, insn
, 1, c
);
1561 dst0
[c
] = bld_insn_2(bld
, opcode
, src0
, src1
);
1564 case TGSI_OPCODE_ARL
:
1565 src1
= bld_imm_u32(bld
, 4);
1566 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1567 src0
= emit_fetch(bld
, insn
, 0, c
);
1568 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1569 SET_TYPE(temp
, NV_TYPE_S32
);
1570 dst0
[c
] = bld_insn_2(bld
, NV_OP_SHL
, temp
, src1
);
1573 case TGSI_OPCODE_CMP
:
1574 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1575 src0
= emit_fetch(bld
, insn
, 0, c
);
1576 src1
= emit_fetch(bld
, insn
, 1, c
);
1577 src2
= emit_fetch(bld
, insn
, 2, c
);
1578 src0
= bld_predicate(bld
, src0
, FALSE
);
1580 src1
= bld_insn_1(bld
, NV_OP_MOV
, src1
);
1581 src1
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1582 src1
->insn
->cc
= NV_CC_LT
;
1584 src2
= bld_insn_1(bld
, NV_OP_MOV
, src2
);
1585 src2
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1586 src2
->insn
->cc
= NV_CC_GE
;
1588 dst0
[c
] = bld_insn_2(bld
, NV_OP_SELECT
, src1
, src2
);
1591 case TGSI_OPCODE_COS
:
1592 case TGSI_OPCODE_SIN
:
1593 src0
= emit_fetch(bld
, insn
, 0, 0);
1594 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1595 if (insn
->Dst
[0].Register
.WriteMask
& 7)
1596 temp
= bld_insn_1(bld
, opcode
, temp
);
1597 for (c
= 0; c
< 3; ++c
)
1598 if (insn
->Dst
[0].Register
.WriteMask
& (1 << c
))
1600 if (!(insn
->Dst
[0].Register
.WriteMask
& (1 << 3)))
1602 src0
= emit_fetch(bld
, insn
, 0, 3);
1603 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1604 dst0
[3] = bld_insn_1(bld
, opcode
, temp
);
1606 case TGSI_OPCODE_DP2
:
1607 temp
= bld_dot(bld
, insn
, 2);
1608 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1611 case TGSI_OPCODE_DP3
:
1612 temp
= bld_dot(bld
, insn
, 3);
1613 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1616 case TGSI_OPCODE_DP4
:
1617 temp
= bld_dot(bld
, insn
, 4);
1618 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1621 case TGSI_OPCODE_DPH
:
1622 src0
= bld_dot(bld
, insn
, 3);
1623 src1
= emit_fetch(bld
, insn
, 1, 3);
1624 temp
= bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1625 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1628 case TGSI_OPCODE_DST
:
1629 if (insn
->Dst
[0].Register
.WriteMask
& 1)
1630 dst0
[0] = bld_imm_f32(bld
, 1.0f
);
1631 if (insn
->Dst
[0].Register
.WriteMask
& 2) {
1632 src0
= emit_fetch(bld
, insn
, 0, 1);
1633 src1
= emit_fetch(bld
, insn
, 1, 1);
1634 dst0
[1] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1636 if (insn
->Dst
[0].Register
.WriteMask
& 4)
1637 dst0
[2] = emit_fetch(bld
, insn
, 0, 2);
1638 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1639 dst0
[3] = emit_fetch(bld
, insn
, 1, 3);
1641 case TGSI_OPCODE_EXP
:
1642 src0
= emit_fetch(bld
, insn
, 0, 0);
1643 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1645 if (insn
->Dst
[0].Register
.WriteMask
& 2)
1646 dst0
[1] = bld_insn_2(bld
, NV_OP_SUB
, src0
, temp
);
1647 if (insn
->Dst
[0].Register
.WriteMask
& 1) {
1648 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, temp
);
1649 dst0
[0] = bld_insn_1(bld
, NV_OP_EX2
, temp
);
1651 if (insn
->Dst
[0].Register
.WriteMask
& 4) {
1652 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1653 dst0
[2] = bld_insn_1(bld
, NV_OP_EX2
, temp
);
1655 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1656 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1658 case TGSI_OPCODE_EX2
:
1659 src0
= emit_fetch(bld
, insn
, 0, 0);
1660 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1661 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1662 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1665 case TGSI_OPCODE_FRC
:
1666 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1667 src0
= emit_fetch(bld
, insn
, 0, c
);
1668 dst0
[c
] = bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1669 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src0
, dst0
[c
]);
1672 case TGSI_OPCODE_KIL
:
1673 for (c
= 0; c
< 4; ++c
) {
1674 src0
= emit_fetch(bld
, insn
, 0, c
);
1678 case TGSI_OPCODE_KILP
:
1679 (new_instruction(bld
->pc
, NV_OP_KIL
))->fixed
= 1;
1681 case TGSI_OPCODE_IF
:
1683 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1685 assert(bld
->cond_lvl
< BLD_MAX_COND_NESTING
);
1687 nvbb_attach_block(bld
->pc
->current_block
, b
, CFG_EDGE_FORWARD
);
1689 bld
->join_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1690 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1692 src1
= bld_predicate(bld
, emit_fetch(bld
, insn
, 0, 0), TRUE
);
1694 bld_flow(bld
, NV_OP_BRA
, NV_CC_EQ
, src1
, NULL
, (bld
->cond_lvl
== 0));
1697 bld_new_block(bld
, b
);
1700 case TGSI_OPCODE_ELSE
:
1702 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1705 nvbb_attach_block(bld
->join_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1707 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1708 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1710 new_instruction(bld
->pc
, NV_OP_BRA
)->is_terminator
= 1;
1713 bld_new_block(bld
, b
);
1716 case TGSI_OPCODE_ENDIF
:
1718 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1721 nvbb_attach_block(bld
->pc
->current_block
, b
, bld
->out_kind
);
1722 nvbb_attach_block(bld
->cond_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1724 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1726 bld_new_block(bld
, b
);
1728 if (!bld
->cond_lvl
&& bld
->join_bb
[bld
->cond_lvl
]) {
1729 bld
->join_bb
[bld
->cond_lvl
]->exit
->prev
->target
= b
;
1730 new_instruction(bld
->pc
, NV_OP_JOIN
)->is_join
= TRUE
;
1734 case TGSI_OPCODE_BGNLOOP
:
1736 struct nv_basic_block
*bl
= new_basic_block(bld
->pc
);
1737 struct nv_basic_block
*bb
= new_basic_block(bld
->pc
);
1739 assert(bld
->loop_lvl
< BLD_MAX_LOOP_NESTING
);
1741 bld
->loop_bb
[bld
->loop_lvl
] = bl
;
1742 bld
->brkt_bb
[bld
->loop_lvl
] = bb
;
1744 bld_flow(bld
, NV_OP_BREAKADDR
, NV_CC_TR
, NULL
, bb
, FALSE
);
1746 nvbb_attach_block(bld
->pc
->current_block
, bl
, CFG_EDGE_LOOP_ENTER
);
1748 bld_new_block(bld
, bld
->loop_bb
[bld
->loop_lvl
++]);
1750 if (bld
->loop_lvl
== bld
->pc
->loop_nesting_bound
)
1751 bld
->pc
->loop_nesting_bound
++;
1753 bld_clear_def_use(&bld
->tvs
[0][0], BLD_MAX_TEMPS
, bld
->loop_lvl
);
1754 bld_clear_def_use(&bld
->avs
[0][0], BLD_MAX_ADDRS
, bld
->loop_lvl
);
1755 bld_clear_def_use(&bld
->pvs
[0][0], BLD_MAX_PREDS
, bld
->loop_lvl
);
1758 case TGSI_OPCODE_BRK
:
1760 struct nv_basic_block
*bb
= bld
->brkt_bb
[bld
->loop_lvl
- 1];
1762 bld_flow(bld
, NV_OP_BREAK
, NV_CC_TR
, NULL
, bb
, FALSE
);
1764 if (bld
->out_kind
== CFG_EDGE_FORWARD
) /* else we already had BRK/CONT */
1765 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_LOOP_LEAVE
);
1767 bld
->out_kind
= CFG_EDGE_FAKE
;
1770 case TGSI_OPCODE_CONT
:
1772 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1774 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1776 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1778 if ((bb
= bld
->join_bb
[bld
->cond_lvl
- 1])) {
1779 bld
->join_bb
[bld
->cond_lvl
- 1] = NULL
;
1780 nv_nvi_delete(bb
->exit
->prev
);
1782 bld
->out_kind
= CFG_EDGE_FAKE
;
1785 case TGSI_OPCODE_ENDLOOP
:
1787 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1789 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1791 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1793 bld_loop_end(bld
, bb
); /* replace loop-side operand of the phis */
1795 bld_new_block(bld
, bld
->brkt_bb
[--bld
->loop_lvl
]);
1798 case TGSI_OPCODE_ABS
:
1799 case TGSI_OPCODE_CEIL
:
1800 case TGSI_OPCODE_FLR
:
1801 case TGSI_OPCODE_TRUNC
:
1802 case TGSI_OPCODE_DDX
:
1803 case TGSI_OPCODE_DDY
:
1804 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1805 src0
= emit_fetch(bld
, insn
, 0, c
);
1806 dst0
[c
] = bld_insn_1(bld
, opcode
, src0
);
1809 case TGSI_OPCODE_LIT
:
1810 bld_lit(bld
, dst0
, insn
);
1812 case TGSI_OPCODE_LRP
:
1813 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1814 src0
= emit_fetch(bld
, insn
, 0, c
);
1815 src1
= emit_fetch(bld
, insn
, 1, c
);
1816 src2
= emit_fetch(bld
, insn
, 2, c
);
1817 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src1
, src2
);
1818 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, dst0
[c
], src0
, src2
);
1821 case TGSI_OPCODE_MOV
:
1822 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1823 dst0
[c
] = emit_fetch(bld
, insn
, 0, c
);
1825 case TGSI_OPCODE_MAD
:
1826 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1827 src0
= emit_fetch(bld
, insn
, 0, c
);
1828 src1
= emit_fetch(bld
, insn
, 1, c
);
1829 src2
= emit_fetch(bld
, insn
, 2, c
);
1830 dst0
[c
] = bld_insn_3(bld
, opcode
, src0
, src1
, src2
);
1833 case TGSI_OPCODE_POW
:
1834 src0
= emit_fetch(bld
, insn
, 0, 0);
1835 src1
= emit_fetch(bld
, insn
, 1, 0);
1836 temp
= bld_pow(bld
, src0
, src1
);
1837 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1840 case TGSI_OPCODE_LOG
:
1841 src0
= emit_fetch(bld
, insn
, 0, 0);
1842 src0
= bld_insn_1(bld
, NV_OP_ABS
, src0
);
1843 temp
= bld_insn_1(bld
, NV_OP_LG2
, src0
);
1845 if (insn
->Dst
[0].Register
.WriteMask
& 3) {
1846 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, temp
);
1849 if (insn
->Dst
[0].Register
.WriteMask
& 2) {
1850 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, temp
);
1851 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1852 temp
= bld_insn_1(bld
, NV_OP_RCP
, temp
);
1853 dst0
[1] = bld_insn_2(bld
, NV_OP_MUL
, src0
, temp
);
1855 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1856 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1858 case TGSI_OPCODE_RCP
:
1859 case TGSI_OPCODE_LG2
:
1860 src0
= emit_fetch(bld
, insn
, 0, 0);
1861 temp
= bld_insn_1(bld
, opcode
, src0
);
1862 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1865 case TGSI_OPCODE_RSQ
:
1866 src0
= emit_fetch(bld
, insn
, 0, 0);
1867 temp
= bld_insn_1(bld
, NV_OP_ABS
, src0
);
1868 temp
= bld_insn_1(bld
, NV_OP_RSQ
, temp
);
1869 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1872 case TGSI_OPCODE_SLT
:
1873 case TGSI_OPCODE_SGE
:
1874 case TGSI_OPCODE_SEQ
:
1875 case TGSI_OPCODE_SGT
:
1876 case TGSI_OPCODE_SLE
:
1877 case TGSI_OPCODE_SNE
:
1878 case TGSI_OPCODE_ISLT
:
1879 case TGSI_OPCODE_ISGE
:
1880 case TGSI_OPCODE_USEQ
:
1881 case TGSI_OPCODE_USGE
:
1882 case TGSI_OPCODE_USLT
:
1883 case TGSI_OPCODE_USNE
:
1884 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1885 src0
= emit_fetch(bld
, insn
, 0, c
);
1886 src1
= emit_fetch(bld
, insn
, 1, c
);
1887 dst0
[c
] = bld_insn_2(bld
, NV_OP_SET
, src0
, src1
);
1888 dst0
[c
]->insn
->set_cond
= translate_setcc(insn
->Instruction
.Opcode
);
1889 SET_TYPE(dst0
[c
], infer_dst_type(insn
->Instruction
.Opcode
));
1891 dst0
[c
]->insn
->src
[0]->typecast
=
1892 dst0
[c
]->insn
->src
[1]->typecast
=
1893 infer_src_type(insn
->Instruction
.Opcode
);
1895 if (dst0
[c
]->reg
.type
!= NV_TYPE_F32
)
1897 dst0
[c
]->reg
.as_type
= NV_TYPE_S32
;
1898 dst0
[c
] = bld_insn_1(bld
, NV_OP_ABS
, dst0
[c
]);
1899 dst0
[c
] = bld_insn_1(bld
, NV_OP_CVT
, dst0
[c
]);
1900 SET_TYPE(dst0
[c
], NV_TYPE_F32
);
1903 case TGSI_OPCODE_SCS
:
1904 if (insn
->Dst
[0].Register
.WriteMask
& 0x3) {
1905 src0
= emit_fetch(bld
, insn
, 0, 0);
1906 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1907 if (insn
->Dst
[0].Register
.WriteMask
& 0x1)
1908 dst0
[0] = bld_insn_1(bld
, NV_OP_COS
, temp
);
1909 if (insn
->Dst
[0].Register
.WriteMask
& 0x2)
1910 dst0
[1] = bld_insn_1(bld
, NV_OP_SIN
, temp
);
1912 if (insn
->Dst
[0].Register
.WriteMask
& 0x4)
1913 dst0
[2] = bld_imm_f32(bld
, 0.0f
);
1914 if (insn
->Dst
[0].Register
.WriteMask
& 0x8)
1915 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1917 case TGSI_OPCODE_SSG
:
1918 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1919 src0
= emit_fetch(bld
, insn
, 0, c
);
1920 src1
= bld_predicate(bld
, src0
, FALSE
);
1921 temp
= bld_insn_2(bld
, NV_OP_AND
, src0
, bld_imm_u32(bld
, 0x80000000));
1922 temp
= bld_insn_2(bld
, NV_OP_OR
, temp
, bld_imm_f32(bld
, 1.0f
));
1923 dst0
[c
] = bld_insn_2(bld
, NV_OP_XOR
, temp
, temp
);
1924 dst0
[c
]->insn
->cc
= NV_CC_EQ
;
1925 nv_reference(bld
->pc
, &dst0
[c
]->insn
->flags_src
, src1
);
1928 case TGSI_OPCODE_SUB
:
1929 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1930 src0
= emit_fetch(bld
, insn
, 0, c
);
1931 src1
= emit_fetch(bld
, insn
, 1, c
);
1932 dst0
[c
] = bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1933 dst0
[c
]->insn
->src
[1]->mod
^= NV_MOD_NEG
;
1936 case TGSI_OPCODE_TEX
:
1937 case TGSI_OPCODE_TXB
:
1938 case TGSI_OPCODE_TXL
:
1939 case TGSI_OPCODE_TXP
:
1940 bld_tex(bld
, dst0
, insn
);
1942 case TGSI_OPCODE_XPD
:
1943 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1945 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1948 src0
= emit_fetch(bld
, insn
, 1, (c
+ 1) % 3);
1949 src1
= emit_fetch(bld
, insn
, 0, (c
+ 2) % 3);
1950 dst0
[c
] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1952 src0
= emit_fetch(bld
, insn
, 0, (c
+ 1) % 3);
1953 src1
= emit_fetch(bld
, insn
, 1, (c
+ 2) % 3);
1954 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dst0
[c
]);
1956 dst0
[c
]->insn
->src
[2]->mod
^= NV_MOD_NEG
;
1959 case TGSI_OPCODE_RET
:
1960 (new_instruction(bld
->pc
, NV_OP_RET
))->fixed
= 1;
1962 case TGSI_OPCODE_END
:
1963 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
1964 bld_export_outputs(bld
);
1967 NOUVEAU_ERR("unhandled opcode %u\n", insn
->Instruction
.Opcode
);
1972 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1973 emit_store(bld
, insn
, c
, dst0
[c
]);
1977 bld_free_value_trackers(struct bld_value_stack
*base
, int n
)
1981 for (i
= 0; i
< n
; ++i
)
1982 for (c
= 0; c
< 4; ++c
)
1983 if (base
[i
* 4 + c
].body
)
1984 FREE(base
[i
* 4 + c
].body
);
1988 nv50_tgsi_to_nc(struct nv_pc
*pc
, struct nv50_translation_info
*ti
)
1990 struct bld_context
*bld
= CALLOC_STRUCT(bld_context
);
1994 pc
->root
[0] = pc
->current_block
= new_basic_block(pc
);
1999 pc
->loop_nesting_bound
= 1;
2001 c
= util_bitcount(bld
->ti
->p
->fp
.interp
>> 24);
2002 if (c
&& ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
2003 bld
->frgcrd
[3] = new_value(pc
, NV_FILE_MEM_V
, NV_TYPE_F32
);
2004 bld
->frgcrd
[3]->reg
.id
= c
- 1;
2005 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_LINTERP
, bld
->frgcrd
[3]);
2006 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_RCP
, bld
->frgcrd
[3]);
2009 for (ip
= 0; ip
< ti
->inst_nr
; ++ip
)
2010 bld_instruction(bld
, &ti
->insns
[ip
]);
2012 bld_free_value_trackers(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
2013 bld_free_value_trackers(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
2014 bld_free_value_trackers(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
2016 bld_free_value_trackers(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
2022 /* If a variable is assigned in a loop, replace all references to the value
2023 * from outside the loop with a phi value.
2026 bld_replace_value(struct nv_pc
*pc
, struct nv_basic_block
*b
,
2027 struct nv_value
*old_val
,
2028 struct nv_value
*new_val
)
2030 struct nv_instruction
*nvi
;
2032 for (nvi
= b
->phi
? b
->phi
: b
->entry
; nvi
; nvi
= nvi
->next
) {
2034 for (s
= 0; s
< 5; ++s
) {
2037 if (nvi
->src
[s
]->value
== old_val
)
2038 nv_reference(pc
, &nvi
->src
[s
], new_val
);
2040 if (nvi
->flags_src
&& nvi
->flags_src
->value
== old_val
)
2041 nv_reference(pc
, &nvi
->flags_src
, new_val
);
2044 b
->pass_seq
= pc
->pass_seq
;
2046 if (b
->out
[0] && b
->out
[0]->pass_seq
< pc
->pass_seq
)
2047 bld_replace_value(pc
, b
->out
[0], old_val
, new_val
);
2049 if (b
->out
[1] && b
->out
[1]->pass_seq
< pc
->pass_seq
)
2050 bld_replace_value(pc
, b
->out
[1], old_val
, new_val
);