2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "nv50_context.h"
28 #include "pipe/p_shader_tokens.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "tgsi/tgsi_util.h"
32 #include "tgsi/tgsi_dump.h"
34 #define BLD_MAX_TEMPS 64
35 #define BLD_MAX_ADDRS 4
36 #define BLD_MAX_PREDS 4
37 #define BLD_MAX_IMMDS 128
39 #define BLD_MAX_COND_NESTING 8
40 #define BLD_MAX_LOOP_NESTING 4
41 #define BLD_MAX_CALL_NESTING 2
43 /* collects all values assigned to the same TGSI register */
44 struct bld_value_stack
{
46 struct nv_value
**body
;
48 uint16_t loop_use
; /* 1 bit per loop level, indicates if used/defd */
53 bld_vals_push_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
55 assert(!stk
->size
|| (stk
->body
[stk
->size
- 1] != val
));
57 if (!(stk
->size
% 8)) {
58 unsigned old_sz
= (stk
->size
+ 0) * sizeof(struct nv_value
*);
59 unsigned new_sz
= (stk
->size
+ 8) * sizeof(struct nv_value
*);
60 stk
->body
= (struct nv_value
**)REALLOC(stk
->body
, old_sz
, new_sz
);
62 stk
->body
[stk
->size
++] = val
;
66 bld_vals_del_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
70 for (i
= stk
->size
; i
> 0; --i
)
71 if (stk
->body
[i
- 1] == val
)
77 stk
->body
[i
- 1] = stk
->body
[stk
->size
- 1];
79 --stk
->size
; /* XXX: old size in REALLOC */
84 bld_vals_push(struct bld_value_stack
*stk
)
86 bld_vals_push_val(stk
, stk
->top
);
91 bld_push_values(struct bld_value_stack
*stacks
, int n
)
95 for (i
= 0; i
< n
; ++i
)
96 for (c
= 0; c
< 4; ++c
)
97 if (stacks
[i
* 4 + c
].top
)
98 bld_vals_push(&stacks
[i
* 4 + c
]);
102 struct nv50_translation_info
*ti
;
105 struct nv_basic_block
*b
;
107 struct tgsi_parse_context parse
[BLD_MAX_CALL_NESTING
];
110 struct nv_basic_block
*cond_bb
[BLD_MAX_COND_NESTING
];
111 struct nv_basic_block
*join_bb
[BLD_MAX_COND_NESTING
];
112 struct nv_basic_block
*else_bb
[BLD_MAX_COND_NESTING
];
114 struct nv_basic_block
*loop_bb
[BLD_MAX_LOOP_NESTING
];
115 struct nv_basic_block
*brkt_bb
[BLD_MAX_LOOP_NESTING
];
118 ubyte out_kind
; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
120 struct bld_value_stack tvs
[BLD_MAX_TEMPS
][4]; /* TGSI_FILE_TEMPORARY */
121 struct bld_value_stack avs
[BLD_MAX_ADDRS
][4]; /* TGSI_FILE_ADDRESS */
122 struct bld_value_stack pvs
[BLD_MAX_PREDS
][4]; /* TGSI_FILE_PREDICATE */
123 struct bld_value_stack ovs
[PIPE_MAX_SHADER_OUTPUTS
][4];
125 uint32_t outputs_written
[(PIPE_MAX_SHADER_OUTPUTS
+ 7) / 8];
127 struct nv_value
*frgcrd
[4];
128 struct nv_value
*sysval
[4];
131 struct nv_value
*saved_addr
[4][2];
132 struct nv_value
*saved_inputs
[128];
133 struct nv_value
*saved_immd
[BLD_MAX_IMMDS
];
138 bld_stack_file(struct bld_context
*bld
, struct bld_value_stack
*stk
)
140 if (stk
< &bld
->avs
[0][0])
143 if (stk
< &bld
->pvs
[0][0])
146 if (stk
< &bld
->ovs
[0][0])
147 return NV_FILE_FLAGS
;
152 static INLINE
struct nv_value
*
153 bld_fetch(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
)
155 stk
[i
* 4 + c
].loop_use
|= 1 << bld
->loop_lvl
;
157 return stk
[i
* 4 + c
].top
;
160 static struct nv_value
*
161 bld_loop_phi(struct bld_context
*, struct bld_value_stack
*, struct nv_value
*);
163 /* If a variable is defined in a loop without prior use, we don't need
164 * a phi in the loop header to account for backwards flow.
166 * However, if this variable is then also used outside the loop, we do
167 * need a phi after all. But we must not use this phi's def inside the
168 * loop, so we can eliminate the phi if it is unused later.
171 bld_store(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
,
172 struct nv_value
*val
)
174 const uint16_t m
= 1 << bld
->loop_lvl
;
176 stk
= &stk
[i
* 4 + c
];
178 if (bld
->loop_lvl
&& !(m
& (stk
->loop_def
| stk
->loop_use
)))
179 bld_loop_phi(bld
, stk
, val
);
182 stk
->loop_def
|= 1 << bld
->loop_lvl
;
186 bld_clear_def_use(struct bld_value_stack
*stk
, int n
, int lvl
)
189 const uint16_t mask
= ~(1 << lvl
);
191 for (i
= 0; i
< n
* 4; ++i
) {
192 stk
[i
].loop_def
&= mask
;
193 stk
[i
].loop_use
&= mask
;
197 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
198 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
199 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
200 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
201 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
202 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
204 #define STORE_OUTR(i, c, v) \
206 bld->ovs[i][c].top = (v); \
207 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
211 bld_warn_uninitialized(struct bld_context
*bld
, int kind
,
212 struct bld_value_stack
*stk
, struct nv_basic_block
*b
)
214 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
215 long i
= (stk
- &bld
->tvs
[0][0]) / 4;
216 long c
= (stk
- &bld
->tvs
[0][0]) & 3;
221 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
222 i
, (int)('x' + c
), kind
? "may be" : "is", b
->id
);
226 static INLINE
struct nv_value
*
227 bld_def(struct nv_instruction
*i
, int c
, struct nv_value
*value
)
234 static INLINE
struct nv_value
*
235 find_by_bb(struct bld_value_stack
*stack
, struct nv_basic_block
*b
)
239 if (stack
->top
&& stack
->top
->insn
->bb
== b
)
242 for (i
= stack
->size
- 1; i
>= 0; --i
)
243 if (stack
->body
[i
]->insn
->bb
== b
)
244 return stack
->body
[i
];
248 /* fetch value from stack that was defined in the specified basic block,
249 * or search for first definitions in all of its predecessors
252 fetch_by_bb(struct bld_value_stack
*stack
,
253 struct nv_value
**vals
, int *n
,
254 struct nv_basic_block
*b
)
257 struct nv_value
*val
;
259 assert(*n
< 16); /* MAX_COND_NESTING */
261 val
= find_by_bb(stack
, b
);
263 for (i
= 0; i
< *n
; ++i
)
269 for (i
= 0; i
< b
->num_in
; ++i
)
270 if (!IS_WALL_EDGE(b
->in_kind
[i
]))
271 fetch_by_bb(stack
, vals
, n
, b
->in
[i
]);
274 static INLINE boolean
275 nvbb_is_terminated(struct nv_basic_block
*bb
)
277 return bb
->exit
&& bb
->exit
->is_terminator
;
280 static INLINE
struct nv_value
*
281 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
);
283 static INLINE
struct nv_value
*
284 bld_undef(struct bld_context
*bld
, ubyte file
)
286 struct nv_instruction
*nvi
= new_instruction(bld
->pc
, NV_OP_UNDEF
);
288 return bld_def(nvi
, 0, new_value(bld
->pc
, file
, NV_TYPE_U32
));
291 static struct nv_value
*
292 bld_phi(struct bld_context
*bld
, struct nv_basic_block
*b
,
293 struct bld_value_stack
*stack
)
295 struct nv_basic_block
*in
;
296 struct nv_value
*vals
[16] = { 0 };
297 struct nv_value
*val
;
298 struct nv_instruction
*phi
;
303 fetch_by_bb(stack
, vals
, &n
, b
);
306 bld_warn_uninitialized(bld
, 0, stack
, b
);
311 if (nvbb_dominated_by(b
, vals
[0]->insn
->bb
))
314 bld_warn_uninitialized(bld
, 1, stack
, b
);
316 /* back-tracking to insert missing value of other path */
319 if (in
->num_in
== 1) {
322 if (!nvbb_reachable_by(in
->in
[0], vals
[0]->insn
->bb
, b
))
325 if (!nvbb_reachable_by(in
->in
[1], vals
[0]->insn
->bb
, b
))
331 bld
->pc
->current_block
= in
;
333 /* should make this a no-op */
334 bld_vals_push_val(stack
, bld_undef(bld
, vals
[0]->reg
.file
));
338 for (i
= 0; i
< n
; ++i
) {
339 /* if value dominates b, continue to the redefinitions */
340 if (nvbb_dominated_by(b
, vals
[i
]->insn
->bb
))
343 /* if value dominates any in-block, b should be the dom frontier */
344 for (j
= 0; j
< b
->num_in
; ++j
)
345 if (nvbb_dominated_by(b
->in
[j
], vals
[i
]->insn
->bb
))
347 /* otherwise, find the dominance frontier and put the phi there */
348 if (j
== b
->num_in
) {
349 in
= nvbb_dom_frontier(vals
[i
]->insn
->bb
);
350 val
= bld_phi(bld
, in
, stack
);
351 bld_vals_push_val(stack
, val
);
357 bld
->pc
->current_block
= b
;
362 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
364 bld_def(phi
, 0, new_value(bld
->pc
, vals
[0]->reg
.file
, vals
[0]->reg
.type
));
365 for (i
= 0; i
< n
; ++i
)
366 phi
->src
[i
] = new_ref(bld
->pc
, vals
[i
]);
371 /* Insert a phi function in the loop header.
372 * For nested loops, we need to insert phi functions in all the outer
373 * loop headers if they don't have one yet.
375 * @def: redefinition from inside loop, or NULL if to be replaced later
377 static struct nv_value
*
378 bld_loop_phi(struct bld_context
*bld
, struct bld_value_stack
*stack
,
379 struct nv_value
*def
)
381 struct nv_instruction
*phi
;
382 struct nv_basic_block
*bb
= bld
->pc
->current_block
;
383 struct nv_value
*val
= NULL
;
385 if (bld
->loop_lvl
> 1) {
387 if (!((stack
->loop_def
| stack
->loop_use
) & (1 << bld
->loop_lvl
)))
388 val
= bld_loop_phi(bld
, stack
, NULL
);
393 val
= bld_phi(bld
, bld
->pc
->current_block
, stack
); /* old definition */
395 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1]->in
[0];
396 val
= bld_undef(bld
, bld_stack_file(bld
, stack
));
399 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1];
401 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
403 bld_def(phi
, 0, new_value_like(bld
->pc
, val
));
407 bld_vals_push_val(stack
, phi
->def
[0]);
409 phi
->target
= (struct nv_basic_block
*)stack
; /* cheat */
411 nv_reference(bld
->pc
, &phi
->src
[0], val
);
412 nv_reference(bld
->pc
, &phi
->src
[1], def
);
414 bld
->pc
->current_block
= bb
;
419 static INLINE
struct nv_value
*
420 bld_fetch_global(struct bld_context
*bld
, struct bld_value_stack
*stack
)
422 const uint16_t m
= 1 << bld
->loop_lvl
;
423 const uint16_t use
= stack
->loop_use
;
425 stack
->loop_use
|= m
;
427 /* If neither used nor def'd inside the loop, build a phi in foresight,
428 * so we don't have to replace stuff later on, which requires tracking.
430 if (bld
->loop_lvl
&& !((use
| stack
->loop_def
) & m
))
431 return bld_loop_phi(bld
, stack
, NULL
);
433 return bld_phi(bld
, bld
->pc
->current_block
, stack
);
436 static INLINE
struct nv_value
*
437 bld_imm_u32(struct bld_context
*bld
, uint32_t u
)
440 unsigned n
= bld
->num_immds
;
442 for (i
= 0; i
< n
; ++i
)
443 if (bld
->saved_immd
[i
]->reg
.imm
.u32
== u
)
444 return bld
->saved_immd
[i
];
445 assert(n
< BLD_MAX_IMMDS
);
449 bld
->saved_immd
[n
] = new_value(bld
->pc
, NV_FILE_IMM
, NV_TYPE_U32
);
450 bld
->saved_immd
[n
]->reg
.imm
.u32
= u
;
451 return bld
->saved_immd
[n
];
455 bld_replace_value(struct nv_pc
*, struct nv_basic_block
*, struct nv_value
*,
458 /* Replace the source of the phi in the loop header by the last assignment,
459 * or eliminate the phi function if there is no assignment inside the loop.
461 * Redundancy situation 1 - (used) but (not redefined) value:
462 * %3 = phi %0, %3 = %3 is used
463 * %3 = phi %0, %4 = is new definition
465 * Redundancy situation 2 - (not used) but (redefined) value:
466 * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
469 bld_loop_end(struct bld_context
*bld
, struct nv_basic_block
*bb
)
471 struct nv_basic_block
*save
= bld
->pc
->current_block
;
472 struct nv_instruction
*phi
, *next
;
473 struct nv_value
*val
;
474 struct bld_value_stack
*stk
;
477 for (phi
= bb
->phi
; phi
&& phi
->opcode
== NV_OP_PHI
; phi
= next
) {
480 stk
= (struct bld_value_stack
*)phi
->target
;
483 /* start with s == 1, src[0] is from outside the loop */
484 for (s
= 1, n
= 0; n
< bb
->num_in
; ++n
) {
485 if (bb
->in_kind
[n
] != CFG_EDGE_BACK
)
489 bld
->pc
->current_block
= bb
->in
[n
];
490 val
= bld_fetch_global(bld
, stk
);
492 for (i
= 0; i
< 4; ++i
)
493 if (phi
->src
[i
] && phi
->src
[i
]->value
== val
)
496 /* skip values we do not want to replace */
497 for (; phi
->src
[s
] && phi
->src
[s
]->value
!= phi
->def
[0]; ++s
);
498 nv_reference(bld
->pc
, &phi
->src
[s
++], val
);
501 bld
->pc
->current_block
= save
;
503 if (phi
->src
[0]->value
== phi
->def
[0] ||
504 phi
->src
[0]->value
== phi
->src
[1]->value
)
507 if (phi
->src
[1]->value
== phi
->def
[0])
513 /* eliminate the phi */
514 bld_vals_del_val(stk
, phi
->def
[0]);
517 bld_replace_value(bld
->pc
, bb
, phi
->def
[0], phi
->src
[s
]->value
);
524 static INLINE
struct nv_value
*
525 bld_imm_f32(struct bld_context
*bld
, float f
)
527 return bld_imm_u32(bld
, fui(f
));
530 #define SET_TYPE(v, t) ((v)->reg.type = (v)->reg.as_type = (t))
532 static struct nv_value
*
533 bld_insn_1(struct bld_context
*bld
, uint opcode
, struct nv_value
*src0
)
535 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
537 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
539 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
542 static struct nv_value
*
543 bld_insn_2(struct bld_context
*bld
, uint opcode
,
544 struct nv_value
*src0
, struct nv_value
*src1
)
546 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
548 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
549 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
551 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
554 static struct nv_value
*
555 bld_insn_3(struct bld_context
*bld
, uint opcode
,
556 struct nv_value
*src0
, struct nv_value
*src1
,
557 struct nv_value
*src2
)
559 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
561 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
562 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
563 nv_reference(bld
->pc
, &insn
->src
[2], src2
);
565 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
568 static struct nv_value
*
569 bld_duplicate_insn(struct bld_context
*bld
, struct nv_instruction
*nvi
)
571 struct nv_instruction
*dupi
= new_instruction(bld
->pc
, nvi
->opcode
);
575 bld_def(dupi
, 0, new_value_like(bld
->pc
, nvi
->def
[0]));
577 if (nvi
->flags_def
) {
578 dupi
->flags_def
= new_value_like(bld
->pc
, nvi
->flags_def
);
579 dupi
->flags_def
->insn
= dupi
;
582 for (c
= 0; c
< 5; ++c
)
584 nv_reference(bld
->pc
, &dupi
->src
[c
], nvi
->src
[c
]->value
);
586 nv_reference(bld
->pc
, &dupi
->flags_src
, nvi
->flags_src
->value
);
589 dupi
->saturate
= nvi
->saturate
;
590 dupi
->centroid
= nvi
->centroid
;
591 dupi
->flat
= nvi
->flat
;
597 bld_lmem_store(struct bld_context
*bld
, struct nv_value
*ptr
, int ofst
,
598 struct nv_value
*val
)
600 struct nv_instruction
*insn
= new_instruction(bld
->pc
, NV_OP_STA
);
601 struct nv_value
*loc
;
603 loc
= new_value(bld
->pc
, NV_FILE_MEM_L
, NV_TYPE_U32
);
605 loc
->reg
.id
= ofst
* 4;
607 nv_reference(bld
->pc
, &insn
->src
[0], loc
);
608 nv_reference(bld
->pc
, &insn
->src
[1], val
);
609 nv_reference(bld
->pc
, &insn
->src
[4], ptr
);
612 static struct nv_value
*
613 bld_lmem_load(struct bld_context
*bld
, struct nv_value
*ptr
, int ofst
)
615 struct nv_value
*loc
, *val
;
617 loc
= new_value(bld
->pc
, NV_FILE_MEM_L
, NV_TYPE_U32
);
619 loc
->reg
.id
= ofst
* 4;
621 val
= bld_insn_1(bld
, NV_OP_LDA
, loc
);
623 nv_reference(bld
->pc
, &val
->insn
->src
[4], ptr
);
628 #define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
630 (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
631 SET_TYPE(d, NV_TYPE_##dt); \
632 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
635 #define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
637 (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
638 SET_TYPE(d, NV_TYPE_##dt); \
639 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
640 (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
643 static struct nv_value
*
644 bld_pow(struct bld_context
*bld
, struct nv_value
*x
, struct nv_value
*e
)
646 struct nv_value
*val
;
648 BLD_INSN_1_EX(val
, LG2
, F32
, x
, F32
);
649 BLD_INSN_2_EX(val
, MUL
, F32
, e
, F32
, val
, F32
);
650 val
= bld_insn_1(bld
, NV_OP_PREEX2
, val
);
651 val
= bld_insn_1(bld
, NV_OP_EX2
, val
);
656 static INLINE
struct nv_value
*
657 bld_load_imm_f32(struct bld_context
*bld
, float f
)
659 struct nv_value
*imm
= bld_insn_1(bld
, NV_OP_MOV
, bld_imm_f32(bld
, f
));
661 SET_TYPE(imm
, NV_TYPE_F32
);
665 static INLINE
struct nv_value
*
666 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
)
668 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_u32(bld
, u
));
671 static struct nv_value
*
672 bld_get_address(struct bld_context
*bld
, int id
, struct nv_value
*indirect
)
675 struct nv_instruction
*nvi
;
676 struct nv_value
*val
;
678 for (i
= 0; i
< 4; ++i
) {
679 if (!bld
->saved_addr
[i
][0])
681 if (bld
->saved_addr
[i
][1] == indirect
) {
682 nvi
= bld
->saved_addr
[i
][0]->insn
;
683 if (nvi
->src
[0]->value
->reg
.imm
.u32
== id
)
684 return bld
->saved_addr
[i
][0];
689 val
= bld_imm_u32(bld
, id
);
691 val
= bld_insn_2(bld
, NV_OP_ADD
, indirect
, val
);
693 val
= bld_insn_1(bld
, NV_OP_MOV
, val
);
695 bld
->saved_addr
[i
][0] = val
;
696 bld
->saved_addr
[i
][0]->reg
.file
= NV_FILE_ADDR
;
697 bld
->saved_addr
[i
][0]->reg
.type
= NV_TYPE_U16
;
698 bld
->saved_addr
[i
][1] = indirect
;
699 return bld
->saved_addr
[i
][0];
703 static struct nv_value
*
704 bld_predicate(struct bld_context
*bld
, struct nv_value
*src
, boolean bool_only
)
706 struct nv_instruction
*s0i
, *nvi
= src
->insn
;
709 nvi
= bld_insn_1(bld
,
710 (src
->reg
.file
== NV_FILE_IMM
) ? NV_OP_MOV
: NV_OP_LDA
,
715 while (nvi
->opcode
== NV_OP_ABS
|| nvi
->opcode
== NV_OP_NEG
||
716 nvi
->opcode
== NV_OP_CVT
) {
717 s0i
= nvi
->src
[0]->value
->insn
;
718 if (!s0i
|| !nv50_op_can_write_flags(s0i
->opcode
))
721 assert(!nvi
->flags_src
);
725 if (!nv50_op_can_write_flags(nvi
->opcode
) ||
726 nvi
->bb
!= bld
->pc
->current_block
) {
727 nvi
= new_instruction(bld
->pc
, NV_OP_CVT
);
728 nv_reference(bld
->pc
, &nvi
->src
[0], src
);
731 if (!nvi
->flags_def
) {
732 nvi
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
733 nvi
->flags_def
->insn
= nvi
;
735 return nvi
->flags_def
;
739 bld_kil(struct bld_context
*bld
, struct nv_value
*src
)
741 struct nv_instruction
*nvi
;
743 src
= bld_predicate(bld
, src
, FALSE
);
744 nvi
= new_instruction(bld
->pc
, NV_OP_KIL
);
746 nvi
->flags_src
= new_ref(bld
->pc
, src
);
751 bld_flow(struct bld_context
*bld
, uint opcode
, ubyte cc
,
752 struct nv_value
*src
, struct nv_basic_block
*target
,
753 boolean plan_reconverge
)
755 struct nv_instruction
*nvi
;
758 new_instruction(bld
->pc
, NV_OP_JOINAT
)->fixed
= 1;
760 nvi
= new_instruction(bld
->pc
, opcode
);
761 nvi
->is_terminator
= 1;
763 nvi
->target
= target
;
765 nvi
->flags_src
= new_ref(bld
->pc
, src
);
769 translate_setcc(unsigned opcode
)
772 case TGSI_OPCODE_SLT
: return NV_CC_LT
;
773 case TGSI_OPCODE_SGE
: return NV_CC_GE
;
774 case TGSI_OPCODE_SEQ
: return NV_CC_EQ
;
775 case TGSI_OPCODE_SGT
: return NV_CC_GT
;
776 case TGSI_OPCODE_SLE
: return NV_CC_LE
;
777 case TGSI_OPCODE_SNE
: return NV_CC_NE
| NV_CC_U
;
778 case TGSI_OPCODE_STR
: return NV_CC_TR
;
779 case TGSI_OPCODE_SFL
: return NV_CC_FL
;
781 case TGSI_OPCODE_ISLT
: return NV_CC_LT
;
782 case TGSI_OPCODE_ISGE
: return NV_CC_GE
;
783 case TGSI_OPCODE_USEQ
: return NV_CC_EQ
;
784 case TGSI_OPCODE_USGE
: return NV_CC_GE
;
785 case TGSI_OPCODE_USLT
: return NV_CC_LT
;
786 case TGSI_OPCODE_USNE
: return NV_CC_NE
;
794 translate_opcode(uint opcode
)
797 case TGSI_OPCODE_ABS
: return NV_OP_ABS
;
798 case TGSI_OPCODE_ADD
:
799 case TGSI_OPCODE_SUB
:
800 case TGSI_OPCODE_UADD
: return NV_OP_ADD
;
801 case TGSI_OPCODE_AND
: return NV_OP_AND
;
802 case TGSI_OPCODE_EX2
: return NV_OP_EX2
;
803 case TGSI_OPCODE_CEIL
: return NV_OP_CEIL
;
804 case TGSI_OPCODE_FLR
: return NV_OP_FLOOR
;
805 case TGSI_OPCODE_TRUNC
: return NV_OP_TRUNC
;
806 case TGSI_OPCODE_ROUND
: return NV_OP_ROUND
;
807 case TGSI_OPCODE_COS
: return NV_OP_COS
;
808 case TGSI_OPCODE_SIN
: return NV_OP_SIN
;
809 case TGSI_OPCODE_DDX
: return NV_OP_DFDX
;
810 case TGSI_OPCODE_DDY
: return NV_OP_DFDY
;
811 case TGSI_OPCODE_F2I
:
812 case TGSI_OPCODE_F2U
:
813 case TGSI_OPCODE_I2F
:
814 case TGSI_OPCODE_U2F
: return NV_OP_CVT
;
815 case TGSI_OPCODE_INEG
: return NV_OP_NEG
;
816 case TGSI_OPCODE_LG2
: return NV_OP_LG2
;
817 case TGSI_OPCODE_ISHR
:
818 case TGSI_OPCODE_USHR
: return NV_OP_SHR
;
819 case TGSI_OPCODE_MAD
:
820 case TGSI_OPCODE_UMAD
: return NV_OP_MAD
;
821 case TGSI_OPCODE_MAX
:
822 case TGSI_OPCODE_IMAX
:
823 case TGSI_OPCODE_UMAX
: return NV_OP_MAX
;
824 case TGSI_OPCODE_MIN
:
825 case TGSI_OPCODE_IMIN
:
826 case TGSI_OPCODE_UMIN
: return NV_OP_MIN
;
827 case TGSI_OPCODE_MUL
:
828 case TGSI_OPCODE_UMUL
: return NV_OP_MUL
;
829 case TGSI_OPCODE_OR
: return NV_OP_OR
;
830 case TGSI_OPCODE_RCP
: return NV_OP_RCP
;
831 case TGSI_OPCODE_RSQ
: return NV_OP_RSQ
;
832 case TGSI_OPCODE_SAD
: return NV_OP_SAD
;
833 case TGSI_OPCODE_SHL
: return NV_OP_SHL
;
834 case TGSI_OPCODE_SLT
:
835 case TGSI_OPCODE_SGE
:
836 case TGSI_OPCODE_SEQ
:
837 case TGSI_OPCODE_SGT
:
838 case TGSI_OPCODE_SLE
:
839 case TGSI_OPCODE_SNE
:
840 case TGSI_OPCODE_ISLT
:
841 case TGSI_OPCODE_ISGE
:
842 case TGSI_OPCODE_USEQ
:
843 case TGSI_OPCODE_USGE
:
844 case TGSI_OPCODE_USLT
:
845 case TGSI_OPCODE_USNE
: return NV_OP_SET
;
846 case TGSI_OPCODE_TEX
: return NV_OP_TEX
;
847 case TGSI_OPCODE_TXP
: return NV_OP_TEX
;
848 case TGSI_OPCODE_TXB
: return NV_OP_TXB
;
849 case TGSI_OPCODE_TXL
: return NV_OP_TXL
;
850 case TGSI_OPCODE_TXD
: return NV_OP_TEX
;
851 case TGSI_OPCODE_XOR
: return NV_OP_XOR
;
858 infer_src_type(unsigned opcode
)
861 case TGSI_OPCODE_MOV
:
862 case TGSI_OPCODE_AND
:
864 case TGSI_OPCODE_XOR
:
865 case TGSI_OPCODE_SAD
:
866 case TGSI_OPCODE_U2F
:
867 case TGSI_OPCODE_UADD
:
868 case TGSI_OPCODE_UDIV
:
869 case TGSI_OPCODE_UMOD
:
870 case TGSI_OPCODE_UMAD
:
871 case TGSI_OPCODE_UMUL
:
872 case TGSI_OPCODE_UMAX
:
873 case TGSI_OPCODE_UMIN
:
874 case TGSI_OPCODE_USEQ
:
875 case TGSI_OPCODE_USGE
:
876 case TGSI_OPCODE_USLT
:
877 case TGSI_OPCODE_USNE
:
878 case TGSI_OPCODE_USHR
:
880 case TGSI_OPCODE_I2F
:
881 case TGSI_OPCODE_IDIV
:
882 case TGSI_OPCODE_IMAX
:
883 case TGSI_OPCODE_IMIN
:
884 case TGSI_OPCODE_INEG
:
885 case TGSI_OPCODE_ISGE
:
886 case TGSI_OPCODE_ISHR
:
887 case TGSI_OPCODE_ISLT
:
895 infer_dst_type(unsigned opcode
)
898 case TGSI_OPCODE_MOV
:
899 case TGSI_OPCODE_F2U
:
900 case TGSI_OPCODE_AND
:
902 case TGSI_OPCODE_XOR
:
903 case TGSI_OPCODE_SAD
:
904 case TGSI_OPCODE_UADD
:
905 case TGSI_OPCODE_UDIV
:
906 case TGSI_OPCODE_UMOD
:
907 case TGSI_OPCODE_UMAD
:
908 case TGSI_OPCODE_UMUL
:
909 case TGSI_OPCODE_UMAX
:
910 case TGSI_OPCODE_UMIN
:
911 case TGSI_OPCODE_USEQ
:
912 case TGSI_OPCODE_USGE
:
913 case TGSI_OPCODE_USLT
:
914 case TGSI_OPCODE_USNE
:
915 case TGSI_OPCODE_USHR
:
917 case TGSI_OPCODE_F2I
:
918 case TGSI_OPCODE_IDIV
:
919 case TGSI_OPCODE_IMAX
:
920 case TGSI_OPCODE_IMIN
:
921 case TGSI_OPCODE_INEG
:
922 case TGSI_OPCODE_ISGE
:
923 case TGSI_OPCODE_ISHR
:
924 case TGSI_OPCODE_ISLT
:
932 emit_store(struct bld_context
*bld
, const struct tgsi_full_instruction
*inst
,
933 unsigned chan
, struct nv_value
*value
)
935 struct nv_value
*ptr
;
936 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[0];
938 if (reg
->Register
.Indirect
) {
939 ptr
= FETCH_ADDR(reg
->Indirect
.Index
,
940 tgsi_util_get_src_register_swizzle(®
->Indirect
, 0));
947 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
948 value
->reg
.type
= infer_dst_type(inst
->Instruction
.Opcode
);
950 switch (inst
->Instruction
.Saturate
) {
953 case TGSI_SAT_ZERO_ONE
:
954 BLD_INSN_1_EX(value
, SAT
, F32
, value
, F32
);
956 case TGSI_SAT_MINUS_PLUS_ONE
:
957 value
->reg
.as_type
= NV_TYPE_F32
;
958 value
= bld_insn_2(bld
, NV_OP_MAX
, value
, bld_load_imm_f32(bld
, -1.0f
));
959 value
= bld_insn_2(bld
, NV_OP_MIN
, value
, bld_load_imm_f32(bld
, 1.0f
));
963 switch (reg
->Register
.File
) {
964 case TGSI_FILE_OUTPUT
:
965 if (!value
->insn
&& (bld
->ti
->output_file
== NV_FILE_OUT
))
966 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
967 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
968 value
->reg
.file
= bld
->ti
->output_file
;
970 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
971 STORE_OUTR(reg
->Register
.Index
, chan
, value
);
973 value
->insn
->fixed
= 1;
974 value
->reg
.id
= bld
->ti
->output_map
[reg
->Register
.Index
][chan
];
977 case TGSI_FILE_TEMPORARY
:
978 assert(reg
->Register
.Index
< BLD_MAX_TEMPS
);
979 if (!value
->insn
|| (value
->insn
->bb
!= bld
->pc
->current_block
))
980 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
981 value
->reg
.file
= NV_FILE_GPR
;
983 if (bld
->ti
->store_to_memory
)
984 bld_lmem_store(bld
, ptr
, reg
->Register
.Index
* 4 + chan
, value
);
986 STORE_TEMP(reg
->Register
.Index
, chan
, value
);
988 case TGSI_FILE_ADDRESS
:
989 assert(reg
->Register
.Index
< BLD_MAX_ADDRS
);
990 value
->reg
.file
= NV_FILE_ADDR
;
991 value
->reg
.type
= NV_TYPE_U16
;
992 STORE_ADDR(reg
->Register
.Index
, chan
, value
);
997 static INLINE
uint32_t
998 bld_is_output_written(struct bld_context
*bld
, int i
, int c
)
1001 return bld
->outputs_written
[i
/ 8] & (0xf << ((i
* 4) % 32));
1002 return bld
->outputs_written
[i
/ 8] & (1 << ((i
* 4 + c
) % 32));
1006 bld_export_outputs(struct bld_context
*bld
)
1008 struct nv_value
*vals
[4];
1009 struct nv_instruction
*nvi
;
1012 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1014 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
) {
1015 if (!bld_is_output_written(bld
, i
, -1))
1017 for (n
= 0, c
= 0; c
< 4; ++c
) {
1018 if (!bld_is_output_written(bld
, i
, c
))
1020 vals
[n
] = bld_fetch_global(bld
, &bld
->ovs
[i
][c
]);
1022 vals
[n
] = bld_insn_1(bld
, NV_OP_MOV
, vals
[n
]);
1023 vals
[n
++]->reg
.id
= bld
->ti
->output_map
[i
][c
];
1027 (nvi
= new_instruction(bld
->pc
, NV_OP_EXPORT
))->fixed
= 1;
1029 for (c
= 0; c
< n
; ++c
)
1030 nvi
->src
[c
] = new_ref(bld
->pc
, vals
[c
]);
1035 bld_new_block(struct bld_context
*bld
, struct nv_basic_block
*b
)
1039 bld_push_values(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
1040 bld_push_values(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
1041 bld_push_values(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
1042 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1044 bld
->pc
->current_block
= b
;
1046 for (i
= 0; i
< 4; ++i
)
1047 bld
->saved_addr
[i
][0] = NULL
;
1049 for (i
= 0; i
< 128; ++i
)
1050 bld
->saved_inputs
[i
] = NULL
;
1052 bld
->out_kind
= CFG_EDGE_FORWARD
;
1055 static struct nv_value
*
1056 bld_saved_input(struct bld_context
*bld
, unsigned i
, unsigned c
)
1058 unsigned idx
= bld
->ti
->input_map
[i
][c
];
1060 if (bld
->ti
->p
->type
!= PIPE_SHADER_FRAGMENT
)
1062 if (bld
->saved_inputs
[idx
])
1063 return bld
->saved_inputs
[idx
];
1067 static struct nv_value
*
1068 bld_interpolate(struct bld_context
*bld
, unsigned mode
, struct nv_value
*val
)
1070 if (val
->reg
.id
== 255) {
1071 /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
1072 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
1073 val
= bld_insn_2(bld
, NV_OP_SHL
, val
, bld_imm_u32(bld
, 31));
1074 val
->insn
->src
[0]->typecast
= NV_TYPE_U32
;
1075 val
= bld_insn_2(bld
, NV_OP_XOR
, val
, bld_imm_f32(bld
, -1.0f
));
1076 val
->insn
->src
[0]->typecast
= NV_TYPE_U32
;
1078 if (mode
& (NV50_INTERP_LINEAR
| NV50_INTERP_FLAT
))
1079 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
1081 val
= bld_insn_2(bld
, NV_OP_PINTERP
, val
, bld
->frgcrd
[3]);
1083 val
->insn
->flat
= (mode
& NV50_INTERP_FLAT
) ? 1 : 0;
1084 val
->insn
->centroid
= (mode
& NV50_INTERP_CENTROID
) ? 1 : 0;
1088 static struct nv_value
*
1089 emit_fetch(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1090 const unsigned s
, const unsigned chan
)
1092 const struct tgsi_full_src_register
*src
= &insn
->Src
[s
];
1093 struct nv_value
*res
;
1094 struct nv_value
*ptr
= NULL
;
1095 unsigned idx
, swz
, dim_idx
, ind_idx
, ind_swz
, sgn
;
1096 ubyte type
= infer_src_type(insn
->Instruction
.Opcode
);
1098 idx
= src
->Register
.Index
;
1099 swz
= tgsi_util_get_full_src_register_swizzle(src
, chan
);
1104 if (src
->Register
.Indirect
) {
1105 ind_idx
= src
->Indirect
.Index
;
1106 ind_swz
= tgsi_util_get_src_register_swizzle(&src
->Indirect
, 0);
1108 ptr
= FETCH_ADDR(ind_idx
, ind_swz
);
1110 if (idx
>= (128 / 4) && src
->Register
.File
== TGSI_FILE_CONSTANT
)
1111 ptr
= bld_get_address(bld
, (idx
* 16) & ~0x1ff, ptr
);
1113 switch (src
->Register
.File
) {
1114 case TGSI_FILE_CONSTANT
:
1115 dim_idx
= src
->Dimension
.Index
;
1116 assert(dim_idx
< 15);
1118 res
= new_value(bld
->pc
, NV_FILE_MEM_C(dim_idx
), type
);
1119 SET_TYPE(res
, type
);
1120 res
->reg
.id
= (idx
* 4 + swz
) & 127;
1121 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1124 res
->insn
->src
[4] = new_ref(bld
->pc
, ptr
);
1126 case TGSI_FILE_IMMEDIATE
:
1127 assert(idx
< bld
->ti
->immd32_nr
);
1128 res
= bld_load_imm_u32(bld
, bld
->ti
->immd32
[idx
* 4 + swz
]);
1130 switch (bld
->ti
->immd32_ty
[idx
]) {
1131 case TGSI_IMM_FLOAT32
: SET_TYPE(res
, NV_TYPE_F32
); break;
1132 case TGSI_IMM_UINT32
: SET_TYPE(res
, NV_TYPE_U32
); break;
1133 case TGSI_IMM_INT32
: SET_TYPE(res
, NV_TYPE_S32
); break;
1135 SET_TYPE(res
, type
);
1139 case TGSI_FILE_INPUT
:
1140 res
= bld_saved_input(bld
, idx
, swz
);
1141 if (res
&& (insn
->Instruction
.Opcode
!= TGSI_OPCODE_TXP
))
1144 res
= new_value(bld
->pc
, bld
->ti
->input_file
, type
);
1145 res
->reg
.id
= bld
->ti
->input_map
[idx
][swz
];
1147 if (res
->reg
.file
== NV_FILE_MEM_V
) {
1148 res
= bld_interpolate(bld
, bld
->ti
->interp_mode
[idx
], res
);
1150 assert(src
->Dimension
.Dimension
== 0);
1151 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1152 assert(res
->reg
.type
== type
);
1154 bld
->saved_inputs
[bld
->ti
->input_map
[idx
][swz
]] = res
;
1156 case TGSI_FILE_TEMPORARY
:
1157 if (bld
->ti
->store_to_memory
)
1158 res
= bld_lmem_load(bld
, ptr
, idx
* 4 + swz
);
1160 res
= bld_fetch_global(bld
, &bld
->tvs
[idx
][swz
]);
1162 case TGSI_FILE_ADDRESS
:
1163 res
= bld_fetch_global(bld
, &bld
->avs
[idx
][swz
]);
1165 case TGSI_FILE_PREDICATE
:
1166 res
= bld_fetch_global(bld
, &bld
->pvs
[idx
][swz
]);
1168 case TGSI_FILE_SYSTEM_VALUE
:
1169 res
= new_value(bld
->pc
, bld
->ti
->input_file
, NV_TYPE_U32
);
1170 res
->reg
.id
= bld
->ti
->sysval_map
[idx
];
1171 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1172 res
= bld_insn_1(bld
, NV_OP_CVT
, res
);
1173 res
->reg
.type
= NV_TYPE_F32
;
1176 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src
->Register
.File
);
1181 return bld_undef(bld
, NV_FILE_GPR
);
1183 sgn
= tgsi_util_get_full_src_register_sign_mode(src
, chan
);
1185 if (insn
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
1186 res
->reg
.as_type
= type
;
1188 if (sgn
!= TGSI_UTIL_SIGN_KEEP
) /* apparently "MOV A, -B" assumes float */
1189 res
->reg
.as_type
= NV_TYPE_F32
;
1192 case TGSI_UTIL_SIGN_KEEP
:
1194 case TGSI_UTIL_SIGN_CLEAR
:
1195 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1197 case TGSI_UTIL_SIGN_TOGGLE
:
1198 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1200 case TGSI_UTIL_SIGN_SET
:
1201 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1202 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1205 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
1214 bld_lit(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1215 const struct tgsi_full_instruction
*insn
)
1217 struct nv_value
*val0
= NULL
;
1218 struct nv_value
*zero
= NULL
;
1219 unsigned mask
= insn
->Dst
[0].Register
.WriteMask
;
1221 if (mask
& ((1 << 0) | (1 << 3)))
1222 dst0
[3] = dst0
[0] = bld_load_imm_f32(bld
, 1.0f
);
1224 if (mask
& (3 << 1)) {
1225 zero
= bld_load_imm_f32(bld
, 0.0f
);
1226 val0
= bld_insn_2(bld
, NV_OP_MAX
, emit_fetch(bld
, insn
, 0, 0), zero
);
1228 if (mask
& (1 << 1))
1232 if (mask
& (1 << 2)) {
1233 struct nv_value
*val1
, *val3
, *src1
, *src3
;
1234 struct nv_value
*pos128
= bld_load_imm_f32(bld
, 127.999999f
);
1235 struct nv_value
*neg128
= bld_load_imm_f32(bld
, -127.999999f
);
1237 src1
= emit_fetch(bld
, insn
, 0, 1);
1238 src3
= emit_fetch(bld
, insn
, 0, 3);
1240 val0
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1241 val0
->insn
->flags_def
->insn
= val0
->insn
;
1243 val1
= bld_insn_2(bld
, NV_OP_MAX
, src1
, zero
);
1244 val3
= bld_insn_2(bld
, NV_OP_MAX
, src3
, neg128
);
1245 val3
= bld_insn_2(bld
, NV_OP_MIN
, val3
, pos128
);
1246 val3
= bld_pow(bld
, val1
, val3
);
1248 dst0
[2] = bld_insn_1(bld
, NV_OP_MOV
, zero
);
1249 dst0
[2]->insn
->cc
= NV_CC_LE
;
1250 dst0
[2]->insn
->flags_src
= new_ref(bld
->pc
, val0
->insn
->flags_def
);
1252 dst0
[2] = bld_insn_2(bld
, NV_OP_SELECT
, val3
, dst0
[2]);
1257 get_tex_dim(const struct tgsi_full_instruction
*insn
, int *dim
, int *arg
)
1259 switch (insn
->Texture
.Texture
) {
1260 case TGSI_TEXTURE_1D
:
1263 case TGSI_TEXTURE_SHADOW1D
:
1267 case TGSI_TEXTURE_UNKNOWN
:
1268 case TGSI_TEXTURE_2D
:
1269 case TGSI_TEXTURE_RECT
:
1272 case TGSI_TEXTURE_SHADOW2D
:
1273 case TGSI_TEXTURE_SHADOWRECT
:
1277 case TGSI_TEXTURE_3D
:
1278 case TGSI_TEXTURE_CUBE
:
1288 load_proj_tex_coords(struct bld_context
*bld
,
1289 struct nv_value
*t
[4], int dim
, int arg
,
1290 const struct tgsi_full_instruction
*insn
)
1294 mask
= (1 << dim
) - 1;
1296 mask
|= 4; /* depth comparison value */
1298 t
[3] = emit_fetch(bld
, insn
, 0, 3);
1300 if (t
[3]->insn
->opcode
== NV_OP_PINTERP
) {
1301 t
[3] = bld_duplicate_insn(bld
, t
[3]->insn
);
1302 t
[3]->insn
->opcode
= NV_OP_LINTERP
;
1303 nv_reference(bld
->pc
, &t
[3]->insn
->src
[1], NULL
);
1306 t
[3] = bld_insn_1(bld
, NV_OP_RCP
, t
[3]);
1308 for (c
= 0; c
< 4; ++c
) {
1309 if (!(mask
& (1 << c
)))
1311 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1313 if (t
[c
]->insn
->opcode
!= NV_OP_LINTERP
&&
1314 t
[c
]->insn
->opcode
!= NV_OP_PINTERP
)
1316 t
[c
] = bld_duplicate_insn(bld
, t
[c
]->insn
);
1317 t
[c
]->insn
->opcode
= NV_OP_PINTERP
;
1318 nv_reference(bld
->pc
, &t
[c
]->insn
->src
[1], t
[3]);
1323 for (c
= 0; mask
; ++c
, mask
>>= 1) {
1326 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], t
[3]);
1330 /* For a quad of threads / top left, top right, bottom left, bottom right
1331 * pixels, do a different operation, and take src0 from a specific thread.
1338 #define QOP(a, b, c, d) \
1339 ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
1341 static INLINE
struct nv_value
*
1342 bld_quadop(struct bld_context
*bld
, ubyte qop
, struct nv_value
*src0
, int lane
,
1343 struct nv_value
*src1
, boolean wp
)
1345 struct nv_value
*val
= bld_insn_2(bld
, NV_OP_QUADOP
, src0
, src1
);
1346 val
->insn
->lanes
= lane
;
1347 val
->insn
->quadop
= qop
;
1349 val
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1350 val
->insn
->flags_def
->insn
= val
->insn
;
1355 static INLINE
struct nv_value
*
1356 bld_cmov(struct bld_context
*bld
,
1357 struct nv_value
*src
, ubyte cc
, struct nv_value
*cr
)
1359 src
= bld_insn_1(bld
, NV_OP_MOV
, src
);
1362 src
->insn
->flags_src
= new_ref(bld
->pc
, cr
);
1367 static struct nv_instruction
*
1368 emit_tex(struct bld_context
*bld
, uint opcode
,
1369 struct nv_value
*dst
[4], struct nv_value
*t_in
[4],
1370 int argc
, int tic
, int tsc
, int cube
)
1372 struct nv_value
*t
[4];
1373 struct nv_instruction
*nvi
;
1376 /* the inputs to a tex instruction must be separate values */
1377 for (c
= 0; c
< argc
; ++c
) {
1378 t
[c
] = bld_insn_1(bld
, NV_OP_MOV
, t_in
[c
]);
1379 SET_TYPE(t
[c
], NV_TYPE_F32
);
1380 t
[c
]->insn
->fixed
= 1;
1383 nvi
= new_instruction(bld
->pc
, opcode
);
1385 for (c
= 0; c
< 4; ++c
)
1386 dst
[c
] = bld_def(nvi
, c
, new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
));
1388 for (c
= 0; c
< argc
; ++c
)
1389 nvi
->src
[c
] = new_ref(bld
->pc
, t
[c
]);
1393 nvi
->tex_mask
= 0xf;
1394 nvi
->tex_cube
= cube
;
1396 nvi
->tex_argc
= argc
;
1402 bld_texlod_sequence(struct bld_context
*bld
,
1403 struct nv_value
*dst
[4], struct nv_value
*t
[4], int arg
,
1404 int tic
, int tsc
, int cube
)
1406 emit_tex(bld
, NV_OP_TXL
, dst
, t
, arg
, tic
, tsc
, cube
); /* TODO */
1410 /* The lanes of a quad are grouped by the bit in the condition register
1411 * they have set, which is selected by differing bias values.
1412 * Move the input values for TEX into a new register set for each group
1413 * and execute TEX only for a specific group.
1414 * We always need to use 4 new registers for the inputs/outputs because
1415 * the implicitly calculated derivatives must be correct.
1418 bld_texbias_sequence(struct bld_context
*bld
,
1419 struct nv_value
*dst
[4], struct nv_value
*t
[4], int arg
,
1420 int tic
, int tsc
, int cube
)
1422 struct nv_instruction
*sel
, *tex
;
1423 struct nv_value
*bit
[4], *cr
[4], *res
[4][4], *val
;
1426 const ubyte cc
[4] = { NV_CC_EQ
, NV_CC_S
, NV_CC_C
, NV_CC_O
};
1428 for (l
= 0; l
< 4; ++l
) {
1429 bit
[l
] = bld_load_imm_u32(bld
, 1 << l
);
1431 val
= bld_quadop(bld
, QOP(SUBR
, SUBR
, SUBR
, SUBR
),
1432 t
[arg
- 1], l
, t
[arg
- 1], TRUE
);
1434 cr
[l
] = bld_cmov(bld
, bit
[l
], NV_CC_EQ
, val
->insn
->flags_def
);
1436 cr
[l
]->reg
.file
= NV_FILE_FLAGS
;
1437 SET_TYPE(cr
[l
], NV_TYPE_U16
);
1440 sel
= new_instruction(bld
->pc
, NV_OP_SELECT
);
1442 for (l
= 0; l
< 4; ++l
)
1443 sel
->src
[l
] = new_ref(bld
->pc
, cr
[l
]);
1445 bld_def(sel
, 0, new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
));
1447 for (l
= 0; l
< 4; ++l
) {
1448 tex
= emit_tex(bld
, NV_OP_TXB
, dst
, t
, arg
, tic
, tsc
, cube
);
1451 tex
->flags_src
= new_ref(bld
->pc
, sel
->def
[0]);
1453 for (c
= 0; c
< 4; ++c
)
1454 res
[l
][c
] = tex
->def
[c
];
1457 for (l
= 0; l
< 4; ++l
)
1458 for (c
= 0; c
< 4; ++c
)
1459 res
[l
][c
] = bld_cmov(bld
, res
[l
][c
], cc
[l
], sel
->def
[0]);
1461 for (c
= 0; c
< 4; ++c
) {
1462 sel
= new_instruction(bld
->pc
, NV_OP_SELECT
);
1464 for (l
= 0; l
< 4; ++l
)
1465 sel
->src
[l
] = new_ref(bld
->pc
, res
[l
][c
]);
1467 bld_def(sel
, 0, (dst
[c
] = new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
)));
1472 bld_is_constant(struct nv_value
*val
)
1474 if (val
->reg
.file
== NV_FILE_IMM
)
1476 return val
->insn
&& nvcg_find_constant(val
->insn
->src
[0]);
1480 bld_tex(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1481 const struct tgsi_full_instruction
*insn
)
1483 struct nv_value
*t
[4], *s
[3];
1484 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1486 const int tic
= insn
->Src
[1].Register
.Index
;
1487 const int tsc
= tic
;
1488 const int cube
= (insn
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) ? 1 : 0;
1490 get_tex_dim(insn
, &dim
, &arg
);
1492 if (!cube
&& insn
->Instruction
.Opcode
== TGSI_OPCODE_TXP
)
1493 load_proj_tex_coords(bld
, t
, dim
, arg
, insn
);
1495 for (c
= 0; c
< dim
; ++c
)
1496 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1498 t
[dim
] = emit_fetch(bld
, insn
, 0, 2);
1503 for (c
= 0; c
< 3; ++c
)
1504 s
[c
] = bld_insn_1(bld
, NV_OP_ABS
, t
[c
]);
1506 s
[0] = bld_insn_2(bld
, NV_OP_MAX
, s
[0], s
[1]);
1507 s
[0] = bld_insn_2(bld
, NV_OP_MAX
, s
[0], s
[2]);
1508 s
[0] = bld_insn_1(bld
, NV_OP_RCP
, s
[0]);
1510 for (c
= 0; c
< 3; ++c
)
1511 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], s
[0]);
1514 if (opcode
== NV_OP_TXB
|| opcode
== NV_OP_TXL
) {
1515 t
[arg
++] = emit_fetch(bld
, insn
, 0, 3);
1517 if ((bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) &&
1518 !bld_is_constant(t
[arg
- 1])) {
1519 if (opcode
== NV_OP_TXB
)
1520 bld_texbias_sequence(bld
, dst0
, t
, arg
, tic
, tsc
, cube
);
1522 bld_texlod_sequence(bld
, dst0
, t
, arg
, tic
, tsc
, cube
);
1527 emit_tex(bld
, opcode
, dst0
, t
, arg
, tic
, tsc
, cube
);
1530 static INLINE
struct nv_value
*
1531 bld_dot(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1534 struct nv_value
*dotp
, *src0
, *src1
;
1537 src0
= emit_fetch(bld
, insn
, 0, 0);
1538 src1
= emit_fetch(bld
, insn
, 1, 0);
1539 dotp
= bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1541 for (c
= 1; c
< n
; ++c
) {
1542 src0
= emit_fetch(bld
, insn
, 0, c
);
1543 src1
= emit_fetch(bld
, insn
, 1, c
);
1544 dotp
= bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dotp
);
1549 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1550 for (chan = 0; chan < 4; ++chan) \
1551 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1554 bld_instruction(struct bld_context
*bld
,
1555 const struct tgsi_full_instruction
*insn
)
1557 struct nv50_program
*prog
= bld
->ti
->p
;
1558 const struct tgsi_full_dst_register
*dreg
= &insn
->Dst
[0];
1559 struct nv_value
*src0
;
1560 struct nv_value
*src1
;
1561 struct nv_value
*src2
;
1562 struct nv_value
*dst0
[4] = { 0 };
1563 struct nv_value
*temp
;
1565 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1567 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
1568 debug_printf("bld_instruction:"); tgsi_dump_instruction(insn
, 1);
1571 switch (insn
->Instruction
.Opcode
) {
1572 case TGSI_OPCODE_ADD
:
1573 case TGSI_OPCODE_MAX
:
1574 case TGSI_OPCODE_MIN
:
1575 case TGSI_OPCODE_MUL
:
1576 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1577 src0
= emit_fetch(bld
, insn
, 0, c
);
1578 src1
= emit_fetch(bld
, insn
, 1, c
);
1579 dst0
[c
] = bld_insn_2(bld
, opcode
, src0
, src1
);
1582 case TGSI_OPCODE_ARL
:
1583 src1
= bld_imm_u32(bld
, 4);
1584 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1585 src0
= emit_fetch(bld
, insn
, 0, c
);
1586 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1587 SET_TYPE(temp
, NV_TYPE_S32
);
1588 dst0
[c
] = bld_insn_2(bld
, NV_OP_SHL
, temp
, src1
);
1591 case TGSI_OPCODE_CMP
:
1592 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1593 src0
= emit_fetch(bld
, insn
, 0, c
);
1594 src1
= emit_fetch(bld
, insn
, 1, c
);
1595 src2
= emit_fetch(bld
, insn
, 2, c
);
1596 src0
= bld_predicate(bld
, src0
, FALSE
);
1598 src1
= bld_insn_1(bld
, NV_OP_MOV
, src1
);
1599 src1
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1600 src1
->insn
->cc
= NV_CC_LT
;
1602 src2
= bld_insn_1(bld
, NV_OP_MOV
, src2
);
1603 src2
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1604 src2
->insn
->cc
= NV_CC_GE
;
1606 dst0
[c
] = bld_insn_2(bld
, NV_OP_SELECT
, src1
, src2
);
1609 case TGSI_OPCODE_COS
:
1610 case TGSI_OPCODE_SIN
:
1611 src0
= emit_fetch(bld
, insn
, 0, 0);
1612 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1613 if (insn
->Dst
[0].Register
.WriteMask
& 7)
1614 temp
= bld_insn_1(bld
, opcode
, temp
);
1615 for (c
= 0; c
< 3; ++c
)
1616 if (insn
->Dst
[0].Register
.WriteMask
& (1 << c
))
1618 if (!(insn
->Dst
[0].Register
.WriteMask
& (1 << 3)))
1620 src0
= emit_fetch(bld
, insn
, 0, 3);
1621 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1622 dst0
[3] = bld_insn_1(bld
, opcode
, temp
);
1624 case TGSI_OPCODE_DP2
:
1625 temp
= bld_dot(bld
, insn
, 2);
1626 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1629 case TGSI_OPCODE_DP3
:
1630 temp
= bld_dot(bld
, insn
, 3);
1631 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1634 case TGSI_OPCODE_DP4
:
1635 temp
= bld_dot(bld
, insn
, 4);
1636 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1639 case TGSI_OPCODE_DPH
:
1640 src0
= bld_dot(bld
, insn
, 3);
1641 src1
= emit_fetch(bld
, insn
, 1, 3);
1642 temp
= bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1643 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1646 case TGSI_OPCODE_DST
:
1647 if (insn
->Dst
[0].Register
.WriteMask
& 1)
1648 dst0
[0] = bld_imm_f32(bld
, 1.0f
);
1649 if (insn
->Dst
[0].Register
.WriteMask
& 2) {
1650 src0
= emit_fetch(bld
, insn
, 0, 1);
1651 src1
= emit_fetch(bld
, insn
, 1, 1);
1652 dst0
[1] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1654 if (insn
->Dst
[0].Register
.WriteMask
& 4)
1655 dst0
[2] = emit_fetch(bld
, insn
, 0, 2);
1656 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1657 dst0
[3] = emit_fetch(bld
, insn
, 1, 3);
1659 case TGSI_OPCODE_EXP
:
1660 src0
= emit_fetch(bld
, insn
, 0, 0);
1661 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1663 if (insn
->Dst
[0].Register
.WriteMask
& 2)
1664 dst0
[1] = bld_insn_2(bld
, NV_OP_SUB
, src0
, temp
);
1665 if (insn
->Dst
[0].Register
.WriteMask
& 1) {
1666 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, temp
);
1667 dst0
[0] = bld_insn_1(bld
, NV_OP_EX2
, temp
);
1669 if (insn
->Dst
[0].Register
.WriteMask
& 4) {
1670 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1671 dst0
[2] = bld_insn_1(bld
, NV_OP_EX2
, temp
);
1673 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1674 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1676 case TGSI_OPCODE_EX2
:
1677 src0
= emit_fetch(bld
, insn
, 0, 0);
1678 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1679 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1680 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1683 case TGSI_OPCODE_FRC
:
1684 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1685 src0
= emit_fetch(bld
, insn
, 0, c
);
1686 dst0
[c
] = bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1687 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src0
, dst0
[c
]);
1690 case TGSI_OPCODE_KIL
:
1691 for (c
= 0; c
< 4; ++c
) {
1692 src0
= emit_fetch(bld
, insn
, 0, c
);
1696 case TGSI_OPCODE_KILP
:
1697 (new_instruction(bld
->pc
, NV_OP_KIL
))->fixed
= 1;
1699 case TGSI_OPCODE_IF
:
1701 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1703 assert(bld
->cond_lvl
< BLD_MAX_COND_NESTING
);
1705 nvbb_attach_block(bld
->pc
->current_block
, b
, CFG_EDGE_FORWARD
);
1707 bld
->join_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1708 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1710 src1
= bld_predicate(bld
, emit_fetch(bld
, insn
, 0, 0), TRUE
);
1712 bld_flow(bld
, NV_OP_BRA
, NV_CC_EQ
, src1
, NULL
, (bld
->cond_lvl
== 0));
1715 bld_new_block(bld
, b
);
1718 case TGSI_OPCODE_ELSE
:
1720 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1723 nvbb_attach_block(bld
->join_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1725 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1726 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1728 new_instruction(bld
->pc
, NV_OP_BRA
)->is_terminator
= 1;
1731 bld_new_block(bld
, b
);
1734 case TGSI_OPCODE_ENDIF
:
1736 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1738 if (!nvbb_is_terminated(bld
->pc
->current_block
))
1739 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, b
, FALSE
);
1742 nvbb_attach_block(bld
->pc
->current_block
, b
, bld
->out_kind
);
1743 nvbb_attach_block(bld
->cond_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1745 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1747 bld_new_block(bld
, b
);
1749 if (!bld
->cond_lvl
&& bld
->join_bb
[bld
->cond_lvl
]) {
1750 bld
->join_bb
[bld
->cond_lvl
]->exit
->prev
->target
= b
;
1751 new_instruction(bld
->pc
, NV_OP_JOIN
)->is_join
= TRUE
;
1755 case TGSI_OPCODE_BGNLOOP
:
1757 struct nv_basic_block
*bl
= new_basic_block(bld
->pc
);
1758 struct nv_basic_block
*bb
= new_basic_block(bld
->pc
);
1760 assert(bld
->loop_lvl
< BLD_MAX_LOOP_NESTING
);
1762 bld
->loop_bb
[bld
->loop_lvl
] = bl
;
1763 bld
->brkt_bb
[bld
->loop_lvl
] = bb
;
1765 bld_flow(bld
, NV_OP_BREAKADDR
, NV_CC_TR
, NULL
, bb
, FALSE
);
1767 nvbb_attach_block(bld
->pc
->current_block
, bl
, CFG_EDGE_LOOP_ENTER
);
1769 bld_new_block(bld
, bld
->loop_bb
[bld
->loop_lvl
++]);
1771 if (bld
->loop_lvl
== bld
->pc
->loop_nesting_bound
)
1772 bld
->pc
->loop_nesting_bound
++;
1774 bld_clear_def_use(&bld
->tvs
[0][0], BLD_MAX_TEMPS
, bld
->loop_lvl
);
1775 bld_clear_def_use(&bld
->avs
[0][0], BLD_MAX_ADDRS
, bld
->loop_lvl
);
1776 bld_clear_def_use(&bld
->pvs
[0][0], BLD_MAX_PREDS
, bld
->loop_lvl
);
1779 case TGSI_OPCODE_BRK
:
1781 struct nv_basic_block
*bb
= bld
->brkt_bb
[bld
->loop_lvl
- 1];
1783 bld_flow(bld
, NV_OP_BREAK
, NV_CC_TR
, NULL
, bb
, FALSE
);
1785 if (bld
->out_kind
== CFG_EDGE_FORWARD
) /* else we already had BRK/CONT */
1786 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_LOOP_LEAVE
);
1788 bld
->out_kind
= CFG_EDGE_FAKE
;
1791 case TGSI_OPCODE_CONT
:
1793 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1795 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1797 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1799 if ((bb
= bld
->join_bb
[bld
->cond_lvl
- 1])) {
1800 bld
->join_bb
[bld
->cond_lvl
- 1] = NULL
;
1801 nv_nvi_delete(bb
->exit
->prev
);
1803 bld
->out_kind
= CFG_EDGE_FAKE
;
1806 case TGSI_OPCODE_ENDLOOP
:
1808 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1810 if (!nvbb_is_terminated(bld
->pc
->current_block
))
1811 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1813 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1815 bld_loop_end(bld
, bb
); /* replace loop-side operand of the phis */
1817 bld_new_block(bld
, bld
->brkt_bb
[--bld
->loop_lvl
]);
1820 case TGSI_OPCODE_ABS
:
1821 case TGSI_OPCODE_CEIL
:
1822 case TGSI_OPCODE_FLR
:
1823 case TGSI_OPCODE_TRUNC
:
1824 case TGSI_OPCODE_ROUND
:
1825 case TGSI_OPCODE_DDX
:
1826 case TGSI_OPCODE_DDY
:
1827 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1828 src0
= emit_fetch(bld
, insn
, 0, c
);
1829 dst0
[c
] = bld_insn_1(bld
, opcode
, src0
);
1832 case TGSI_OPCODE_LIT
:
1833 bld_lit(bld
, dst0
, insn
);
1835 case TGSI_OPCODE_LRP
:
1836 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1837 src0
= emit_fetch(bld
, insn
, 0, c
);
1838 src1
= emit_fetch(bld
, insn
, 1, c
);
1839 src2
= emit_fetch(bld
, insn
, 2, c
);
1840 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src1
, src2
);
1841 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, dst0
[c
], src0
, src2
);
1844 case TGSI_OPCODE_MOV
:
1845 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1846 dst0
[c
] = emit_fetch(bld
, insn
, 0, c
);
1848 case TGSI_OPCODE_MAD
:
1849 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1850 src0
= emit_fetch(bld
, insn
, 0, c
);
1851 src1
= emit_fetch(bld
, insn
, 1, c
);
1852 src2
= emit_fetch(bld
, insn
, 2, c
);
1853 dst0
[c
] = bld_insn_3(bld
, opcode
, src0
, src1
, src2
);
1856 case TGSI_OPCODE_POW
:
1857 src0
= emit_fetch(bld
, insn
, 0, 0);
1858 src1
= emit_fetch(bld
, insn
, 1, 0);
1859 temp
= bld_pow(bld
, src0
, src1
);
1860 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1863 case TGSI_OPCODE_LOG
:
1864 src0
= emit_fetch(bld
, insn
, 0, 0);
1865 src0
= bld_insn_1(bld
, NV_OP_ABS
, src0
);
1866 temp
= bld_insn_1(bld
, NV_OP_LG2
, src0
);
1868 if (insn
->Dst
[0].Register
.WriteMask
& 3) {
1869 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, temp
);
1872 if (insn
->Dst
[0].Register
.WriteMask
& 2) {
1873 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, temp
);
1874 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1875 temp
= bld_insn_1(bld
, NV_OP_RCP
, temp
);
1876 dst0
[1] = bld_insn_2(bld
, NV_OP_MUL
, src0
, temp
);
1878 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1879 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1881 case TGSI_OPCODE_RCP
:
1882 case TGSI_OPCODE_LG2
:
1883 src0
= emit_fetch(bld
, insn
, 0, 0);
1884 temp
= bld_insn_1(bld
, opcode
, src0
);
1885 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1888 case TGSI_OPCODE_RSQ
:
1889 src0
= emit_fetch(bld
, insn
, 0, 0);
1890 temp
= bld_insn_1(bld
, NV_OP_ABS
, src0
);
1891 temp
= bld_insn_1(bld
, NV_OP_RSQ
, temp
);
1892 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1895 case TGSI_OPCODE_SLT
:
1896 case TGSI_OPCODE_SGE
:
1897 case TGSI_OPCODE_SEQ
:
1898 case TGSI_OPCODE_SGT
:
1899 case TGSI_OPCODE_SLE
:
1900 case TGSI_OPCODE_SNE
:
1901 case TGSI_OPCODE_ISLT
:
1902 case TGSI_OPCODE_ISGE
:
1903 case TGSI_OPCODE_USEQ
:
1904 case TGSI_OPCODE_USGE
:
1905 case TGSI_OPCODE_USLT
:
1906 case TGSI_OPCODE_USNE
:
1907 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1908 src0
= emit_fetch(bld
, insn
, 0, c
);
1909 src1
= emit_fetch(bld
, insn
, 1, c
);
1910 dst0
[c
] = bld_insn_2(bld
, NV_OP_SET
, src0
, src1
);
1911 dst0
[c
]->insn
->set_cond
= translate_setcc(insn
->Instruction
.Opcode
);
1912 SET_TYPE(dst0
[c
], infer_dst_type(insn
->Instruction
.Opcode
));
1914 dst0
[c
]->insn
->src
[0]->typecast
=
1915 dst0
[c
]->insn
->src
[1]->typecast
=
1916 infer_src_type(insn
->Instruction
.Opcode
);
1918 if (dst0
[c
]->reg
.type
!= NV_TYPE_F32
)
1920 dst0
[c
]->reg
.as_type
= NV_TYPE_S32
;
1921 dst0
[c
] = bld_insn_1(bld
, NV_OP_ABS
, dst0
[c
]);
1922 dst0
[c
] = bld_insn_1(bld
, NV_OP_CVT
, dst0
[c
]);
1923 SET_TYPE(dst0
[c
], NV_TYPE_F32
);
1926 case TGSI_OPCODE_SCS
:
1927 if (insn
->Dst
[0].Register
.WriteMask
& 0x3) {
1928 src0
= emit_fetch(bld
, insn
, 0, 0);
1929 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1930 if (insn
->Dst
[0].Register
.WriteMask
& 0x1)
1931 dst0
[0] = bld_insn_1(bld
, NV_OP_COS
, temp
);
1932 if (insn
->Dst
[0].Register
.WriteMask
& 0x2)
1933 dst0
[1] = bld_insn_1(bld
, NV_OP_SIN
, temp
);
1935 if (insn
->Dst
[0].Register
.WriteMask
& 0x4)
1936 dst0
[2] = bld_imm_f32(bld
, 0.0f
);
1937 if (insn
->Dst
[0].Register
.WriteMask
& 0x8)
1938 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1940 case TGSI_OPCODE_SSG
:
1941 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1942 src0
= emit_fetch(bld
, insn
, 0, c
);
1943 src1
= bld_predicate(bld
, src0
, FALSE
);
1944 temp
= bld_insn_2(bld
, NV_OP_AND
, src0
, bld_imm_u32(bld
, 0x80000000));
1945 temp
= bld_insn_2(bld
, NV_OP_OR
, temp
, bld_imm_f32(bld
, 1.0f
));
1946 dst0
[c
] = bld_insn_2(bld
, NV_OP_XOR
, temp
, temp
);
1947 dst0
[c
]->insn
->cc
= NV_CC_EQ
;
1948 nv_reference(bld
->pc
, &dst0
[c
]->insn
->flags_src
, src1
);
1949 dst0
[c
] = bld_insn_2(bld
, NV_OP_SELECT
, dst0
[c
], temp
);
1952 case TGSI_OPCODE_SUB
:
1953 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1954 src0
= emit_fetch(bld
, insn
, 0, c
);
1955 src1
= emit_fetch(bld
, insn
, 1, c
);
1956 dst0
[c
] = bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1957 dst0
[c
]->insn
->src
[1]->mod
^= NV_MOD_NEG
;
1960 case TGSI_OPCODE_TEX
:
1961 case TGSI_OPCODE_TXB
:
1962 case TGSI_OPCODE_TXL
:
1963 case TGSI_OPCODE_TXP
:
1964 case TGSI_OPCODE_TXD
: // fake
1965 bld_tex(bld
, dst0
, insn
);
1967 case TGSI_OPCODE_XPD
:
1968 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1970 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1973 src0
= emit_fetch(bld
, insn
, 1, (c
+ 1) % 3);
1974 src1
= emit_fetch(bld
, insn
, 0, (c
+ 2) % 3);
1975 dst0
[c
] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1977 src0
= emit_fetch(bld
, insn
, 0, (c
+ 1) % 3);
1978 src1
= emit_fetch(bld
, insn
, 1, (c
+ 2) % 3);
1979 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dst0
[c
]);
1981 dst0
[c
]->insn
->src
[2]->mod
^= NV_MOD_NEG
;
1984 case TGSI_OPCODE_RET
:
1985 (new_instruction(bld
->pc
, NV_OP_RET
))->fixed
= 1;
1987 case TGSI_OPCODE_END
:
1988 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
1989 bld_export_outputs(bld
);
1992 NOUVEAU_ERR("unhandled opcode %u\n", insn
->Instruction
.Opcode
);
1997 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1998 emit_store(bld
, insn
, c
, dst0
[c
]);
2000 if (prog
->type
== PIPE_SHADER_VERTEX
&& prog
->vp
.clpd_nr
&&
2001 dreg
->Register
.File
== TGSI_FILE_OUTPUT
&& !dreg
->Register
.Indirect
&&
2002 prog
->out
[dreg
->Register
.Index
].sn
== TGSI_SEMANTIC_POSITION
) {
2005 for (p
= 0; p
< prog
->vp
.clpd_nr
; p
++) {
2006 struct nv_value
*clipd
= NULL
;
2008 for (c
= 0; c
< 4; c
++) {
2009 temp
= new_value(bld
->pc
, NV_FILE_MEM_C(15), NV_TYPE_F32
);
2010 temp
->reg
.id
= p
* 4 + c
;
2011 temp
= bld_insn_1(bld
, NV_OP_LDA
, temp
);
2014 bld_insn_3(bld
, NV_OP_MAD
, dst0
[c
], temp
, clipd
) :
2015 bld_insn_2(bld
, NV_OP_MUL
, dst0
[c
], temp
);
2018 temp
= bld_insn_1(bld
, NV_OP_MOV
, clipd
);
2019 temp
->reg
.file
= NV_FILE_OUT
;
2020 temp
->reg
.id
= bld
->ti
->p
->vp
.clpd
+ p
;
2021 temp
->insn
->fixed
= 1;
2027 bld_free_value_trackers(struct bld_value_stack
*base
, int n
)
2031 for (i
= 0; i
< n
; ++i
)
2032 for (c
= 0; c
< 4; ++c
)
2033 if (base
[i
* 4 + c
].body
)
2034 FREE(base
[i
* 4 + c
].body
);
2038 nv50_tgsi_to_nc(struct nv_pc
*pc
, struct nv50_translation_info
*ti
)
2040 struct bld_context
*bld
= CALLOC_STRUCT(bld_context
);
2044 pc
->root
[0] = pc
->current_block
= new_basic_block(pc
);
2049 pc
->loop_nesting_bound
= 1;
2051 c
= util_bitcount(bld
->ti
->p
->fp
.interp
>> 24);
2052 if (c
&& ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
2053 bld
->frgcrd
[3] = new_value(pc
, NV_FILE_MEM_V
, NV_TYPE_F32
);
2054 bld
->frgcrd
[3]->reg
.id
= c
- 1;
2055 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_LINTERP
, bld
->frgcrd
[3]);
2056 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_RCP
, bld
->frgcrd
[3]);
2059 for (ip
= 0; ip
< ti
->inst_nr
; ++ip
)
2060 bld_instruction(bld
, &ti
->insns
[ip
]);
2062 bld_free_value_trackers(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
2063 bld_free_value_trackers(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
2064 bld_free_value_trackers(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
2066 bld_free_value_trackers(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
2072 /* If a variable is assigned in a loop, replace all references to the value
2073 * from outside the loop with a phi value.
2076 bld_replace_value(struct nv_pc
*pc
, struct nv_basic_block
*b
,
2077 struct nv_value
*old_val
,
2078 struct nv_value
*new_val
)
2080 struct nv_instruction
*nvi
;
2082 for (nvi
= b
->phi
? b
->phi
: b
->entry
; nvi
; nvi
= nvi
->next
) {
2084 for (s
= 0; s
< 5; ++s
) {
2087 if (nvi
->src
[s
]->value
== old_val
)
2088 nv_reference(pc
, &nvi
->src
[s
], new_val
);
2090 if (nvi
->flags_src
&& nvi
->flags_src
->value
== old_val
)
2091 nv_reference(pc
, &nvi
->flags_src
, new_val
);
2094 b
->pass_seq
= pc
->pass_seq
;
2096 if (b
->out
[0] && b
->out
[0]->pass_seq
< pc
->pass_seq
)
2097 bld_replace_value(pc
, b
->out
[0], old_val
, new_val
);
2099 if (b
->out
[1] && b
->out
[1]->pass_seq
< pc
->pass_seq
)
2100 bld_replace_value(pc
, b
->out
[1], old_val
, new_val
);