6fd749b35f8dd79513be4643579983bb537a610d
2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 /* #define NV50_TGSI2NC_DEBUG */
27 #include "nv50_context.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_util.h"
34 #include "util/u_simple_list.h"
35 #include "tgsi/tgsi_dump.h"
37 #define BLD_MAX_TEMPS 64
38 #define BLD_MAX_ADDRS 4
39 #define BLD_MAX_PREDS 4
40 #define BLD_MAX_IMMDS 128
42 #define BLD_MAX_COND_NESTING 4
43 #define BLD_MAX_LOOP_NESTING 4
44 #define BLD_MAX_CALL_NESTING 2
46 /* collects all values assigned to the same TGSI register */
47 struct bld_value_stack
{
49 struct nv_value
**body
;
51 uint16_t loop_use
; /* 1 bit per loop level, indicates if used/defd */
56 bld_vals_push_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
58 assert(!stk
->size
|| (stk
->body
[stk
->size
- 1] != val
));
60 if (!(stk
->size
% 8)) {
61 unsigned old_sz
= (stk
->size
+ 0) * sizeof(struct nv_value
*);
62 unsigned new_sz
= (stk
->size
+ 8) * sizeof(struct nv_value
*);
63 stk
->body
= (struct nv_value
**)REALLOC(stk
->body
, old_sz
, new_sz
);
65 stk
->body
[stk
->size
++] = val
;
69 bld_vals_del_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
73 for (i
= stk
->size
- 1; i
>= 0; --i
)
74 if (stk
->body
[i
] == val
)
79 if (i
!= stk
->size
- 1)
80 stk
->body
[i
] = stk
->body
[stk
->size
- 1];
82 --stk
->size
; /* XXX: old size in REALLOC */
87 bld_vals_push(struct bld_value_stack
*stk
)
89 bld_vals_push_val(stk
, stk
->top
);
94 bld_push_values(struct bld_value_stack
*stacks
, int n
)
98 for (i
= 0; i
< n
; ++i
)
99 for (c
= 0; c
< 4; ++c
)
100 if (stacks
[i
* 4 + c
].top
)
101 bld_vals_push(&stacks
[i
* 4 + c
]);
105 struct nv50_translation_info
*ti
;
108 struct nv_basic_block
*b
;
110 struct tgsi_parse_context parse
[BLD_MAX_CALL_NESTING
];
113 struct nv_basic_block
*cond_bb
[BLD_MAX_COND_NESTING
];
114 struct nv_basic_block
*join_bb
[BLD_MAX_COND_NESTING
];
115 struct nv_basic_block
*else_bb
[BLD_MAX_COND_NESTING
];
117 struct nv_basic_block
*loop_bb
[BLD_MAX_LOOP_NESTING
];
118 struct nv_basic_block
*brkt_bb
[BLD_MAX_LOOP_NESTING
];
121 ubyte out_kind
; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
123 struct bld_value_stack tvs
[BLD_MAX_TEMPS
][4]; /* TGSI_FILE_TEMPORARY */
124 struct bld_value_stack avs
[BLD_MAX_ADDRS
][4]; /* TGSI_FILE_ADDRESS */
125 struct bld_value_stack pvs
[BLD_MAX_PREDS
][4]; /* TGSI_FILE_PREDICATE */
126 struct bld_value_stack ovs
[PIPE_MAX_SHADER_OUTPUTS
][4];
128 uint32_t outputs_written
[(PIPE_MAX_SHADER_OUTPUTS
+ 31) / 32];
130 struct nv_value
*frgcrd
[4];
131 struct nv_value
*sysval
[4];
134 struct nv_value
*saved_addr
[4][2];
135 struct nv_value
*saved_inputs
[128];
136 struct nv_value
*saved_immd
[BLD_MAX_IMMDS
];
141 bld_stack_file(struct bld_context
*bld
, struct bld_value_stack
*stk
)
143 if (stk
< &bld
->avs
[0][0])
146 if (stk
< &bld
->pvs
[0][0])
149 if (stk
< &bld
->ovs
[0][0])
150 return NV_FILE_FLAGS
;
155 static INLINE
struct nv_value
*
156 bld_fetch(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
)
158 stk
[i
* 4 + c
].loop_use
|= 1 << bld
->loop_lvl
;
160 return stk
[i
* 4 + c
].top
;
163 static struct nv_value
*
164 bld_loop_phi(struct bld_context
*, struct bld_value_stack
*, struct nv_value
*);
166 /* If a variable is defined in a loop without prior use, we don't need
167 * a phi in the loop header to account for backwards flow.
169 * However, if this variable is then also used outside the loop, we do
170 * need a phi after all. But we must not use this phi's def inside the
171 * loop, so we can eliminate the phi if it is unused later.
174 bld_store(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
,
175 struct nv_value
*val
)
177 const uint16_t m
= 1 << bld
->loop_lvl
;
179 stk
= &stk
[i
* 4 + c
];
181 if (bld
->loop_lvl
&& !(m
& (stk
->loop_def
| stk
->loop_use
)))
182 bld_loop_phi(bld
, stk
, val
);
185 stk
->loop_def
|= 1 << bld
->loop_lvl
;
189 bld_clear_def_use(struct bld_value_stack
*stk
, int n
, int lvl
)
192 const uint16_t mask
= ~(1 << lvl
);
194 for (i
= 0; i
< n
* 4; ++i
) {
195 stk
[i
].loop_def
&= mask
;
196 stk
[i
].loop_use
&= mask
;
200 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
201 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
202 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
203 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
204 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
205 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
207 #define STORE_OUTR(i, c, v) \
209 bld->ovs[i][c].top = (v); \
210 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
214 bld_warn_uninitialized(struct bld_context
*bld
, int kind
,
215 struct bld_value_stack
*stk
, struct nv_basic_block
*b
)
217 long i
= (stk
- &bld
->tvs
[0][0]) / 4;
218 long c
= (stk
- &bld
->tvs
[0][0]) & 3;
223 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
224 i
, (int)('x' + c
), kind
? "may be" : "is", b
->id
);
227 static INLINE
struct nv_value
*
228 bld_def(struct nv_instruction
*i
, int c
, struct nv_value
*value
)
235 static INLINE
struct nv_value
*
236 find_by_bb(struct bld_value_stack
*stack
, struct nv_basic_block
*b
)
240 if (stack
->top
&& stack
->top
->insn
->bb
== b
)
243 for (i
= stack
->size
- 1; i
>= 0; --i
)
244 if (stack
->body
[i
]->insn
->bb
== b
)
245 return stack
->body
[i
];
249 /* fetch value from stack that was defined in the specified basic block,
250 * or search for first definitions in all of its predecessors
253 fetch_by_bb(struct bld_value_stack
*stack
,
254 struct nv_value
**vals
, int *n
,
255 struct nv_basic_block
*b
)
258 struct nv_value
*val
;
260 assert(*n
< 16); /* MAX_COND_NESTING */
262 val
= find_by_bb(stack
, b
);
264 for (i
= 0; i
< *n
; ++i
)
270 for (i
= 0; i
< b
->num_in
; ++i
)
271 if (!IS_WALL_EDGE(b
->in_kind
[i
]))
272 fetch_by_bb(stack
, vals
, n
, b
->in
[i
]);
275 static INLINE
struct nv_value
*
276 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
);
278 static INLINE
struct nv_value
*
279 bld_undef(struct bld_context
*bld
, ubyte file
)
281 struct nv_instruction
*nvi
= new_instruction(bld
->pc
, NV_OP_UNDEF
);
283 return bld_def(nvi
, 0, new_value(bld
->pc
, file
, NV_TYPE_U32
));
286 static struct nv_value
*
287 bld_phi(struct bld_context
*bld
, struct nv_basic_block
*b
,
288 struct bld_value_stack
*stack
)
290 struct nv_basic_block
*in
;
291 struct nv_value
*vals
[16], *val
;
292 struct nv_instruction
*phi
;
297 fetch_by_bb(stack
, vals
, &n
, b
);
300 bld_warn_uninitialized(bld
, 0, stack
, b
);
305 if (nvbb_dominated_by(b
, vals
[0]->insn
->bb
))
308 bld_warn_uninitialized(bld
, 1, stack
, b
);
310 /* back-tracking to insert missing value of other path */
313 if (in
->num_in
== 1) {
316 if (!nvbb_reachable_by(in
->in
[0], vals
[0]->insn
->bb
, b
))
319 if (!nvbb_reachable_by(in
->in
[1], vals
[0]->insn
->bb
, b
))
325 bld
->pc
->current_block
= in
;
327 /* should make this a no-op */
328 bld_vals_push_val(stack
, bld_undef(bld
, vals
[0]->reg
.file
));
332 for (i
= 0; i
< n
; ++i
) {
333 /* if value dominates b, continue to the redefinitions */
334 if (nvbb_dominated_by(b
, vals
[i
]->insn
->bb
))
337 /* if value dominates any in-block, b should be the dom frontier */
338 for (j
= 0; j
< b
->num_in
; ++j
)
339 if (nvbb_dominated_by(b
->in
[j
], vals
[i
]->insn
->bb
))
341 /* otherwise, find the dominance frontier and put the phi there */
342 if (j
== b
->num_in
) {
343 in
= nvbb_dom_frontier(vals
[i
]->insn
->bb
);
344 val
= bld_phi(bld
, in
, stack
);
345 bld_vals_push_val(stack
, val
);
351 bld
->pc
->current_block
= b
;
356 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
358 bld_def(phi
, 0, new_value(bld
->pc
, vals
[0]->reg
.file
, vals
[0]->reg
.type
));
359 for (i
= 0; i
< n
; ++i
)
360 phi
->src
[i
] = new_ref(bld
->pc
, vals
[i
]);
365 /* Insert a phi function in the loop header.
366 * For nested loops, we need to insert phi functions in all the outer
367 * loop headers if they don't have one yet.
369 * @def: redefinition from inside loop, or NULL if to be replaced later
371 static struct nv_value
*
372 bld_loop_phi(struct bld_context
*bld
, struct bld_value_stack
*stack
,
373 struct nv_value
*def
)
375 struct nv_instruction
*phi
;
376 struct nv_basic_block
*bb
= bld
->pc
->current_block
;
377 struct nv_value
*val
= NULL
;
379 if (bld
->loop_lvl
> 1) {
381 if (!((stack
->loop_def
| stack
->loop_use
) & (1 << bld
->loop_lvl
)))
382 val
= bld_loop_phi(bld
, stack
, NULL
);
387 val
= bld_phi(bld
, bld
->pc
->current_block
, stack
); /* old definition */
389 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1]->in
[0];
390 val
= bld_undef(bld
, bld_stack_file(bld
, stack
));
393 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1];
395 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
397 bld_def(phi
, 0, new_value_like(bld
->pc
, val
));
401 bld_vals_push_val(stack
, phi
->def
[0]);
403 phi
->target
= (struct nv_basic_block
*)stack
; /* cheat */
405 nv_reference(bld
->pc
, &phi
->src
[0], val
);
406 nv_reference(bld
->pc
, &phi
->src
[1], def
);
408 bld
->pc
->current_block
= bb
;
413 static INLINE
struct nv_value
*
414 bld_fetch_global(struct bld_context
*bld
, struct bld_value_stack
*stack
)
416 const uint16_t m
= 1 << bld
->loop_lvl
;
417 const uint16_t use
= stack
->loop_use
;
419 stack
->loop_use
|= m
;
421 /* If neither used nor def'd inside the loop, build a phi in foresight,
422 * so we don't have to replace stuff later on, which requires tracking.
424 if (bld
->loop_lvl
&& !((use
| stack
->loop_def
) & m
))
425 return bld_loop_phi(bld
, stack
, NULL
);
427 return bld_phi(bld
, bld
->pc
->current_block
, stack
);
430 static INLINE
struct nv_value
*
431 bld_imm_u32(struct bld_context
*bld
, uint32_t u
)
434 unsigned n
= bld
->num_immds
;
436 for (i
= 0; i
< n
; ++i
)
437 if (bld
->saved_immd
[i
]->reg
.imm
.u32
== u
)
438 return bld
->saved_immd
[i
];
439 assert(n
< BLD_MAX_IMMDS
);
443 bld
->saved_immd
[n
] = new_value(bld
->pc
, NV_FILE_IMM
, NV_TYPE_U32
);
444 bld
->saved_immd
[n
]->reg
.imm
.u32
= u
;
445 return bld
->saved_immd
[n
];
449 bld_replace_value(struct nv_pc
*, struct nv_basic_block
*, struct nv_value
*,
452 /* Replace the source of the phi in the loop header by the last assignment,
453 * or eliminate the phi function if there is no assignment inside the loop.
455 * Redundancy situation 1 - (used) but (not redefined) value:
456 * %3 = phi %0, %3 = %3 is used
457 * %3 = phi %0, %4 = is new definition
459 * Redundancy situation 2 - (not used) but (redefined) value:
460 * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
463 bld_loop_end(struct bld_context
*bld
, struct nv_basic_block
*bb
)
465 struct nv_basic_block
*save
= bld
->pc
->current_block
;
466 struct nv_instruction
*phi
, *next
;
467 struct nv_value
*val
;
468 struct bld_value_stack
*stk
;
471 for (phi
= bb
->phi
; phi
&& phi
->opcode
== NV_OP_PHI
; phi
= next
) {
474 stk
= (struct bld_value_stack
*)phi
->target
;
477 for (s
= 1, n
= 0; n
< bb
->num_in
; ++n
) {
478 if (bb
->in_kind
[n
] != CFG_EDGE_BACK
)
482 bld
->pc
->current_block
= bb
->in
[n
];
483 val
= bld_fetch_global(bld
, stk
);
485 for (i
= 0; i
< 4; ++i
)
486 if (phi
->src
[i
] && phi
->src
[i
]->value
== val
)
489 nv_reference(bld
->pc
, &phi
->src
[s
++], val
);
491 bld
->pc
->current_block
= save
;
493 if (phi
->src
[0]->value
== phi
->def
[0] ||
494 phi
->src
[0]->value
== phi
->src
[1]->value
)
497 if (phi
->src
[1]->value
== phi
->def
[0])
503 /* eliminate the phi */
504 bld_vals_del_val(stk
, phi
->def
[0]);
507 bld_replace_value(bld
->pc
, bb
, phi
->def
[0], phi
->src
[s
]->value
);
514 static INLINE
struct nv_value
*
515 bld_imm_f32(struct bld_context
*bld
, float f
)
517 return bld_imm_u32(bld
, fui(f
));
520 #define SET_TYPE(v, t) ((v)->reg.type = (v)->reg.as_type = (t))
522 static struct nv_value
*
523 bld_insn_1(struct bld_context
*bld
, uint opcode
, struct nv_value
*src0
)
525 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
527 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
529 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
532 static struct nv_value
*
533 bld_insn_2(struct bld_context
*bld
, uint opcode
,
534 struct nv_value
*src0
, struct nv_value
*src1
)
536 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
538 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
539 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
541 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
544 static struct nv_value
*
545 bld_insn_3(struct bld_context
*bld
, uint opcode
,
546 struct nv_value
*src0
, struct nv_value
*src1
,
547 struct nv_value
*src2
)
549 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
551 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
552 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
553 nv_reference(bld
->pc
, &insn
->src
[2], src2
);
555 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
558 static struct nv_value
*
559 bld_duplicate_insn(struct bld_context
*bld
, struct nv_instruction
*nvi
)
561 struct nv_instruction
*dupi
= new_instruction(bld
->pc
, nvi
->opcode
);
565 bld_def(dupi
, 0, new_value_like(bld
->pc
, nvi
->def
[0]));
567 if (nvi
->flags_def
) {
568 dupi
->flags_def
= new_value_like(bld
->pc
, nvi
->flags_def
);
569 dupi
->flags_def
->insn
= dupi
;
572 for (c
= 0; c
< 5; ++c
)
574 nv_reference(bld
->pc
, &dupi
->src
[c
], nvi
->src
[c
]->value
);
576 nv_reference(bld
->pc
, &dupi
->flags_src
, nvi
->flags_src
->value
);
579 dupi
->saturate
= nvi
->saturate
;
580 dupi
->centroid
= nvi
->centroid
;
581 dupi
->flat
= nvi
->flat
;
587 bld_lmem_store(struct bld_context
*bld
, struct nv_value
*ptr
, int ofst
,
588 struct nv_value
*val
)
590 struct nv_instruction
*insn
= new_instruction(bld
->pc
, NV_OP_STA
);
591 struct nv_value
*loc
;
593 loc
= new_value(bld
->pc
, NV_FILE_MEM_L
, NV_TYPE_U32
);
595 loc
->reg
.id
= ofst
* 4;
597 nv_reference(bld
->pc
, &insn
->src
[0], loc
);
598 nv_reference(bld
->pc
, &insn
->src
[1], val
);
599 nv_reference(bld
->pc
, &insn
->src
[4], ptr
);
602 static struct nv_value
*
603 bld_lmem_load(struct bld_context
*bld
, struct nv_value
*ptr
, int ofst
)
605 struct nv_value
*loc
, *val
;
607 loc
= new_value(bld
->pc
, NV_FILE_MEM_L
, NV_TYPE_U32
);
609 loc
->reg
.id
= ofst
* 4;
611 val
= bld_insn_1(bld
, NV_OP_LDA
, loc
);
613 nv_reference(bld
->pc
, &val
->insn
->src
[4], ptr
);
618 #define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
620 (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
621 SET_TYPE(d, NV_TYPE_##dt); \
622 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
625 #define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
627 (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
628 SET_TYPE(d, NV_TYPE_##dt); \
629 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
630 (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
633 static struct nv_value
*
634 bld_pow(struct bld_context
*bld
, struct nv_value
*x
, struct nv_value
*e
)
636 struct nv_value
*val
;
638 BLD_INSN_1_EX(val
, LG2
, F32
, x
, F32
);
639 BLD_INSN_2_EX(val
, MUL
, F32
, e
, F32
, val
, F32
);
640 val
= bld_insn_1(bld
, NV_OP_PREEX2
, val
);
641 val
= bld_insn_1(bld
, NV_OP_EX2
, val
);
646 static INLINE
struct nv_value
*
647 bld_load_imm_f32(struct bld_context
*bld
, float f
)
649 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_f32(bld
, f
));
652 static INLINE
struct nv_value
*
653 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
)
655 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_u32(bld
, u
));
658 static struct nv_value
*
659 bld_get_address(struct bld_context
*bld
, int id
, struct nv_value
*indirect
)
662 struct nv_instruction
*nvi
;
664 for (i
= 0; i
< 4; ++i
) {
665 if (!bld
->saved_addr
[i
][0])
667 if (bld
->saved_addr
[i
][1] == indirect
) {
668 nvi
= bld
->saved_addr
[i
][0]->insn
;
669 if (nvi
->src
[0]->value
->reg
.imm
.u32
== id
)
670 return bld
->saved_addr
[i
][0];
675 bld
->saved_addr
[i
][0] = bld_load_imm_u32(bld
, id
);
676 bld
->saved_addr
[i
][0]->reg
.file
= NV_FILE_ADDR
;
677 bld
->saved_addr
[i
][0]->reg
.type
= NV_TYPE_U16
;
678 bld
->saved_addr
[i
][1] = indirect
;
679 return bld
->saved_addr
[i
][0];
683 static struct nv_value
*
684 bld_predicate(struct bld_context
*bld
, struct nv_value
*src
, boolean bool_only
)
686 struct nv_instruction
*s0i
, *nvi
= src
->insn
;
689 nvi
= bld_insn_1(bld
,
690 (src
->reg
.file
== NV_FILE_IMM
) ? NV_OP_MOV
: NV_OP_LDA
,
695 while (nvi
->opcode
== NV_OP_ABS
|| nvi
->opcode
== NV_OP_NEG
||
696 nvi
->opcode
== NV_OP_CVT
) {
697 s0i
= nvi
->src
[0]->value
->insn
;
699 s0i
->opcode
== NV_OP_LDA
||
700 s0i
->opcode
== NV_OP_MOV
||
701 s0i
->opcode
== NV_OP_PHI
)
704 assert(!nvi
->flags_src
);
708 if (nvi
->opcode
== NV_OP_LDA
||
709 nvi
->opcode
== NV_OP_MOV
||
710 nvi
->opcode
== NV_OP_PHI
|| nvi
->bb
!= bld
->pc
->current_block
) {
711 nvi
= new_instruction(bld
->pc
, NV_OP_CVT
);
712 nv_reference(bld
->pc
, &nvi
->src
[0], src
);
715 if (!nvi
->flags_def
) {
716 nvi
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
717 nvi
->flags_def
->insn
= nvi
;
719 return nvi
->flags_def
;
723 bld_kil(struct bld_context
*bld
, struct nv_value
*src
)
725 struct nv_instruction
*nvi
;
727 src
= bld_predicate(bld
, src
, FALSE
);
728 nvi
= new_instruction(bld
->pc
, NV_OP_KIL
);
730 nvi
->flags_src
= new_ref(bld
->pc
, src
);
735 bld_flow(struct bld_context
*bld
, uint opcode
, ubyte cc
,
736 struct nv_value
*src
, struct nv_basic_block
*target
,
737 boolean plan_reconverge
)
739 struct nv_instruction
*nvi
;
742 new_instruction(bld
->pc
, NV_OP_JOINAT
)->fixed
= 1;
744 nvi
= new_instruction(bld
->pc
, opcode
);
745 nvi
->is_terminator
= 1;
747 nvi
->target
= target
;
749 nvi
->flags_src
= new_ref(bld
->pc
, src
);
753 translate_setcc(unsigned opcode
)
756 case TGSI_OPCODE_SLT
: return NV_CC_LT
;
757 case TGSI_OPCODE_SGE
: return NV_CC_GE
;
758 case TGSI_OPCODE_SEQ
: return NV_CC_EQ
;
759 case TGSI_OPCODE_SGT
: return NV_CC_GT
;
760 case TGSI_OPCODE_SLE
: return NV_CC_LE
;
761 case TGSI_OPCODE_SNE
: return NV_CC_NE
| NV_CC_U
;
762 case TGSI_OPCODE_STR
: return NV_CC_TR
;
763 case TGSI_OPCODE_SFL
: return NV_CC_FL
;
765 case TGSI_OPCODE_ISLT
: return NV_CC_LT
;
766 case TGSI_OPCODE_ISGE
: return NV_CC_GE
;
767 case TGSI_OPCODE_USEQ
: return NV_CC_EQ
;
768 case TGSI_OPCODE_USGE
: return NV_CC_GE
;
769 case TGSI_OPCODE_USLT
: return NV_CC_LT
;
770 case TGSI_OPCODE_USNE
: return NV_CC_NE
;
778 translate_opcode(uint opcode
)
781 case TGSI_OPCODE_ABS
: return NV_OP_ABS
;
782 case TGSI_OPCODE_ADD
:
783 case TGSI_OPCODE_SUB
:
784 case TGSI_OPCODE_UADD
: return NV_OP_ADD
;
785 case TGSI_OPCODE_AND
: return NV_OP_AND
;
786 case TGSI_OPCODE_EX2
: return NV_OP_EX2
;
787 case TGSI_OPCODE_CEIL
: return NV_OP_CEIL
;
788 case TGSI_OPCODE_FLR
: return NV_OP_FLOOR
;
789 case TGSI_OPCODE_TRUNC
: return NV_OP_TRUNC
;
790 case TGSI_OPCODE_COS
: return NV_OP_COS
;
791 case TGSI_OPCODE_SIN
: return NV_OP_SIN
;
792 case TGSI_OPCODE_DDX
: return NV_OP_DFDX
;
793 case TGSI_OPCODE_DDY
: return NV_OP_DFDY
;
794 case TGSI_OPCODE_F2I
:
795 case TGSI_OPCODE_F2U
:
796 case TGSI_OPCODE_I2F
:
797 case TGSI_OPCODE_U2F
: return NV_OP_CVT
;
798 case TGSI_OPCODE_INEG
: return NV_OP_NEG
;
799 case TGSI_OPCODE_LG2
: return NV_OP_LG2
;
800 case TGSI_OPCODE_ISHR
:
801 case TGSI_OPCODE_USHR
: return NV_OP_SHR
;
802 case TGSI_OPCODE_MAD
:
803 case TGSI_OPCODE_UMAD
: return NV_OP_MAD
;
804 case TGSI_OPCODE_MAX
:
805 case TGSI_OPCODE_IMAX
:
806 case TGSI_OPCODE_UMAX
: return NV_OP_MAX
;
807 case TGSI_OPCODE_MIN
:
808 case TGSI_OPCODE_IMIN
:
809 case TGSI_OPCODE_UMIN
: return NV_OP_MIN
;
810 case TGSI_OPCODE_MUL
:
811 case TGSI_OPCODE_UMUL
: return NV_OP_MUL
;
812 case TGSI_OPCODE_OR
: return NV_OP_OR
;
813 case TGSI_OPCODE_RCP
: return NV_OP_RCP
;
814 case TGSI_OPCODE_RSQ
: return NV_OP_RSQ
;
815 case TGSI_OPCODE_SAD
: return NV_OP_SAD
;
816 case TGSI_OPCODE_SHL
: return NV_OP_SHL
;
817 case TGSI_OPCODE_SLT
:
818 case TGSI_OPCODE_SGE
:
819 case TGSI_OPCODE_SEQ
:
820 case TGSI_OPCODE_SGT
:
821 case TGSI_OPCODE_SLE
:
822 case TGSI_OPCODE_SNE
:
823 case TGSI_OPCODE_ISLT
:
824 case TGSI_OPCODE_ISGE
:
825 case TGSI_OPCODE_USEQ
:
826 case TGSI_OPCODE_USGE
:
827 case TGSI_OPCODE_USLT
:
828 case TGSI_OPCODE_USNE
: return NV_OP_SET
;
829 case TGSI_OPCODE_TEX
: return NV_OP_TEX
;
830 case TGSI_OPCODE_TXP
: return NV_OP_TEX
;
831 case TGSI_OPCODE_TXB
: return NV_OP_TXB
;
832 case TGSI_OPCODE_TXL
: return NV_OP_TXL
;
833 case TGSI_OPCODE_XOR
: return NV_OP_XOR
;
840 infer_src_type(unsigned opcode
)
843 case TGSI_OPCODE_MOV
:
844 case TGSI_OPCODE_AND
:
846 case TGSI_OPCODE_XOR
:
847 case TGSI_OPCODE_SAD
:
848 case TGSI_OPCODE_U2F
:
849 case TGSI_OPCODE_UADD
:
850 case TGSI_OPCODE_UDIV
:
851 case TGSI_OPCODE_UMOD
:
852 case TGSI_OPCODE_UMAD
:
853 case TGSI_OPCODE_UMUL
:
854 case TGSI_OPCODE_UMAX
:
855 case TGSI_OPCODE_UMIN
:
856 case TGSI_OPCODE_USEQ
:
857 case TGSI_OPCODE_USGE
:
858 case TGSI_OPCODE_USLT
:
859 case TGSI_OPCODE_USNE
:
860 case TGSI_OPCODE_USHR
:
862 case TGSI_OPCODE_I2F
:
863 case TGSI_OPCODE_IDIV
:
864 case TGSI_OPCODE_IMAX
:
865 case TGSI_OPCODE_IMIN
:
866 case TGSI_OPCODE_INEG
:
867 case TGSI_OPCODE_ISGE
:
868 case TGSI_OPCODE_ISHR
:
869 case TGSI_OPCODE_ISLT
:
877 infer_dst_type(unsigned opcode
)
880 case TGSI_OPCODE_MOV
:
881 case TGSI_OPCODE_F2U
:
882 case TGSI_OPCODE_AND
:
884 case TGSI_OPCODE_XOR
:
885 case TGSI_OPCODE_SAD
:
886 case TGSI_OPCODE_UADD
:
887 case TGSI_OPCODE_UDIV
:
888 case TGSI_OPCODE_UMOD
:
889 case TGSI_OPCODE_UMAD
:
890 case TGSI_OPCODE_UMUL
:
891 case TGSI_OPCODE_UMAX
:
892 case TGSI_OPCODE_UMIN
:
893 case TGSI_OPCODE_USEQ
:
894 case TGSI_OPCODE_USGE
:
895 case TGSI_OPCODE_USLT
:
896 case TGSI_OPCODE_USNE
:
897 case TGSI_OPCODE_USHR
:
899 case TGSI_OPCODE_F2I
:
900 case TGSI_OPCODE_IDIV
:
901 case TGSI_OPCODE_IMAX
:
902 case TGSI_OPCODE_IMIN
:
903 case TGSI_OPCODE_INEG
:
904 case TGSI_OPCODE_ISGE
:
905 case TGSI_OPCODE_ISHR
:
906 case TGSI_OPCODE_ISLT
:
914 emit_store(struct bld_context
*bld
, const struct tgsi_full_instruction
*inst
,
915 unsigned chan
, struct nv_value
*value
)
917 struct nv_value
*ptr
;
918 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[0];
920 if (reg
->Register
.Indirect
) {
921 ptr
= FETCH_ADDR(reg
->Indirect
.Index
,
922 tgsi_util_get_src_register_swizzle(®
->Indirect
, 0));
929 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
930 value
->reg
.type
= infer_dst_type(inst
->Instruction
.Opcode
);
932 switch (inst
->Instruction
.Saturate
) {
935 case TGSI_SAT_ZERO_ONE
:
936 BLD_INSN_1_EX(value
, SAT
, F32
, value
, F32
);
938 case TGSI_SAT_MINUS_PLUS_ONE
:
939 value
->reg
.as_type
= NV_TYPE_F32
;
940 value
= bld_insn_2(bld
, NV_OP_MAX
, value
, bld_load_imm_f32(bld
, -1.0f
));
941 value
= bld_insn_2(bld
, NV_OP_MIN
, value
, bld_load_imm_f32(bld
, 1.0f
));
945 switch (reg
->Register
.File
) {
946 case TGSI_FILE_OUTPUT
:
947 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
948 value
->reg
.file
= bld
->ti
->output_file
;
950 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
951 STORE_OUTR(reg
->Register
.Index
, chan
, value
);
953 value
->insn
->fixed
= 1;
954 value
->reg
.id
= bld
->ti
->output_map
[reg
->Register
.Index
][chan
];
957 case TGSI_FILE_TEMPORARY
:
958 assert(reg
->Register
.Index
< BLD_MAX_TEMPS
);
959 value
->reg
.file
= NV_FILE_GPR
;
960 if (value
->insn
->bb
!= bld
->pc
->current_block
)
961 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
963 if (bld
->ti
->store_to_memory
)
964 bld_lmem_store(bld
, ptr
, reg
->Register
.Index
* 4 + chan
, value
);
966 STORE_TEMP(reg
->Register
.Index
, chan
, value
);
968 case TGSI_FILE_ADDRESS
:
969 assert(reg
->Register
.Index
< BLD_MAX_ADDRS
);
970 value
->reg
.file
= NV_FILE_ADDR
;
971 value
->reg
.type
= NV_TYPE_U16
;
972 STORE_ADDR(reg
->Register
.Index
, chan
, value
);
977 static INLINE
uint32_t
978 bld_is_output_written(struct bld_context
*bld
, int i
, int c
)
981 return bld
->outputs_written
[i
/ 8] & (0xf << ((i
* 4) % 32));
982 return bld
->outputs_written
[i
/ 8] & (1 << ((i
* 4 + c
) % 32));
986 bld_export_outputs(struct bld_context
*bld
)
988 struct nv_value
*vals
[4];
989 struct nv_instruction
*nvi
;
992 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
994 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
) {
995 if (!bld_is_output_written(bld
, i
, -1))
997 for (n
= 0, c
= 0; c
< 4; ++c
) {
998 if (!bld_is_output_written(bld
, i
, c
))
1000 vals
[n
] = bld_fetch_global(bld
, &bld
->ovs
[i
][c
]);
1002 vals
[n
] = bld_insn_1(bld
, NV_OP_MOV
, vals
[n
]);
1003 vals
[n
++]->reg
.id
= bld
->ti
->output_map
[i
][c
];
1007 (nvi
= new_instruction(bld
->pc
, NV_OP_EXPORT
))->fixed
= 1;
1009 for (c
= 0; c
< n
; ++c
)
1010 nvi
->src
[c
] = new_ref(bld
->pc
, vals
[c
]);
1015 bld_new_block(struct bld_context
*bld
, struct nv_basic_block
*b
)
1019 bld_push_values(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
1020 bld_push_values(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
1021 bld_push_values(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
1022 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1024 bld
->pc
->current_block
= b
;
1026 for (i
= 0; i
< 4; ++i
)
1027 bld
->saved_addr
[i
][0] = NULL
;
1029 for (i
= 0; i
< 128; ++i
)
1030 bld
->saved_inputs
[i
] = NULL
;
1032 bld
->out_kind
= CFG_EDGE_FORWARD
;
1035 static struct nv_value
*
1036 bld_saved_input(struct bld_context
*bld
, unsigned i
, unsigned c
)
1038 unsigned idx
= bld
->ti
->input_map
[i
][c
];
1040 if (bld
->ti
->p
->type
!= PIPE_SHADER_FRAGMENT
)
1042 if (bld
->saved_inputs
[idx
])
1043 return bld
->saved_inputs
[idx
];
1047 static struct nv_value
*
1048 bld_interpolate(struct bld_context
*bld
, unsigned mode
, struct nv_value
*val
)
1050 if (val
->reg
.id
== 255) {
1051 /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
1052 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
1053 val
= bld_insn_2(bld
, NV_OP_SHL
, val
, bld_imm_u32(bld
, 31));
1054 val
->insn
->src
[0]->typecast
= NV_TYPE_U32
;
1055 val
= bld_insn_2(bld
, NV_OP_XOR
, val
, bld_imm_f32(bld
, -1.0f
));
1056 val
->insn
->src
[0]->typecast
= NV_TYPE_U32
;
1058 if (mode
& (NV50_INTERP_LINEAR
| NV50_INTERP_FLAT
))
1059 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
1061 val
= bld_insn_2(bld
, NV_OP_PINTERP
, val
, bld
->frgcrd
[3]);
1063 val
->insn
->flat
= (mode
& NV50_INTERP_FLAT
) ? 1 : 0;
1064 val
->insn
->centroid
= (mode
& NV50_INTERP_CENTROID
) ? 1 : 0;
1068 static struct nv_value
*
1069 emit_fetch(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1070 const unsigned s
, const unsigned chan
)
1072 const struct tgsi_full_src_register
*src
= &insn
->Src
[s
];
1073 struct nv_value
*res
;
1074 struct nv_value
*ptr
= NULL
;
1075 unsigned idx
, swz
, dim_idx
, ind_idx
, ind_swz
;
1076 ubyte type
= infer_src_type(insn
->Instruction
.Opcode
);
1078 idx
= src
->Register
.Index
;
1079 swz
= tgsi_util_get_full_src_register_swizzle(src
, chan
);
1084 if (src
->Register
.Indirect
) {
1085 ind_idx
= src
->Indirect
.Index
;
1086 ind_swz
= tgsi_util_get_src_register_swizzle(&src
->Indirect
, 0);
1088 ptr
= FETCH_ADDR(ind_idx
, ind_swz
);
1090 if (idx
>= (128 / 4) && src
->Register
.File
== TGSI_FILE_CONSTANT
)
1091 ptr
= bld_get_address(bld
, (idx
* 16) & ~0x1ff, ptr
);
1093 switch (src
->Register
.File
) {
1094 case TGSI_FILE_CONSTANT
:
1095 dim_idx
= src
->Dimension
.Index
? src
->Dimension
.Index
+ 2 : 1;
1096 assert(dim_idx
< 14);
1097 assert(dim_idx
== 1); /* for now */
1099 res
= new_value(bld
->pc
, NV_FILE_MEM_C(dim_idx
), type
);
1100 SET_TYPE(res
, type
);
1101 res
->reg
.id
= (idx
* 4 + swz
) & 127;
1102 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1105 res
->insn
->src
[4] = new_ref(bld
->pc
, ptr
);
1107 case TGSI_FILE_IMMEDIATE
:
1108 assert(idx
< bld
->ti
->immd32_nr
);
1109 res
= bld_load_imm_u32(bld
, bld
->ti
->immd32
[idx
* 4 + swz
]);
1111 switch (bld
->ti
->immd32_ty
[idx
]) {
1112 case TGSI_IMM_FLOAT32
: SET_TYPE(res
, NV_TYPE_F32
); break;
1113 case TGSI_IMM_UINT32
: SET_TYPE(res
, NV_TYPE_U32
); break;
1114 case TGSI_IMM_INT32
: SET_TYPE(res
, NV_TYPE_S32
); break;
1116 SET_TYPE(res
, type
);
1120 case TGSI_FILE_INPUT
:
1121 res
= bld_saved_input(bld
, idx
, swz
);
1122 if (res
&& (insn
->Instruction
.Opcode
!= TGSI_OPCODE_TXP
))
1125 res
= new_value(bld
->pc
, bld
->ti
->input_file
, type
);
1126 res
->reg
.id
= bld
->ti
->input_map
[idx
][swz
];
1128 if (res
->reg
.file
== NV_FILE_MEM_V
) {
1129 res
= bld_interpolate(bld
, bld
->ti
->interp_mode
[idx
], res
);
1131 assert(src
->Dimension
.Dimension
== 0);
1132 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1133 assert(res
->reg
.type
== type
);
1135 bld
->saved_inputs
[bld
->ti
->input_map
[idx
][swz
]] = res
;
1137 case TGSI_FILE_TEMPORARY
:
1138 if (bld
->ti
->store_to_memory
)
1139 res
= bld_lmem_load(bld
, ptr
, idx
* 4 + swz
);
1141 res
= bld_fetch_global(bld
, &bld
->tvs
[idx
][swz
]);
1143 case TGSI_FILE_ADDRESS
:
1144 res
= bld_fetch_global(bld
, &bld
->avs
[idx
][swz
]);
1146 case TGSI_FILE_PREDICATE
:
1147 res
= bld_fetch_global(bld
, &bld
->pvs
[idx
][swz
]);
1150 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src
->Register
.File
);
1155 return bld_undef(bld
, NV_FILE_GPR
);
1157 if (insn
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
1158 res
->reg
.as_type
= type
;
1160 switch (tgsi_util_get_full_src_register_sign_mode(src
, chan
)) {
1161 case TGSI_UTIL_SIGN_KEEP
:
1163 case TGSI_UTIL_SIGN_CLEAR
:
1164 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1166 case TGSI_UTIL_SIGN_TOGGLE
:
1167 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1169 case TGSI_UTIL_SIGN_SET
:
1170 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1171 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1174 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
1183 bld_lit(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1184 const struct tgsi_full_instruction
*insn
)
1186 struct nv_value
*val0
, *zero
;
1187 unsigned mask
= insn
->Dst
[0].Register
.WriteMask
;
1189 if (mask
& ((1 << 0) | (1 << 3)))
1190 dst0
[3] = dst0
[0] = bld_load_imm_f32(bld
, 1.0f
);
1192 if (mask
& (3 << 1)) {
1193 zero
= bld_load_imm_f32(bld
, 0.0f
);
1194 val0
= bld_insn_2(bld
, NV_OP_MAX
, emit_fetch(bld
, insn
, 0, 0), zero
);
1196 if (mask
& (1 << 1))
1200 if (mask
& (1 << 2)) {
1201 struct nv_value
*val1
, *val3
, *src1
, *src3
;
1202 struct nv_value
*pos128
= bld_load_imm_f32(bld
, 127.999999f
);
1203 struct nv_value
*neg128
= bld_load_imm_f32(bld
, -127.999999f
);
1205 src1
= emit_fetch(bld
, insn
, 0, 1);
1206 src3
= emit_fetch(bld
, insn
, 0, 3);
1208 val0
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1209 val0
->insn
->flags_def
->insn
= val0
->insn
;
1211 val1
= bld_insn_2(bld
, NV_OP_MAX
, src1
, zero
);
1212 val3
= bld_insn_2(bld
, NV_OP_MAX
, src3
, neg128
);
1213 val3
= bld_insn_2(bld
, NV_OP_MIN
, val3
, pos128
);
1214 val3
= bld_pow(bld
, val1
, val3
);
1216 dst0
[2] = bld_insn_1(bld
, NV_OP_MOV
, zero
);
1217 dst0
[2]->insn
->cc
= NV_CC_LE
;
1218 dst0
[2]->insn
->flags_src
= new_ref(bld
->pc
, val0
->insn
->flags_def
);
1220 dst0
[2] = bld_insn_2(bld
, NV_OP_SELECT
, val3
, dst0
[2]);
1225 get_tex_dim(const struct tgsi_full_instruction
*insn
, int *dim
, int *arg
)
1227 switch (insn
->Texture
.Texture
) {
1228 case TGSI_TEXTURE_1D
:
1231 case TGSI_TEXTURE_SHADOW1D
:
1235 case TGSI_TEXTURE_UNKNOWN
:
1236 case TGSI_TEXTURE_2D
:
1237 case TGSI_TEXTURE_RECT
:
1240 case TGSI_TEXTURE_SHADOW2D
:
1241 case TGSI_TEXTURE_SHADOWRECT
:
1245 case TGSI_TEXTURE_3D
:
1246 case TGSI_TEXTURE_CUBE
:
1256 load_proj_tex_coords(struct bld_context
*bld
,
1257 struct nv_value
*t
[4], int dim
,
1258 const struct tgsi_full_instruction
*insn
)
1262 t
[3] = emit_fetch(bld
, insn
, 0, 3);
1264 if (t
[3]->insn
->opcode
== NV_OP_PINTERP
) {
1265 t
[3] = bld_duplicate_insn(bld
, t
[3]->insn
);
1266 t
[3]->insn
->opcode
= NV_OP_LINTERP
;
1267 nv_reference(bld
->pc
, &t
[3]->insn
->src
[1], NULL
);
1270 t
[3] = bld_insn_1(bld
, NV_OP_RCP
, t
[3]);
1272 for (c
= 0; c
< dim
; ++c
) {
1273 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1275 if (t
[c
]->insn
->opcode
== NV_OP_LINTERP
||
1276 t
[c
]->insn
->opcode
== NV_OP_PINTERP
) {
1277 t
[c
] = bld_duplicate_insn(bld
, t
[c
]->insn
);
1278 t
[c
]->insn
->opcode
= NV_OP_PINTERP
;
1279 nv_reference(bld
->pc
, &t
[c
]->insn
->src
[1], t
[3]);
1285 for (c
= 0; mask
; ++c
, mask
>>= 1) {
1288 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], t
[3]);
1292 /* For a quad of threads / top left, top right, bottom left, bottom right
1293 * pixels, do a different operation, and take src0 from a specific thread.
1300 #define QOP(a, b, c, d) \
1301 ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
1303 static INLINE
struct nv_value
*
1304 bld_quadop(struct bld_context
*bld
, ubyte qop
, struct nv_value
*src0
, int lane
,
1305 struct nv_value
*src1
, boolean wp
)
1307 struct nv_value
*val
= bld_insn_2(bld
, NV_OP_QUADOP
, src0
, src1
);
1308 val
->insn
->lanes
= lane
;
1309 val
->insn
->quadop
= qop
;
1311 val
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1312 val
->insn
->flags_def
->insn
= val
->insn
;
1317 static INLINE
struct nv_value
*
1318 bld_cmov(struct bld_context
*bld
,
1319 struct nv_value
*src
, ubyte cc
, struct nv_value
*cr
)
1321 src
= bld_insn_1(bld
, NV_OP_MOV
, src
);
1324 src
->insn
->flags_src
= new_ref(bld
->pc
, cr
);
1329 static struct nv_instruction
*
1330 emit_tex(struct bld_context
*bld
, uint opcode
,
1331 struct nv_value
*dst
[4], struct nv_value
*t_in
[4],
1332 int argc
, int tic
, int tsc
, int cube
)
1334 struct nv_value
*t
[4];
1335 struct nv_instruction
*nvi
;
1338 /* the inputs to a tex instruction must be separate values */
1339 for (c
= 0; c
< argc
; ++c
) {
1340 t
[c
] = bld_insn_1(bld
, NV_OP_MOV
, t_in
[c
]);
1341 SET_TYPE(t
[c
], NV_TYPE_F32
);
1342 t
[c
]->insn
->fixed
= 1;
1345 nvi
= new_instruction(bld
->pc
, opcode
);
1347 for (c
= 0; c
< 4; ++c
)
1348 dst
[c
] = bld_def(nvi
, c
, new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
));
1350 for (c
= 0; c
< argc
; ++c
)
1351 nvi
->src
[c
] = new_ref(bld
->pc
, t
[c
]);
1355 nvi
->tex_mask
= 0xf;
1356 nvi
->tex_cube
= cube
;
1358 nvi
->tex_argc
= argc
;
1364 bld_texlod_sequence(struct bld_context
*bld
,
1365 struct nv_value
*dst
[4], struct nv_value
*t
[4], int arg
,
1366 int tic
, int tsc
, int cube
)
1368 emit_tex(bld
, NV_OP_TXL
, dst
, t
, arg
, tic
, tsc
, cube
); /* TODO */
1372 /* The lanes of a quad are grouped by the bit in the condition register
1373 * they have set, which is selected by differing bias values.
1374 * Move the input values for TEX into a new register set for each group
1375 * and execute TEX only for a specific group.
1376 * We always need to use 4 new registers for the inputs/outputs because
1377 * the implicitly calculated derivatives must be correct.
1380 bld_texbias_sequence(struct bld_context
*bld
,
1381 struct nv_value
*dst
[4], struct nv_value
*t
[4], int arg
,
1382 int tic
, int tsc
, int cube
)
1384 struct nv_instruction
*sel
, *tex
;
1385 struct nv_value
*bit
[4], *cr
[4], *res
[4][4], *val
;
1388 const ubyte cc
[4] = { NV_CC_EQ
, NV_CC_S
, NV_CC_C
, NV_CC_O
};
1390 for (l
= 0; l
< 4; ++l
) {
1391 bit
[l
] = bld_load_imm_u32(bld
, 1 << l
);
1393 val
= bld_quadop(bld
, QOP(SUBR
, SUBR
, SUBR
, SUBR
),
1394 t
[arg
- 1], l
, t
[arg
- 1], TRUE
);
1396 cr
[l
] = bld_cmov(bld
, bit
[l
], NV_CC_EQ
, val
->insn
->flags_def
);
1398 cr
[l
]->reg
.file
= NV_FILE_FLAGS
;
1399 SET_TYPE(cr
[l
], NV_TYPE_U16
);
1402 sel
= new_instruction(bld
->pc
, NV_OP_SELECT
);
1404 for (l
= 0; l
< 4; ++l
)
1405 sel
->src
[l
] = new_ref(bld
->pc
, cr
[l
]);
1407 bld_def(sel
, 0, new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
));
1409 for (l
= 0; l
< 4; ++l
) {
1410 tex
= emit_tex(bld
, NV_OP_TXB
, dst
, t
, arg
, tic
, tsc
, cube
);
1413 tex
->flags_src
= new_ref(bld
->pc
, sel
->def
[0]);
1415 for (c
= 0; c
< 4; ++c
)
1416 res
[l
][c
] = tex
->def
[c
];
1419 for (l
= 0; l
< 4; ++l
)
1420 for (c
= 0; c
< 4; ++c
)
1421 res
[l
][c
] = bld_cmov(bld
, res
[l
][c
], cc
[l
], sel
->def
[0]);
1423 for (c
= 0; c
< 4; ++c
) {
1424 sel
= new_instruction(bld
->pc
, NV_OP_SELECT
);
1426 for (l
= 0; l
< 4; ++l
)
1427 sel
->src
[l
] = new_ref(bld
->pc
, res
[l
][c
]);
1429 bld_def(sel
, 0, (dst
[c
] = new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
)));
1434 bld_is_constant(struct nv_value
*val
)
1436 if (val
->reg
.file
== NV_FILE_IMM
)
1438 return val
->insn
&& nvcg_find_constant(val
->insn
->src
[0]);
1442 bld_tex(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1443 const struct tgsi_full_instruction
*insn
)
1445 struct nv_value
*t
[4], *s
[3];
1446 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1448 const int tic
= insn
->Src
[1].Register
.Index
;
1450 const int cube
= (insn
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) ? 1 : 0;
1452 get_tex_dim(insn
, &dim
, &arg
);
1454 if (!cube
&& insn
->Instruction
.Opcode
== TGSI_OPCODE_TXP
)
1455 load_proj_tex_coords(bld
, t
, dim
, insn
);
1457 for (c
= 0; c
< dim
; ++c
)
1458 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1462 for (c
= 0; c
< 3; ++c
)
1463 s
[c
] = bld_insn_1(bld
, NV_OP_ABS
, t
[c
]);
1465 s
[0] = bld_insn_2(bld
, NV_OP_MAX
, s
[0], s
[1]);
1466 s
[0] = bld_insn_2(bld
, NV_OP_MAX
, s
[0], s
[2]);
1467 s
[0] = bld_insn_1(bld
, NV_OP_RCP
, s
[0]);
1469 for (c
= 0; c
< 3; ++c
)
1470 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], s
[0]);
1474 t
[dim
] = emit_fetch(bld
, insn
, 0, 2);
1476 if (opcode
== NV_OP_TXB
|| opcode
== NV_OP_TXL
) {
1477 t
[arg
++] = emit_fetch(bld
, insn
, 0, 3);
1479 if ((bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) &&
1480 !bld_is_constant(t
[arg
- 1])) {
1481 if (opcode
== NV_OP_TXB
)
1482 bld_texbias_sequence(bld
, dst0
, t
, arg
, tic
, tsc
, cube
);
1484 bld_texlod_sequence(bld
, dst0
, t
, arg
, tic
, tsc
, cube
);
1489 emit_tex(bld
, opcode
, dst0
, t
, arg
, tic
, tsc
, cube
);
1492 static INLINE
struct nv_value
*
1493 bld_dot(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1496 struct nv_value
*dotp
, *src0
, *src1
;
1499 src0
= emit_fetch(bld
, insn
, 0, 0);
1500 src1
= emit_fetch(bld
, insn
, 1, 0);
1501 dotp
= bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1503 for (c
= 1; c
< n
; ++c
) {
1504 src0
= emit_fetch(bld
, insn
, 0, c
);
1505 src1
= emit_fetch(bld
, insn
, 1, c
);
1506 dotp
= bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dotp
);
1511 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1512 for (chan = 0; chan < 4; ++chan) \
1513 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1516 bld_instruction(struct bld_context
*bld
,
1517 const struct tgsi_full_instruction
*insn
)
1519 struct nv_value
*src0
;
1520 struct nv_value
*src1
;
1521 struct nv_value
*src2
;
1522 struct nv_value
*dst0
[4];
1523 struct nv_value
*temp
;
1525 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1527 #ifdef NV50_TGSI2NC_DEBUG
1528 debug_printf("bld_instruction:"); tgsi_dump_instruction(insn
, 1);
1531 switch (insn
->Instruction
.Opcode
) {
1532 case TGSI_OPCODE_ADD
:
1533 case TGSI_OPCODE_MAX
:
1534 case TGSI_OPCODE_MIN
:
1535 case TGSI_OPCODE_MUL
:
1536 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1537 src0
= emit_fetch(bld
, insn
, 0, c
);
1538 src1
= emit_fetch(bld
, insn
, 1, c
);
1539 dst0
[c
] = bld_insn_2(bld
, opcode
, src0
, src1
);
1542 case TGSI_OPCODE_ARL
:
1543 src1
= bld_imm_u32(bld
, 4);
1544 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1545 src0
= emit_fetch(bld
, insn
, 0, c
);
1546 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1547 SET_TYPE(temp
, NV_TYPE_S32
);
1548 dst0
[c
] = bld_insn_2(bld
, NV_OP_SHL
, temp
, src1
);
1551 case TGSI_OPCODE_CMP
:
1552 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1553 src0
= emit_fetch(bld
, insn
, 0, c
);
1554 src1
= emit_fetch(bld
, insn
, 1, c
);
1555 src2
= emit_fetch(bld
, insn
, 2, c
);
1556 src0
= bld_predicate(bld
, src0
, FALSE
);
1558 src1
= bld_insn_1(bld
, NV_OP_MOV
, src1
);
1559 src1
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1560 src1
->insn
->cc
= NV_CC_LT
;
1562 src2
= bld_insn_1(bld
, NV_OP_MOV
, src2
);
1563 src2
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1564 src2
->insn
->cc
= NV_CC_GE
;
1566 dst0
[c
] = bld_insn_2(bld
, NV_OP_SELECT
, src1
, src2
);
1569 case TGSI_OPCODE_COS
:
1570 case TGSI_OPCODE_SIN
:
1571 src0
= emit_fetch(bld
, insn
, 0, 0);
1572 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1573 if (insn
->Dst
[0].Register
.WriteMask
& 7)
1574 temp
= bld_insn_1(bld
, opcode
, temp
);
1575 for (c
= 0; c
< 3; ++c
)
1576 if (insn
->Dst
[0].Register
.WriteMask
& (1 << c
))
1578 if (!(insn
->Dst
[0].Register
.WriteMask
& (1 << 3)))
1580 src0
= emit_fetch(bld
, insn
, 0, 3);
1581 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1582 dst0
[3] = bld_insn_1(bld
, opcode
, temp
);
1584 case TGSI_OPCODE_DP2
:
1585 temp
= bld_dot(bld
, insn
, 2);
1586 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1589 case TGSI_OPCODE_DP3
:
1590 temp
= bld_dot(bld
, insn
, 3);
1591 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1594 case TGSI_OPCODE_DP4
:
1595 temp
= bld_dot(bld
, insn
, 4);
1596 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1599 case TGSI_OPCODE_DPH
:
1600 src0
= bld_dot(bld
, insn
, 3);
1601 src1
= emit_fetch(bld
, insn
, 1, 3);
1602 temp
= bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1603 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1606 case TGSI_OPCODE_DST
:
1607 if (insn
->Dst
[0].Register
.WriteMask
& 1)
1608 dst0
[0] = bld_imm_f32(bld
, 1.0f
);
1609 if (insn
->Dst
[0].Register
.WriteMask
& 2) {
1610 src0
= emit_fetch(bld
, insn
, 0, 1);
1611 src1
= emit_fetch(bld
, insn
, 1, 1);
1612 dst0
[1] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1614 if (insn
->Dst
[0].Register
.WriteMask
& 4)
1615 dst0
[2] = emit_fetch(bld
, insn
, 0, 2);
1616 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1617 dst0
[3] = emit_fetch(bld
, insn
, 1, 3);
1619 case TGSI_OPCODE_EX2
:
1620 src0
= emit_fetch(bld
, insn
, 0, 0);
1621 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1622 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1623 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1626 case TGSI_OPCODE_FRC
:
1627 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1628 src0
= emit_fetch(bld
, insn
, 0, c
);
1629 dst0
[c
] = bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1630 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src0
, dst0
[c
]);
1633 case TGSI_OPCODE_KIL
:
1634 for (c
= 0; c
< 4; ++c
) {
1635 src0
= emit_fetch(bld
, insn
, 0, c
);
1639 case TGSI_OPCODE_KILP
:
1640 (new_instruction(bld
->pc
, NV_OP_KIL
))->fixed
= 1;
1642 case TGSI_OPCODE_IF
:
1644 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1646 nvbb_attach_block(bld
->pc
->current_block
, b
, CFG_EDGE_FORWARD
);
1648 bld
->join_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1649 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1651 src1
= bld_predicate(bld
, emit_fetch(bld
, insn
, 0, 0), TRUE
);
1653 bld_flow(bld
, NV_OP_BRA
, NV_CC_EQ
, src1
, NULL
, (bld
->cond_lvl
== 0));
1656 bld_new_block(bld
, b
);
1659 case TGSI_OPCODE_ELSE
:
1661 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1664 nvbb_attach_block(bld
->join_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1666 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1667 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1669 new_instruction(bld
->pc
, NV_OP_BRA
)->is_terminator
= 1;
1672 bld_new_block(bld
, b
);
1675 case TGSI_OPCODE_ENDIF
:
1677 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1680 nvbb_attach_block(bld
->pc
->current_block
, b
, bld
->out_kind
);
1681 nvbb_attach_block(bld
->cond_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1683 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1685 bld_new_block(bld
, b
);
1687 if (!bld
->cond_lvl
&& bld
->join_bb
[bld
->cond_lvl
]) {
1688 bld
->join_bb
[bld
->cond_lvl
]->exit
->prev
->target
= b
;
1689 new_instruction(bld
->pc
, NV_OP_JOIN
)->is_join
= TRUE
;
1693 case TGSI_OPCODE_BGNLOOP
:
1695 struct nv_basic_block
*bl
= new_basic_block(bld
->pc
);
1696 struct nv_basic_block
*bb
= new_basic_block(bld
->pc
);
1698 bld
->loop_bb
[bld
->loop_lvl
] = bl
;
1699 bld
->brkt_bb
[bld
->loop_lvl
] = bb
;
1701 bld_flow(bld
, NV_OP_BREAKADDR
, NV_CC_TR
, NULL
, bb
, FALSE
);
1703 nvbb_attach_block(bld
->pc
->current_block
, bl
, CFG_EDGE_LOOP_ENTER
);
1705 bld_new_block(bld
, bld
->loop_bb
[bld
->loop_lvl
++]);
1707 if (bld
->loop_lvl
== bld
->pc
->loop_nesting_bound
)
1708 bld
->pc
->loop_nesting_bound
++;
1710 bld_clear_def_use(&bld
->tvs
[0][0], BLD_MAX_TEMPS
, bld
->loop_lvl
);
1711 bld_clear_def_use(&bld
->avs
[0][0], BLD_MAX_ADDRS
, bld
->loop_lvl
);
1712 bld_clear_def_use(&bld
->pvs
[0][0], BLD_MAX_PREDS
, bld
->loop_lvl
);
1715 case TGSI_OPCODE_BRK
:
1717 struct nv_basic_block
*bb
= bld
->brkt_bb
[bld
->loop_lvl
- 1];
1719 bld_flow(bld
, NV_OP_BREAK
, NV_CC_TR
, NULL
, bb
, FALSE
);
1721 if (bld
->out_kind
== CFG_EDGE_FORWARD
) /* else we already had BRK/CONT */
1722 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_LOOP_LEAVE
);
1724 bld
->out_kind
= CFG_EDGE_FAKE
;
1727 case TGSI_OPCODE_CONT
:
1729 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1731 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1733 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1735 if ((bb
= bld
->join_bb
[bld
->cond_lvl
- 1])) {
1736 bld
->join_bb
[bld
->cond_lvl
- 1] = NULL
;
1737 nv_nvi_delete(bb
->exit
->prev
);
1739 bld
->out_kind
= CFG_EDGE_FAKE
;
1742 case TGSI_OPCODE_ENDLOOP
:
1744 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1746 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1748 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1750 bld_loop_end(bld
, bb
); /* replace loop-side operand of the phis */
1752 bld_new_block(bld
, bld
->brkt_bb
[--bld
->loop_lvl
]);
1755 case TGSI_OPCODE_ABS
:
1756 case TGSI_OPCODE_CEIL
:
1757 case TGSI_OPCODE_FLR
:
1758 case TGSI_OPCODE_TRUNC
:
1759 case TGSI_OPCODE_DDX
:
1760 case TGSI_OPCODE_DDY
:
1761 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1762 src0
= emit_fetch(bld
, insn
, 0, c
);
1763 dst0
[c
] = bld_insn_1(bld
, opcode
, src0
);
1766 case TGSI_OPCODE_LIT
:
1767 bld_lit(bld
, dst0
, insn
);
1769 case TGSI_OPCODE_LRP
:
1770 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1771 src0
= emit_fetch(bld
, insn
, 0, c
);
1772 src1
= emit_fetch(bld
, insn
, 1, c
);
1773 src2
= emit_fetch(bld
, insn
, 2, c
);
1774 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src1
, src2
);
1775 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, dst0
[c
], src0
, src2
);
1778 case TGSI_OPCODE_MOV
:
1779 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1780 dst0
[c
] = emit_fetch(bld
, insn
, 0, c
);
1782 case TGSI_OPCODE_MAD
:
1783 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1784 src0
= emit_fetch(bld
, insn
, 0, c
);
1785 src1
= emit_fetch(bld
, insn
, 1, c
);
1786 src2
= emit_fetch(bld
, insn
, 2, c
);
1787 dst0
[c
] = bld_insn_3(bld
, opcode
, src0
, src1
, src2
);
1790 case TGSI_OPCODE_POW
:
1791 src0
= emit_fetch(bld
, insn
, 0, 0);
1792 src1
= emit_fetch(bld
, insn
, 1, 0);
1793 temp
= bld_pow(bld
, src0
, src1
);
1794 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1797 case TGSI_OPCODE_RCP
:
1798 case TGSI_OPCODE_LG2
:
1799 src0
= emit_fetch(bld
, insn
, 0, 0);
1800 temp
= bld_insn_1(bld
, opcode
, src0
);
1801 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1804 case TGSI_OPCODE_RSQ
:
1805 src0
= emit_fetch(bld
, insn
, 0, 0);
1806 temp
= bld_insn_1(bld
, NV_OP_ABS
, src0
);
1807 temp
= bld_insn_1(bld
, NV_OP_RSQ
, temp
);
1808 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1811 case TGSI_OPCODE_SLT
:
1812 case TGSI_OPCODE_SGE
:
1813 case TGSI_OPCODE_SEQ
:
1814 case TGSI_OPCODE_SGT
:
1815 case TGSI_OPCODE_SLE
:
1816 case TGSI_OPCODE_SNE
:
1817 case TGSI_OPCODE_ISLT
:
1818 case TGSI_OPCODE_ISGE
:
1819 case TGSI_OPCODE_USEQ
:
1820 case TGSI_OPCODE_USGE
:
1821 case TGSI_OPCODE_USLT
:
1822 case TGSI_OPCODE_USNE
:
1823 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1824 src0
= emit_fetch(bld
, insn
, 0, c
);
1825 src1
= emit_fetch(bld
, insn
, 1, c
);
1826 dst0
[c
] = bld_insn_2(bld
, NV_OP_SET
, src0
, src1
);
1827 dst0
[c
]->insn
->set_cond
= translate_setcc(insn
->Instruction
.Opcode
);
1828 SET_TYPE(dst0
[c
], infer_dst_type(insn
->Instruction
.Opcode
));
1830 dst0
[c
]->insn
->src
[0]->typecast
=
1831 dst0
[c
]->insn
->src
[1]->typecast
=
1832 infer_src_type(insn
->Instruction
.Opcode
);
1834 if (dst0
[c
]->reg
.type
!= NV_TYPE_F32
)
1836 dst0
[c
]->reg
.as_type
= NV_TYPE_S32
;
1837 dst0
[c
] = bld_insn_1(bld
, NV_OP_ABS
, dst0
[c
]);
1838 dst0
[c
] = bld_insn_1(bld
, NV_OP_CVT
, dst0
[c
]);
1839 SET_TYPE(dst0
[c
], NV_TYPE_F32
);
1842 case TGSI_OPCODE_SCS
:
1843 if (insn
->Dst
[0].Register
.WriteMask
& 0x3) {
1844 src0
= emit_fetch(bld
, insn
, 0, 0);
1845 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1846 if (insn
->Dst
[0].Register
.WriteMask
& 0x1)
1847 dst0
[0] = bld_insn_1(bld
, NV_OP_COS
, temp
);
1848 if (insn
->Dst
[0].Register
.WriteMask
& 0x2)
1849 dst0
[1] = bld_insn_1(bld
, NV_OP_SIN
, temp
);
1851 if (insn
->Dst
[0].Register
.WriteMask
& 0x4)
1852 dst0
[2] = bld_imm_f32(bld
, 0.0f
);
1853 if (insn
->Dst
[0].Register
.WriteMask
& 0x8)
1854 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1856 case TGSI_OPCODE_SSG
:
1857 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1858 src0
= emit_fetch(bld
, insn
, 0, c
);
1859 src1
= bld_predicate(bld
, src0
, FALSE
);
1860 temp
= bld_insn_2(bld
, NV_OP_AND
, src0
, bld_imm_u32(bld
, 0x80000000));
1861 temp
= bld_insn_2(bld
, NV_OP_OR
, temp
, bld_imm_f32(bld
, 1.0f
));
1862 dst0
[c
] = bld_insn_2(bld
, NV_OP_XOR
, temp
, temp
);
1863 dst0
[c
]->insn
->cc
= NV_CC_EQ
;
1864 nv_reference(bld
->pc
, &dst0
[c
]->insn
->flags_src
, src1
);
1867 case TGSI_OPCODE_SUB
:
1868 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1869 src0
= emit_fetch(bld
, insn
, 0, c
);
1870 src1
= emit_fetch(bld
, insn
, 1, c
);
1871 dst0
[c
] = bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1872 dst0
[c
]->insn
->src
[1]->mod
^= NV_MOD_NEG
;
1875 case TGSI_OPCODE_TEX
:
1876 case TGSI_OPCODE_TXB
:
1877 case TGSI_OPCODE_TXL
:
1878 case TGSI_OPCODE_TXP
:
1879 bld_tex(bld
, dst0
, insn
);
1881 case TGSI_OPCODE_XPD
:
1882 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1884 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1887 src0
= emit_fetch(bld
, insn
, 1, (c
+ 1) % 3);
1888 src1
= emit_fetch(bld
, insn
, 0, (c
+ 2) % 3);
1889 dst0
[c
] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1891 src0
= emit_fetch(bld
, insn
, 0, (c
+ 1) % 3);
1892 src1
= emit_fetch(bld
, insn
, 1, (c
+ 2) % 3);
1893 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dst0
[c
]);
1895 dst0
[c
]->insn
->src
[2]->mod
^= NV_MOD_NEG
;
1898 case TGSI_OPCODE_RET
:
1899 (new_instruction(bld
->pc
, NV_OP_RET
))->fixed
= 1;
1901 case TGSI_OPCODE_END
:
1902 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
1903 bld_export_outputs(bld
);
1906 NOUVEAU_ERR("unhandled opcode %u\n", insn
->Instruction
.Opcode
);
1911 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1912 emit_store(bld
, insn
, c
, dst0
[c
]);
1916 bld_free_value_trackers(struct bld_value_stack
*base
, int n
)
1920 for (i
= 0; i
< n
; ++i
)
1921 for (c
= 0; c
< 4; ++c
)
1922 if (base
[i
* 4 + c
].body
)
1923 FREE(base
[i
* 4 + c
].body
);
1927 nv50_tgsi_to_nc(struct nv_pc
*pc
, struct nv50_translation_info
*ti
)
1929 struct bld_context
*bld
= CALLOC_STRUCT(bld_context
);
1933 pc
->root
[0] = pc
->current_block
= new_basic_block(pc
);
1938 pc
->loop_nesting_bound
= 1;
1940 c
= util_bitcount(bld
->ti
->p
->fp
.interp
>> 24);
1941 if (c
&& ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1942 bld
->frgcrd
[3] = new_value(pc
, NV_FILE_MEM_V
, NV_TYPE_F32
);
1943 bld
->frgcrd
[3]->reg
.id
= c
- 1;
1944 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_LINTERP
, bld
->frgcrd
[3]);
1945 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_RCP
, bld
->frgcrd
[3]);
1948 for (ip
= 0; ip
< ti
->inst_nr
; ++ip
)
1949 bld_instruction(bld
, &ti
->insns
[ip
]);
1951 bld_free_value_trackers(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
1952 bld_free_value_trackers(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
1953 bld_free_value_trackers(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
1955 bld_free_value_trackers(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1961 /* If a variable is assigned in a loop, replace all references to the value
1962 * from outside the loop with a phi value.
1965 bld_replace_value(struct nv_pc
*pc
, struct nv_basic_block
*b
,
1966 struct nv_value
*old_val
,
1967 struct nv_value
*new_val
)
1969 struct nv_instruction
*nvi
;
1971 for (nvi
= b
->phi
? b
->phi
: b
->entry
; nvi
; nvi
= nvi
->next
) {
1973 for (s
= 0; s
< 5; ++s
) {
1976 if (nvi
->src
[s
]->value
== old_val
)
1977 nv_reference(pc
, &nvi
->src
[s
], new_val
);
1979 if (nvi
->flags_src
&& nvi
->flags_src
->value
== old_val
)
1980 nv_reference(pc
, &nvi
->flags_src
, new_val
);
1983 b
->pass_seq
= pc
->pass_seq
;
1985 if (b
->out
[0] && b
->out
[0]->pass_seq
< pc
->pass_seq
)
1986 bld_replace_value(pc
, b
->out
[0], old_val
, new_val
);
1988 if (b
->out
[1] && b
->out
[1]->pass_seq
< pc
->pass_seq
)
1989 bld_replace_value(pc
, b
->out
[1], old_val
, new_val
);