2faabc8d7d4b40a7df8c6f833f4a6f3ab8982a65
2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 /* #define NV50_TGSI2NC_DEBUG */
27 #include "nv50_context.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_util.h"
34 #include "tgsi/tgsi_dump.h"
36 #define BLD_MAX_TEMPS 64
37 #define BLD_MAX_ADDRS 4
38 #define BLD_MAX_PREDS 4
39 #define BLD_MAX_IMMDS 128
41 #define BLD_MAX_COND_NESTING 8
42 #define BLD_MAX_LOOP_NESTING 4
43 #define BLD_MAX_CALL_NESTING 2
45 /* collects all values assigned to the same TGSI register */
46 struct bld_value_stack
{
48 struct nv_value
**body
;
50 uint16_t loop_use
; /* 1 bit per loop level, indicates if used/defd */
55 bld_vals_push_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
57 assert(!stk
->size
|| (stk
->body
[stk
->size
- 1] != val
));
59 if (!(stk
->size
% 8)) {
60 unsigned old_sz
= (stk
->size
+ 0) * sizeof(struct nv_value
*);
61 unsigned new_sz
= (stk
->size
+ 8) * sizeof(struct nv_value
*);
62 stk
->body
= (struct nv_value
**)REALLOC(stk
->body
, old_sz
, new_sz
);
64 stk
->body
[stk
->size
++] = val
;
68 bld_vals_del_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
72 for (i
= stk
->size
; i
> 0; --i
)
73 if (stk
->body
[i
- 1] == val
)
79 stk
->body
[i
- 1] = stk
->body
[stk
->size
- 1];
81 --stk
->size
; /* XXX: old size in REALLOC */
86 bld_vals_push(struct bld_value_stack
*stk
)
88 bld_vals_push_val(stk
, stk
->top
);
93 bld_push_values(struct bld_value_stack
*stacks
, int n
)
97 for (i
= 0; i
< n
; ++i
)
98 for (c
= 0; c
< 4; ++c
)
99 if (stacks
[i
* 4 + c
].top
)
100 bld_vals_push(&stacks
[i
* 4 + c
]);
104 struct nv50_translation_info
*ti
;
107 struct nv_basic_block
*b
;
109 struct tgsi_parse_context parse
[BLD_MAX_CALL_NESTING
];
112 struct nv_basic_block
*cond_bb
[BLD_MAX_COND_NESTING
];
113 struct nv_basic_block
*join_bb
[BLD_MAX_COND_NESTING
];
114 struct nv_basic_block
*else_bb
[BLD_MAX_COND_NESTING
];
116 struct nv_basic_block
*loop_bb
[BLD_MAX_LOOP_NESTING
];
117 struct nv_basic_block
*brkt_bb
[BLD_MAX_LOOP_NESTING
];
120 ubyte out_kind
; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
122 struct bld_value_stack tvs
[BLD_MAX_TEMPS
][4]; /* TGSI_FILE_TEMPORARY */
123 struct bld_value_stack avs
[BLD_MAX_ADDRS
][4]; /* TGSI_FILE_ADDRESS */
124 struct bld_value_stack pvs
[BLD_MAX_PREDS
][4]; /* TGSI_FILE_PREDICATE */
125 struct bld_value_stack ovs
[PIPE_MAX_SHADER_OUTPUTS
][4];
127 uint32_t outputs_written
[(PIPE_MAX_SHADER_OUTPUTS
+ 7) / 8];
129 struct nv_value
*frgcrd
[4];
130 struct nv_value
*sysval
[4];
133 struct nv_value
*saved_addr
[4][2];
134 struct nv_value
*saved_inputs
[128];
135 struct nv_value
*saved_immd
[BLD_MAX_IMMDS
];
140 bld_stack_file(struct bld_context
*bld
, struct bld_value_stack
*stk
)
142 if (stk
< &bld
->avs
[0][0])
145 if (stk
< &bld
->pvs
[0][0])
148 if (stk
< &bld
->ovs
[0][0])
149 return NV_FILE_FLAGS
;
154 static INLINE
struct nv_value
*
155 bld_fetch(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
)
157 stk
[i
* 4 + c
].loop_use
|= 1 << bld
->loop_lvl
;
159 return stk
[i
* 4 + c
].top
;
162 static struct nv_value
*
163 bld_loop_phi(struct bld_context
*, struct bld_value_stack
*, struct nv_value
*);
165 /* If a variable is defined in a loop without prior use, we don't need
166 * a phi in the loop header to account for backwards flow.
168 * However, if this variable is then also used outside the loop, we do
169 * need a phi after all. But we must not use this phi's def inside the
170 * loop, so we can eliminate the phi if it is unused later.
173 bld_store(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
,
174 struct nv_value
*val
)
176 const uint16_t m
= 1 << bld
->loop_lvl
;
178 stk
= &stk
[i
* 4 + c
];
180 if (bld
->loop_lvl
&& !(m
& (stk
->loop_def
| stk
->loop_use
)))
181 bld_loop_phi(bld
, stk
, val
);
184 stk
->loop_def
|= 1 << bld
->loop_lvl
;
188 bld_clear_def_use(struct bld_value_stack
*stk
, int n
, int lvl
)
191 const uint16_t mask
= ~(1 << lvl
);
193 for (i
= 0; i
< n
* 4; ++i
) {
194 stk
[i
].loop_def
&= mask
;
195 stk
[i
].loop_use
&= mask
;
199 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
200 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
201 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
202 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
203 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
204 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
206 #define STORE_OUTR(i, c, v) \
208 bld->ovs[i][c].top = (v); \
209 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
213 bld_warn_uninitialized(struct bld_context
*bld
, int kind
,
214 struct bld_value_stack
*stk
, struct nv_basic_block
*b
)
216 #ifdef NV50_TGSI2NC_DEBUG
217 long i
= (stk
- &bld
->tvs
[0][0]) / 4;
218 long c
= (stk
- &bld
->tvs
[0][0]) & 3;
223 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
224 i
, (int)('x' + c
), kind
? "may be" : "is", b
->id
);
228 static INLINE
struct nv_value
*
229 bld_def(struct nv_instruction
*i
, int c
, struct nv_value
*value
)
236 static INLINE
struct nv_value
*
237 find_by_bb(struct bld_value_stack
*stack
, struct nv_basic_block
*b
)
241 if (stack
->top
&& stack
->top
->insn
->bb
== b
)
244 for (i
= stack
->size
- 1; i
>= 0; --i
)
245 if (stack
->body
[i
]->insn
->bb
== b
)
246 return stack
->body
[i
];
250 /* fetch value from stack that was defined in the specified basic block,
251 * or search for first definitions in all of its predecessors
254 fetch_by_bb(struct bld_value_stack
*stack
,
255 struct nv_value
**vals
, int *n
,
256 struct nv_basic_block
*b
)
259 struct nv_value
*val
;
261 assert(*n
< 16); /* MAX_COND_NESTING */
263 val
= find_by_bb(stack
, b
);
265 for (i
= 0; i
< *n
; ++i
)
271 for (i
= 0; i
< b
->num_in
; ++i
)
272 if (!IS_WALL_EDGE(b
->in_kind
[i
]))
273 fetch_by_bb(stack
, vals
, n
, b
->in
[i
]);
276 static INLINE
struct nv_value
*
277 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
);
279 static INLINE
struct nv_value
*
280 bld_undef(struct bld_context
*bld
, ubyte file
)
282 struct nv_instruction
*nvi
= new_instruction(bld
->pc
, NV_OP_UNDEF
);
284 return bld_def(nvi
, 0, new_value(bld
->pc
, file
, NV_TYPE_U32
));
287 static struct nv_value
*
288 bld_phi(struct bld_context
*bld
, struct nv_basic_block
*b
,
289 struct bld_value_stack
*stack
)
291 struct nv_basic_block
*in
;
292 struct nv_value
*vals
[16], *val
;
293 struct nv_instruction
*phi
;
298 fetch_by_bb(stack
, vals
, &n
, b
);
301 bld_warn_uninitialized(bld
, 0, stack
, b
);
306 if (nvbb_dominated_by(b
, vals
[0]->insn
->bb
))
309 bld_warn_uninitialized(bld
, 1, stack
, b
);
311 /* back-tracking to insert missing value of other path */
314 if (in
->num_in
== 1) {
317 if (!nvbb_reachable_by(in
->in
[0], vals
[0]->insn
->bb
, b
))
320 if (!nvbb_reachable_by(in
->in
[1], vals
[0]->insn
->bb
, b
))
326 bld
->pc
->current_block
= in
;
328 /* should make this a no-op */
329 bld_vals_push_val(stack
, bld_undef(bld
, vals
[0]->reg
.file
));
333 for (i
= 0; i
< n
; ++i
) {
334 /* if value dominates b, continue to the redefinitions */
335 if (nvbb_dominated_by(b
, vals
[i
]->insn
->bb
))
338 /* if value dominates any in-block, b should be the dom frontier */
339 for (j
= 0; j
< b
->num_in
; ++j
)
340 if (nvbb_dominated_by(b
->in
[j
], vals
[i
]->insn
->bb
))
342 /* otherwise, find the dominance frontier and put the phi there */
343 if (j
== b
->num_in
) {
344 in
= nvbb_dom_frontier(vals
[i
]->insn
->bb
);
345 val
= bld_phi(bld
, in
, stack
);
346 bld_vals_push_val(stack
, val
);
352 bld
->pc
->current_block
= b
;
357 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
359 bld_def(phi
, 0, new_value(bld
->pc
, vals
[0]->reg
.file
, vals
[0]->reg
.type
));
360 for (i
= 0; i
< n
; ++i
)
361 phi
->src
[i
] = new_ref(bld
->pc
, vals
[i
]);
366 /* Insert a phi function in the loop header.
367 * For nested loops, we need to insert phi functions in all the outer
368 * loop headers if they don't have one yet.
370 * @def: redefinition from inside loop, or NULL if to be replaced later
372 static struct nv_value
*
373 bld_loop_phi(struct bld_context
*bld
, struct bld_value_stack
*stack
,
374 struct nv_value
*def
)
376 struct nv_instruction
*phi
;
377 struct nv_basic_block
*bb
= bld
->pc
->current_block
;
378 struct nv_value
*val
= NULL
;
380 if (bld
->loop_lvl
> 1) {
382 if (!((stack
->loop_def
| stack
->loop_use
) & (1 << bld
->loop_lvl
)))
383 val
= bld_loop_phi(bld
, stack
, NULL
);
388 val
= bld_phi(bld
, bld
->pc
->current_block
, stack
); /* old definition */
390 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1]->in
[0];
391 val
= bld_undef(bld
, bld_stack_file(bld
, stack
));
394 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1];
396 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
398 bld_def(phi
, 0, new_value_like(bld
->pc
, val
));
402 bld_vals_push_val(stack
, phi
->def
[0]);
404 phi
->target
= (struct nv_basic_block
*)stack
; /* cheat */
406 nv_reference(bld
->pc
, &phi
->src
[0], val
);
407 nv_reference(bld
->pc
, &phi
->src
[1], def
);
409 bld
->pc
->current_block
= bb
;
414 static INLINE
struct nv_value
*
415 bld_fetch_global(struct bld_context
*bld
, struct bld_value_stack
*stack
)
417 const uint16_t m
= 1 << bld
->loop_lvl
;
418 const uint16_t use
= stack
->loop_use
;
420 stack
->loop_use
|= m
;
422 /* If neither used nor def'd inside the loop, build a phi in foresight,
423 * so we don't have to replace stuff later on, which requires tracking.
425 if (bld
->loop_lvl
&& !((use
| stack
->loop_def
) & m
))
426 return bld_loop_phi(bld
, stack
, NULL
);
428 return bld_phi(bld
, bld
->pc
->current_block
, stack
);
431 static INLINE
struct nv_value
*
432 bld_imm_u32(struct bld_context
*bld
, uint32_t u
)
435 unsigned n
= bld
->num_immds
;
437 for (i
= 0; i
< n
; ++i
)
438 if (bld
->saved_immd
[i
]->reg
.imm
.u32
== u
)
439 return bld
->saved_immd
[i
];
440 assert(n
< BLD_MAX_IMMDS
);
444 bld
->saved_immd
[n
] = new_value(bld
->pc
, NV_FILE_IMM
, NV_TYPE_U32
);
445 bld
->saved_immd
[n
]->reg
.imm
.u32
= u
;
446 return bld
->saved_immd
[n
];
450 bld_replace_value(struct nv_pc
*, struct nv_basic_block
*, struct nv_value
*,
453 /* Replace the source of the phi in the loop header by the last assignment,
454 * or eliminate the phi function if there is no assignment inside the loop.
456 * Redundancy situation 1 - (used) but (not redefined) value:
457 * %3 = phi %0, %3 = %3 is used
458 * %3 = phi %0, %4 = is new definition
460 * Redundancy situation 2 - (not used) but (redefined) value:
461 * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
464 bld_loop_end(struct bld_context
*bld
, struct nv_basic_block
*bb
)
466 struct nv_basic_block
*save
= bld
->pc
->current_block
;
467 struct nv_instruction
*phi
, *next
;
468 struct nv_value
*val
;
469 struct bld_value_stack
*stk
;
472 for (phi
= bb
->phi
; phi
&& phi
->opcode
== NV_OP_PHI
; phi
= next
) {
475 stk
= (struct bld_value_stack
*)phi
->target
;
478 for (s
= 1, n
= 0; n
< bb
->num_in
; ++n
) {
479 if (bb
->in_kind
[n
] != CFG_EDGE_BACK
)
483 bld
->pc
->current_block
= bb
->in
[n
];
484 val
= bld_fetch_global(bld
, stk
);
486 for (i
= 0; i
< 4; ++i
)
487 if (phi
->src
[i
] && phi
->src
[i
]->value
== val
)
490 nv_reference(bld
->pc
, &phi
->src
[s
++], val
);
492 bld
->pc
->current_block
= save
;
494 if (phi
->src
[0]->value
== phi
->def
[0] ||
495 phi
->src
[0]->value
== phi
->src
[1]->value
)
498 if (phi
->src
[1]->value
== phi
->def
[0])
504 /* eliminate the phi */
505 bld_vals_del_val(stk
, phi
->def
[0]);
508 bld_replace_value(bld
->pc
, bb
, phi
->def
[0], phi
->src
[s
]->value
);
515 static INLINE
struct nv_value
*
516 bld_imm_f32(struct bld_context
*bld
, float f
)
518 return bld_imm_u32(bld
, fui(f
));
521 #define SET_TYPE(v, t) ((v)->reg.type = (v)->reg.as_type = (t))
523 static struct nv_value
*
524 bld_insn_1(struct bld_context
*bld
, uint opcode
, struct nv_value
*src0
)
526 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
528 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
530 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
533 static struct nv_value
*
534 bld_insn_2(struct bld_context
*bld
, uint opcode
,
535 struct nv_value
*src0
, struct nv_value
*src1
)
537 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
539 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
540 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
542 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
545 static struct nv_value
*
546 bld_insn_3(struct bld_context
*bld
, uint opcode
,
547 struct nv_value
*src0
, struct nv_value
*src1
,
548 struct nv_value
*src2
)
550 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
552 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
553 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
554 nv_reference(bld
->pc
, &insn
->src
[2], src2
);
556 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.as_type
));
559 static struct nv_value
*
560 bld_duplicate_insn(struct bld_context
*bld
, struct nv_instruction
*nvi
)
562 struct nv_instruction
*dupi
= new_instruction(bld
->pc
, nvi
->opcode
);
566 bld_def(dupi
, 0, new_value_like(bld
->pc
, nvi
->def
[0]));
568 if (nvi
->flags_def
) {
569 dupi
->flags_def
= new_value_like(bld
->pc
, nvi
->flags_def
);
570 dupi
->flags_def
->insn
= dupi
;
573 for (c
= 0; c
< 5; ++c
)
575 nv_reference(bld
->pc
, &dupi
->src
[c
], nvi
->src
[c
]->value
);
577 nv_reference(bld
->pc
, &dupi
->flags_src
, nvi
->flags_src
->value
);
580 dupi
->saturate
= nvi
->saturate
;
581 dupi
->centroid
= nvi
->centroid
;
582 dupi
->flat
= nvi
->flat
;
588 bld_lmem_store(struct bld_context
*bld
, struct nv_value
*ptr
, int ofst
,
589 struct nv_value
*val
)
591 struct nv_instruction
*insn
= new_instruction(bld
->pc
, NV_OP_STA
);
592 struct nv_value
*loc
;
594 loc
= new_value(bld
->pc
, NV_FILE_MEM_L
, NV_TYPE_U32
);
596 loc
->reg
.id
= ofst
* 4;
598 nv_reference(bld
->pc
, &insn
->src
[0], loc
);
599 nv_reference(bld
->pc
, &insn
->src
[1], val
);
600 nv_reference(bld
->pc
, &insn
->src
[4], ptr
);
603 static struct nv_value
*
604 bld_lmem_load(struct bld_context
*bld
, struct nv_value
*ptr
, int ofst
)
606 struct nv_value
*loc
, *val
;
608 loc
= new_value(bld
->pc
, NV_FILE_MEM_L
, NV_TYPE_U32
);
610 loc
->reg
.id
= ofst
* 4;
612 val
= bld_insn_1(bld
, NV_OP_LDA
, loc
);
614 nv_reference(bld
->pc
, &val
->insn
->src
[4], ptr
);
619 #define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
621 (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
622 SET_TYPE(d, NV_TYPE_##dt); \
623 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
626 #define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
628 (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
629 SET_TYPE(d, NV_TYPE_##dt); \
630 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
631 (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
634 static struct nv_value
*
635 bld_pow(struct bld_context
*bld
, struct nv_value
*x
, struct nv_value
*e
)
637 struct nv_value
*val
;
639 BLD_INSN_1_EX(val
, LG2
, F32
, x
, F32
);
640 BLD_INSN_2_EX(val
, MUL
, F32
, e
, F32
, val
, F32
);
641 val
= bld_insn_1(bld
, NV_OP_PREEX2
, val
);
642 val
= bld_insn_1(bld
, NV_OP_EX2
, val
);
647 static INLINE
struct nv_value
*
648 bld_load_imm_f32(struct bld_context
*bld
, float f
)
650 struct nv_value
*imm
= bld_insn_1(bld
, NV_OP_MOV
, bld_imm_f32(bld
, f
));
652 SET_TYPE(imm
, NV_TYPE_F32
);
656 static INLINE
struct nv_value
*
657 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
)
659 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_u32(bld
, u
));
662 static struct nv_value
*
663 bld_get_address(struct bld_context
*bld
, int id
, struct nv_value
*indirect
)
666 struct nv_instruction
*nvi
;
667 struct nv_value
*val
;
669 for (i
= 0; i
< 4; ++i
) {
670 if (!bld
->saved_addr
[i
][0])
672 if (bld
->saved_addr
[i
][1] == indirect
) {
673 nvi
= bld
->saved_addr
[i
][0]->insn
;
674 if (nvi
->src
[0]->value
->reg
.imm
.u32
== id
)
675 return bld
->saved_addr
[i
][0];
680 val
= bld_imm_u32(bld
, id
);
682 val
= bld_insn_2(bld
, NV_OP_ADD
, indirect
, val
);
684 val
= bld_insn_1(bld
, NV_OP_MOV
, val
);
686 bld
->saved_addr
[i
][0] = val
;
687 bld
->saved_addr
[i
][0]->reg
.file
= NV_FILE_ADDR
;
688 bld
->saved_addr
[i
][0]->reg
.type
= NV_TYPE_U16
;
689 bld
->saved_addr
[i
][1] = indirect
;
690 return bld
->saved_addr
[i
][0];
694 static struct nv_value
*
695 bld_predicate(struct bld_context
*bld
, struct nv_value
*src
, boolean bool_only
)
697 struct nv_instruction
*s0i
, *nvi
= src
->insn
;
700 nvi
= bld_insn_1(bld
,
701 (src
->reg
.file
== NV_FILE_IMM
) ? NV_OP_MOV
: NV_OP_LDA
,
706 while (nvi
->opcode
== NV_OP_ABS
|| nvi
->opcode
== NV_OP_NEG
||
707 nvi
->opcode
== NV_OP_CVT
) {
708 s0i
= nvi
->src
[0]->value
->insn
;
709 if (!s0i
|| !nv50_op_can_write_flags(s0i
->opcode
))
712 assert(!nvi
->flags_src
);
716 if (!nv50_op_can_write_flags(nvi
->opcode
) ||
717 nvi
->bb
!= bld
->pc
->current_block
) {
718 nvi
= new_instruction(bld
->pc
, NV_OP_CVT
);
719 nv_reference(bld
->pc
, &nvi
->src
[0], src
);
722 if (!nvi
->flags_def
) {
723 nvi
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
724 nvi
->flags_def
->insn
= nvi
;
726 return nvi
->flags_def
;
730 bld_kil(struct bld_context
*bld
, struct nv_value
*src
)
732 struct nv_instruction
*nvi
;
734 src
= bld_predicate(bld
, src
, FALSE
);
735 nvi
= new_instruction(bld
->pc
, NV_OP_KIL
);
737 nvi
->flags_src
= new_ref(bld
->pc
, src
);
742 bld_flow(struct bld_context
*bld
, uint opcode
, ubyte cc
,
743 struct nv_value
*src
, struct nv_basic_block
*target
,
744 boolean plan_reconverge
)
746 struct nv_instruction
*nvi
;
749 new_instruction(bld
->pc
, NV_OP_JOINAT
)->fixed
= 1;
751 nvi
= new_instruction(bld
->pc
, opcode
);
752 nvi
->is_terminator
= 1;
754 nvi
->target
= target
;
756 nvi
->flags_src
= new_ref(bld
->pc
, src
);
760 translate_setcc(unsigned opcode
)
763 case TGSI_OPCODE_SLT
: return NV_CC_LT
;
764 case TGSI_OPCODE_SGE
: return NV_CC_GE
;
765 case TGSI_OPCODE_SEQ
: return NV_CC_EQ
;
766 case TGSI_OPCODE_SGT
: return NV_CC_GT
;
767 case TGSI_OPCODE_SLE
: return NV_CC_LE
;
768 case TGSI_OPCODE_SNE
: return NV_CC_NE
| NV_CC_U
;
769 case TGSI_OPCODE_STR
: return NV_CC_TR
;
770 case TGSI_OPCODE_SFL
: return NV_CC_FL
;
772 case TGSI_OPCODE_ISLT
: return NV_CC_LT
;
773 case TGSI_OPCODE_ISGE
: return NV_CC_GE
;
774 case TGSI_OPCODE_USEQ
: return NV_CC_EQ
;
775 case TGSI_OPCODE_USGE
: return NV_CC_GE
;
776 case TGSI_OPCODE_USLT
: return NV_CC_LT
;
777 case TGSI_OPCODE_USNE
: return NV_CC_NE
;
785 translate_opcode(uint opcode
)
788 case TGSI_OPCODE_ABS
: return NV_OP_ABS
;
789 case TGSI_OPCODE_ADD
:
790 case TGSI_OPCODE_SUB
:
791 case TGSI_OPCODE_UADD
: return NV_OP_ADD
;
792 case TGSI_OPCODE_AND
: return NV_OP_AND
;
793 case TGSI_OPCODE_EX2
: return NV_OP_EX2
;
794 case TGSI_OPCODE_CEIL
: return NV_OP_CEIL
;
795 case TGSI_OPCODE_FLR
: return NV_OP_FLOOR
;
796 case TGSI_OPCODE_TRUNC
: return NV_OP_TRUNC
;
797 case TGSI_OPCODE_COS
: return NV_OP_COS
;
798 case TGSI_OPCODE_SIN
: return NV_OP_SIN
;
799 case TGSI_OPCODE_DDX
: return NV_OP_DFDX
;
800 case TGSI_OPCODE_DDY
: return NV_OP_DFDY
;
801 case TGSI_OPCODE_F2I
:
802 case TGSI_OPCODE_F2U
:
803 case TGSI_OPCODE_I2F
:
804 case TGSI_OPCODE_U2F
: return NV_OP_CVT
;
805 case TGSI_OPCODE_INEG
: return NV_OP_NEG
;
806 case TGSI_OPCODE_LG2
: return NV_OP_LG2
;
807 case TGSI_OPCODE_ISHR
:
808 case TGSI_OPCODE_USHR
: return NV_OP_SHR
;
809 case TGSI_OPCODE_MAD
:
810 case TGSI_OPCODE_UMAD
: return NV_OP_MAD
;
811 case TGSI_OPCODE_MAX
:
812 case TGSI_OPCODE_IMAX
:
813 case TGSI_OPCODE_UMAX
: return NV_OP_MAX
;
814 case TGSI_OPCODE_MIN
:
815 case TGSI_OPCODE_IMIN
:
816 case TGSI_OPCODE_UMIN
: return NV_OP_MIN
;
817 case TGSI_OPCODE_MUL
:
818 case TGSI_OPCODE_UMUL
: return NV_OP_MUL
;
819 case TGSI_OPCODE_OR
: return NV_OP_OR
;
820 case TGSI_OPCODE_RCP
: return NV_OP_RCP
;
821 case TGSI_OPCODE_RSQ
: return NV_OP_RSQ
;
822 case TGSI_OPCODE_SAD
: return NV_OP_SAD
;
823 case TGSI_OPCODE_SHL
: return NV_OP_SHL
;
824 case TGSI_OPCODE_SLT
:
825 case TGSI_OPCODE_SGE
:
826 case TGSI_OPCODE_SEQ
:
827 case TGSI_OPCODE_SGT
:
828 case TGSI_OPCODE_SLE
:
829 case TGSI_OPCODE_SNE
:
830 case TGSI_OPCODE_ISLT
:
831 case TGSI_OPCODE_ISGE
:
832 case TGSI_OPCODE_USEQ
:
833 case TGSI_OPCODE_USGE
:
834 case TGSI_OPCODE_USLT
:
835 case TGSI_OPCODE_USNE
: return NV_OP_SET
;
836 case TGSI_OPCODE_TEX
: return NV_OP_TEX
;
837 case TGSI_OPCODE_TXP
: return NV_OP_TEX
;
838 case TGSI_OPCODE_TXB
: return NV_OP_TXB
;
839 case TGSI_OPCODE_TXL
: return NV_OP_TXL
;
840 case TGSI_OPCODE_XOR
: return NV_OP_XOR
;
847 infer_src_type(unsigned opcode
)
850 case TGSI_OPCODE_MOV
:
851 case TGSI_OPCODE_AND
:
853 case TGSI_OPCODE_XOR
:
854 case TGSI_OPCODE_SAD
:
855 case TGSI_OPCODE_U2F
:
856 case TGSI_OPCODE_UADD
:
857 case TGSI_OPCODE_UDIV
:
858 case TGSI_OPCODE_UMOD
:
859 case TGSI_OPCODE_UMAD
:
860 case TGSI_OPCODE_UMUL
:
861 case TGSI_OPCODE_UMAX
:
862 case TGSI_OPCODE_UMIN
:
863 case TGSI_OPCODE_USEQ
:
864 case TGSI_OPCODE_USGE
:
865 case TGSI_OPCODE_USLT
:
866 case TGSI_OPCODE_USNE
:
867 case TGSI_OPCODE_USHR
:
869 case TGSI_OPCODE_I2F
:
870 case TGSI_OPCODE_IDIV
:
871 case TGSI_OPCODE_IMAX
:
872 case TGSI_OPCODE_IMIN
:
873 case TGSI_OPCODE_INEG
:
874 case TGSI_OPCODE_ISGE
:
875 case TGSI_OPCODE_ISHR
:
876 case TGSI_OPCODE_ISLT
:
884 infer_dst_type(unsigned opcode
)
887 case TGSI_OPCODE_MOV
:
888 case TGSI_OPCODE_F2U
:
889 case TGSI_OPCODE_AND
:
891 case TGSI_OPCODE_XOR
:
892 case TGSI_OPCODE_SAD
:
893 case TGSI_OPCODE_UADD
:
894 case TGSI_OPCODE_UDIV
:
895 case TGSI_OPCODE_UMOD
:
896 case TGSI_OPCODE_UMAD
:
897 case TGSI_OPCODE_UMUL
:
898 case TGSI_OPCODE_UMAX
:
899 case TGSI_OPCODE_UMIN
:
900 case TGSI_OPCODE_USEQ
:
901 case TGSI_OPCODE_USGE
:
902 case TGSI_OPCODE_USLT
:
903 case TGSI_OPCODE_USNE
:
904 case TGSI_OPCODE_USHR
:
906 case TGSI_OPCODE_F2I
:
907 case TGSI_OPCODE_IDIV
:
908 case TGSI_OPCODE_IMAX
:
909 case TGSI_OPCODE_IMIN
:
910 case TGSI_OPCODE_INEG
:
911 case TGSI_OPCODE_ISGE
:
912 case TGSI_OPCODE_ISHR
:
913 case TGSI_OPCODE_ISLT
:
921 emit_store(struct bld_context
*bld
, const struct tgsi_full_instruction
*inst
,
922 unsigned chan
, struct nv_value
*value
)
924 struct nv_value
*ptr
;
925 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[0];
927 if (reg
->Register
.Indirect
) {
928 ptr
= FETCH_ADDR(reg
->Indirect
.Index
,
929 tgsi_util_get_src_register_swizzle(®
->Indirect
, 0));
936 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
937 value
->reg
.type
= infer_dst_type(inst
->Instruction
.Opcode
);
939 switch (inst
->Instruction
.Saturate
) {
942 case TGSI_SAT_ZERO_ONE
:
943 BLD_INSN_1_EX(value
, SAT
, F32
, value
, F32
);
945 case TGSI_SAT_MINUS_PLUS_ONE
:
946 value
->reg
.as_type
= NV_TYPE_F32
;
947 value
= bld_insn_2(bld
, NV_OP_MAX
, value
, bld_load_imm_f32(bld
, -1.0f
));
948 value
= bld_insn_2(bld
, NV_OP_MIN
, value
, bld_load_imm_f32(bld
, 1.0f
));
952 switch (reg
->Register
.File
) {
953 case TGSI_FILE_OUTPUT
:
954 if (!value
->insn
&& (bld
->ti
->output_file
== NV_FILE_OUT
))
955 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
956 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
957 value
->reg
.file
= bld
->ti
->output_file
;
959 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
960 STORE_OUTR(reg
->Register
.Index
, chan
, value
);
962 value
->insn
->fixed
= 1;
963 value
->reg
.id
= bld
->ti
->output_map
[reg
->Register
.Index
][chan
];
966 case TGSI_FILE_TEMPORARY
:
967 assert(reg
->Register
.Index
< BLD_MAX_TEMPS
);
968 if (!value
->insn
|| (value
->insn
->bb
!= bld
->pc
->current_block
))
969 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
970 value
->reg
.file
= NV_FILE_GPR
;
972 if (bld
->ti
->store_to_memory
)
973 bld_lmem_store(bld
, ptr
, reg
->Register
.Index
* 4 + chan
, value
);
975 STORE_TEMP(reg
->Register
.Index
, chan
, value
);
977 case TGSI_FILE_ADDRESS
:
978 assert(reg
->Register
.Index
< BLD_MAX_ADDRS
);
979 value
->reg
.file
= NV_FILE_ADDR
;
980 value
->reg
.type
= NV_TYPE_U16
;
981 STORE_ADDR(reg
->Register
.Index
, chan
, value
);
986 static INLINE
uint32_t
987 bld_is_output_written(struct bld_context
*bld
, int i
, int c
)
990 return bld
->outputs_written
[i
/ 8] & (0xf << ((i
* 4) % 32));
991 return bld
->outputs_written
[i
/ 8] & (1 << ((i
* 4 + c
) % 32));
995 bld_export_outputs(struct bld_context
*bld
)
997 struct nv_value
*vals
[4];
998 struct nv_instruction
*nvi
;
1001 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1003 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
) {
1004 if (!bld_is_output_written(bld
, i
, -1))
1006 for (n
= 0, c
= 0; c
< 4; ++c
) {
1007 if (!bld_is_output_written(bld
, i
, c
))
1009 vals
[n
] = bld_fetch_global(bld
, &bld
->ovs
[i
][c
]);
1011 vals
[n
] = bld_insn_1(bld
, NV_OP_MOV
, vals
[n
]);
1012 vals
[n
++]->reg
.id
= bld
->ti
->output_map
[i
][c
];
1016 (nvi
= new_instruction(bld
->pc
, NV_OP_EXPORT
))->fixed
= 1;
1018 for (c
= 0; c
< n
; ++c
)
1019 nvi
->src
[c
] = new_ref(bld
->pc
, vals
[c
]);
1024 bld_new_block(struct bld_context
*bld
, struct nv_basic_block
*b
)
1028 bld_push_values(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
1029 bld_push_values(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
1030 bld_push_values(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
1031 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1033 bld
->pc
->current_block
= b
;
1035 for (i
= 0; i
< 4; ++i
)
1036 bld
->saved_addr
[i
][0] = NULL
;
1038 for (i
= 0; i
< 128; ++i
)
1039 bld
->saved_inputs
[i
] = NULL
;
1041 bld
->out_kind
= CFG_EDGE_FORWARD
;
1044 static struct nv_value
*
1045 bld_saved_input(struct bld_context
*bld
, unsigned i
, unsigned c
)
1047 unsigned idx
= bld
->ti
->input_map
[i
][c
];
1049 if (bld
->ti
->p
->type
!= PIPE_SHADER_FRAGMENT
)
1051 if (bld
->saved_inputs
[idx
])
1052 return bld
->saved_inputs
[idx
];
1056 static struct nv_value
*
1057 bld_interpolate(struct bld_context
*bld
, unsigned mode
, struct nv_value
*val
)
1059 if (val
->reg
.id
== 255) {
1060 /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
1061 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
1062 val
= bld_insn_2(bld
, NV_OP_SHL
, val
, bld_imm_u32(bld
, 31));
1063 val
->insn
->src
[0]->typecast
= NV_TYPE_U32
;
1064 val
= bld_insn_2(bld
, NV_OP_XOR
, val
, bld_imm_f32(bld
, -1.0f
));
1065 val
->insn
->src
[0]->typecast
= NV_TYPE_U32
;
1067 if (mode
& (NV50_INTERP_LINEAR
| NV50_INTERP_FLAT
))
1068 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
1070 val
= bld_insn_2(bld
, NV_OP_PINTERP
, val
, bld
->frgcrd
[3]);
1072 val
->insn
->flat
= (mode
& NV50_INTERP_FLAT
) ? 1 : 0;
1073 val
->insn
->centroid
= (mode
& NV50_INTERP_CENTROID
) ? 1 : 0;
1077 static struct nv_value
*
1078 emit_fetch(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1079 const unsigned s
, const unsigned chan
)
1081 const struct tgsi_full_src_register
*src
= &insn
->Src
[s
];
1082 struct nv_value
*res
;
1083 struct nv_value
*ptr
= NULL
;
1084 unsigned idx
, swz
, dim_idx
, ind_idx
, ind_swz
, sgn
;
1085 ubyte type
= infer_src_type(insn
->Instruction
.Opcode
);
1087 idx
= src
->Register
.Index
;
1088 swz
= tgsi_util_get_full_src_register_swizzle(src
, chan
);
1093 if (src
->Register
.Indirect
) {
1094 ind_idx
= src
->Indirect
.Index
;
1095 ind_swz
= tgsi_util_get_src_register_swizzle(&src
->Indirect
, 0);
1097 ptr
= FETCH_ADDR(ind_idx
, ind_swz
);
1099 if (idx
>= (128 / 4) && src
->Register
.File
== TGSI_FILE_CONSTANT
)
1100 ptr
= bld_get_address(bld
, (idx
* 16) & ~0x1ff, ptr
);
1102 switch (src
->Register
.File
) {
1103 case TGSI_FILE_CONSTANT
:
1104 dim_idx
= src
->Dimension
.Index
? src
->Dimension
.Index
+ 2 : 1;
1105 assert(dim_idx
< 14);
1106 assert(dim_idx
== 1); /* for now */
1108 res
= new_value(bld
->pc
, NV_FILE_MEM_C(dim_idx
), type
);
1109 SET_TYPE(res
, type
);
1110 res
->reg
.id
= (idx
* 4 + swz
) & 127;
1111 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1114 res
->insn
->src
[4] = new_ref(bld
->pc
, ptr
);
1116 case TGSI_FILE_IMMEDIATE
:
1117 assert(idx
< bld
->ti
->immd32_nr
);
1118 res
= bld_load_imm_u32(bld
, bld
->ti
->immd32
[idx
* 4 + swz
]);
1120 switch (bld
->ti
->immd32_ty
[idx
]) {
1121 case TGSI_IMM_FLOAT32
: SET_TYPE(res
, NV_TYPE_F32
); break;
1122 case TGSI_IMM_UINT32
: SET_TYPE(res
, NV_TYPE_U32
); break;
1123 case TGSI_IMM_INT32
: SET_TYPE(res
, NV_TYPE_S32
); break;
1125 SET_TYPE(res
, type
);
1129 case TGSI_FILE_INPUT
:
1130 res
= bld_saved_input(bld
, idx
, swz
);
1131 if (res
&& (insn
->Instruction
.Opcode
!= TGSI_OPCODE_TXP
))
1134 res
= new_value(bld
->pc
, bld
->ti
->input_file
, type
);
1135 res
->reg
.id
= bld
->ti
->input_map
[idx
][swz
];
1137 if (res
->reg
.file
== NV_FILE_MEM_V
) {
1138 res
= bld_interpolate(bld
, bld
->ti
->interp_mode
[idx
], res
);
1140 assert(src
->Dimension
.Dimension
== 0);
1141 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
1142 assert(res
->reg
.type
== type
);
1144 bld
->saved_inputs
[bld
->ti
->input_map
[idx
][swz
]] = res
;
1146 case TGSI_FILE_TEMPORARY
:
1147 if (bld
->ti
->store_to_memory
)
1148 res
= bld_lmem_load(bld
, ptr
, idx
* 4 + swz
);
1150 res
= bld_fetch_global(bld
, &bld
->tvs
[idx
][swz
]);
1152 case TGSI_FILE_ADDRESS
:
1153 res
= bld_fetch_global(bld
, &bld
->avs
[idx
][swz
]);
1155 case TGSI_FILE_PREDICATE
:
1156 res
= bld_fetch_global(bld
, &bld
->pvs
[idx
][swz
]);
1159 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src
->Register
.File
);
1164 return bld_undef(bld
, NV_FILE_GPR
);
1166 sgn
= tgsi_util_get_full_src_register_sign_mode(src
, chan
);
1168 if (insn
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
1169 res
->reg
.as_type
= type
;
1171 if (sgn
!= TGSI_UTIL_SIGN_KEEP
) /* apparently "MOV A, -B" assumes float */
1172 res
->reg
.as_type
= NV_TYPE_F32
;
1175 case TGSI_UTIL_SIGN_KEEP
:
1177 case TGSI_UTIL_SIGN_CLEAR
:
1178 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1180 case TGSI_UTIL_SIGN_TOGGLE
:
1181 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1183 case TGSI_UTIL_SIGN_SET
:
1184 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
1185 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
1188 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
1197 bld_lit(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1198 const struct tgsi_full_instruction
*insn
)
1200 struct nv_value
*val0
, *zero
;
1201 unsigned mask
= insn
->Dst
[0].Register
.WriteMask
;
1203 if (mask
& ((1 << 0) | (1 << 3)))
1204 dst0
[3] = dst0
[0] = bld_load_imm_f32(bld
, 1.0f
);
1206 if (mask
& (3 << 1)) {
1207 zero
= bld_load_imm_f32(bld
, 0.0f
);
1208 val0
= bld_insn_2(bld
, NV_OP_MAX
, emit_fetch(bld
, insn
, 0, 0), zero
);
1210 if (mask
& (1 << 1))
1214 if (mask
& (1 << 2)) {
1215 struct nv_value
*val1
, *val3
, *src1
, *src3
;
1216 struct nv_value
*pos128
= bld_load_imm_f32(bld
, 127.999999f
);
1217 struct nv_value
*neg128
= bld_load_imm_f32(bld
, -127.999999f
);
1219 src1
= emit_fetch(bld
, insn
, 0, 1);
1220 src3
= emit_fetch(bld
, insn
, 0, 3);
1222 val0
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1223 val0
->insn
->flags_def
->insn
= val0
->insn
;
1225 val1
= bld_insn_2(bld
, NV_OP_MAX
, src1
, zero
);
1226 val3
= bld_insn_2(bld
, NV_OP_MAX
, src3
, neg128
);
1227 val3
= bld_insn_2(bld
, NV_OP_MIN
, val3
, pos128
);
1228 val3
= bld_pow(bld
, val1
, val3
);
1230 dst0
[2] = bld_insn_1(bld
, NV_OP_MOV
, zero
);
1231 dst0
[2]->insn
->cc
= NV_CC_LE
;
1232 dst0
[2]->insn
->flags_src
= new_ref(bld
->pc
, val0
->insn
->flags_def
);
1234 dst0
[2] = bld_insn_2(bld
, NV_OP_SELECT
, val3
, dst0
[2]);
1239 get_tex_dim(const struct tgsi_full_instruction
*insn
, int *dim
, int *arg
)
1241 switch (insn
->Texture
.Texture
) {
1242 case TGSI_TEXTURE_1D
:
1245 case TGSI_TEXTURE_SHADOW1D
:
1249 case TGSI_TEXTURE_UNKNOWN
:
1250 case TGSI_TEXTURE_2D
:
1251 case TGSI_TEXTURE_RECT
:
1254 case TGSI_TEXTURE_SHADOW2D
:
1255 case TGSI_TEXTURE_SHADOWRECT
:
1259 case TGSI_TEXTURE_3D
:
1260 case TGSI_TEXTURE_CUBE
:
1270 load_proj_tex_coords(struct bld_context
*bld
,
1271 struct nv_value
*t
[4], int dim
, int arg
,
1272 const struct tgsi_full_instruction
*insn
)
1276 mask
= (1 << dim
) - 1;
1278 mask
|= 4; /* depth comparison value */
1280 t
[3] = emit_fetch(bld
, insn
, 0, 3);
1282 if (t
[3]->insn
->opcode
== NV_OP_PINTERP
) {
1283 t
[3] = bld_duplicate_insn(bld
, t
[3]->insn
);
1284 t
[3]->insn
->opcode
= NV_OP_LINTERP
;
1285 nv_reference(bld
->pc
, &t
[3]->insn
->src
[1], NULL
);
1288 t
[3] = bld_insn_1(bld
, NV_OP_RCP
, t
[3]);
1290 for (c
= 0; c
< 4; ++c
) {
1291 if (!(mask
& (1 << c
)))
1293 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1295 if (t
[c
]->insn
->opcode
!= NV_OP_LINTERP
&&
1296 t
[c
]->insn
->opcode
!= NV_OP_PINTERP
)
1298 t
[c
] = bld_duplicate_insn(bld
, t
[c
]->insn
);
1299 t
[c
]->insn
->opcode
= NV_OP_PINTERP
;
1300 nv_reference(bld
->pc
, &t
[c
]->insn
->src
[1], t
[3]);
1305 for (c
= 0; mask
; ++c
, mask
>>= 1) {
1308 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], t
[3]);
1312 /* For a quad of threads / top left, top right, bottom left, bottom right
1313 * pixels, do a different operation, and take src0 from a specific thread.
1320 #define QOP(a, b, c, d) \
1321 ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
1323 static INLINE
struct nv_value
*
1324 bld_quadop(struct bld_context
*bld
, ubyte qop
, struct nv_value
*src0
, int lane
,
1325 struct nv_value
*src1
, boolean wp
)
1327 struct nv_value
*val
= bld_insn_2(bld
, NV_OP_QUADOP
, src0
, src1
);
1328 val
->insn
->lanes
= lane
;
1329 val
->insn
->quadop
= qop
;
1331 val
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
1332 val
->insn
->flags_def
->insn
= val
->insn
;
1337 static INLINE
struct nv_value
*
1338 bld_cmov(struct bld_context
*bld
,
1339 struct nv_value
*src
, ubyte cc
, struct nv_value
*cr
)
1341 src
= bld_insn_1(bld
, NV_OP_MOV
, src
);
1344 src
->insn
->flags_src
= new_ref(bld
->pc
, cr
);
1349 static struct nv_instruction
*
1350 emit_tex(struct bld_context
*bld
, uint opcode
,
1351 struct nv_value
*dst
[4], struct nv_value
*t_in
[4],
1352 int argc
, int tic
, int tsc
, int cube
)
1354 struct nv_value
*t
[4];
1355 struct nv_instruction
*nvi
;
1358 /* the inputs to a tex instruction must be separate values */
1359 for (c
= 0; c
< argc
; ++c
) {
1360 t
[c
] = bld_insn_1(bld
, NV_OP_MOV
, t_in
[c
]);
1361 SET_TYPE(t
[c
], NV_TYPE_F32
);
1362 t
[c
]->insn
->fixed
= 1;
1365 nvi
= new_instruction(bld
->pc
, opcode
);
1367 for (c
= 0; c
< 4; ++c
)
1368 dst
[c
] = bld_def(nvi
, c
, new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
));
1370 for (c
= 0; c
< argc
; ++c
)
1371 nvi
->src
[c
] = new_ref(bld
->pc
, t
[c
]);
1375 nvi
->tex_mask
= 0xf;
1376 nvi
->tex_cube
= cube
;
1378 nvi
->tex_argc
= argc
;
1384 bld_texlod_sequence(struct bld_context
*bld
,
1385 struct nv_value
*dst
[4], struct nv_value
*t
[4], int arg
,
1386 int tic
, int tsc
, int cube
)
1388 emit_tex(bld
, NV_OP_TXL
, dst
, t
, arg
, tic
, tsc
, cube
); /* TODO */
1392 /* The lanes of a quad are grouped by the bit in the condition register
1393 * they have set, which is selected by differing bias values.
1394 * Move the input values for TEX into a new register set for each group
1395 * and execute TEX only for a specific group.
1396 * We always need to use 4 new registers for the inputs/outputs because
1397 * the implicitly calculated derivatives must be correct.
1400 bld_texbias_sequence(struct bld_context
*bld
,
1401 struct nv_value
*dst
[4], struct nv_value
*t
[4], int arg
,
1402 int tic
, int tsc
, int cube
)
1404 struct nv_instruction
*sel
, *tex
;
1405 struct nv_value
*bit
[4], *cr
[4], *res
[4][4], *val
;
1408 const ubyte cc
[4] = { NV_CC_EQ
, NV_CC_S
, NV_CC_C
, NV_CC_O
};
1410 for (l
= 0; l
< 4; ++l
) {
1411 bit
[l
] = bld_load_imm_u32(bld
, 1 << l
);
1413 val
= bld_quadop(bld
, QOP(SUBR
, SUBR
, SUBR
, SUBR
),
1414 t
[arg
- 1], l
, t
[arg
- 1], TRUE
);
1416 cr
[l
] = bld_cmov(bld
, bit
[l
], NV_CC_EQ
, val
->insn
->flags_def
);
1418 cr
[l
]->reg
.file
= NV_FILE_FLAGS
;
1419 SET_TYPE(cr
[l
], NV_TYPE_U16
);
1422 sel
= new_instruction(bld
->pc
, NV_OP_SELECT
);
1424 for (l
= 0; l
< 4; ++l
)
1425 sel
->src
[l
] = new_ref(bld
->pc
, cr
[l
]);
1427 bld_def(sel
, 0, new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
));
1429 for (l
= 0; l
< 4; ++l
) {
1430 tex
= emit_tex(bld
, NV_OP_TXB
, dst
, t
, arg
, tic
, tsc
, cube
);
1433 tex
->flags_src
= new_ref(bld
->pc
, sel
->def
[0]);
1435 for (c
= 0; c
< 4; ++c
)
1436 res
[l
][c
] = tex
->def
[c
];
1439 for (l
= 0; l
< 4; ++l
)
1440 for (c
= 0; c
< 4; ++c
)
1441 res
[l
][c
] = bld_cmov(bld
, res
[l
][c
], cc
[l
], sel
->def
[0]);
1443 for (c
= 0; c
< 4; ++c
) {
1444 sel
= new_instruction(bld
->pc
, NV_OP_SELECT
);
1446 for (l
= 0; l
< 4; ++l
)
1447 sel
->src
[l
] = new_ref(bld
->pc
, res
[l
][c
]);
1449 bld_def(sel
, 0, (dst
[c
] = new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
)));
1454 bld_is_constant(struct nv_value
*val
)
1456 if (val
->reg
.file
== NV_FILE_IMM
)
1458 return val
->insn
&& nvcg_find_constant(val
->insn
->src
[0]);
1462 bld_tex(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1463 const struct tgsi_full_instruction
*insn
)
1465 struct nv_value
*t
[4], *s
[3];
1466 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1468 const int tic
= insn
->Src
[1].Register
.Index
;
1470 const int cube
= (insn
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) ? 1 : 0;
1472 get_tex_dim(insn
, &dim
, &arg
);
1474 if (!cube
&& insn
->Instruction
.Opcode
== TGSI_OPCODE_TXP
)
1475 load_proj_tex_coords(bld
, t
, dim
, arg
, insn
);
1477 for (c
= 0; c
< dim
; ++c
)
1478 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1480 t
[dim
] = emit_fetch(bld
, insn
, 0, 2);
1485 for (c
= 0; c
< 3; ++c
)
1486 s
[c
] = bld_insn_1(bld
, NV_OP_ABS
, t
[c
]);
1488 s
[0] = bld_insn_2(bld
, NV_OP_MAX
, s
[0], s
[1]);
1489 s
[0] = bld_insn_2(bld
, NV_OP_MAX
, s
[0], s
[2]);
1490 s
[0] = bld_insn_1(bld
, NV_OP_RCP
, s
[0]);
1492 for (c
= 0; c
< 3; ++c
)
1493 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], s
[0]);
1496 if (opcode
== NV_OP_TXB
|| opcode
== NV_OP_TXL
) {
1497 t
[arg
++] = emit_fetch(bld
, insn
, 0, 3);
1499 if ((bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) &&
1500 !bld_is_constant(t
[arg
- 1])) {
1501 if (opcode
== NV_OP_TXB
)
1502 bld_texbias_sequence(bld
, dst0
, t
, arg
, tic
, tsc
, cube
);
1504 bld_texlod_sequence(bld
, dst0
, t
, arg
, tic
, tsc
, cube
);
1509 emit_tex(bld
, opcode
, dst0
, t
, arg
, tic
, tsc
, cube
);
1512 static INLINE
struct nv_value
*
1513 bld_dot(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1516 struct nv_value
*dotp
, *src0
, *src1
;
1519 src0
= emit_fetch(bld
, insn
, 0, 0);
1520 src1
= emit_fetch(bld
, insn
, 1, 0);
1521 dotp
= bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1523 for (c
= 1; c
< n
; ++c
) {
1524 src0
= emit_fetch(bld
, insn
, 0, c
);
1525 src1
= emit_fetch(bld
, insn
, 1, c
);
1526 dotp
= bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dotp
);
1531 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1532 for (chan = 0; chan < 4; ++chan) \
1533 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1536 bld_instruction(struct bld_context
*bld
,
1537 const struct tgsi_full_instruction
*insn
)
1539 struct nv_value
*src0
;
1540 struct nv_value
*src1
;
1541 struct nv_value
*src2
;
1542 struct nv_value
*dst0
[4];
1543 struct nv_value
*temp
;
1545 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1547 #ifdef NV50_TGSI2NC_DEBUG
1548 debug_printf("bld_instruction:"); tgsi_dump_instruction(insn
, 1);
1551 switch (insn
->Instruction
.Opcode
) {
1552 case TGSI_OPCODE_ADD
:
1553 case TGSI_OPCODE_MAX
:
1554 case TGSI_OPCODE_MIN
:
1555 case TGSI_OPCODE_MUL
:
1556 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1557 src0
= emit_fetch(bld
, insn
, 0, c
);
1558 src1
= emit_fetch(bld
, insn
, 1, c
);
1559 dst0
[c
] = bld_insn_2(bld
, opcode
, src0
, src1
);
1562 case TGSI_OPCODE_ARL
:
1563 src1
= bld_imm_u32(bld
, 4);
1564 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1565 src0
= emit_fetch(bld
, insn
, 0, c
);
1566 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1567 SET_TYPE(temp
, NV_TYPE_S32
);
1568 dst0
[c
] = bld_insn_2(bld
, NV_OP_SHL
, temp
, src1
);
1571 case TGSI_OPCODE_CMP
:
1572 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1573 src0
= emit_fetch(bld
, insn
, 0, c
);
1574 src1
= emit_fetch(bld
, insn
, 1, c
);
1575 src2
= emit_fetch(bld
, insn
, 2, c
);
1576 src0
= bld_predicate(bld
, src0
, FALSE
);
1578 src1
= bld_insn_1(bld
, NV_OP_MOV
, src1
);
1579 src1
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1580 src1
->insn
->cc
= NV_CC_LT
;
1582 src2
= bld_insn_1(bld
, NV_OP_MOV
, src2
);
1583 src2
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1584 src2
->insn
->cc
= NV_CC_GE
;
1586 dst0
[c
] = bld_insn_2(bld
, NV_OP_SELECT
, src1
, src2
);
1589 case TGSI_OPCODE_COS
:
1590 case TGSI_OPCODE_SIN
:
1591 src0
= emit_fetch(bld
, insn
, 0, 0);
1592 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1593 if (insn
->Dst
[0].Register
.WriteMask
& 7)
1594 temp
= bld_insn_1(bld
, opcode
, temp
);
1595 for (c
= 0; c
< 3; ++c
)
1596 if (insn
->Dst
[0].Register
.WriteMask
& (1 << c
))
1598 if (!(insn
->Dst
[0].Register
.WriteMask
& (1 << 3)))
1600 src0
= emit_fetch(bld
, insn
, 0, 3);
1601 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1602 dst0
[3] = bld_insn_1(bld
, opcode
, temp
);
1604 case TGSI_OPCODE_DP2
:
1605 temp
= bld_dot(bld
, insn
, 2);
1606 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1609 case TGSI_OPCODE_DP3
:
1610 temp
= bld_dot(bld
, insn
, 3);
1611 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1614 case TGSI_OPCODE_DP4
:
1615 temp
= bld_dot(bld
, insn
, 4);
1616 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1619 case TGSI_OPCODE_DPH
:
1620 src0
= bld_dot(bld
, insn
, 3);
1621 src1
= emit_fetch(bld
, insn
, 1, 3);
1622 temp
= bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1623 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1626 case TGSI_OPCODE_DST
:
1627 if (insn
->Dst
[0].Register
.WriteMask
& 1)
1628 dst0
[0] = bld_imm_f32(bld
, 1.0f
);
1629 if (insn
->Dst
[0].Register
.WriteMask
& 2) {
1630 src0
= emit_fetch(bld
, insn
, 0, 1);
1631 src1
= emit_fetch(bld
, insn
, 1, 1);
1632 dst0
[1] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1634 if (insn
->Dst
[0].Register
.WriteMask
& 4)
1635 dst0
[2] = emit_fetch(bld
, insn
, 0, 2);
1636 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1637 dst0
[3] = emit_fetch(bld
, insn
, 1, 3);
1639 case TGSI_OPCODE_EXP
:
1640 src0
= emit_fetch(bld
, insn
, 0, 0);
1641 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1643 if (insn
->Dst
[0].Register
.WriteMask
& 2)
1644 dst0
[1] = bld_insn_2(bld
, NV_OP_SUB
, src0
, temp
);
1645 if (insn
->Dst
[0].Register
.WriteMask
& 1) {
1646 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, temp
);
1647 dst0
[0] = bld_insn_1(bld
, NV_OP_EX2
, temp
);
1649 if (insn
->Dst
[0].Register
.WriteMask
& 4) {
1650 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1651 dst0
[2] = bld_insn_1(bld
, NV_OP_EX2
, temp
);
1653 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1654 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1656 case TGSI_OPCODE_EX2
:
1657 src0
= emit_fetch(bld
, insn
, 0, 0);
1658 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1659 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1660 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1663 case TGSI_OPCODE_FRC
:
1664 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1665 src0
= emit_fetch(bld
, insn
, 0, c
);
1666 dst0
[c
] = bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1667 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src0
, dst0
[c
]);
1670 case TGSI_OPCODE_KIL
:
1671 for (c
= 0; c
< 4; ++c
) {
1672 src0
= emit_fetch(bld
, insn
, 0, c
);
1676 case TGSI_OPCODE_KILP
:
1677 (new_instruction(bld
->pc
, NV_OP_KIL
))->fixed
= 1;
1679 case TGSI_OPCODE_IF
:
1681 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1683 assert(bld
->cond_lvl
< BLD_MAX_COND_NESTING
);
1685 nvbb_attach_block(bld
->pc
->current_block
, b
, CFG_EDGE_FORWARD
);
1687 bld
->join_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1688 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1690 src1
= bld_predicate(bld
, emit_fetch(bld
, insn
, 0, 0), TRUE
);
1692 bld_flow(bld
, NV_OP_BRA
, NV_CC_EQ
, src1
, NULL
, (bld
->cond_lvl
== 0));
1695 bld_new_block(bld
, b
);
1698 case TGSI_OPCODE_ELSE
:
1700 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1703 nvbb_attach_block(bld
->join_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1705 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1706 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1708 new_instruction(bld
->pc
, NV_OP_BRA
)->is_terminator
= 1;
1711 bld_new_block(bld
, b
);
1714 case TGSI_OPCODE_ENDIF
:
1716 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1719 nvbb_attach_block(bld
->pc
->current_block
, b
, bld
->out_kind
);
1720 nvbb_attach_block(bld
->cond_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1722 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1724 bld_new_block(bld
, b
);
1726 if (!bld
->cond_lvl
&& bld
->join_bb
[bld
->cond_lvl
]) {
1727 bld
->join_bb
[bld
->cond_lvl
]->exit
->prev
->target
= b
;
1728 new_instruction(bld
->pc
, NV_OP_JOIN
)->is_join
= TRUE
;
1732 case TGSI_OPCODE_BGNLOOP
:
1734 struct nv_basic_block
*bl
= new_basic_block(bld
->pc
);
1735 struct nv_basic_block
*bb
= new_basic_block(bld
->pc
);
1737 assert(bld
->loop_lvl
< BLD_MAX_LOOP_NESTING
);
1739 bld
->loop_bb
[bld
->loop_lvl
] = bl
;
1740 bld
->brkt_bb
[bld
->loop_lvl
] = bb
;
1742 bld_flow(bld
, NV_OP_BREAKADDR
, NV_CC_TR
, NULL
, bb
, FALSE
);
1744 nvbb_attach_block(bld
->pc
->current_block
, bl
, CFG_EDGE_LOOP_ENTER
);
1746 bld_new_block(bld
, bld
->loop_bb
[bld
->loop_lvl
++]);
1748 if (bld
->loop_lvl
== bld
->pc
->loop_nesting_bound
)
1749 bld
->pc
->loop_nesting_bound
++;
1751 bld_clear_def_use(&bld
->tvs
[0][0], BLD_MAX_TEMPS
, bld
->loop_lvl
);
1752 bld_clear_def_use(&bld
->avs
[0][0], BLD_MAX_ADDRS
, bld
->loop_lvl
);
1753 bld_clear_def_use(&bld
->pvs
[0][0], BLD_MAX_PREDS
, bld
->loop_lvl
);
1756 case TGSI_OPCODE_BRK
:
1758 struct nv_basic_block
*bb
= bld
->brkt_bb
[bld
->loop_lvl
- 1];
1760 bld_flow(bld
, NV_OP_BREAK
, NV_CC_TR
, NULL
, bb
, FALSE
);
1762 if (bld
->out_kind
== CFG_EDGE_FORWARD
) /* else we already had BRK/CONT */
1763 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_LOOP_LEAVE
);
1765 bld
->out_kind
= CFG_EDGE_FAKE
;
1768 case TGSI_OPCODE_CONT
:
1770 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1772 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1774 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1776 if ((bb
= bld
->join_bb
[bld
->cond_lvl
- 1])) {
1777 bld
->join_bb
[bld
->cond_lvl
- 1] = NULL
;
1778 nv_nvi_delete(bb
->exit
->prev
);
1780 bld
->out_kind
= CFG_EDGE_FAKE
;
1783 case TGSI_OPCODE_ENDLOOP
:
1785 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1787 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1789 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1791 bld_loop_end(bld
, bb
); /* replace loop-side operand of the phis */
1793 bld_new_block(bld
, bld
->brkt_bb
[--bld
->loop_lvl
]);
1796 case TGSI_OPCODE_ABS
:
1797 case TGSI_OPCODE_CEIL
:
1798 case TGSI_OPCODE_FLR
:
1799 case TGSI_OPCODE_TRUNC
:
1800 case TGSI_OPCODE_DDX
:
1801 case TGSI_OPCODE_DDY
:
1802 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1803 src0
= emit_fetch(bld
, insn
, 0, c
);
1804 dst0
[c
] = bld_insn_1(bld
, opcode
, src0
);
1807 case TGSI_OPCODE_LIT
:
1808 bld_lit(bld
, dst0
, insn
);
1810 case TGSI_OPCODE_LRP
:
1811 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1812 src0
= emit_fetch(bld
, insn
, 0, c
);
1813 src1
= emit_fetch(bld
, insn
, 1, c
);
1814 src2
= emit_fetch(bld
, insn
, 2, c
);
1815 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src1
, src2
);
1816 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, dst0
[c
], src0
, src2
);
1819 case TGSI_OPCODE_MOV
:
1820 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1821 dst0
[c
] = emit_fetch(bld
, insn
, 0, c
);
1823 case TGSI_OPCODE_MAD
:
1824 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1825 src0
= emit_fetch(bld
, insn
, 0, c
);
1826 src1
= emit_fetch(bld
, insn
, 1, c
);
1827 src2
= emit_fetch(bld
, insn
, 2, c
);
1828 dst0
[c
] = bld_insn_3(bld
, opcode
, src0
, src1
, src2
);
1831 case TGSI_OPCODE_POW
:
1832 src0
= emit_fetch(bld
, insn
, 0, 0);
1833 src1
= emit_fetch(bld
, insn
, 1, 0);
1834 temp
= bld_pow(bld
, src0
, src1
);
1835 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1838 case TGSI_OPCODE_LOG
:
1839 src0
= emit_fetch(bld
, insn
, 0, 0);
1840 src0
= bld_insn_1(bld
, NV_OP_ABS
, src0
);
1841 temp
= bld_insn_1(bld
, NV_OP_LG2
, src0
);
1843 if (insn
->Dst
[0].Register
.WriteMask
& 3) {
1844 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, temp
);
1847 if (insn
->Dst
[0].Register
.WriteMask
& 2) {
1848 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, temp
);
1849 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1850 temp
= bld_insn_1(bld
, NV_OP_RCP
, temp
);
1851 dst0
[1] = bld_insn_2(bld
, NV_OP_MUL
, src0
, temp
);
1853 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1854 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1856 case TGSI_OPCODE_RCP
:
1857 case TGSI_OPCODE_LG2
:
1858 src0
= emit_fetch(bld
, insn
, 0, 0);
1859 temp
= bld_insn_1(bld
, opcode
, src0
);
1860 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1863 case TGSI_OPCODE_RSQ
:
1864 src0
= emit_fetch(bld
, insn
, 0, 0);
1865 temp
= bld_insn_1(bld
, NV_OP_ABS
, src0
);
1866 temp
= bld_insn_1(bld
, NV_OP_RSQ
, temp
);
1867 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1870 case TGSI_OPCODE_SLT
:
1871 case TGSI_OPCODE_SGE
:
1872 case TGSI_OPCODE_SEQ
:
1873 case TGSI_OPCODE_SGT
:
1874 case TGSI_OPCODE_SLE
:
1875 case TGSI_OPCODE_SNE
:
1876 case TGSI_OPCODE_ISLT
:
1877 case TGSI_OPCODE_ISGE
:
1878 case TGSI_OPCODE_USEQ
:
1879 case TGSI_OPCODE_USGE
:
1880 case TGSI_OPCODE_USLT
:
1881 case TGSI_OPCODE_USNE
:
1882 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1883 src0
= emit_fetch(bld
, insn
, 0, c
);
1884 src1
= emit_fetch(bld
, insn
, 1, c
);
1885 dst0
[c
] = bld_insn_2(bld
, NV_OP_SET
, src0
, src1
);
1886 dst0
[c
]->insn
->set_cond
= translate_setcc(insn
->Instruction
.Opcode
);
1887 SET_TYPE(dst0
[c
], infer_dst_type(insn
->Instruction
.Opcode
));
1889 dst0
[c
]->insn
->src
[0]->typecast
=
1890 dst0
[c
]->insn
->src
[1]->typecast
=
1891 infer_src_type(insn
->Instruction
.Opcode
);
1893 if (dst0
[c
]->reg
.type
!= NV_TYPE_F32
)
1895 dst0
[c
]->reg
.as_type
= NV_TYPE_S32
;
1896 dst0
[c
] = bld_insn_1(bld
, NV_OP_ABS
, dst0
[c
]);
1897 dst0
[c
] = bld_insn_1(bld
, NV_OP_CVT
, dst0
[c
]);
1898 SET_TYPE(dst0
[c
], NV_TYPE_F32
);
1901 case TGSI_OPCODE_SCS
:
1902 if (insn
->Dst
[0].Register
.WriteMask
& 0x3) {
1903 src0
= emit_fetch(bld
, insn
, 0, 0);
1904 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1905 if (insn
->Dst
[0].Register
.WriteMask
& 0x1)
1906 dst0
[0] = bld_insn_1(bld
, NV_OP_COS
, temp
);
1907 if (insn
->Dst
[0].Register
.WriteMask
& 0x2)
1908 dst0
[1] = bld_insn_1(bld
, NV_OP_SIN
, temp
);
1910 if (insn
->Dst
[0].Register
.WriteMask
& 0x4)
1911 dst0
[2] = bld_imm_f32(bld
, 0.0f
);
1912 if (insn
->Dst
[0].Register
.WriteMask
& 0x8)
1913 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1915 case TGSI_OPCODE_SSG
:
1916 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1917 src0
= emit_fetch(bld
, insn
, 0, c
);
1918 src1
= bld_predicate(bld
, src0
, FALSE
);
1919 temp
= bld_insn_2(bld
, NV_OP_AND
, src0
, bld_imm_u32(bld
, 0x80000000));
1920 temp
= bld_insn_2(bld
, NV_OP_OR
, temp
, bld_imm_f32(bld
, 1.0f
));
1921 dst0
[c
] = bld_insn_2(bld
, NV_OP_XOR
, temp
, temp
);
1922 dst0
[c
]->insn
->cc
= NV_CC_EQ
;
1923 nv_reference(bld
->pc
, &dst0
[c
]->insn
->flags_src
, src1
);
1926 case TGSI_OPCODE_SUB
:
1927 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1928 src0
= emit_fetch(bld
, insn
, 0, c
);
1929 src1
= emit_fetch(bld
, insn
, 1, c
);
1930 dst0
[c
] = bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1931 dst0
[c
]->insn
->src
[1]->mod
^= NV_MOD_NEG
;
1934 case TGSI_OPCODE_TEX
:
1935 case TGSI_OPCODE_TXB
:
1936 case TGSI_OPCODE_TXL
:
1937 case TGSI_OPCODE_TXP
:
1938 bld_tex(bld
, dst0
, insn
);
1940 case TGSI_OPCODE_XPD
:
1941 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1943 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1946 src0
= emit_fetch(bld
, insn
, 1, (c
+ 1) % 3);
1947 src1
= emit_fetch(bld
, insn
, 0, (c
+ 2) % 3);
1948 dst0
[c
] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1950 src0
= emit_fetch(bld
, insn
, 0, (c
+ 1) % 3);
1951 src1
= emit_fetch(bld
, insn
, 1, (c
+ 2) % 3);
1952 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dst0
[c
]);
1954 dst0
[c
]->insn
->src
[2]->mod
^= NV_MOD_NEG
;
1957 case TGSI_OPCODE_RET
:
1958 (new_instruction(bld
->pc
, NV_OP_RET
))->fixed
= 1;
1960 case TGSI_OPCODE_END
:
1961 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
1962 bld_export_outputs(bld
);
1965 NOUVEAU_ERR("unhandled opcode %u\n", insn
->Instruction
.Opcode
);
1970 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1971 emit_store(bld
, insn
, c
, dst0
[c
]);
1975 bld_free_value_trackers(struct bld_value_stack
*base
, int n
)
1979 for (i
= 0; i
< n
; ++i
)
1980 for (c
= 0; c
< 4; ++c
)
1981 if (base
[i
* 4 + c
].body
)
1982 FREE(base
[i
* 4 + c
].body
);
1986 nv50_tgsi_to_nc(struct nv_pc
*pc
, struct nv50_translation_info
*ti
)
1988 struct bld_context
*bld
= CALLOC_STRUCT(bld_context
);
1992 pc
->root
[0] = pc
->current_block
= new_basic_block(pc
);
1997 pc
->loop_nesting_bound
= 1;
1999 c
= util_bitcount(bld
->ti
->p
->fp
.interp
>> 24);
2000 if (c
&& ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
2001 bld
->frgcrd
[3] = new_value(pc
, NV_FILE_MEM_V
, NV_TYPE_F32
);
2002 bld
->frgcrd
[3]->reg
.id
= c
- 1;
2003 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_LINTERP
, bld
->frgcrd
[3]);
2004 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_RCP
, bld
->frgcrd
[3]);
2007 for (ip
= 0; ip
< ti
->inst_nr
; ++ip
)
2008 bld_instruction(bld
, &ti
->insns
[ip
]);
2010 bld_free_value_trackers(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
2011 bld_free_value_trackers(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
2012 bld_free_value_trackers(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
2014 bld_free_value_trackers(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
2020 /* If a variable is assigned in a loop, replace all references to the value
2021 * from outside the loop with a phi value.
2024 bld_replace_value(struct nv_pc
*pc
, struct nv_basic_block
*b
,
2025 struct nv_value
*old_val
,
2026 struct nv_value
*new_val
)
2028 struct nv_instruction
*nvi
;
2030 for (nvi
= b
->phi
? b
->phi
: b
->entry
; nvi
; nvi
= nvi
->next
) {
2032 for (s
= 0; s
< 5; ++s
) {
2035 if (nvi
->src
[s
]->value
== old_val
)
2036 nv_reference(pc
, &nvi
->src
[s
], new_val
);
2038 if (nvi
->flags_src
&& nvi
->flags_src
->value
== old_val
)
2039 nv_reference(pc
, &nvi
->flags_src
, new_val
);
2042 b
->pass_seq
= pc
->pass_seq
;
2044 if (b
->out
[0] && b
->out
[0]->pass_seq
< pc
->pass_seq
)
2045 bld_replace_value(pc
, b
->out
[0], old_val
, new_val
);
2047 if (b
->out
[1] && b
->out
[1]->pass_seq
< pc
->pass_seq
)
2048 bld_replace_value(pc
, b
->out
[1], old_val
, new_val
);