2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #define NOUVEAU_DEBUG 1
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_util.h"
30 #include "tgsi/tgsi_dump.h"
31 #include "util/u_dynarray.h"
34 #include "nvc0_program.h"
36 /* Arbitrary internal limits. */
37 #define BLD_MAX_TEMPS 64
38 #define BLD_MAX_ADDRS 4
39 #define BLD_MAX_PREDS 4
40 #define BLD_MAX_IMMDS 128
41 #define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS
43 #define BLD_MAX_COND_NESTING 8
44 #define BLD_MAX_LOOP_NESTING 4
45 #define BLD_MAX_CALL_NESTING 2
47 /* This structure represents a TGSI register. */
49 struct nv_value
*current
;
50 /* collect all SSA values assigned to it */
51 struct util_dynarray vals
;
52 /* 1 bit per loop level, indicates if used/defd, reset when loop ends */
57 static INLINE
struct nv_value
**
58 bld_register_access(struct bld_register
*reg
, unsigned i
)
60 return util_dynarray_element(®
->vals
, struct nv_value
*, i
);
64 bld_register_add_val(struct bld_register
*reg
, struct nv_value
*val
)
66 struct nv_basic_block
*bb
= val
->insn
->bb
;
69 (util_dynarray_top(®
->vals
, struct nv_value
*))->insn
->bb
== bb
)
70 *(util_dynarray_top_ptr(®
->vals
, struct nv_value
*)) = val
;
72 util_dynarray_append(®
->vals
, struct nv_value
*, val
);
76 bld_register_del_val(struct bld_register
*reg
, struct nv_value
*val
)
80 for (i
= reg
->vals
.size
/ sizeof(struct nv_value
*); i
> 0; --i
)
81 if (*bld_register_access(reg
, i
- 1) == val
)
86 if (i
!= reg
->vals
.size
/ sizeof(struct nv_value
*))
87 *bld_register_access(reg
, i
- 1) = util_dynarray_pop(®
->vals
,
90 reg
->vals
.size
-= sizeof(struct nv_value
*);
96 struct nvc0_translation_info
*ti
;
99 struct nv_basic_block
*b
;
101 struct tgsi_parse_context parse
[BLD_MAX_CALL_NESTING
];
104 struct nv_basic_block
*cond_bb
[BLD_MAX_COND_NESTING
];
105 struct nv_basic_block
*join_bb
[BLD_MAX_COND_NESTING
];
106 struct nv_basic_block
*else_bb
[BLD_MAX_COND_NESTING
];
108 struct nv_basic_block
*loop_bb
[BLD_MAX_LOOP_NESTING
];
109 struct nv_basic_block
*brkt_bb
[BLD_MAX_LOOP_NESTING
];
112 ubyte out_kind
; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
114 struct bld_register tvs
[BLD_MAX_TEMPS
][4]; /* TGSI_FILE_TEMPORARY */
115 struct bld_register avs
[BLD_MAX_ADDRS
][4]; /* TGSI_FILE_ADDRESS */
116 struct bld_register pvs
[BLD_MAX_PREDS
][4]; /* TGSI_FILE_PREDICATE */
117 struct bld_register ovs
[BLD_MAX_OUTPS
][4]; /* TGSI_FILE_OUTPUT, FP only */
119 uint32_t outputs_written
[(PIPE_MAX_SHADER_OUTPUTS
+ 7) / 8];
122 struct nv_value
*zero
;
123 struct nv_value
*frag_coord
[4];
126 struct nv_value
*saved_sysvals
[4];
127 struct nv_value
*saved_addr
[4][2];
128 struct nv_value
*saved_inputs
[PIPE_MAX_SHADER_INPUTS
][4];
129 struct nv_value
*saved_immd
[BLD_MAX_IMMDS
];
134 bld_register_file(struct bld_context
*bld
, struct bld_register
*reg
)
136 if (reg
< &bld
->avs
[0][0]) return NV_FILE_GPR
;
138 if (reg
< &bld
->pvs
[0][0]) return NV_FILE_GPR
;
140 if (reg
< &bld
->ovs
[0][0]) return NV_FILE_PRED
;
142 return NV_FILE_MEM_V
;
145 static INLINE
struct nv_value
*
146 bld_fetch(struct bld_context
*bld
, struct bld_register
*regs
, int i
, int c
)
148 regs
[i
* 4 + c
].loop_use
|= 1 << bld
->loop_lvl
;
149 return regs
[i
* 4 + c
].current
;
152 static struct nv_value
*
153 bld_loop_phi(struct bld_context
*, struct bld_register
*, struct nv_value
*);
155 /* If a variable is defined in a loop without prior use, we don't need
156 * a phi in the loop header to account for backwards flow.
158 * However, if this variable is then also used outside the loop, we do
159 * need a phi after all. But we must not use this phi's def inside the
160 * loop, so we can eliminate the phi if it is unused later.
163 bld_store(struct bld_context
*bld
,
164 struct bld_register
*regs
, int i
, int c
, struct nv_value
*val
)
166 const uint16_t m
= 1 << bld
->loop_lvl
;
167 struct bld_register
*reg
= ®s
[i
* 4 + c
];
169 if (bld
->loop_lvl
&& !(m
& (reg
->loop_def
| reg
->loop_use
)))
170 bld_loop_phi(bld
, reg
, val
);
173 bld_register_add_val(reg
, reg
->current
);
175 reg
->loop_def
|= 1 << bld
->loop_lvl
;
178 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
179 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
180 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
181 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
182 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
183 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
184 #define STORE_OUTP(i, c, v) \
186 bld_store(bld, &bld->ovs[0][0], i, c, (v)); \
187 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
191 bld_clear_def_use(struct bld_register
*regs
, int n
, int lvl
)
194 const uint16_t mask
= ~(1 << lvl
);
196 for (i
= 0; i
< n
* 4; ++i
) {
197 regs
[i
].loop_def
&= mask
;
198 regs
[i
].loop_use
&= mask
;
203 bld_warn_uninitialized(struct bld_context
*bld
, int kind
,
204 struct bld_register
*reg
, struct nv_basic_block
*b
)
207 long i
= (reg
- &bld
->tvs
[0][0]) / 4;
208 long c
= (reg
- &bld
->tvs
[0][0]) & 3;
212 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
213 i
, (int)('x' + c
), kind
? "may be" : "is", b
->id
);
217 static INLINE
struct nv_value
*
218 bld_def(struct nv_instruction
*i
, int c
, struct nv_value
*value
)
225 static INLINE
struct nv_value
*
226 find_by_bb(struct bld_register
*reg
, struct nv_basic_block
*b
)
230 if (reg
->current
&& reg
->current
->insn
->bb
== b
)
233 for (i
= 0; i
< reg
->vals
.size
/ sizeof(struct nv_value
*); ++i
)
234 if ((*bld_register_access(reg
, i
))->insn
->bb
== b
)
235 return *bld_register_access(reg
, i
);
239 /* Fetch value from register that was defined in the specified BB,
240 * or search for first definitions in all of its predecessors.
243 fetch_by_bb(struct bld_register
*reg
,
244 struct nv_value
**vals
, int *n
,
245 struct nv_basic_block
*b
)
248 struct nv_value
*val
;
250 assert(*n
< 16); /* MAX_COND_NESTING */
252 val
= find_by_bb(reg
, b
);
254 for (i
= 0; i
< *n
; ++i
)
260 for (i
= 0; i
< b
->num_in
; ++i
)
261 if (!IS_WALL_EDGE(b
->in_kind
[i
]))
262 fetch_by_bb(reg
, vals
, n
, b
->in
[i
]);
265 static INLINE
struct nv_value
*
266 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
);
268 static INLINE
struct nv_value
*
269 bld_undef(struct bld_context
*bld
, ubyte file
)
271 struct nv_instruction
*nvi
= new_instruction(bld
->pc
, NV_OP_UNDEF
);
273 return bld_def(nvi
, 0, new_value(bld
->pc
, file
, 4));
276 static struct nv_value
*
277 bld_phi(struct bld_context
*bld
, struct nv_basic_block
*b
,
278 struct bld_register
*reg
)
280 struct nv_basic_block
*in
;
281 struct nv_value
*vals
[16] = { NULL
};
282 struct nv_value
*val
;
283 struct nv_instruction
*phi
;
288 fetch_by_bb(reg
, vals
, &n
, b
);
291 bld_warn_uninitialized(bld
, 0, reg
, b
);
296 if (nvc0_bblock_dominated_by(b
, vals
[0]->insn
->bb
))
299 bld_warn_uninitialized(bld
, 1, reg
, b
);
301 /* back-tracking to insert missing value of other path */
304 if (in
->num_in
== 1) {
307 if (!nvc0_bblock_reachable_by(in
->in
[0], vals
[0]->insn
->bb
, b
))
310 if (!nvc0_bblock_reachable_by(in
->in
[1], vals
[0]->insn
->bb
, b
))
316 bld
->pc
->current_block
= in
;
318 /* should make this a no-op */
319 bld_register_add_val(reg
, bld_undef(bld
, vals
[0]->reg
.file
));
323 for (i
= 0; i
< n
; ++i
) {
324 /* if value dominates b, continue to the redefinitions */
325 if (nvc0_bblock_dominated_by(b
, vals
[i
]->insn
->bb
))
328 /* if value dominates any in-block, b should be the dom frontier */
329 for (j
= 0; j
< b
->num_in
; ++j
)
330 if (nvc0_bblock_dominated_by(b
->in
[j
], vals
[i
]->insn
->bb
))
332 /* otherwise, find the dominance frontier and put the phi there */
333 if (j
== b
->num_in
) {
334 in
= nvc0_bblock_dom_frontier(vals
[i
]->insn
->bb
);
335 val
= bld_phi(bld
, in
, reg
);
336 bld_register_add_val(reg
, val
);
342 bld
->pc
->current_block
= b
;
347 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
349 bld_def(phi
, 0, new_value(bld
->pc
, vals
[0]->reg
.file
, vals
[0]->reg
.size
));
350 for (i
= 0; i
< n
; ++i
)
351 nv_reference(bld
->pc
, phi
, i
, vals
[i
]);
356 /* Insert a phi function in the loop header.
357 * For nested loops, we need to insert phi functions in all the outer
358 * loop headers if they don't have one yet.
360 * @def: redefinition from inside loop, or NULL if to be replaced later
362 static struct nv_value
*
363 bld_loop_phi(struct bld_context
*bld
, struct bld_register
*reg
,
364 struct nv_value
*def
)
366 struct nv_instruction
*phi
;
367 struct nv_basic_block
*bb
= bld
->pc
->current_block
;
368 struct nv_value
*val
= NULL
;
370 if (bld
->loop_lvl
> 1) {
372 if (!((reg
->loop_def
| reg
->loop_use
) & (1 << bld
->loop_lvl
)))
373 val
= bld_loop_phi(bld
, reg
, NULL
);
378 val
= bld_phi(bld
, bld
->pc
->current_block
, reg
); /* old definition */
380 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1]->in
[0];
381 val
= bld_undef(bld
, bld_register_file(bld
, reg
));
384 bld
->pc
->current_block
= bld
->loop_bb
[bld
->loop_lvl
- 1];
386 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
388 bld_def(phi
, 0, new_value_like(bld
->pc
, val
));
392 bld_register_add_val(reg
, phi
->def
[0]);
394 phi
->target
= (struct nv_basic_block
*)reg
; /* cheat */
396 nv_reference(bld
->pc
, phi
, 0, val
);
397 nv_reference(bld
->pc
, phi
, 1, def
);
399 bld
->pc
->current_block
= bb
;
404 static INLINE
struct nv_value
*
405 bld_fetch_global(struct bld_context
*bld
, struct bld_register
*reg
)
407 const uint16_t m
= 1 << bld
->loop_lvl
;
408 const uint16_t use
= reg
->loop_use
;
412 /* If neither used nor def'd inside the loop, build a phi in foresight,
413 * so we don't have to replace stuff later on, which requires tracking.
415 if (bld
->loop_lvl
&& !((use
| reg
->loop_def
) & m
))
416 return bld_loop_phi(bld
, reg
, NULL
);
418 return bld_phi(bld
, bld
->pc
->current_block
, reg
);
421 static INLINE
struct nv_value
*
422 bld_imm_u32(struct bld_context
*bld
, uint32_t u
)
425 unsigned n
= bld
->num_immds
;
427 for (i
= 0; i
< n
; ++i
)
428 if (bld
->saved_immd
[i
]->reg
.imm
.u32
== u
)
429 return bld
->saved_immd
[i
];
431 assert(n
< BLD_MAX_IMMDS
);
434 bld
->saved_immd
[n
] = new_value(bld
->pc
, NV_FILE_IMM
, 4);
435 bld
->saved_immd
[n
]->reg
.imm
.u32
= u
;
436 return bld
->saved_immd
[n
];
440 bld_replace_value(struct nv_pc
*, struct nv_basic_block
*, struct nv_value
*,
443 /* Replace the source of the phi in the loop header by the last assignment,
444 * or eliminate the phi function if there is no assignment inside the loop.
446 * Redundancy situation 1 - (used) but (not redefined) value:
447 * %3 = phi %0, %3 = %3 is used
448 * %3 = phi %0, %4 = is new definition
450 * Redundancy situation 2 - (not used) but (redefined) value:
451 * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
454 bld_loop_end(struct bld_context
*bld
, struct nv_basic_block
*bb
)
456 struct nv_basic_block
*save
= bld
->pc
->current_block
;
457 struct nv_instruction
*phi
, *next
;
458 struct nv_value
*val
;
459 struct bld_register
*reg
;
462 for (phi
= bb
->phi
; phi
&& phi
->opcode
== NV_OP_PHI
; phi
= next
) {
465 reg
= (struct bld_register
*)phi
->target
;
468 for (s
= 1, n
= 0; n
< bb
->num_in
; ++n
) {
469 if (bb
->in_kind
[n
] != CFG_EDGE_BACK
)
473 bld
->pc
->current_block
= bb
->in
[n
];
474 val
= bld_fetch_global(bld
, reg
);
476 for (i
= 0; i
< 4; ++i
)
477 if (phi
->src
[i
] && phi
->src
[i
]->value
== val
)
480 nv_reference(bld
->pc
, phi
, s
++, val
);
482 bld
->pc
->current_block
= save
;
484 if (phi
->src
[0]->value
== phi
->def
[0] ||
485 phi
->src
[0]->value
== phi
->src
[1]->value
)
488 if (phi
->src
[1]->value
== phi
->def
[0])
494 /* eliminate the phi */
495 bld_register_del_val(reg
, phi
->def
[0]);
498 bld_replace_value(bld
->pc
, bb
, phi
->def
[0], phi
->src
[s
]->value
);
500 nvc0_insn_delete(phi
);
505 static INLINE
struct nv_value
*
506 bld_imm_f32(struct bld_context
*bld
, float f
)
508 return bld_imm_u32(bld
, fui(f
));
511 static struct nv_value
*
512 bld_insn_1(struct bld_context
*bld
, uint opcode
, struct nv_value
*src0
)
514 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
516 nv_reference(bld
->pc
, insn
, 0, src0
);
518 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.size
));
521 static struct nv_value
*
522 bld_insn_2(struct bld_context
*bld
, uint opcode
,
523 struct nv_value
*src0
, struct nv_value
*src1
)
525 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
527 nv_reference(bld
->pc
, insn
, 0, src0
);
528 nv_reference(bld
->pc
, insn
, 1, src1
);
530 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.size
));
533 static struct nv_value
*
534 bld_insn_3(struct bld_context
*bld
, uint opcode
,
535 struct nv_value
*src0
, struct nv_value
*src1
,
536 struct nv_value
*src2
)
538 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
540 nv_reference(bld
->pc
, insn
, 0, src0
);
541 nv_reference(bld
->pc
, insn
, 1, src1
);
542 nv_reference(bld
->pc
, insn
, 2, src2
);
544 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.size
));
548 bld_src_predicate(struct bld_context
*bld
,
549 struct nv_instruction
*nvi
, int s
, struct nv_value
*val
)
552 nv_reference(bld
->pc
, nvi
, s
, val
);
556 bld_src_pointer(struct bld_context
*bld
,
557 struct nv_instruction
*nvi
, int s
, struct nv_value
*val
)
560 nv_reference(bld
->pc
, nvi
, s
, val
);
564 bld_lmem_store(struct bld_context
*bld
, struct nv_value
*ptr
, int ofst
,
565 struct nv_value
*val
)
567 struct nv_instruction
*insn
= new_instruction(bld
->pc
, NV_OP_ST
);
568 struct nv_value
*loc
;
570 loc
= new_value(bld
->pc
, NV_FILE_MEM_L
, nv_type_sizeof(NV_TYPE_U32
));
572 loc
->reg
.address
= ofst
* 4;
574 nv_reference(bld
->pc
, insn
, 0, loc
);
575 nv_reference(bld
->pc
, insn
, 1, val
);
577 bld_src_pointer(bld
, insn
, 2, ptr
);
580 static struct nv_value
*
581 bld_lmem_load(struct bld_context
*bld
, struct nv_value
*ptr
, int ofst
)
583 struct nv_value
*loc
, *val
;
585 loc
= new_value(bld
->pc
, NV_FILE_MEM_L
, nv_type_sizeof(NV_TYPE_U32
));
587 loc
->reg
.address
= ofst
* 4;
589 val
= bld_insn_1(bld
, NV_OP_LD
, loc
);
591 bld_src_pointer(bld
, val
->insn
, 1, ptr
);
596 static struct nv_value
*
597 bld_pow(struct bld_context
*bld
, struct nv_value
*x
, struct nv_value
*e
)
599 struct nv_value
*val
;
601 val
= bld_insn_1(bld
, NV_OP_LG2
, x
);
602 val
= bld_insn_2(bld
, NV_OP_MUL_F32
, e
, val
);
604 val
= bld_insn_1(bld
, NV_OP_PREEX2
, val
);
605 val
= bld_insn_1(bld
, NV_OP_EX2
, val
);
610 static INLINE
struct nv_value
*
611 bld_load_imm_f32(struct bld_context
*bld
, float f
)
615 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_f32(bld
, f
));
618 static INLINE
struct nv_value
*
619 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
)
623 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_u32(bld
, u
));
626 static INLINE
struct nv_value
*
627 bld_setp(struct bld_context
*bld
, uint op
, uint8_t cc
,
628 struct nv_value
*src0
, struct nv_value
*src1
)
630 struct nv_value
*val
= bld_insn_2(bld
, op
, src0
, src1
);
632 val
->reg
.file
= NV_FILE_PRED
;
634 val
->insn
->set_cond
= cc
& 0xf;
638 static INLINE
struct nv_value
*
639 bld_cvt(struct bld_context
*bld
, uint8_t dt
, uint8_t st
, struct nv_value
*src
)
641 struct nv_value
*val
= bld_insn_1(bld
, NV_OP_CVT
, src
);
642 val
->insn
->ext
.cvt
.d
= dt
;
643 val
->insn
->ext
.cvt
.s
= st
;
648 bld_kil(struct bld_context
*bld
, struct nv_value
*src
)
650 struct nv_instruction
*nvi
;
652 src
= bld_setp(bld
, NV_OP_SET_F32
, NV_CC_LT
, src
, bld
->zero
);
654 nvi
= new_instruction(bld
->pc
, NV_OP_KIL
);
657 bld_src_predicate(bld
, nvi
, 0, src
);
661 bld_flow(struct bld_context
*bld
, uint opcode
,
662 struct nv_value
*pred
, uint8_t cc
, struct nv_basic_block
*target
,
665 struct nv_instruction
*nvi
;
668 new_instruction(bld
->pc
, NV_OP_JOINAT
)->fixed
= 1;
670 nvi
= new_instruction(bld
->pc
, opcode
);
671 nvi
->target
= target
;
675 bld_src_predicate(bld
, nvi
, 0, pred
);
680 translate_setcc(unsigned opcode
)
683 case TGSI_OPCODE_SLT
: return NV_CC_LT
;
684 case TGSI_OPCODE_SGE
: return NV_CC_GE
;
685 case TGSI_OPCODE_SEQ
: return NV_CC_EQ
;
686 case TGSI_OPCODE_SGT
: return NV_CC_GT
;
687 case TGSI_OPCODE_SLE
: return NV_CC_LE
;
688 case TGSI_OPCODE_SNE
: return NV_CC_NE
| NV_CC_U
;
689 case TGSI_OPCODE_STR
: return NV_CC_TR
;
690 case TGSI_OPCODE_SFL
: return NV_CC_FL
;
692 case TGSI_OPCODE_ISLT
: return NV_CC_LT
;
693 case TGSI_OPCODE_ISGE
: return NV_CC_GE
;
694 case TGSI_OPCODE_USEQ
: return NV_CC_EQ
;
695 case TGSI_OPCODE_USGE
: return NV_CC_GE
;
696 case TGSI_OPCODE_USLT
: return NV_CC_LT
;
697 case TGSI_OPCODE_USNE
: return NV_CC_NE
;
705 translate_opcode(uint opcode
)
708 case TGSI_OPCODE_ABS
: return NV_OP_ABS_F32
;
709 case TGSI_OPCODE_ADD
: return NV_OP_ADD_F32
;
710 case TGSI_OPCODE_SUB
: return NV_OP_SUB_F32
;
711 case TGSI_OPCODE_UADD
: return NV_OP_ADD_B32
;
712 case TGSI_OPCODE_AND
: return NV_OP_AND
;
713 case TGSI_OPCODE_EX2
: return NV_OP_EX2
;
714 case TGSI_OPCODE_CEIL
: return NV_OP_CEIL
;
715 case TGSI_OPCODE_FLR
: return NV_OP_FLOOR
;
716 case TGSI_OPCODE_TRUNC
: return NV_OP_TRUNC
;
717 case TGSI_OPCODE_COS
: return NV_OP_COS
;
718 case TGSI_OPCODE_SIN
: return NV_OP_SIN
;
719 case TGSI_OPCODE_DDX
: return NV_OP_DFDX
;
720 case TGSI_OPCODE_DDY
: return NV_OP_DFDY
;
721 case TGSI_OPCODE_F2I
:
722 case TGSI_OPCODE_F2U
:
723 case TGSI_OPCODE_I2F
:
724 case TGSI_OPCODE_U2F
: return NV_OP_CVT
;
725 case TGSI_OPCODE_INEG
: return NV_OP_NEG_S32
;
726 case TGSI_OPCODE_LG2
: return NV_OP_LG2
;
727 case TGSI_OPCODE_ISHR
: return NV_OP_SAR
;
728 case TGSI_OPCODE_USHR
: return NV_OP_SHR
;
729 case TGSI_OPCODE_MAD
: return NV_OP_MAD_F32
;
730 case TGSI_OPCODE_MAX
: return NV_OP_MAX_F32
;
731 case TGSI_OPCODE_IMAX
: return NV_OP_MAX_S32
;
732 case TGSI_OPCODE_UMAX
: return NV_OP_MAX_U32
;
733 case TGSI_OPCODE_MIN
: return NV_OP_MIN_F32
;
734 case TGSI_OPCODE_IMIN
: return NV_OP_MIN_S32
;
735 case TGSI_OPCODE_UMIN
: return NV_OP_MIN_U32
;
736 case TGSI_OPCODE_MUL
: return NV_OP_MUL_F32
;
737 case TGSI_OPCODE_UMUL
: return NV_OP_MUL_B32
;
738 case TGSI_OPCODE_OR
: return NV_OP_OR
;
739 case TGSI_OPCODE_RCP
: return NV_OP_RCP
;
740 case TGSI_OPCODE_RSQ
: return NV_OP_RSQ
;
741 case TGSI_OPCODE_SAD
: return NV_OP_SAD
;
742 case TGSI_OPCODE_SHL
: return NV_OP_SHL
;
743 case TGSI_OPCODE_SLT
:
744 case TGSI_OPCODE_SGE
:
745 case TGSI_OPCODE_SEQ
:
746 case TGSI_OPCODE_SGT
:
747 case TGSI_OPCODE_SLE
:
748 case TGSI_OPCODE_SNE
: return NV_OP_FSET_F32
;
749 case TGSI_OPCODE_ISLT
:
750 case TGSI_OPCODE_ISGE
: return NV_OP_SET_S32
;
751 case TGSI_OPCODE_USEQ
:
752 case TGSI_OPCODE_USGE
:
753 case TGSI_OPCODE_USLT
:
754 case TGSI_OPCODE_USNE
: return NV_OP_SET_U32
;
755 case TGSI_OPCODE_TEX
: return NV_OP_TEX
;
756 case TGSI_OPCODE_TXP
: return NV_OP_TEX
;
757 case TGSI_OPCODE_TXB
: return NV_OP_TXB
;
758 case TGSI_OPCODE_TXL
: return NV_OP_TXL
;
759 case TGSI_OPCODE_XOR
: return NV_OP_XOR
;
767 infer_src_type(unsigned opcode
)
770 case TGSI_OPCODE_MOV
:
771 case TGSI_OPCODE_AND
:
773 case TGSI_OPCODE_XOR
:
774 case TGSI_OPCODE_SAD
:
775 case TGSI_OPCODE_U2F
:
776 case TGSI_OPCODE_UADD
:
777 case TGSI_OPCODE_UDIV
:
778 case TGSI_OPCODE_UMOD
:
779 case TGSI_OPCODE_UMAD
:
780 case TGSI_OPCODE_UMUL
:
781 case TGSI_OPCODE_UMAX
:
782 case TGSI_OPCODE_UMIN
:
783 case TGSI_OPCODE_USEQ
:
784 case TGSI_OPCODE_USGE
:
785 case TGSI_OPCODE_USLT
:
786 case TGSI_OPCODE_USNE
:
787 case TGSI_OPCODE_USHR
:
789 case TGSI_OPCODE_I2F
:
790 case TGSI_OPCODE_IDIV
:
791 case TGSI_OPCODE_IMAX
:
792 case TGSI_OPCODE_IMIN
:
793 case TGSI_OPCODE_INEG
:
794 case TGSI_OPCODE_ISGE
:
795 case TGSI_OPCODE_ISHR
:
796 case TGSI_OPCODE_ISLT
:
804 infer_dst_type(unsigned opcode
)
807 case TGSI_OPCODE_MOV
:
808 case TGSI_OPCODE_F2U
:
809 case TGSI_OPCODE_AND
:
811 case TGSI_OPCODE_XOR
:
812 case TGSI_OPCODE_SAD
:
813 case TGSI_OPCODE_UADD
:
814 case TGSI_OPCODE_UDIV
:
815 case TGSI_OPCODE_UMOD
:
816 case TGSI_OPCODE_UMAD
:
817 case TGSI_OPCODE_UMUL
:
818 case TGSI_OPCODE_UMAX
:
819 case TGSI_OPCODE_UMIN
:
820 case TGSI_OPCODE_USEQ
:
821 case TGSI_OPCODE_USGE
:
822 case TGSI_OPCODE_USLT
:
823 case TGSI_OPCODE_USNE
:
824 case TGSI_OPCODE_USHR
:
826 case TGSI_OPCODE_F2I
:
827 case TGSI_OPCODE_IDIV
:
828 case TGSI_OPCODE_IMAX
:
829 case TGSI_OPCODE_IMIN
:
830 case TGSI_OPCODE_INEG
:
831 case TGSI_OPCODE_ISGE
:
832 case TGSI_OPCODE_ISHR
:
833 case TGSI_OPCODE_ISLT
:
842 emit_store(struct bld_context
*bld
, const struct tgsi_full_instruction
*inst
,
843 unsigned chan
, struct nv_value
*res
)
845 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[0];
846 struct nv_instruction
*nvi
;
847 struct nv_value
*mem
;
848 struct nv_value
*ptr
= NULL
;
851 idx
= reg
->Register
.Index
;
854 if (reg
->Register
.Indirect
)
855 ptr
= FETCH_ADDR(reg
->Indirect
.Index
,
856 tgsi_util_get_src_register_swizzle(®
->Indirect
, 0));
858 switch (inst
->Instruction
.Saturate
) {
861 case TGSI_SAT_ZERO_ONE
:
862 res
= bld_insn_1(bld
, NV_OP_SAT
, res
);
864 case TGSI_SAT_MINUS_PLUS_ONE
:
865 res
= bld_insn_2(bld
, NV_OP_MAX_F32
, res
, bld_load_imm_f32(bld
, -1.0f
));
866 res
= bld_insn_2(bld
, NV_OP_MIN_F32
, res
, bld_load_imm_f32(bld
, 1.0f
));
870 switch (reg
->Register
.File
) {
871 case TGSI_FILE_OUTPUT
:
873 res
= bld_insn_1(bld
, NV_OP_MOV
, res
);
875 if (bld
->pc
->is_fragprog
) {
877 STORE_OUTP(idx
, chan
, res
);
879 nvi
= new_instruction(bld
->pc
, NV_OP_EXPORT
);
880 mem
= new_value(bld
->pc
, bld
->ti
->output_file
, res
->reg
.size
);
881 nv_reference(bld
->pc
, nvi
, 0, mem
);
882 nv_reference(bld
->pc
, nvi
, 1, res
);
884 mem
->reg
.address
= bld
->ti
->output_loc
[idx
][chan
];
886 mem
->reg
.address
= 0x80 + idx
* 16 + chan
* 4;
890 case TGSI_FILE_TEMPORARY
:
891 assert(idx
< BLD_MAX_TEMPS
);
892 if (!res
->insn
|| res
->insn
->bb
!= bld
->pc
->current_block
)
893 res
= bld_insn_1(bld
, NV_OP_MOV
, res
);
895 assert(res
->reg
.file
== NV_FILE_GPR
);
897 if (bld
->ti
->require_stores
)
898 bld_lmem_store(bld
, ptr
, idx
* 4 + chan
, res
);
900 STORE_TEMP(idx
, chan
, res
);
902 case TGSI_FILE_ADDRESS
:
903 assert(idx
< BLD_MAX_ADDRS
);
904 STORE_ADDR(idx
, chan
, res
);
909 static INLINE
uint32_t
910 bld_is_output_written(struct bld_context
*bld
, int i
, int c
)
913 return bld
->outputs_written
[i
/ 8] & (0xf << ((i
* 4) % 32));
914 return bld
->outputs_written
[i
/ 8] & (1 << ((i
* 4 + c
) % 32));
918 bld_append_vp_ucp(struct bld_context
*bld
)
920 struct nv_value
*res
[6];
921 struct nv_value
*ucp
, *vtx
, *out
;
922 struct nv_instruction
*insn
;
925 assert(bld
->ti
->prog
->vp
.num_ucps
<= 6);
927 for (c
= 0; c
< 4; ++c
) {
928 vtx
= bld_fetch_global(bld
, &bld
->ovs
[bld
->hpos_index
][c
]);
930 for (i
= 0; i
< bld
->ti
->prog
->vp
.num_ucps
; ++i
) {
931 ucp
= new_value(bld
->pc
, NV_FILE_MEM_C(15), 4);
932 ucp
->reg
.address
= i
* 16 + c
* 4;
935 res
[i
] = bld_insn_2(bld
, NV_OP_MUL_F32
, vtx
, ucp
);
937 res
[i
] = bld_insn_3(bld
, NV_OP_MAD_F32
, vtx
, ucp
, res
[i
]);
941 for (i
= 0; i
< bld
->ti
->prog
->vp
.num_ucps
; ++i
) {
942 (out
= new_value(bld
->pc
, NV_FILE_MEM_V
, 4))->reg
.address
= 0x2c0 + i
* 4;
943 (insn
= new_instruction(bld
->pc
, NV_OP_EXPORT
))->fixed
= 1;
944 nv_reference(bld
->pc
, insn
, 0, out
);
945 nv_reference(bld
->pc
, insn
, 1, res
[i
]);
950 bld_export_fp_outputs(struct bld_context
*bld
)
952 struct nv_value
*vals
[4];
953 struct nv_instruction
*nvi
;
956 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
) {
957 if (!bld_is_output_written(bld
, i
, -1))
959 for (n
= 0, c
= 0; c
< 4; ++c
) {
960 if (!bld_is_output_written(bld
, i
, c
))
962 vals
[n
] = bld_fetch_global(bld
, &bld
->ovs
[i
][c
]);
964 vals
[n
] = bld_insn_1(bld
, NV_OP_MOV
, vals
[n
]);
965 vals
[n
++]->reg
.id
= bld
->ti
->output_loc
[i
][c
];
969 (nvi
= new_instruction(bld
->pc
, NV_OP_EXPORT
))->fixed
= 1;
970 for (c
= 0; c
< n
; ++c
)
971 nv_reference(bld
->pc
, nvi
, c
, vals
[c
]);
976 bld_new_block(struct bld_context
*bld
, struct nv_basic_block
*b
)
980 bld
->pc
->current_block
= b
;
982 for (i
= 0; i
< 4; ++i
)
983 bld
->saved_addr
[i
][0] = NULL
;
984 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; ++i
)
985 for (c
= 0; c
< 4; ++c
)
986 bld
->saved_inputs
[i
][c
] = NULL
;
988 bld
->out_kind
= CFG_EDGE_FORWARD
;
991 static struct nv_value
*
992 bld_interp(struct bld_context
*bld
, unsigned mode
, struct nv_value
*val
)
994 unsigned cent
= mode
& NVC0_INTERP_CENTROID
;
996 mode
&= ~NVC0_INTERP_CENTROID
;
998 if (val
->reg
.address
== 0x3fc) {
999 /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
1000 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
1001 val
->insn
->flat
= 1;
1002 val
= bld_insn_2(bld
, NV_OP_SHL
, val
, bld_imm_u32(bld
, 31));
1003 val
= bld_insn_2(bld
, NV_OP_XOR
, val
, bld_imm_f32(bld
, -1.0f
));
1006 if (mode
== NVC0_INTERP_PERSPECTIVE
) {
1007 val
= bld_insn_2(bld
, NV_OP_PINTERP
, val
, bld
->frag_coord
[3]);
1009 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
1012 val
->insn
->flat
= mode
== NVC0_INTERP_FLAT
? 1 : 0;
1013 val
->insn
->centroid
= cent
? 1 : 0;
1017 static struct nv_value
*
1018 emit_fetch(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1019 const unsigned s
, const unsigned chan
)
1021 const struct tgsi_full_src_register
*src
= &insn
->Src
[s
];
1022 struct nv_value
*res
= NULL
;
1023 struct nv_value
*ptr
= NULL
;
1024 int idx
, ind_idx
, dim_idx
;
1025 unsigned swz
, ind_swz
, sgn
;
1027 idx
= src
->Register
.Index
;
1028 swz
= tgsi_util_get_full_src_register_swizzle(src
, chan
);
1030 if (src
->Register
.Indirect
) {
1031 ind_idx
= src
->Indirect
.Index
;
1032 ind_swz
= tgsi_util_get_src_register_swizzle(&src
->Indirect
, 0);
1034 ptr
= FETCH_ADDR(ind_idx
, ind_swz
);
1037 if (src
->Register
.Dimension
)
1038 dim_idx
= src
->Dimension
.Index
;
1042 switch (src
->Register
.File
) {
1043 case TGSI_FILE_CONSTANT
:
1044 assert(dim_idx
< 14);
1045 res
= new_value(bld
->pc
, NV_FILE_MEM_C(dim_idx
), 4);
1046 res
->reg
.address
= idx
* 16 + swz
* 4;
1047 res
= bld_insn_1(bld
, NV_OP_LD
, res
);
1049 bld_src_pointer(bld
, res
->insn
, 1, ptr
);
1051 case TGSI_FILE_IMMEDIATE
: /* XXX: type for MOV TEMP[0], -IMM[0] */
1052 assert(idx
< bld
->ti
->immd32_nr
);
1053 res
= bld_load_imm_u32(bld
, bld
->ti
->immd32
[idx
* 4 + swz
]);
1055 case TGSI_FILE_INPUT
:
1056 assert(!src
->Register
.Dimension
);
1058 res
= bld
->saved_inputs
[idx
][swz
];
1062 res
= new_value(bld
->pc
, bld
->ti
->input_file
, 4);
1064 res
->reg
.address
= 0x80 + idx
* 16 + swz
* 4;
1066 res
->reg
.address
= bld
->ti
->input_loc
[idx
][swz
];
1068 if (bld
->pc
->is_fragprog
)
1069 res
= bld_interp(bld
, bld
->ti
->interp_mode
[idx
], res
);
1071 res
= bld_insn_1(bld
, NV_OP_VFETCH
, res
);
1074 bld_src_pointer(bld
, res
->insn
, res
->insn
->src
[1] ? 2 : 1, ptr
);
1076 bld
->saved_inputs
[idx
][swz
] = res
;
1078 case TGSI_FILE_TEMPORARY
:
1079 if (bld
->ti
->require_stores
)
1080 res
= bld_lmem_load(bld
, ptr
, idx
* 4 + swz
);
1082 res
= bld_fetch_global(bld
, &bld
->tvs
[idx
][swz
]);
1084 case TGSI_FILE_ADDRESS
:
1085 res
= bld_fetch_global(bld
, &bld
->avs
[idx
][swz
]);
1087 case TGSI_FILE_PREDICATE
:
1088 res
= bld_fetch_global(bld
, &bld
->pvs
[idx
][swz
]);
1090 case TGSI_FILE_SYSTEM_VALUE
:
1091 assert(bld
->ti
->sysval_loc
[idx
] < 0xf00); /* >= would mean special reg */
1092 res
= new_value(bld
->pc
,
1093 bld
->pc
->is_fragprog
? NV_FILE_MEM_V
: NV_FILE_MEM_A
, 4);
1094 res
->reg
.address
= bld
->ti
->sysval_loc
[idx
];
1096 if (res
->reg
.file
== NV_FILE_MEM_A
)
1097 res
= bld_insn_1(bld
, NV_OP_VFETCH
, res
);
1099 res
= bld_interp(bld
, NVC0_INTERP_FLAT
, res
);
1101 /* mesa doesn't do real integers yet :-(and in GL this should be S32) */
1102 res
= bld_cvt(bld
, NV_TYPE_F32
, NV_TYPE_U32
, res
);
1105 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src
->Register
.File
);
1110 return bld_undef(bld
, NV_FILE_GPR
);
1112 sgn
= tgsi_util_get_full_src_register_sign_mode(src
, chan
);
1115 case TGSI_UTIL_SIGN_KEEP
:
1117 case TGSI_UTIL_SIGN_CLEAR
:
1118 res
= bld_insn_1(bld
, NV_OP_ABS_F32
, res
);
1120 case TGSI_UTIL_SIGN_TOGGLE
:
1121 res
= bld_insn_1(bld
, NV_OP_NEG_F32
, res
);
1123 case TGSI_UTIL_SIGN_SET
:
1124 res
= bld_insn_1(bld
, NV_OP_ABS_F32
, res
);
1125 res
= bld_insn_1(bld
, NV_OP_NEG_F32
, res
);
1128 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
1137 bld_lit(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1138 const struct tgsi_full_instruction
*insn
)
1140 struct nv_value
*val0
= NULL
;
1141 unsigned mask
= insn
->Dst
[0].Register
.WriteMask
;
1143 if (mask
& ((1 << 0) | (1 << 3)))
1144 dst0
[3] = dst0
[0] = bld_load_imm_f32(bld
, 1.0f
);
1146 if (mask
& (3 << 1)) {
1147 val0
= bld_insn_2(bld
, NV_OP_MAX
, emit_fetch(bld
, insn
, 0, 0), bld
->zero
);
1148 if (mask
& (1 << 1))
1152 if (mask
& (1 << 2)) {
1153 struct nv_value
*val1
, *val3
, *src1
, *src3
, *pred
;
1154 struct nv_value
*pos128
= bld_load_imm_f32(bld
, 127.999999f
);
1155 struct nv_value
*neg128
= bld_load_imm_f32(bld
, -127.999999f
);
1157 src1
= emit_fetch(bld
, insn
, 0, 1);
1158 src3
= emit_fetch(bld
, insn
, 0, 3);
1160 pred
= bld_setp(bld
, NV_OP_SET_F32
, NV_CC_LE
, val0
, bld
->zero
);
1162 val1
= bld_insn_2(bld
, NV_OP_MAX_F32
, src1
, bld
->zero
);
1163 val3
= bld_insn_2(bld
, NV_OP_MAX_F32
, src3
, neg128
);
1164 val3
= bld_insn_2(bld
, NV_OP_MIN_F32
, val3
, pos128
);
1165 val3
= bld_pow(bld
, val1
, val3
);
1167 dst0
[2] = bld_insn_1(bld
, NV_OP_MOV
, bld
->zero
);
1168 bld_src_predicate(bld
, dst0
[2]->insn
, 1, pred
);
1170 dst0
[2] = bld_insn_2(bld
, NV_OP_SELECT
, val3
, dst0
[2]);
1175 describe_texture_target(unsigned target
, int *dim
,
1176 int *array
, int *cube
, int *shadow
)
1178 *array
= *cube
= *shadow
= 0;
1181 case TGSI_TEXTURE_1D
:
1184 case TGSI_TEXTURE_SHADOW1D
:
1187 case TGSI_TEXTURE_UNKNOWN
:
1188 case TGSI_TEXTURE_2D
:
1189 case TGSI_TEXTURE_RECT
:
1192 case TGSI_TEXTURE_SHADOW2D
:
1193 case TGSI_TEXTURE_SHADOWRECT
:
1197 case TGSI_TEXTURE_3D
:
1200 case TGSI_TEXTURE_CUBE
:
1204 case TGSI_TEXTURE_1D_ARRAY
:
1207 case TGSI_TEXTURE_2D_ARRAY
:
1212 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1213 *dim = *array = *shadow = 1;
1215 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1217 *array = *shadow = 1;
1219 case TGSI_TEXTURE_CUBE_ARRAY:
1230 static struct nv_value
*
1231 bld_clone(struct bld_context
*bld
, struct nv_instruction
*nvi
)
1233 struct nv_instruction
*dupi
= new_instruction(bld
->pc
, nvi
->opcode
);
1234 struct nv_instruction
*next
, *prev
;
1245 for (c
= 0; c
< 5 && nvi
->def
[c
]; ++c
)
1246 bld_def(dupi
, c
, new_value_like(bld
->pc
, nvi
->def
[c
]));
1248 for (c
= 0; c
< 6 && nvi
->src
[c
]; ++c
) {
1249 dupi
->src
[c
] = NULL
;
1250 nv_reference(bld
->pc
, dupi
, c
, nvi
->src
[c
]->value
);
1253 return dupi
->def
[0];
1256 /* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */
1258 load_proj_tex_coords(struct bld_context
*bld
,
1259 struct nv_value
*t
[4], int dim
, int shadow
,
1260 const struct tgsi_full_instruction
*insn
)
1263 unsigned mask
= (1 << dim
) - 1;
1266 mask
|= 4; /* depth comparison value */
1268 t
[3] = emit_fetch(bld
, insn
, 0, 3);
1269 if (t
[3]->insn
->opcode
== NV_OP_PINTERP
) {
1270 t
[3] = bld_clone(bld
, t
[3]->insn
);
1271 t
[3]->insn
->opcode
= NV_OP_LINTERP
;
1272 nv_reference(bld
->pc
, t
[3]->insn
, 1, NULL
);
1274 t
[3] = bld_insn_1(bld
, NV_OP_RCP
, t
[3]);
1276 for (c
= 0; c
< 4; ++c
) {
1277 if (!(mask
& (1 << c
)))
1279 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1281 if (t
[c
]->insn
->opcode
!= NV_OP_PINTERP
)
1285 t
[c
] = bld_clone(bld
, t
[c
]->insn
);
1286 nv_reference(bld
->pc
, t
[c
]->insn
, 1, t
[3]);
1291 t
[3] = emit_fetch(bld
, insn
, 0, 3);
1292 t
[3] = bld_insn_1(bld
, NV_OP_RCP
, t
[3]);
1294 for (c
= 0; c
< 4; ++c
)
1295 if (mask
& (1 << c
))
1296 t
[c
] = bld_insn_2(bld
, NV_OP_MUL_F32
, t
[c
], t
[3]);
1299 /* For a quad of threads / top left, top right, bottom left, bottom right
1300 * pixels, do a different operation, and take src0 from a specific thread.
1307 #define QOP(a, b, c, d) \
1308 ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
1310 static INLINE
struct nv_value
*
1311 bld_quadop(struct bld_context
*bld
, ubyte qop
, struct nv_value
*src0
, int lane
,
1312 struct nv_value
*src1
, boolean wp
)
1314 struct nv_value
*val
= bld_insn_2(bld
, NV_OP_QUADOP
, src0
, src1
);
1315 val
->insn
->lanes
= lane
;
1316 val
->insn
->quadop
= qop
;
1318 assert(!"quadop predicate write");
1323 /* order of TGSI operands: x y z layer shadow lod/bias */
1324 /* order of native operands: layer x y z | lod/bias shadow */
1325 static struct nv_instruction
*
1326 emit_tex(struct bld_context
*bld
, uint opcode
, int tic
, int tsc
,
1327 struct nv_value
*dst
[4], struct nv_value
*arg
[4],
1328 int dim
, int array
, int cube
, int shadow
)
1330 struct nv_value
*src
[4];
1331 struct nv_instruction
*nvi
, *bnd
;
1334 boolean lodbias
= opcode
== NV_OP_TXB
|| opcode
== NV_OP_TXL
;
1337 arg
[dim
] = bld_cvt(bld
, NV_TYPE_U32
, NV_TYPE_F32
, arg
[dim
]);
1339 /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */
1341 bnd
= new_instruction(bld
->pc
, NV_OP_BIND
);
1343 src
[s
] = new_value(bld
->pc
, NV_FILE_GPR
, 4);
1344 bld_def(bnd
, s
, src
[s
]);
1345 nv_reference(bld
->pc
, bnd
, s
++, arg
[dim
+ cube
]);
1347 for (c
= 0; c
< dim
+ cube
; ++c
, ++s
) {
1348 src
[s
] = bld_def(bnd
, s
, new_value(bld
->pc
, NV_FILE_GPR
, 4));
1349 nv_reference(bld
->pc
, bnd
, s
, arg
[c
]);
1352 if (shadow
|| lodbias
) {
1353 bnd
= new_instruction(bld
->pc
, NV_OP_BIND
);
1356 src
[s
] = new_value(bld
->pc
, NV_FILE_GPR
, 4);
1357 bld_def(bnd
, 0, src
[s
++]);
1358 nv_reference(bld
->pc
, bnd
, 0, arg
[dim
+ cube
+ array
+ shadow
]);
1361 src
[s
] = new_value(bld
->pc
, NV_FILE_GPR
, 4);
1362 bld_def(bnd
, lodbias
, src
[s
++]);
1363 nv_reference(bld
->pc
, bnd
, lodbias
, arg
[dim
+ cube
+ array
]);
1367 nvi
= new_instruction(bld
->pc
, opcode
);
1368 for (c
= 0; c
< 4; ++c
)
1369 dst
[c
] = bld_def(nvi
, c
, new_value(bld
->pc
, NV_FILE_GPR
, 4));
1370 for (c
= 0; c
< s
; ++c
)
1371 nv_reference(bld
->pc
, nvi
, c
, src
[c
]);
1373 nvi
->ext
.tex
.t
= tic
;
1374 nvi
->ext
.tex
.s
= tsc
;
1375 nvi
->tex_mask
= 0xf;
1376 nvi
->tex_cube
= cube
;
1378 nvi
->tex_cube
= cube
;
1379 nvi
->tex_shadow
= shadow
;
1380 nvi
->tex_array
= array
;
1387 bld_tex(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1388 const struct tgsi_full_instruction
*insn
)
1390 struct nv_value
*t
[4], *s
[3];
1391 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1392 int c
, dim
, array
, cube
, shadow
;
1393 const int lodbias
= opcode
== NV_OP_TXB
|| opcode
== NV_OP_TXL
;
1394 const int tic
= insn
->Src
[1].Register
.Index
;
1395 const int tsc
= tic
;
1397 describe_texture_target(insn
->Texture
.Texture
, &dim
, &array
, &cube
, &shadow
);
1399 assert(dim
+ array
+ shadow
+ lodbias
<= 5);
1401 if (!cube
&& !array
&& insn
->Instruction
.Opcode
== TGSI_OPCODE_TXP
)
1402 load_proj_tex_coords(bld
, t
, dim
, shadow
, insn
);
1404 for (c
= 0; c
< dim
+ cube
+ array
; ++c
)
1405 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1407 t
[c
] = emit_fetch(bld
, insn
, 0, MAX2(c
, 2));
1411 for (c
= 0; c
< 3; ++c
)
1412 s
[c
] = bld_insn_1(bld
, NV_OP_ABS_F32
, t
[c
]);
1414 s
[0] = bld_insn_2(bld
, NV_OP_MAX_F32
, s
[0], s
[1]);
1415 s
[0] = bld_insn_2(bld
, NV_OP_MAX_F32
, s
[0], s
[2]);
1416 s
[0] = bld_insn_1(bld
, NV_OP_RCP
, s
[0]);
1418 for (c
= 0; c
< 3; ++c
)
1419 t
[c
] = bld_insn_2(bld
, NV_OP_MUL_F32
, t
[c
], s
[0]);
1423 t
[dim
+ cube
+ array
+ shadow
] = emit_fetch(bld
, insn
, 0, 3);
1425 emit_tex(bld
, opcode
, tic
, tsc
, dst0
, t
, dim
, array
, cube
, shadow
);
1428 static INLINE
struct nv_value
*
1429 bld_dot(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
1432 struct nv_value
*dotp
, *src0
, *src1
;
1435 src0
= emit_fetch(bld
, insn
, 0, 0);
1436 src1
= emit_fetch(bld
, insn
, 1, 0);
1437 dotp
= bld_insn_2(bld
, NV_OP_MUL_F32
, src0
, src1
);
1439 for (c
= 1; c
< n
; ++c
) {
1440 src0
= emit_fetch(bld
, insn
, 0, c
);
1441 src1
= emit_fetch(bld
, insn
, 1, c
);
1442 dotp
= bld_insn_3(bld
, NV_OP_MAD_F32
, src0
, src1
, dotp
);
1447 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1448 for (chan = 0; chan < 4; ++chan) \
1449 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1452 bld_instruction(struct bld_context
*bld
,
1453 const struct tgsi_full_instruction
*insn
)
1455 struct nv_value
*src0
;
1456 struct nv_value
*src1
;
1457 struct nv_value
*src2
;
1458 struct nv_value
*dst0
[4] = { NULL
};
1459 struct nv_value
*temp
;
1461 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1462 uint8_t mask
= insn
->Dst
[0].Register
.WriteMask
;
1464 #ifdef NOUVEAU_DEBUG
1465 debug_printf("bld_instruction:"); tgsi_dump_instruction(insn
, 1);
1468 switch (insn
->Instruction
.Opcode
) {
1469 case TGSI_OPCODE_ADD
:
1470 case TGSI_OPCODE_MAX
:
1471 case TGSI_OPCODE_MIN
:
1472 case TGSI_OPCODE_MUL
:
1473 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1474 src0
= emit_fetch(bld
, insn
, 0, c
);
1475 src1
= emit_fetch(bld
, insn
, 1, c
);
1476 dst0
[c
] = bld_insn_2(bld
, opcode
, src0
, src1
);
1479 case TGSI_OPCODE_ARL
:
1480 src1
= bld_imm_u32(bld
, 4);
1481 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1482 src0
= emit_fetch(bld
, insn
, 0, c
);
1483 src0
= bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1484 src0
->insn
->ext
.cvt
.d
= NV_TYPE_S32
;
1485 src0
->insn
->ext
.cvt
.s
= NV_TYPE_F32
;
1486 dst0
[c
] = bld_insn_2(bld
, NV_OP_SHL
, src0
, src1
);
1489 case TGSI_OPCODE_CMP
:
1490 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1491 src0
= emit_fetch(bld
, insn
, 0, c
);
1492 src0
= bld_setp(bld
, NV_OP_SET_F32
, NV_CC_LT
, src0
, bld
->zero
);
1493 src1
= emit_fetch(bld
, insn
, 1, c
);
1494 src2
= emit_fetch(bld
, insn
, 2, c
);
1495 dst0
[c
] = bld_insn_3(bld
, NV_OP_SELP
, src1
, src2
, src0
);
1498 case TGSI_OPCODE_COS
:
1499 case TGSI_OPCODE_SIN
:
1500 src0
= emit_fetch(bld
, insn
, 0, 0);
1501 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1502 if (insn
->Dst
[0].Register
.WriteMask
& 7)
1503 temp
= bld_insn_1(bld
, opcode
, temp
);
1504 for (c
= 0; c
< 3; ++c
)
1505 if (insn
->Dst
[0].Register
.WriteMask
& (1 << c
))
1507 if (!(insn
->Dst
[0].Register
.WriteMask
& (1 << 3)))
1509 src0
= emit_fetch(bld
, insn
, 0, 3);
1510 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1511 dst0
[3] = bld_insn_1(bld
, opcode
, temp
);
1513 case TGSI_OPCODE_DP2
:
1514 temp
= bld_dot(bld
, insn
, 2);
1515 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1518 case TGSI_OPCODE_DP3
:
1519 temp
= bld_dot(bld
, insn
, 3);
1520 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1523 case TGSI_OPCODE_DP4
:
1524 temp
= bld_dot(bld
, insn
, 4);
1525 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1528 case TGSI_OPCODE_DPH
:
1529 src0
= bld_dot(bld
, insn
, 3);
1530 src1
= emit_fetch(bld
, insn
, 1, 3);
1531 temp
= bld_insn_2(bld
, NV_OP_ADD_F32
, src0
, src1
);
1532 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1535 case TGSI_OPCODE_DST
:
1536 if (insn
->Dst
[0].Register
.WriteMask
& 1)
1537 dst0
[0] = bld_imm_f32(bld
, 1.0f
);
1538 if (insn
->Dst
[0].Register
.WriteMask
& 2) {
1539 src0
= emit_fetch(bld
, insn
, 0, 1);
1540 src1
= emit_fetch(bld
, insn
, 1, 1);
1541 dst0
[1] = bld_insn_2(bld
, NV_OP_MUL_F32
, src0
, src1
);
1543 if (insn
->Dst
[0].Register
.WriteMask
& 4)
1544 dst0
[2] = emit_fetch(bld
, insn
, 0, 2);
1545 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1546 dst0
[3] = emit_fetch(bld
, insn
, 1, 3);
1548 case TGSI_OPCODE_EXP
:
1549 src0
= emit_fetch(bld
, insn
, 0, 0);
1550 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1552 if (insn
->Dst
[0].Register
.WriteMask
& 2)
1553 dst0
[1] = bld_insn_2(bld
, NV_OP_SUB_F32
, src0
, temp
);
1554 if (insn
->Dst
[0].Register
.WriteMask
& 1) {
1555 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, temp
);
1556 dst0
[0] = bld_insn_1(bld
, NV_OP_EX2
, temp
);
1558 if (insn
->Dst
[0].Register
.WriteMask
& 4) {
1559 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1560 dst0
[2] = bld_insn_1(bld
, NV_OP_EX2
, temp
);
1562 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1563 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1565 case TGSI_OPCODE_EX2
:
1566 src0
= emit_fetch(bld
, insn
, 0, 0);
1567 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1568 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1569 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1572 case TGSI_OPCODE_FRC
:
1573 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1574 src0
= emit_fetch(bld
, insn
, 0, c
);
1575 dst0
[c
] = bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1576 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB_F32
, src0
, dst0
[c
]);
1579 case TGSI_OPCODE_KIL
:
1580 for (c
= 0; c
< 4; ++c
)
1581 bld_kil(bld
, emit_fetch(bld
, insn
, 0, c
));
1583 case TGSI_OPCODE_KILP
:
1584 (new_instruction(bld
->pc
, NV_OP_KIL
))->fixed
= 1;
1586 case TGSI_OPCODE_IF
:
1588 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1589 struct nv_value
*pred
= emit_fetch(bld
, insn
, 0, 0);
1591 assert(bld
->cond_lvl
< BLD_MAX_COND_NESTING
);
1593 nvc0_bblock_attach(bld
->pc
->current_block
, b
, CFG_EDGE_FORWARD
);
1595 bld
->join_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1596 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1598 if (pred
->insn
&& NV_BASEOP(pred
->insn
->opcode
) == NV_OP_SET
) {
1599 pred
= bld_clone(bld
, pred
->insn
);
1601 pred
->reg
.file
= NV_FILE_PRED
;
1602 if (pred
->insn
->opcode
== NV_OP_FSET_F32
)
1603 pred
->insn
->opcode
= NV_OP_SET_F32
;
1605 pred
= bld_setp(bld
, NV_OP_SET_U32
, NV_CC_NE
| NV_CC_U
,
1610 bld_flow(bld
, NV_OP_BRA
, pred
, NV_CC_NOT_P
, NULL
, (bld
->cond_lvl
== 0));
1613 bld_new_block(bld
, b
);
1616 case TGSI_OPCODE_ELSE
:
1618 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1621 nvc0_bblock_attach(bld
->join_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1623 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1624 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1626 new_instruction(bld
->pc
, NV_OP_BRA
)->terminator
= 1;
1629 bld_new_block(bld
, b
);
1632 case TGSI_OPCODE_ENDIF
:
1634 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1637 nvc0_bblock_attach(bld
->pc
->current_block
, b
, bld
->out_kind
);
1638 nvc0_bblock_attach(bld
->cond_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1640 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1642 bld_new_block(bld
, b
);
1644 if (!bld
->cond_lvl
&& bld
->join_bb
[bld
->cond_lvl
]) {
1645 bld
->join_bb
[bld
->cond_lvl
]->exit
->prev
->target
= b
;
1646 new_instruction(bld
->pc
, NV_OP_JOIN
)->join
= 1;
1650 case TGSI_OPCODE_BGNLOOP
:
1652 struct nv_basic_block
*bl
= new_basic_block(bld
->pc
);
1653 struct nv_basic_block
*bb
= new_basic_block(bld
->pc
);
1655 assert(bld
->loop_lvl
< BLD_MAX_LOOP_NESTING
);
1657 bld
->loop_bb
[bld
->loop_lvl
] = bl
;
1658 bld
->brkt_bb
[bld
->loop_lvl
] = bb
;
1660 nvc0_bblock_attach(bld
->pc
->current_block
, bl
, CFG_EDGE_LOOP_ENTER
);
1662 bld_new_block(bld
, bld
->loop_bb
[bld
->loop_lvl
++]);
1664 if (bld
->loop_lvl
== bld
->pc
->loop_nesting_bound
)
1665 bld
->pc
->loop_nesting_bound
++;
1667 bld_clear_def_use(&bld
->tvs
[0][0], BLD_MAX_TEMPS
, bld
->loop_lvl
);
1668 bld_clear_def_use(&bld
->avs
[0][0], BLD_MAX_ADDRS
, bld
->loop_lvl
);
1669 bld_clear_def_use(&bld
->pvs
[0][0], BLD_MAX_PREDS
, bld
->loop_lvl
);
1672 case TGSI_OPCODE_BRK
:
1674 struct nv_basic_block
*bb
= bld
->brkt_bb
[bld
->loop_lvl
- 1];
1676 bld_flow(bld
, NV_OP_BRA
, NULL
, NV_CC_P
, bb
, FALSE
);
1678 if (bld
->out_kind
== CFG_EDGE_FORWARD
) /* else we already had BRK/CONT */
1679 nvc0_bblock_attach(bld
->pc
->current_block
, bb
, CFG_EDGE_LOOP_LEAVE
);
1681 bld
->out_kind
= CFG_EDGE_FAKE
;
1684 case TGSI_OPCODE_CONT
:
1686 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1688 bld_flow(bld
, NV_OP_BRA
, NULL
, NV_CC_P
, bb
, FALSE
);
1690 nvc0_bblock_attach(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1692 if ((bb
= bld
->join_bb
[bld
->cond_lvl
- 1])) {
1693 bld
->join_bb
[bld
->cond_lvl
- 1] = NULL
;
1694 nvc0_insn_delete(bb
->exit
->prev
);
1696 bld
->out_kind
= CFG_EDGE_FAKE
;
1699 case TGSI_OPCODE_ENDLOOP
:
1701 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1703 if (bld
->out_kind
!= CFG_EDGE_FAKE
) { /* else we already had BRK/CONT */
1704 bld_flow(bld
, NV_OP_BRA
, NULL
, NV_CC_P
, bb
, FALSE
);
1706 nvc0_bblock_attach(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1709 bld_loop_end(bld
, bb
); /* replace loop-side operand of the phis */
1711 bld_new_block(bld
, bld
->brkt_bb
[--bld
->loop_lvl
]);
1714 case TGSI_OPCODE_ABS
:
1715 case TGSI_OPCODE_CEIL
:
1716 case TGSI_OPCODE_FLR
:
1717 case TGSI_OPCODE_TRUNC
:
1718 case TGSI_OPCODE_DDX
:
1719 case TGSI_OPCODE_DDY
:
1720 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1721 src0
= emit_fetch(bld
, insn
, 0, c
);
1722 dst0
[c
] = bld_insn_1(bld
, opcode
, src0
);
1725 case TGSI_OPCODE_LIT
:
1726 bld_lit(bld
, dst0
, insn
);
1728 case TGSI_OPCODE_LRP
:
1729 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1730 src0
= emit_fetch(bld
, insn
, 0, c
);
1731 src1
= emit_fetch(bld
, insn
, 1, c
);
1732 src2
= emit_fetch(bld
, insn
, 2, c
);
1733 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB_F32
, src1
, src2
);
1734 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD_F32
, dst0
[c
], src0
, src2
);
1737 case TGSI_OPCODE_MOV
:
1738 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1739 dst0
[c
] = emit_fetch(bld
, insn
, 0, c
);
1741 case TGSI_OPCODE_MAD
:
1742 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1743 src0
= emit_fetch(bld
, insn
, 0, c
);
1744 src1
= emit_fetch(bld
, insn
, 1, c
);
1745 src2
= emit_fetch(bld
, insn
, 2, c
);
1746 dst0
[c
] = bld_insn_3(bld
, opcode
, src0
, src1
, src2
);
1749 case TGSI_OPCODE_POW
:
1750 src0
= emit_fetch(bld
, insn
, 0, 0);
1751 src1
= emit_fetch(bld
, insn
, 1, 0);
1752 temp
= bld_pow(bld
, src0
, src1
);
1753 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1756 case TGSI_OPCODE_LOG
:
1757 src0
= emit_fetch(bld
, insn
, 0, 0);
1758 src0
= bld_insn_1(bld
, NV_OP_ABS_F32
, src0
);
1759 temp
= bld_insn_1(bld
, NV_OP_LG2
, src0
);
1761 if (insn
->Dst
[0].Register
.WriteMask
& 3) {
1762 temp
= bld_insn_1(bld
, NV_OP_FLOOR
, temp
);
1765 if (insn
->Dst
[0].Register
.WriteMask
& 2) {
1766 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, temp
);
1767 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1768 temp
= bld_insn_1(bld
, NV_OP_RCP
, temp
);
1769 dst0
[1] = bld_insn_2(bld
, NV_OP_MUL_F32
, src0
, temp
);
1771 if (insn
->Dst
[0].Register
.WriteMask
& 8)
1772 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1774 case TGSI_OPCODE_RCP
:
1775 case TGSI_OPCODE_LG2
:
1776 src0
= emit_fetch(bld
, insn
, 0, 0);
1777 temp
= bld_insn_1(bld
, opcode
, src0
);
1778 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1781 case TGSI_OPCODE_RSQ
:
1782 src0
= emit_fetch(bld
, insn
, 0, 0);
1783 temp
= bld_insn_1(bld
, NV_OP_ABS_F32
, src0
);
1784 temp
= bld_insn_1(bld
, NV_OP_RSQ
, temp
);
1785 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1788 case TGSI_OPCODE_SLT
:
1789 case TGSI_OPCODE_SGE
:
1790 case TGSI_OPCODE_SEQ
:
1791 case TGSI_OPCODE_SGT
:
1792 case TGSI_OPCODE_SLE
:
1793 case TGSI_OPCODE_SNE
:
1794 case TGSI_OPCODE_ISLT
:
1795 case TGSI_OPCODE_ISGE
:
1796 case TGSI_OPCODE_USEQ
:
1797 case TGSI_OPCODE_USGE
:
1798 case TGSI_OPCODE_USLT
:
1799 case TGSI_OPCODE_USNE
:
1800 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1801 src0
= emit_fetch(bld
, insn
, 0, c
);
1802 src1
= emit_fetch(bld
, insn
, 1, c
);
1803 dst0
[c
] = bld_insn_2(bld
, opcode
, src0
, src1
);
1804 dst0
[c
]->insn
->set_cond
= translate_setcc(insn
->Instruction
.Opcode
);
1807 case TGSI_OPCODE_SCS
:
1808 if (insn
->Dst
[0].Register
.WriteMask
& 0x3) {
1809 src0
= emit_fetch(bld
, insn
, 0, 0);
1810 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1811 if (insn
->Dst
[0].Register
.WriteMask
& 0x1)
1812 dst0
[0] = bld_insn_1(bld
, NV_OP_COS
, temp
);
1813 if (insn
->Dst
[0].Register
.WriteMask
& 0x2)
1814 dst0
[1] = bld_insn_1(bld
, NV_OP_SIN
, temp
);
1816 if (insn
->Dst
[0].Register
.WriteMask
& 0x4)
1817 dst0
[2] = bld_imm_f32(bld
, 0.0f
);
1818 if (insn
->Dst
[0].Register
.WriteMask
& 0x8)
1819 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1821 case TGSI_OPCODE_SSG
:
1822 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) { /* XXX: set lt, set gt, sub */
1823 src0
= emit_fetch(bld
, insn
, 0, c
);
1824 src1
= bld_setp(bld
, NV_OP_SET_F32
, NV_CC_EQ
, src0
, bld
->zero
);
1825 temp
= bld_insn_2(bld
, NV_OP_AND
, src0
, bld_imm_u32(bld
, 0x80000000));
1826 temp
= bld_insn_2(bld
, NV_OP_OR
, temp
, bld_imm_f32(bld
, 1.0f
));
1827 dst0
[c
] = bld_insn_1(bld
, NV_OP_MOV
, temp
);
1828 bld_src_predicate(bld
, dst0
[c
]->insn
, 1, src1
);
1831 case TGSI_OPCODE_SUB
:
1832 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1833 src0
= emit_fetch(bld
, insn
, 0, c
);
1834 src1
= emit_fetch(bld
, insn
, 1, c
);
1835 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB_F32
, src0
, src1
);
1838 case TGSI_OPCODE_TEX
:
1839 case TGSI_OPCODE_TXB
:
1840 case TGSI_OPCODE_TXL
:
1841 case TGSI_OPCODE_TXP
:
1842 bld_tex(bld
, dst0
, insn
);
1844 case TGSI_OPCODE_XPD
:
1845 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1847 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1850 src0
= emit_fetch(bld
, insn
, 1, (c
+ 1) % 3);
1851 src1
= emit_fetch(bld
, insn
, 0, (c
+ 2) % 3);
1852 dst0
[c
] = bld_insn_2(bld
, NV_OP_MUL_F32
, src0
, src1
);
1854 src0
= emit_fetch(bld
, insn
, 0, (c
+ 1) % 3);
1855 src1
= emit_fetch(bld
, insn
, 1, (c
+ 2) % 3);
1856 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD_F32
, src0
, src1
, dst0
[c
]);
1858 dst0
[c
]->insn
->src
[2]->mod
^= NV_MOD_NEG
;
1861 case TGSI_OPCODE_RET
:
1862 (new_instruction(bld
->pc
, NV_OP_RET
))->fixed
= 1;
1864 case TGSI_OPCODE_END
:
1865 /* VP outputs are exported in-place as scalars, optimization later */
1866 if (bld
->pc
->is_fragprog
)
1867 bld_export_fp_outputs(bld
);
1868 if (bld
->ti
->append_ucp
)
1869 bld_append_vp_ucp(bld
);
1872 NOUVEAU_ERR("unhandled opcode %u\n", insn
->Instruction
.Opcode
);
1877 if (insn
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
&&
1878 !bld
->pc
->is_fragprog
) {
1879 struct nv_instruction
*mi
= NULL
;
1882 if (bld
->ti
->append_ucp
) {
1883 if (bld
->ti
->output_loc
[insn
->Dst
[0].Register
.Index
][0] == 0x70) {
1884 bld
->hpos_index
= insn
->Dst
[0].Register
.Index
;
1885 for (c
= 0; c
< 4; ++c
)
1886 if (mask
& (1 << c
))
1887 STORE_OUTP(insn
->Dst
[0].Register
.Index
, c
, dst0
[c
]);
1891 for (c
= 0; c
< 4; ++c
)
1892 if (mask
& (1 << c
))
1893 if ((dst0
[c
]->reg
.file
== NV_FILE_IMM
) ||
1894 (dst0
[c
]->reg
.file
== NV_FILE_GPR
&& dst0
[c
]->reg
.id
== 63))
1895 dst0
[c
] = bld_insn_1(bld
, NV_OP_MOV
, dst0
[c
]);
1898 if ((mask
& 0x3) == 0x3) {
1901 mi
= bld_insn_2(bld
, NV_OP_BIND
, dst0
[0], dst0
[1])->insn
;
1903 if ((mask
& 0xc) == 0xc) {
1907 nv_reference(bld
->pc
, mi
, 2, dst0
[2]);
1908 nv_reference(bld
->pc
, mi
, 3, dst0
[3]);
1912 mi
= bld_insn_2(bld
, NV_OP_BIND
, dst0
[2], dst0
[3])->insn
;
1915 if (mi
&& (mask
& 0x4)) {
1918 nv_reference(bld
->pc
, mi
, 2, dst0
[2]);
1922 struct nv_instruction
*ex
= new_instruction(bld
->pc
, NV_OP_EXPORT
);
1925 nv_reference(bld
->pc
, ex
, 0, new_value(bld
->pc
, NV_FILE_MEM_V
, 4));
1926 nv_reference(bld
->pc
, ex
, 1, mi
->def
[0]);
1928 for (s
= 1; s
< size
/ 4; ++s
) {
1929 bld_def(mi
, s
, new_value(bld
->pc
, NV_FILE_GPR
, 4));
1930 nv_reference(bld
->pc
, ex
, s
+ 1, mi
->def
[s
]);
1934 ex
->src
[0]->value
->reg
.size
= size
;
1935 ex
->src
[0]->value
->reg
.address
=
1936 bld
->ti
->output_loc
[insn
->Dst
[0].Register
.Index
][c
];
1940 for (c
= 0; c
< 4; ++c
)
1941 if (mask
& (1 << c
))
1942 emit_store(bld
, insn
, c
, dst0
[c
]);
1946 bld_free_registers(struct bld_register
*base
, int n
)
1950 for (i
= 0; i
< n
; ++i
)
1951 for (c
= 0; c
< 4; ++c
)
1952 util_dynarray_fini(&base
[i
* 4 + c
].vals
);
1956 nvc0_tgsi_to_nc(struct nv_pc
*pc
, struct nvc0_translation_info
*ti
)
1958 struct bld_context
*bld
= CALLOC_STRUCT(bld_context
);
1961 pc
->root
[0] = pc
->current_block
= new_basic_block(pc
);
1966 pc
->loop_nesting_bound
= 1;
1968 bld
->zero
= new_value(pc
, NV_FILE_GPR
, 4);
1969 bld
->zero
->reg
.id
= 63;
1971 if (pc
->is_fragprog
) {
1972 struct nv_value
*mem
= new_value(pc
, NV_FILE_MEM_V
, 4);
1973 mem
->reg
.address
= 0x7c;
1975 bld
->frag_coord
[3] = bld_insn_1(bld
, NV_OP_LINTERP
, mem
);
1976 bld
->frag_coord
[3] = bld_insn_1(bld
, NV_OP_RCP
, bld
->frag_coord
[3]);
1979 for (ip
= 0; ip
< ti
->num_insns
; ++ip
)
1980 bld_instruction(bld
, &ti
->insns
[ip
]);
1982 bld_free_registers(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
1983 bld_free_registers(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
1984 bld_free_registers(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
1985 bld_free_registers(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1991 /* If a variable is assigned in a loop, replace all references to the value
1992 * from outside the loop with a phi value.
1995 bld_replace_value(struct nv_pc
*pc
, struct nv_basic_block
*b
,
1996 struct nv_value
*old_val
,
1997 struct nv_value
*new_val
)
1999 struct nv_instruction
*nvi
;
2001 for (nvi
= b
->phi
? b
->phi
: b
->entry
; nvi
; nvi
= nvi
->next
) {
2003 for (s
= 0; s
< 6 && nvi
->src
[s
]; ++s
)
2004 if (nvi
->src
[s
]->value
== old_val
)
2005 nv_reference(pc
, nvi
, s
, new_val
);
2008 b
->pass_seq
= pc
->pass_seq
;
2010 if (b
->out
[0] && b
->out
[0]->pass_seq
< pc
->pass_seq
)
2011 bld_replace_value(pc
, b
->out
[0], old_val
, new_val
);
2013 if (b
->out
[1] && b
->out
[1]->pass_seq
< pc
->pass_seq
)
2014 bld_replace_value(pc
, b
->out
[1], old_val
, new_val
);