2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 /* XXX: need to clean this up so we get the typecasting right more naturally */
26 * In bld_store_loop_var, only replace values that belong to the TGSI register
28 * For TGSI MOV, we only associate the source value with the value tracker of
29 * the destination, instead of generating an actual MOV.
31 * Possible solution: generate PHI functions in loop headers in advance.
34 * In fetch_by_bb, when going back through a break-block, we miss all of the
35 * definitions from inside the loop.
40 #include "nv50_context.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
47 #include "util/u_simple_list.h"
48 #include "tgsi/tgsi_dump.h"
50 #define BLD_MAX_TEMPS 64
51 #define BLD_MAX_ADDRS 4
52 #define BLD_MAX_PREDS 4
53 #define BLD_MAX_IMMDS 128
55 #define BLD_MAX_COND_NESTING 4
56 #define BLD_MAX_LOOP_NESTING 4
57 #define BLD_MAX_CALL_NESTING 2
59 /* collects all values assigned to the same TGSI register */
60 struct bld_value_stack
{
62 struct nv_value
**body
;
64 uint16_t loop_use
; /* 1 bit per loop level, indicates if used/defd */
69 bld_vals_push_val(struct bld_value_stack
*stk
, struct nv_value
*val
)
71 assert(!stk
->size
|| (stk
->body
[stk
->size
- 1] != val
));
73 if (!(stk
->size
% 8)) {
74 unsigned old_sz
= (stk
->size
+ 0) * sizeof(struct nv_value
*);
75 unsigned new_sz
= (stk
->size
+ 8) * sizeof(struct nv_value
*);
76 stk
->body
= (struct nv_value
**)REALLOC(stk
->body
, old_sz
, new_sz
);
78 stk
->body
[stk
->size
++] = val
;
82 bld_vals_push(struct bld_value_stack
*stk
)
84 bld_vals_push_val(stk
, stk
->top
);
89 bld_push_values(struct bld_value_stack
*stacks
, int n
)
93 for (i
= 0; i
< n
; ++i
)
94 for (c
= 0; c
< 4; ++c
)
95 if (stacks
[i
* 4 + c
].top
)
96 bld_vals_push(&stacks
[i
* 4 + c
]);
100 struct nv50_translation_info
*ti
;
103 struct nv_basic_block
*b
;
105 struct tgsi_parse_context parse
[BLD_MAX_CALL_NESTING
];
108 struct nv_basic_block
*cond_bb
[BLD_MAX_COND_NESTING
];
109 struct nv_basic_block
*join_bb
[BLD_MAX_COND_NESTING
];
110 struct nv_basic_block
*else_bb
[BLD_MAX_COND_NESTING
];
112 struct nv_basic_block
*loop_bb
[BLD_MAX_LOOP_NESTING
];
113 struct nv_basic_block
*brkt_bb
[BLD_MAX_LOOP_NESTING
];
116 struct bld_value_stack tvs
[BLD_MAX_TEMPS
][4]; /* TGSI_FILE_TEMPORARY */
117 struct bld_value_stack avs
[BLD_MAX_ADDRS
][4]; /* TGSI_FILE_ADDRESS */
118 struct bld_value_stack pvs
[BLD_MAX_PREDS
][4]; /* TGSI_FILE_PREDICATE */
119 struct bld_value_stack ovs
[PIPE_MAX_SHADER_OUTPUTS
][4];
121 uint32_t outputs_written
[PIPE_MAX_SHADER_OUTPUTS
/ 32];
123 struct nv_value
*frgcrd
[4];
124 struct nv_value
*sysval
[4];
127 struct nv_value
*saved_addr
[4][2];
128 struct nv_value
*saved_inputs
[128];
129 struct nv_value
*saved_immd
[BLD_MAX_IMMDS
];
133 static INLINE
struct nv_value
*
134 bld_fetch(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
)
136 stk
[i
* 4 + c
].loop_use
|= 1 << bld
->loop_lvl
;
138 return stk
[i
* 4 + c
].top
;
142 bld_store_loop_var(struct bld_context
*, struct bld_value_stack
*);
145 bld_store(struct bld_context
*bld
, struct bld_value_stack
*stk
, int i
, int c
,
146 struct nv_value
*val
)
148 bld_store_loop_var(bld
, &stk
[i
* 4 + c
]);
150 stk
[i
* 4 + c
].top
= val
;
154 bld_clear_def_use(struct bld_value_stack
*stk
, int n
, int lvl
)
157 const uint16_t mask
= ~(1 << lvl
);
159 for (i
= 0; i
< n
* 4; ++i
) {
160 stk
[i
].loop_def
&= mask
;
161 stk
[i
].loop_use
&= mask
;
165 #define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
166 #define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
167 #define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
168 #define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
169 #define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
170 #define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
172 #define STORE_OUTR(i, c, v) \
174 bld->ovs[i][c].top = (v); \
175 bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
179 bld_warn_uninitialized(struct bld_context
*bld
, int kind
,
180 struct bld_value_stack
*stk
, struct nv_basic_block
*b
)
182 long i
= (stk
- &bld
->tvs
[0][0]) / 4;
183 long c
= (stk
- &bld
->tvs
[0][0]) & 3;
185 debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
186 i
, (int)('x' + c
), kind
? "may be" : "is", b
->id
);
189 static INLINE
struct nv_value
*
190 bld_def(struct nv_instruction
*i
, int c
, struct nv_value
*value
)
197 static INLINE
struct nv_value
*
198 find_by_bb(struct bld_value_stack
*stack
, struct nv_basic_block
*b
)
202 if (stack
->top
&& stack
->top
->insn
->bb
== b
)
205 for (i
= stack
->size
- 1; i
>= 0; --i
)
206 if (stack
->body
[i
]->insn
->bb
== b
)
207 return stack
->body
[i
];
211 /* fetch value from stack that was defined in the specified basic block,
212 * or search for first definitions in all of its predecessors
215 fetch_by_bb(struct bld_value_stack
*stack
,
216 struct nv_value
**vals
, int *n
,
217 struct nv_basic_block
*b
)
220 struct nv_value
*val
;
222 assert(*n
< 16); /* MAX_COND_NESTING */
224 val
= find_by_bb(stack
, b
);
226 for (i
= 0; i
< *n
; ++i
)
232 for (i
= 0; i
< b
->num_in
; ++i
)
233 if (b
->in_kind
[i
] != CFG_EDGE_BACK
)
234 fetch_by_bb(stack
, vals
, n
, b
->in
[i
]);
237 static INLINE
struct nv_value
*
238 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
);
240 static struct nv_value
*
241 bld_phi(struct bld_context
*bld
, struct nv_basic_block
*b
,
242 struct bld_value_stack
*stack
)
244 struct nv_basic_block
*in
;
245 struct nv_value
*vals
[16], *val
;
246 struct nv_instruction
*phi
;
251 fetch_by_bb(stack
, vals
, &n
, b
);
254 bld_warn_uninitialized(bld
, 0, stack
, b
);
259 if (nvbb_dominated_by(b
, vals
[0]->insn
->bb
))
262 bld_warn_uninitialized(bld
, 1, stack
, b
);
264 /* back-tracking to insert missing value of other path */
267 if (in
->num_in
== 1) {
270 if (!nvbb_reachable_by(in
->in
[0], vals
[0]->insn
->bb
, b
)) {
274 if (!nvbb_reachable_by(in
->in
[1], vals
[0]->insn
->bb
, b
)) {
281 bld
->pc
->current_block
= in
;
283 /* should make this a no-op */
284 bld_vals_push_val(stack
, bld_load_imm_u32(bld
, 0));
288 for (i
= 0; i
< n
; ++i
) {
289 /* if value dominates b, continue to the redefinitions */
290 if (nvbb_dominated_by(b
, vals
[i
]->insn
->bb
))
293 /* if value dominates any in-block, b should be the dom frontier */
294 for (j
= 0; j
< b
->num_in
; ++j
)
295 if (nvbb_dominated_by(b
->in
[j
], vals
[i
]->insn
->bb
))
297 /* otherwise, find the dominance frontier and put the phi there */
298 if (j
== b
->num_in
) {
299 in
= nvbb_dom_frontier(vals
[i
]->insn
->bb
);
300 val
= bld_phi(bld
, in
, stack
);
301 bld_vals_push_val(stack
, val
);
307 bld
->pc
->current_block
= b
;
312 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
314 bld_def(phi
, 0, new_value(bld
->pc
, vals
[0]->reg
.file
, vals
[0]->reg
.type
));
315 for (i
= 0; i
< n
; ++i
)
316 phi
->src
[i
] = new_ref(bld
->pc
, vals
[i
]);
321 static INLINE
struct nv_value
*
322 bld_fetch_global(struct bld_context
*bld
, struct bld_value_stack
*stack
)
324 stack
->loop_use
|= 1 << bld
->loop_lvl
;
325 return bld_phi(bld
, bld
->pc
->current_block
, stack
);
328 static INLINE
struct nv_value
*
329 bld_imm_u32(struct bld_context
*bld
, uint32_t u
)
332 unsigned n
= bld
->num_immds
;
334 for (i
= 0; i
< n
; ++i
)
335 if (bld
->saved_immd
[i
]->reg
.imm
.u32
== u
)
336 return bld
->saved_immd
[i
];
337 assert(n
< BLD_MAX_IMMDS
);
341 bld
->saved_immd
[n
] = new_value(bld
->pc
, NV_FILE_IMM
, NV_TYPE_U32
);
342 bld
->saved_immd
[n
]->reg
.imm
.u32
= u
;
343 return bld
->saved_immd
[n
];
347 bld_replace_value(struct nv_pc
*, struct nv_basic_block
*, struct nv_value
*,
350 /* When setting a variable inside a loop, and we have used it before in the
351 * loop, we need to insert a phi function in the loop header.
354 bld_store_loop_var(struct bld_context
*bld
, struct bld_value_stack
*stk
)
356 struct nv_basic_block
*bb
;
357 struct nv_instruction
*phi
;
358 struct nv_value
*val
;
360 uint16_t loop_def
= stk
->loop_def
;
362 if (!(ll
= bld
->loop_lvl
))
364 stk
->loop_def
|= 1 << ll
;
366 if ((~stk
->loop_use
| loop_def
) & (1 << ll
))
370 debug_printf("TEMP[%li].%c used before loop redef (def=%x/use=%x)\n",
371 (stk
- &bld
->tvs
[0][0]) / 4,
372 (int)('x' + ((stk
- &bld
->tvs
[0][0]) & 3)),
373 loop_def
, stk
->loop_use
);
376 stk
->loop_def
|= 1 << ll
;
378 assert(bld
->loop_bb
[ll
- 1]->num_in
== 1);
380 /* get last assignment from outside this loop, could be from bld_phi */
381 val
= stk
->body
[stk
->size
- 1];
383 /* create the phi in the loop entry block */
385 bb
= bld
->pc
->current_block
;
386 bld
->pc
->current_block
= bld
->loop_bb
[ll
- 1];
388 phi
= new_instruction(bld
->pc
, NV_OP_PHI
);
390 bld_def(phi
, 0, new_value(bld
->pc
, val
->reg
.file
, val
->reg
.type
));
393 bld_replace_value(bld
->pc
, bld
->loop_bb
[ll
- 1], val
, phi
->def
[0]);
396 bld_vals_push_val(stk
, phi
->def
[0]);
398 phi
->target
= (struct nv_basic_block
*)stk
; /* cheat */
400 nv_reference(bld
->pc
, &phi
->src
[0], val
);
401 nv_reference(bld
->pc
, &phi
->src
[1], phi
->def
[0]);
403 bld
->pc
->current_block
= bb
;
407 bld_loop_end(struct bld_context
*bld
, struct nv_basic_block
*bb
)
409 struct nv_instruction
*phi
;
410 struct nv_value
*val
;
412 for (phi
= bb
->phi
; phi
&& phi
->opcode
== NV_OP_PHI
; phi
= phi
->next
) {
413 val
= bld_fetch_global(bld
, (struct bld_value_stack
*)phi
->target
);
414 nv_reference(bld
->pc
, &phi
->src
[1], val
);
419 static INLINE
struct nv_value
*
420 bld_imm_f32(struct bld_context
*bld
, float f
)
422 return bld_imm_u32(bld
, fui(f
));
425 #define SET_TYPE(v, t) ((v)->reg.type = NV_TYPE_##t)
427 static struct nv_value
*
428 bld_insn_1(struct bld_context
*bld
, uint opcode
, struct nv_value
*src0
)
430 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
433 nv_reference(bld
->pc
, &insn
->src
[0], src0
); /* NOTE: new_ref would suffice */
435 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.type
));
438 static struct nv_value
*
439 bld_insn_2(struct bld_context
*bld
, uint opcode
,
440 struct nv_value
*src0
, struct nv_value
*src1
)
442 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
444 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
445 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
447 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.type
));
450 static struct nv_value
*
451 bld_insn_3(struct bld_context
*bld
, uint opcode
,
452 struct nv_value
*src0
, struct nv_value
*src1
,
453 struct nv_value
*src2
)
455 struct nv_instruction
*insn
= new_instruction(bld
->pc
, opcode
);
457 nv_reference(bld
->pc
, &insn
->src
[0], src0
);
458 nv_reference(bld
->pc
, &insn
->src
[1], src1
);
459 nv_reference(bld
->pc
, &insn
->src
[2], src2
);
461 return bld_def(insn
, 0, new_value(bld
->pc
, NV_FILE_GPR
, src0
->reg
.type
));
464 #define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
466 (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
467 (d)->reg.type = NV_TYPE_##dt; \
468 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
471 #define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
473 (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
474 (d)->reg.type = NV_TYPE_##dt; \
475 (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
476 (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
479 static struct nv_value
*
480 bld_pow(struct bld_context
*bld
, struct nv_value
*x
, struct nv_value
*e
)
482 struct nv_value
*val
;
484 BLD_INSN_1_EX(val
, LG2
, F32
, x
, F32
);
485 BLD_INSN_2_EX(val
, MUL
, F32
, e
, F32
, val
, F32
);
486 val
= bld_insn_1(bld
, NV_OP_PREEX2
, val
);
487 val
= bld_insn_1(bld
, NV_OP_EX2
, val
);
492 static INLINE
struct nv_value
*
493 bld_load_imm_f32(struct bld_context
*bld
, float f
)
495 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_f32(bld
, f
));
498 static INLINE
struct nv_value
*
499 bld_load_imm_u32(struct bld_context
*bld
, uint32_t u
)
501 return bld_insn_1(bld
, NV_OP_MOV
, bld_imm_u32(bld
, u
));
504 static struct nv_value
*
505 bld_get_address(struct bld_context
*bld
, int id
, struct nv_value
*indirect
)
508 struct nv_instruction
*nvi
;
510 for (i
= 0; i
< 4; ++i
) {
511 if (!bld
->saved_addr
[i
][0])
513 if (bld
->saved_addr
[i
][1] == indirect
) {
514 nvi
= bld
->saved_addr
[i
][0]->insn
;
515 if (nvi
->src
[0]->value
->reg
.imm
.u32
== id
)
516 return bld
->saved_addr
[i
][0];
521 bld
->saved_addr
[i
][0] = bld_load_imm_u32(bld
, id
);
522 bld
->saved_addr
[i
][0]->reg
.file
= NV_FILE_ADDR
;
523 bld
->saved_addr
[i
][1] = indirect
;
524 return bld
->saved_addr
[i
][0];
528 static struct nv_value
*
529 bld_predicate(struct bld_context
*bld
, struct nv_value
*src
)
531 struct nv_instruction
*nvi
= src
->insn
;
533 if (nvi
->opcode
== NV_OP_LDA
||
534 nvi
->opcode
== NV_OP_PHI
||
535 nvi
->bb
!= bld
->pc
->current_block
) {
536 nvi
= new_instruction(bld
->pc
, NV_OP_CVT
);
537 nv_reference(bld
->pc
, &nvi
->src
[0], src
);
540 if (!nvi
->flags_def
) {
541 nvi
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
542 nvi
->flags_def
->insn
= nvi
;
544 return nvi
->flags_def
;
548 bld_kil(struct bld_context
*bld
, struct nv_value
*src
)
550 struct nv_instruction
*nvi
;
552 src
= bld_predicate(bld
, src
);
553 nvi
= new_instruction(bld
->pc
, NV_OP_KIL
);
555 nvi
->flags_src
= new_ref(bld
->pc
, src
);
560 bld_flow(struct bld_context
*bld
, uint opcode
, ubyte cc
,
561 struct nv_value
*src
, struct nv_basic_block
*target
,
562 boolean plan_reconverge
)
564 struct nv_instruction
*nvi
;
567 new_instruction(bld
->pc
, NV_OP_JOINAT
)->fixed
= 1;
569 nvi
= new_instruction(bld
->pc
, opcode
);
570 nvi
->is_terminator
= 1;
572 nvi
->target
= target
;
574 nvi
->flags_src
= new_ref(bld
->pc
, src
);
578 translate_setcc(unsigned opcode
)
581 case TGSI_OPCODE_SLT
: return NV_CC_LT
;
582 case TGSI_OPCODE_SGE
: return NV_CC_GE
;
583 case TGSI_OPCODE_SEQ
: return NV_CC_EQ
;
584 case TGSI_OPCODE_SGT
: return NV_CC_GT
;
585 case TGSI_OPCODE_SLE
: return NV_CC_LE
;
586 case TGSI_OPCODE_SNE
: return NV_CC_NE
| NV_CC_U
;
587 case TGSI_OPCODE_STR
: return NV_CC_TR
;
588 case TGSI_OPCODE_SFL
: return NV_CC_FL
;
590 case TGSI_OPCODE_ISLT
: return NV_CC_LT
;
591 case TGSI_OPCODE_ISGE
: return NV_CC_GE
;
592 case TGSI_OPCODE_USEQ
: return NV_CC_EQ
;
593 case TGSI_OPCODE_USGE
: return NV_CC_GE
;
594 case TGSI_OPCODE_USLT
: return NV_CC_LT
;
595 case TGSI_OPCODE_USNE
: return NV_CC_NE
;
603 translate_opcode(uint opcode
)
606 case TGSI_OPCODE_ABS
: return NV_OP_ABS
;
607 case TGSI_OPCODE_ADD
:
608 case TGSI_OPCODE_SUB
:
609 case TGSI_OPCODE_UADD
: return NV_OP_ADD
;
610 case TGSI_OPCODE_AND
: return NV_OP_AND
;
611 case TGSI_OPCODE_EX2
: return NV_OP_EX2
;
612 case TGSI_OPCODE_CEIL
: return NV_OP_CEIL
;
613 case TGSI_OPCODE_FLR
: return NV_OP_FLOOR
;
614 case TGSI_OPCODE_TRUNC
: return NV_OP_TRUNC
;
615 case TGSI_OPCODE_DDX
: return NV_OP_DFDX
;
616 case TGSI_OPCODE_DDY
: return NV_OP_DFDY
;
617 case TGSI_OPCODE_F2I
:
618 case TGSI_OPCODE_F2U
:
619 case TGSI_OPCODE_I2F
:
620 case TGSI_OPCODE_U2F
: return NV_OP_CVT
;
621 case TGSI_OPCODE_INEG
: return NV_OP_NEG
;
622 case TGSI_OPCODE_LG2
: return NV_OP_LG2
;
623 case TGSI_OPCODE_ISHR
:
624 case TGSI_OPCODE_USHR
: return NV_OP_SHR
;
625 case TGSI_OPCODE_MAD
:
626 case TGSI_OPCODE_UMAD
: return NV_OP_MAD
;
627 case TGSI_OPCODE_MAX
:
628 case TGSI_OPCODE_IMAX
:
629 case TGSI_OPCODE_UMAX
: return NV_OP_MAX
;
630 case TGSI_OPCODE_MIN
:
631 case TGSI_OPCODE_IMIN
:
632 case TGSI_OPCODE_UMIN
: return NV_OP_MIN
;
633 case TGSI_OPCODE_MUL
:
634 case TGSI_OPCODE_UMUL
: return NV_OP_MUL
;
635 case TGSI_OPCODE_OR
: return NV_OP_OR
;
636 case TGSI_OPCODE_RCP
: return NV_OP_RCP
;
637 case TGSI_OPCODE_RSQ
: return NV_OP_RSQ
;
638 case TGSI_OPCODE_SAD
: return NV_OP_SAD
;
639 case TGSI_OPCODE_SHL
: return NV_OP_SHL
;
640 case TGSI_OPCODE_SLT
:
641 case TGSI_OPCODE_SGE
:
642 case TGSI_OPCODE_SEQ
:
643 case TGSI_OPCODE_SGT
:
644 case TGSI_OPCODE_SLE
:
645 case TGSI_OPCODE_SNE
:
646 case TGSI_OPCODE_ISLT
:
647 case TGSI_OPCODE_ISGE
:
648 case TGSI_OPCODE_USEQ
:
649 case TGSI_OPCODE_USGE
:
650 case TGSI_OPCODE_USLT
:
651 case TGSI_OPCODE_USNE
: return NV_OP_SET
;
652 case TGSI_OPCODE_TEX
: return NV_OP_TEX
;
653 case TGSI_OPCODE_TXP
: return NV_OP_TEX
;
654 case TGSI_OPCODE_TXB
: return NV_OP_TXB
;
655 case TGSI_OPCODE_TXL
: return NV_OP_TXL
;
656 case TGSI_OPCODE_XOR
: return NV_OP_XOR
;
663 infer_src_type(unsigned opcode
)
666 case TGSI_OPCODE_MOV
:
667 case TGSI_OPCODE_AND
:
669 case TGSI_OPCODE_XOR
:
670 case TGSI_OPCODE_SAD
:
671 case TGSI_OPCODE_U2F
:
672 case TGSI_OPCODE_UADD
:
673 case TGSI_OPCODE_UDIV
:
674 case TGSI_OPCODE_UMOD
:
675 case TGSI_OPCODE_UMAD
:
676 case TGSI_OPCODE_UMUL
:
677 case TGSI_OPCODE_UMAX
:
678 case TGSI_OPCODE_UMIN
:
679 case TGSI_OPCODE_USEQ
:
680 case TGSI_OPCODE_USGE
:
681 case TGSI_OPCODE_USLT
:
682 case TGSI_OPCODE_USNE
:
683 case TGSI_OPCODE_USHR
:
685 case TGSI_OPCODE_I2F
:
686 case TGSI_OPCODE_IDIV
:
687 case TGSI_OPCODE_IMAX
:
688 case TGSI_OPCODE_IMIN
:
689 case TGSI_OPCODE_INEG
:
690 case TGSI_OPCODE_ISGE
:
691 case TGSI_OPCODE_ISHR
:
692 case TGSI_OPCODE_ISLT
:
700 infer_dst_type(unsigned opcode
)
703 case TGSI_OPCODE_MOV
:
704 case TGSI_OPCODE_F2U
:
705 case TGSI_OPCODE_AND
:
707 case TGSI_OPCODE_XOR
:
708 case TGSI_OPCODE_SAD
:
709 case TGSI_OPCODE_UADD
:
710 case TGSI_OPCODE_UDIV
:
711 case TGSI_OPCODE_UMOD
:
712 case TGSI_OPCODE_UMAD
:
713 case TGSI_OPCODE_UMUL
:
714 case TGSI_OPCODE_UMAX
:
715 case TGSI_OPCODE_UMIN
:
716 case TGSI_OPCODE_USEQ
:
717 case TGSI_OPCODE_USGE
:
718 case TGSI_OPCODE_USLT
:
719 case TGSI_OPCODE_USNE
:
720 case TGSI_OPCODE_USHR
:
722 case TGSI_OPCODE_F2I
:
723 case TGSI_OPCODE_IDIV
:
724 case TGSI_OPCODE_IMAX
:
725 case TGSI_OPCODE_IMIN
:
726 case TGSI_OPCODE_INEG
:
727 case TGSI_OPCODE_ISGE
:
728 case TGSI_OPCODE_ISHR
:
729 case TGSI_OPCODE_ISLT
:
737 emit_store(struct bld_context
*bld
, const struct tgsi_full_instruction
*inst
,
738 unsigned chan
, struct nv_value
*value
)
740 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[0];
744 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_MOV
)
745 value
->reg
.type
= infer_dst_type(inst
->Instruction
.Opcode
);
747 switch (inst
->Instruction
.Saturate
) {
750 case TGSI_SAT_ZERO_ONE
:
751 BLD_INSN_1_EX(value
, SAT
, F32
, value
, F32
);
753 case TGSI_SAT_MINUS_PLUS_ONE
:
754 value
= bld_insn_2(bld
, NV_OP_MAX
, value
, bld_load_imm_f32(bld
, -1.0f
));
755 value
= bld_insn_2(bld
, NV_OP_MIN
, value
, bld_load_imm_f32(bld
, 1.0f
));
756 value
->reg
.type
= NV_TYPE_F32
;
760 switch (reg
->Register
.File
) {
761 case TGSI_FILE_OUTPUT
:
762 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
763 value
->reg
.file
= bld
->ti
->output_file
;
765 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
766 STORE_OUTR(reg
->Register
.Index
, chan
, value
);
768 value
->insn
->fixed
= 1;
769 value
->reg
.id
= bld
->ti
->output_map
[reg
->Register
.Index
][chan
];
772 case TGSI_FILE_TEMPORARY
:
773 assert(reg
->Register
.Index
< BLD_MAX_TEMPS
);
774 value
->reg
.file
= NV_FILE_GPR
;
775 if (value
->insn
->bb
!= bld
->pc
->current_block
)
776 value
= bld_insn_1(bld
, NV_OP_MOV
, value
);
777 STORE_TEMP(reg
->Register
.Index
, chan
, value
);
779 case TGSI_FILE_ADDRESS
:
780 assert(reg
->Register
.Index
< BLD_MAX_ADDRS
);
781 value
->reg
.file
= NV_FILE_ADDR
;
782 STORE_ADDR(reg
->Register
.Index
, chan
, value
);
787 static INLINE
uint32_t
788 bld_is_output_written(struct bld_context
*bld
, int i
, int c
)
791 return bld
->outputs_written
[i
/ 8] & (0xf << ((i
* 4) % 32));
792 return bld
->outputs_written
[i
/ 8] & (1 << ((i
* 4 + c
) % 32));
796 bld_export_outputs(struct bld_context
*bld
)
798 struct nv_value
*vals
[4];
799 struct nv_instruction
*nvi
;
802 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
804 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
) {
805 if (!bld_is_output_written(bld
, i
, -1))
807 for (n
= 0, c
= 0; c
< 4; ++c
) {
808 if (!bld_is_output_written(bld
, i
, c
))
810 vals
[n
] = bld_fetch_global(bld
, &bld
->ovs
[i
][c
]);
812 vals
[n
] = bld_insn_1(bld
, NV_OP_MOV
, vals
[n
]);
813 vals
[n
++]->reg
.id
= bld
->ti
->output_map
[i
][c
];
817 (nvi
= new_instruction(bld
->pc
, NV_OP_EXPORT
))->fixed
= 1;
819 for (c
= 0; c
< n
; ++c
)
820 nvi
->src
[c
] = new_ref(bld
->pc
, vals
[c
]);
825 bld_new_block(struct bld_context
*bld
, struct nv_basic_block
*b
)
829 bld_push_values(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
830 bld_push_values(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
831 bld_push_values(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
832 bld_push_values(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
834 bld
->pc
->current_block
= b
;
836 for (i
= 0; i
< 4; ++i
)
837 bld
->saved_addr
[i
][0] = NULL
;
839 for (i
= 0; i
< 128; ++i
)
840 bld
->saved_inputs
[i
] = NULL
;
843 static struct nv_value
*
844 bld_saved_input(struct bld_context
*bld
, unsigned i
, unsigned c
)
846 unsigned idx
= bld
->ti
->input_map
[i
][c
];
848 if (bld
->ti
->p
->type
!= PIPE_SHADER_FRAGMENT
)
850 if (bld
->saved_inputs
[idx
])
851 return bld
->saved_inputs
[idx
];
855 static struct nv_value
*
856 bld_interpolate(struct bld_context
*bld
, unsigned mode
, struct nv_value
*val
)
858 if (mode
& (NV50_INTERP_LINEAR
| NV50_INTERP_FLAT
))
859 val
= bld_insn_1(bld
, NV_OP_LINTERP
, val
);
861 val
= bld_insn_2(bld
, NV_OP_PINTERP
, val
, bld
->frgcrd
[3]);
863 val
->insn
->flat
= (mode
& NV50_INTERP_FLAT
) ? 1 : 0;
864 val
->insn
->centroid
= (mode
& NV50_INTERP_CENTROID
) ? 1 : 0;
868 static struct nv_value
*
869 emit_fetch(struct bld_context
*bld
, const struct tgsi_full_instruction
*insn
,
870 const unsigned s
, const unsigned chan
)
872 const struct tgsi_full_src_register
*src
= &insn
->Src
[s
];
873 struct nv_value
*res
;
874 unsigned idx
, swz
, dim_idx
, ind_idx
, ind_swz
;
875 ubyte type
= infer_src_type(insn
->Instruction
.Opcode
);
877 idx
= src
->Register
.Index
;
878 swz
= tgsi_util_get_full_src_register_swizzle(src
, chan
);
883 if (src
->Register
.Indirect
) {
884 ind_idx
= src
->Indirect
.Index
;
885 ind_swz
= tgsi_util_get_src_register_swizzle(&src
->Indirect
, 0);
888 switch (src
->Register
.File
) {
889 case TGSI_FILE_CONSTANT
:
890 dim_idx
= src
->Dimension
.Index
? src
->Dimension
.Index
+ 2 : 1;
891 assert(dim_idx
< 14);
892 assert(dim_idx
== 1); /* for now */
894 res
= new_value(bld
->pc
, NV_FILE_MEM_C(dim_idx
), type
);
895 res
->reg
.type
= type
;
896 res
->reg
.id
= (idx
* 4 + swz
) & 127;
897 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
899 if (src
->Register
.Indirect
)
900 res
->insn
->src
[4] = new_ref(bld
->pc
, FETCH_ADDR(ind_idx
, ind_swz
));
901 if (idx
>= (128 / 4))
903 new_ref(bld
->pc
, bld_get_address(bld
, (idx
* 16) & ~0x1ff, NULL
));
905 case TGSI_FILE_IMMEDIATE
:
906 assert(idx
< bld
->ti
->immd32_nr
);
907 res
= bld_load_imm_u32(bld
, bld
->ti
->immd32
[idx
* 4 + swz
]);
908 res
->reg
.type
= type
;
910 case TGSI_FILE_INPUT
:
911 res
= bld_saved_input(bld
, idx
, swz
);
912 if (res
&& (insn
->Instruction
.Opcode
!= TGSI_OPCODE_TXP
))
915 res
= new_value(bld
->pc
, bld
->ti
->input_file
, type
);
916 res
->reg
.id
= bld
->ti
->input_map
[idx
][swz
];
918 if (res
->reg
.file
== NV_FILE_MEM_V
) {
919 res
= bld_interpolate(bld
, bld
->ti
->interp_mode
[idx
], res
);
921 assert(src
->Dimension
.Dimension
== 0);
922 res
= bld_insn_1(bld
, NV_OP_LDA
, res
);
924 assert(res
->reg
.type
== type
);
926 bld
->saved_inputs
[bld
->ti
->input_map
[idx
][swz
]] = res
;
928 case TGSI_FILE_TEMPORARY
:
929 /* this should be load from l[], with reload elimination later on */
930 res
= bld_fetch_global(bld
, &bld
->tvs
[idx
][swz
]);
932 case TGSI_FILE_ADDRESS
:
933 res
= bld_fetch_global(bld
, &bld
->avs
[idx
][swz
]);
935 case TGSI_FILE_PREDICATE
:
936 res
= bld_fetch_global(bld
, &bld
->pvs
[idx
][swz
]);
939 NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src
->Register
.File
);
944 debug_printf("WARNING: undefined source value in TGSI instruction\n");
945 return bld_load_imm_u32(bld
, 0);
948 switch (tgsi_util_get_full_src_register_sign_mode(src
, chan
)) {
949 case TGSI_UTIL_SIGN_KEEP
:
951 case TGSI_UTIL_SIGN_CLEAR
:
952 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
954 case TGSI_UTIL_SIGN_TOGGLE
:
955 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
957 case TGSI_UTIL_SIGN_SET
:
958 res
= bld_insn_1(bld
, NV_OP_ABS
, res
);
959 res
= bld_insn_1(bld
, NV_OP_NEG
, res
);
962 NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
971 bld_lit(struct bld_context
*bld
, struct nv_value
*dst0
[4],
972 const struct tgsi_full_instruction
*insn
)
974 struct nv_value
*val0
, *zero
;
975 unsigned mask
= insn
->Dst
[0].Register
.WriteMask
;
977 if (mask
& ((1 << 0) | (1 << 3)))
978 dst0
[3] = dst0
[0] = bld_load_imm_f32(bld
, 1.0f
);
980 if (mask
& (3 << 1)) {
981 zero
= bld_load_imm_f32(bld
, 0.0f
);
982 val0
= bld_insn_2(bld
, NV_OP_MAX
, emit_fetch(bld
, insn
, 0, 0), zero
);
988 if (mask
& (1 << 2)) {
989 struct nv_value
*val1
, *val3
, *src1
, *src3
;
990 struct nv_value
*pos128
= bld_load_imm_f32(bld
, 127.999999f
);
991 struct nv_value
*neg128
= bld_load_imm_f32(bld
, -127.999999f
);
993 src1
= emit_fetch(bld
, insn
, 0, 1);
994 src3
= emit_fetch(bld
, insn
, 0, 3);
996 val0
->insn
->flags_def
= new_value(bld
->pc
, NV_FILE_FLAGS
, NV_TYPE_U16
);
997 val0
->insn
->flags_def
->insn
= val0
->insn
;
999 val1
= bld_insn_2(bld
, NV_OP_MAX
, src1
, zero
);
1000 val3
= bld_insn_2(bld
, NV_OP_MAX
, src3
, neg128
);
1001 val3
= bld_insn_2(bld
, NV_OP_MIN
, val3
, pos128
);
1002 val3
= bld_pow(bld
, val1
, val3
);
1004 dst0
[2] = bld_insn_1(bld
, NV_OP_MOV
, zero
);
1005 dst0
[2]->insn
->cc
= NV_CC_LE
;
1006 dst0
[2]->insn
->flags_src
= new_ref(bld
->pc
, val0
->insn
->flags_def
);
1008 dst0
[2] = bld_insn_2(bld
, NV_OP_SELECT
, val3
, dst0
[2]);
1013 get_tex_dim(const struct tgsi_full_instruction
*insn
, int *dim
, int *arg
)
1015 switch (insn
->Texture
.Texture
) {
1016 case TGSI_TEXTURE_1D
:
1019 case TGSI_TEXTURE_SHADOW1D
:
1023 case TGSI_TEXTURE_UNKNOWN
:
1024 case TGSI_TEXTURE_2D
:
1025 case TGSI_TEXTURE_RECT
:
1028 case TGSI_TEXTURE_SHADOW2D
:
1029 case TGSI_TEXTURE_SHADOWRECT
:
1033 case TGSI_TEXTURE_3D
:
1034 case TGSI_TEXTURE_CUBE
:
1044 load_proj_tex_coords(struct bld_context
*bld
,
1045 struct nv_value
*t
[4], int dim
,
1046 const struct tgsi_full_instruction
*insn
)
1050 t
[3] = emit_fetch(bld
, insn
, 0, 3);
1052 if (t
[3]->insn
->opcode
== NV_OP_PINTERP
) {
1053 t
[3]->insn
->opcode
= NV_OP_LINTERP
;
1054 nv_reference(bld
->pc
, &t
[3]->insn
->src
[1], NULL
);
1057 t
[3] = bld_insn_1(bld
, NV_OP_RCP
, t
[3]);
1059 for (c
= 0; c
< dim
; ++c
) {
1060 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1061 if (t
[c
]->insn
->opcode
== NV_OP_LINTERP
)
1062 t
[c
]->insn
->opcode
= NV_OP_PINTERP
;
1064 if (t
[c
]->insn
->opcode
== NV_OP_PINTERP
)
1065 nv_reference(bld
->pc
, &t
[c
]->insn
->src
[1], t
[3]);
1070 for (c
= 0; mask
; ++c
, mask
>>= 1) {
1073 t
[c
] = bld_insn_2(bld
, NV_OP_MUL
, t
[c
], t
[3]);
1078 bld_tex(struct bld_context
*bld
, struct nv_value
*dst0
[4],
1079 const struct tgsi_full_instruction
*insn
)
1081 struct nv_value
*t
[4];
1082 struct nv_instruction
*nvi
;
1083 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1086 get_tex_dim(insn
, &dim
, &arg
);
1088 if (insn
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) {
1091 if (insn
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
1092 load_proj_tex_coords(bld
, t
, dim
, insn
);
1094 for (c
= 0; c
< dim
; ++c
)
1095 t
[c
] = emit_fetch(bld
, insn
, 0, c
);
1098 t
[dim
] = emit_fetch(bld
, insn
, 0, 2);
1100 if (insn
->Instruction
.Opcode
== TGSI_OPCODE_TXB
||
1101 insn
->Instruction
.Opcode
== TGSI_OPCODE_TXL
) {
1102 t
[arg
++] = emit_fetch(bld
, insn
, 0, 3);
1105 for (c
= 0; c
< arg
; ++c
) {
1106 t
[c
] = bld_insn_1(bld
, NV_OP_MOV
, t
[c
]);
1107 t
[c
]->reg
.type
= NV_TYPE_F32
;
1110 nvi
= new_instruction(bld
->pc
, opcode
);
1112 for (c
= 0; c
< 4; ++c
) {
1113 nvi
->def
[c
] = dst0
[c
] = new_value(bld
->pc
, NV_FILE_GPR
, NV_TYPE_F32
);
1114 nvi
->def
[c
]->insn
= nvi
;
1116 for (c
= 0; c
< arg
; ++c
)
1117 nvi
->src
[c
] = new_ref(bld
->pc
, t
[c
]);
1119 nvi
->tex_t
= insn
->Src
[1].Register
.Index
;
1121 nvi
->tex_mask
= 0xf;
1122 nvi
->tex_cube
= (insn
->Texture
.Texture
== TGSI_TEXTURE_CUBE
) ? 1 : 0;
1124 nvi
->tex_argc
= arg
;
1127 #define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
1128 for (chan = 0; chan < 4; ++chan) \
1129 if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
1132 bld_instruction(struct bld_context
*bld
,
1133 const struct tgsi_full_instruction
*insn
)
1135 struct nv_value
*src0
;
1136 struct nv_value
*src1
;
1137 struct nv_value
*src2
;
1138 struct nv_value
*dst0
[4];
1139 struct nv_value
*temp
;
1141 uint opcode
= translate_opcode(insn
->Instruction
.Opcode
);
1143 tgsi_dump_instruction(insn
, 1);
1145 switch (insn
->Instruction
.Opcode
) {
1146 case TGSI_OPCODE_ADD
:
1147 case TGSI_OPCODE_MAX
:
1148 case TGSI_OPCODE_MIN
:
1149 case TGSI_OPCODE_MUL
:
1150 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1151 src0
= emit_fetch(bld
, insn
, 0, c
);
1152 src1
= emit_fetch(bld
, insn
, 1, c
);
1153 dst0
[c
] = bld_insn_2(bld
, opcode
, src0
, src1
);
1156 case TGSI_OPCODE_CMP
:
1157 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1158 src0
= emit_fetch(bld
, insn
, 0, c
);
1159 src1
= emit_fetch(bld
, insn
, 1, c
);
1160 src2
= emit_fetch(bld
, insn
, 2, c
);
1161 src0
= bld_predicate(bld
, src0
);
1163 src1
= bld_insn_1(bld
, NV_OP_MOV
, src1
);
1164 src1
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1165 src1
->insn
->cc
= NV_CC_LT
;
1167 src2
= bld_insn_1(bld
, NV_OP_MOV
, src2
);
1168 src2
->insn
->flags_src
= new_ref(bld
->pc
, src0
);
1169 src2
->insn
->cc
= NV_CC_GE
;
1171 dst0
[c
] = bld_insn_2(bld
, NV_OP_SELECT
, src1
, src2
);
1174 case TGSI_OPCODE_COS
:
1175 src0
= emit_fetch(bld
, insn
, 0, 0);
1176 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1177 if (insn
->Dst
[0].Register
.WriteMask
& 7)
1178 temp
= bld_insn_1(bld
, NV_OP_COS
, temp
);
1179 for (c
= 0; c
< 3; ++c
)
1180 if (insn
->Dst
[0].Register
.WriteMask
& (1 << c
))
1182 if (!(insn
->Dst
[0].Register
.WriteMask
& (1 << 3)))
1184 /* XXX: if src0.x is src0.w, don't emit new insns */
1185 src0
= emit_fetch(bld
, insn
, 0, 3);
1186 temp
= bld_insn_1(bld
, NV_OP_PRESIN
, src0
);
1187 dst0
[3] = bld_insn_1(bld
, NV_OP_COS
, temp
);
1189 case TGSI_OPCODE_DP3
:
1190 src0
= emit_fetch(bld
, insn
, 0, 0);
1191 src1
= emit_fetch(bld
, insn
, 1, 0);
1192 temp
= bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1193 for (c
= 1; c
< 3; ++c
) {
1194 src0
= emit_fetch(bld
, insn
, 0, c
);
1195 src1
= emit_fetch(bld
, insn
, 1, c
);
1196 temp
= bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, temp
);
1198 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1201 case TGSI_OPCODE_DP4
:
1202 src0
= emit_fetch(bld
, insn
, 0, 0);
1203 src1
= emit_fetch(bld
, insn
, 1, 0);
1204 temp
= bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1205 for (c
= 1; c
< 4; ++c
) {
1206 src0
= emit_fetch(bld
, insn
, 0, c
);
1207 src1
= emit_fetch(bld
, insn
, 1, c
);
1208 temp
= bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, temp
);
1210 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1213 case TGSI_OPCODE_EX2
:
1214 src0
= emit_fetch(bld
, insn
, 0, 0);
1215 temp
= bld_insn_1(bld
, NV_OP_PREEX2
, src0
);
1216 temp
= bld_insn_1(bld
, NV_OP_EX2
, temp
);
1217 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1220 case TGSI_OPCODE_FRC
:
1221 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1222 src0
= emit_fetch(bld
, insn
, 0, c
);
1223 dst0
[c
] = bld_insn_1(bld
, NV_OP_FLOOR
, src0
);
1224 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src0
, dst0
[c
]);
1227 case TGSI_OPCODE_KIL
:
1228 for (c
= 0; c
< 4; ++c
) {
1229 src0
= emit_fetch(bld
, insn
, 0, c
);
1233 case TGSI_OPCODE_IF
:
1235 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1237 nvbb_attach_block(bld
->pc
->current_block
, b
, CFG_EDGE_FORWARD
);
1239 bld
->join_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1240 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1242 src1
= bld_predicate(bld
, emit_fetch(bld
, insn
, 0, 0));
1244 bld_flow(bld
, NV_OP_BRA
, NV_CC_EQ
, src1
, NULL
, FALSE
);
1247 bld_new_block(bld
, b
);
1250 case TGSI_OPCODE_ELSE
:
1252 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1255 nvbb_attach_block(bld
->join_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1257 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1258 bld
->cond_bb
[bld
->cond_lvl
] = bld
->pc
->current_block
;
1260 new_instruction(bld
->pc
, NV_OP_BRA
)->is_terminator
= 1;
1263 bld_new_block(bld
, b
);
1266 case TGSI_OPCODE_ENDIF
:
1268 struct nv_basic_block
*b
= new_basic_block(bld
->pc
);
1271 nvbb_attach_block(bld
->pc
->current_block
, b
, CFG_EDGE_FORWARD
);
1272 nvbb_attach_block(bld
->cond_bb
[bld
->cond_lvl
], b
, CFG_EDGE_FORWARD
);
1274 bld
->cond_bb
[bld
->cond_lvl
]->exit
->target
= b
;
1276 if (0 && bld
->join_bb
[bld
->cond_lvl
]) {
1277 bld
->join_bb
[bld
->cond_lvl
]->exit
->prev
->target
= b
;
1279 new_instruction(bld
->pc
, NV_OP_NOP
)->is_join
= TRUE
;
1282 bld_new_block(bld
, b
);
1285 case TGSI_OPCODE_BGNLOOP
:
1287 struct nv_basic_block
*bl
= new_basic_block(bld
->pc
);
1288 struct nv_basic_block
*bb
= new_basic_block(bld
->pc
);
1290 bld
->loop_bb
[bld
->loop_lvl
] = bl
;
1291 bld
->brkt_bb
[bld
->loop_lvl
] = bb
;
1293 bld_flow(bld
, NV_OP_BREAKADDR
, NV_CC_TR
, NULL
, bb
, FALSE
);
1295 nvbb_attach_block(bld
->pc
->current_block
, bl
, CFG_EDGE_LOOP_ENTER
);
1297 bld_new_block(bld
, bld
->loop_bb
[bld
->loop_lvl
++]);
1299 if (bld
->loop_lvl
== bld
->pc
->loop_nesting_bound
)
1300 bld
->pc
->loop_nesting_bound
++;
1302 bld_clear_def_use(&bld
->tvs
[0][0], BLD_MAX_TEMPS
, bld
->loop_lvl
);
1303 bld_clear_def_use(&bld
->avs
[0][0], BLD_MAX_ADDRS
, bld
->loop_lvl
);
1304 bld_clear_def_use(&bld
->pvs
[0][0], BLD_MAX_PREDS
, bld
->loop_lvl
);
1307 case TGSI_OPCODE_BRK
:
1309 struct nv_basic_block
*bb
= bld
->brkt_bb
[bld
->loop_lvl
- 1];
1311 bld_flow(bld
, NV_OP_BREAK
, NV_CC_TR
, NULL
, bb
, FALSE
);
1313 /* XXX: don't do this for redundant BRKs */
1314 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_LOOP_LEAVE
);
1317 case TGSI_OPCODE_CONT
:
1319 struct nv_basic_block
*bb
= bld
->loop_bb
[bld
->loop_lvl
- 1];
1321 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1323 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1326 case TGSI_OPCODE_ENDLOOP
:
1328 struct nv_basic_block
*bb
= bld
->loop_bb
[--bld
->loop_lvl
];
1330 bld_flow(bld
, NV_OP_BRA
, NV_CC_TR
, NULL
, bb
, FALSE
);
1332 nvbb_attach_block(bld
->pc
->current_block
, bb
, CFG_EDGE_BACK
);
1334 bld_loop_end(bld
, bb
); /* replace loop-side operand of the phis */
1336 bld_new_block(bld
, bld
->brkt_bb
[bld
->loop_lvl
]);
1339 case TGSI_OPCODE_ABS
:
1340 case TGSI_OPCODE_CEIL
:
1341 case TGSI_OPCODE_FLR
:
1342 case TGSI_OPCODE_TRUNC
:
1343 case TGSI_OPCODE_DDX
:
1344 case TGSI_OPCODE_DDY
:
1345 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1346 src0
= emit_fetch(bld
, insn
, 0, c
);
1347 dst0
[c
] = bld_insn_1(bld
, opcode
, src0
);
1350 case TGSI_OPCODE_LIT
:
1351 bld_lit(bld
, dst0
, insn
);
1353 case TGSI_OPCODE_LRP
:
1354 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1355 src0
= emit_fetch(bld
, insn
, 0, c
);
1356 src1
= emit_fetch(bld
, insn
, 1, c
);
1357 src2
= emit_fetch(bld
, insn
, 2, c
);
1358 dst0
[c
] = bld_insn_2(bld
, NV_OP_SUB
, src1
, src2
);
1359 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, dst0
[c
], src0
, src2
);
1362 case TGSI_OPCODE_MOV
:
1363 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1364 dst0
[c
] = emit_fetch(bld
, insn
, 0, c
);
1366 case TGSI_OPCODE_MAD
:
1367 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1368 src0
= emit_fetch(bld
, insn
, 0, c
);
1369 src1
= emit_fetch(bld
, insn
, 1, c
);
1370 src2
= emit_fetch(bld
, insn
, 2, c
);
1371 dst0
[c
] = bld_insn_3(bld
, opcode
, src0
, src1
, src2
);
1374 case TGSI_OPCODE_POW
:
1375 src0
= emit_fetch(bld
, insn
, 0, 0);
1376 src1
= emit_fetch(bld
, insn
, 1, 0);
1377 temp
= bld_pow(bld
, src0
, src1
);
1378 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1381 case TGSI_OPCODE_RCP
:
1382 case TGSI_OPCODE_LG2
:
1383 src0
= emit_fetch(bld
, insn
, 0, 0);
1384 temp
= bld_insn_1(bld
, opcode
, src0
);
1385 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1388 case TGSI_OPCODE_RSQ
:
1389 src0
= emit_fetch(bld
, insn
, 0, 0);
1390 temp
= bld_insn_1(bld
, NV_OP_ABS
, src0
);
1391 temp
= bld_insn_1(bld
, NV_OP_RSQ
, temp
);
1392 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1395 case TGSI_OPCODE_SLT
:
1396 case TGSI_OPCODE_SGE
:
1397 case TGSI_OPCODE_SEQ
:
1398 case TGSI_OPCODE_SGT
:
1399 case TGSI_OPCODE_SLE
:
1400 case TGSI_OPCODE_SNE
:
1401 case TGSI_OPCODE_ISLT
:
1402 case TGSI_OPCODE_ISGE
:
1403 case TGSI_OPCODE_USEQ
:
1404 case TGSI_OPCODE_USGE
:
1405 case TGSI_OPCODE_USLT
:
1406 case TGSI_OPCODE_USNE
:
1407 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1408 src0
= emit_fetch(bld
, insn
, 0, c
);
1409 src1
= emit_fetch(bld
, insn
, 1, c
);
1410 dst0
[c
] = bld_insn_2(bld
, NV_OP_SET
, src0
, src1
);
1411 dst0
[c
]->insn
->set_cond
= translate_setcc(insn
->Instruction
.Opcode
);
1412 dst0
[c
]->reg
.type
= infer_dst_type(insn
->Instruction
.Opcode
);
1414 dst0
[c
]->insn
->src
[0]->typecast
=
1415 dst0
[c
]->insn
->src
[1]->typecast
=
1416 infer_src_type(insn
->Instruction
.Opcode
);
1418 if (dst0
[c
]->reg
.type
!= NV_TYPE_F32
)
1420 dst0
[c
] = bld_insn_1(bld
, NV_OP_ABS
, dst0
[c
]);
1421 dst0
[c
]->insn
->src
[0]->typecast
= NV_TYPE_S32
;
1422 dst0
[c
]->reg
.type
= NV_TYPE_S32
;
1423 dst0
[c
] = bld_insn_1(bld
, NV_OP_CVT
, dst0
[c
]);
1424 dst0
[c
]->reg
.type
= NV_TYPE_F32
;
1427 case TGSI_OPCODE_SUB
:
1428 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1429 src0
= emit_fetch(bld
, insn
, 0, c
);
1430 src1
= emit_fetch(bld
, insn
, 1, c
);
1431 dst0
[c
] = bld_insn_2(bld
, NV_OP_ADD
, src0
, src1
);
1432 dst0
[c
]->insn
->src
[1]->mod
^= NV_MOD_NEG
;
1435 case TGSI_OPCODE_TEX
:
1436 case TGSI_OPCODE_TXB
:
1437 case TGSI_OPCODE_TXL
:
1438 case TGSI_OPCODE_TXP
:
1439 bld_tex(bld
, dst0
, insn
);
1441 case TGSI_OPCODE_XPD
:
1442 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
) {
1444 dst0
[3] = bld_imm_f32(bld
, 1.0f
);
1447 src0
= emit_fetch(bld
, insn
, 0, (c
+ 1) % 3);
1448 src1
= emit_fetch(bld
, insn
, 1, (c
+ 2) % 3);
1449 dst0
[c
] = bld_insn_2(bld
, NV_OP_MUL
, src0
, src1
);
1451 src0
= emit_fetch(bld
, insn
, 0, (c
+ 2) % 3);
1452 src1
= emit_fetch(bld
, insn
, 1, (c
+ 1) % 3);
1453 dst0
[c
] = bld_insn_3(bld
, NV_OP_MAD
, src0
, src1
, dst0
[c
]);
1455 dst0
[c
]->insn
->src
[2]->mod
^= NV_MOD_NEG
;
1458 case TGSI_OPCODE_END
:
1459 if (bld
->ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
1460 bld_export_outputs(bld
);
1463 NOUVEAU_ERR("nv_bld: unhandled opcode %u\n", insn
->Instruction
.Opcode
);
1468 FOR_EACH_DST0_ENABLED_CHANNEL(c
, insn
)
1469 emit_store(bld
, insn
, c
, dst0
[c
]);
1473 bld_free_value_trackers(struct bld_value_stack
*base
, int n
)
1477 for (i
= 0; i
< n
; ++i
)
1478 for (c
= 0; c
< 4; ++c
)
1479 if (base
[i
* 4 + c
].body
)
1480 FREE(base
[i
* 4 + c
].body
);
1484 nv50_tgsi_to_nc(struct nv_pc
*pc
, struct nv50_translation_info
*ti
)
1486 struct bld_context
*bld
= CALLOC_STRUCT(bld_context
);
1489 pc
->root
= pc
->current_block
= new_basic_block(pc
);
1494 pc
->loop_nesting_bound
= 1;
1496 c
= util_bitcount(bld
->ti
->p
->fp
.interp
>> 24);
1497 if (c
&& ti
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1498 bld
->frgcrd
[3] = new_value(pc
, NV_FILE_MEM_V
, NV_TYPE_F32
);
1499 bld
->frgcrd
[3]->reg
.id
= c
- 1;
1500 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_LINTERP
, bld
->frgcrd
[3]);
1501 bld
->frgcrd
[3] = bld_insn_1(bld
, NV_OP_RCP
, bld
->frgcrd
[3]);
1504 tgsi_parse_init(&bld
->parse
[0], ti
->p
->pipe
.tokens
);
1506 while (!tgsi_parse_end_of_tokens(&bld
->parse
[bld
->call_lvl
])) {
1507 const union tgsi_full_token
*tok
= &bld
->parse
[bld
->call_lvl
].FullToken
;
1509 tgsi_parse_token(&bld
->parse
[bld
->call_lvl
]);
1511 switch (tok
->Token
.Type
) {
1512 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1513 bld_instruction(bld
, &tok
->FullInstruction
);
1520 bld_free_value_trackers(&bld
->tvs
[0][0], BLD_MAX_TEMPS
);
1521 bld_free_value_trackers(&bld
->avs
[0][0], BLD_MAX_ADDRS
);
1522 bld_free_value_trackers(&bld
->pvs
[0][0], BLD_MAX_PREDS
);
1524 bld_free_value_trackers(&bld
->ovs
[0][0], PIPE_MAX_SHADER_OUTPUTS
);
1530 /* If a variable is assigned in a loop, replace all references to the value
1531 * from outside the loop with a phi value.
1534 bld_replace_value(struct nv_pc
*pc
, struct nv_basic_block
*b
,
1535 struct nv_value
*old_val
,
1536 struct nv_value
*new_val
)
1538 struct nv_instruction
*nvi
;
1540 for (nvi
= b
->entry
; nvi
; nvi
= nvi
->next
) {
1542 for (s
= 0; s
< 5; ++s
) {
1545 if (nvi
->src
[s
]->value
== old_val
)
1546 nv_reference(pc
, &nvi
->src
[s
], new_val
);
1548 if (nvi
->flags_src
&& nvi
->flags_src
->value
== old_val
)
1549 nv_reference(pc
, &nvi
->flags_src
, new_val
);
1552 b
->pass_seq
= pc
->pass_seq
;
1554 if (b
->out
[0] && b
->out
[0]->pass_seq
< pc
->pass_seq
)
1555 bld_replace_value(pc
, b
->out
[0], old_val
, new_val
);
1557 if (b
->out
[1] && b
->out
[1]->pass_seq
< pc
->pass_seq
)
1558 bld_replace_value(pc
, b
->out
[1], old_val
, new_val
);