2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
32 #include "pipe/p_shader_tokens.h"
34 #include "util/u_memory.h"
35 #include "util/u_math.h"
37 #include "tgsi/tgsi_parse.h"
38 #include "tgsi/tgsi_dump.h"
39 #include "tgsi/tgsi_info.h"
41 #include "brw_context.h"
43 #include "brw_debug.h"
47 static struct brw_reg
get_tmp( struct brw_vs_compile
*c
)
49 struct brw_reg tmp
= brw_vec8_grf(c
->last_tmp
, 0);
51 if (++c
->last_tmp
> c
->prog_data
.total_grf
)
52 c
->prog_data
.total_grf
= c
->last_tmp
;
57 static void release_tmp( struct brw_vs_compile
*c
, struct brw_reg tmp
)
59 if (tmp
.nr
== c
->last_tmp
-1)
63 static void release_tmps( struct brw_vs_compile
*c
)
65 c
->last_tmp
= c
->first_tmp
;
71 * Preallocate GRF register before code emit.
72 * Do things as simply as possible. Allocate and populate all regs
75 static void brw_vs_alloc_regs( struct brw_vs_compile
*c
)
77 GLuint i
, reg
= 0, mrf
;
78 int attributes_in_vue
;
80 /* Determine whether to use a real constant buffer or use a block
81 * of GRF registers for constants. The later is faster but only
82 * works if everything fits in the GRF.
83 * XXX this heuristic/check may need some fine tuning...
85 if (c
->vp
->info
.file_max
[TGSI_FILE_CONSTANT
] +
86 c
->vp
->info
.file_max
[TGSI_FILE_IMMEDIATE
] +
87 c
->vp
->info
.file_max
[TGSI_FILE_TEMPORARY
] + 21 > BRW_MAX_GRF
)
88 c
->vp
->use_const_buffer
= GL_TRUE
;
90 /* XXX: immediates can go elsewhere if necessary:
92 assert(c
->vp
->info
.file_max
[TGSI_FILE_IMMEDIATE
] +
93 c
->vp
->info
.file_max
[TGSI_FILE_TEMPORARY
] + 21 <= BRW_MAX_GRF
);
95 c
->vp
->use_const_buffer
= GL_FALSE
;
98 /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
100 /* r0 -- reserved as usual
102 c
->r0
= brw_vec8_grf(reg
, 0);
105 /* User clip planes from curbe:
107 if (c
->key
.nr_userclip
) {
108 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
109 c
->userplane
[i
] = stride( brw_vec4_grf(reg
+3+i
/2, (i
%2) * 4), 0, 4, 1);
112 /* Deal with curbe alignment:
114 reg
+= ((6 + c
->key
.nr_userclip
+ 3) / 4) * 2;
117 /* Vertex program parameters from curbe:
119 if (c
->vp
->use_const_buffer
) {
120 /* get constants from a real constant buffer */
121 c
->prog_data
.curb_read_length
= 0;
122 c
->prog_data
.nr_params
= 4; /* XXX 0 causes a bug elsewhere... */
125 /* use a section of the GRF for constants */
126 GLuint nr_params
= c
->vp
->info
.file_max
[TGSI_FILE_CONSTANT
] + 1;
127 for (i
= 0; i
< nr_params
; i
++) {
128 c
->regs
[TGSI_FILE_CONSTANT
][i
] = stride( brw_vec4_grf(reg
+i
/2, (i
%2) * 4), 0, 4, 1);
130 reg
+= (nr_params
+ 1) / 2;
131 c
->prog_data
.curb_read_length
= reg
- 1;
132 c
->prog_data
.nr_params
= nr_params
* 4;
135 /* Allocate input regs:
137 c
->nr_inputs
= c
->vp
->info
.num_inputs
;
138 for (i
= 0; i
< c
->nr_inputs
; i
++) {
139 c
->regs
[TGSI_FILE_INPUT
][i
] = brw_vec8_grf(reg
, 0);
143 /* If there are no inputs, we'll still be reading one attribute's worth
144 * because it's required -- see urb_read_length setting.
146 if (c
->nr_inputs
== 0)
149 /* Allocate a GRF and load immediate values by hand with 4 MOVs!!!
151 * XXX: Try to encode float immediates as brw immediates
152 * XXX: Put immediates into the CURBE.
153 * XXX: Make sure ureg sets minimal immediate size and respect it
156 for (i
= 0; i
< c
->nr_immediates
; i
++) {
160 r
= brw_vec8_grf(reg
, 0);
162 for (j
= 0; j
< 4; j
++) {
164 brw_writemask(r
, (1<<j
)),
165 brw_imm_f(c
->immediate
[i
][j
]));
172 /* Allocate outputs. The non-position outputs go straight into message regs.
174 c
->nr_outputs
= c
->prog_data
.nr_outputs
;
175 c
->first_output
= reg
;
176 c
->first_overflow_output
= 0;
178 if (c
->chipset
.is_igdng
)
183 /* XXX: need to access vertex output semantics here:
185 for (i
= 0; i
< c
->prog_data
.nr_outputs
; i
++) {
186 assert(i
< Elements(c
->regs
[TGSI_FILE_OUTPUT
]));
188 /* XXX: Hardwire position to zero:
191 c
->regs
[TGSI_FILE_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
194 /* XXX: disable psiz:
197 c
->regs
[TGSI_FILE_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
199 mrf
++; /* just a placeholder? XXX fix later stages & remove this */
202 c
->regs
[TGSI_FILE_OUTPUT
][i
] = brw_message_reg(mrf
);
206 /* too many vertex results to fit in MRF, use GRF for overflow */
207 if (!c
->first_overflow_output
)
208 c
->first_overflow_output
= i
;
209 c
->regs
[TGSI_FILE_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
214 /* Allocate program temporaries:
217 for (i
= 0; i
< c
->vp
->info
.file_max
[TGSI_FILE_TEMPORARY
]+1; i
++) {
218 c
->regs
[TGSI_FILE_TEMPORARY
][i
] = brw_vec8_grf(reg
, 0);
222 /* Address reg(s). Don't try to use the internal address reg until
225 for (i
= 0; i
< c
->vp
->info
.file_max
[TGSI_FILE_ADDRESS
]+1; i
++) {
226 c
->regs
[TGSI_FILE_ADDRESS
][i
] = brw_reg(BRW_GENERAL_REGISTER_FILE
,
230 BRW_VERTICAL_STRIDE_8
,
232 BRW_HORIZONTAL_STRIDE_1
,
238 if (c
->vp
->use_const_buffer
) {
239 for (i
= 0; i
< 3; i
++) {
240 c
->current_const
[i
].index
= -1;
241 c
->current_const
[i
].reg
= brw_vec8_grf(reg
, 0);
247 for (i
= 0; i
< 128; i
++) {
248 if (c
->output_regs
[i
].used_in_src
) {
249 c
->output_regs
[i
].reg
= brw_vec8_grf(reg
, 0);
255 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg
, 0);
258 /* Some opcodes need an internal temporary:
261 c
->last_tmp
= reg
; /* for allocation purposes */
263 /* Each input reg holds data from two vertices. The
264 * urb_read_length is the number of registers read from *each*
265 * vertex urb, so is half the amount:
267 c
->prog_data
.urb_read_length
= (c
->nr_inputs
+ 1) / 2;
269 /* Setting this field to 0 leads to undefined behavior according to the
270 * the VS_STATE docs. Our VUEs will always have at least one attribute
271 * sitting in them, even if it's padding.
273 if (c
->prog_data
.urb_read_length
== 0)
274 c
->prog_data
.urb_read_length
= 1;
276 /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
277 * them to fit the biggest thing they need to.
279 attributes_in_vue
= MAX2(c
->nr_outputs
, c
->nr_inputs
);
281 if (c
->chipset
.is_igdng
)
282 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ 6 + 3) / 4;
284 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ 2 + 3) / 4;
286 c
->prog_data
.total_grf
= reg
;
288 if (BRW_DEBUG
& DEBUG_VS
) {
289 debug_printf("%s NumAddrRegs %d\n", __FUNCTION__
,
290 c
->vp
->info
.file_max
[TGSI_FILE_ADDRESS
]+1);
291 debug_printf("%s NumTemps %d\n", __FUNCTION__
,
292 c
->vp
->info
.file_max
[TGSI_FILE_TEMPORARY
]+1);
293 debug_printf("%s reg = %d\n", __FUNCTION__
, reg
);
299 * If an instruction uses a temp reg both as a src and the dest, we
300 * sometimes need to allocate an intermediate temporary.
302 static void unalias1( struct brw_vs_compile
*c
,
305 void (*func
)( struct brw_vs_compile
*,
309 if (dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) {
310 struct brw_compile
*p
= &c
->func
;
311 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
313 brw_MOV(p
, dst
, tmp
);
323 * Checkes if 2-operand instruction needs an intermediate temporary.
325 static void unalias2( struct brw_vs_compile
*c
,
329 void (*func
)( struct brw_vs_compile
*,
334 if ((dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) ||
335 (dst
.file
== arg1
.file
&& dst
.nr
== arg1
.nr
)) {
336 struct brw_compile
*p
= &c
->func
;
337 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
338 func(c
, tmp
, arg0
, arg1
);
339 brw_MOV(p
, dst
, tmp
);
343 func(c
, dst
, arg0
, arg1
);
349 * Checkes if 3-operand instruction needs an intermediate temporary.
351 static void unalias3( struct brw_vs_compile
*c
,
356 void (*func
)( struct brw_vs_compile
*,
362 if ((dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) ||
363 (dst
.file
== arg1
.file
&& dst
.nr
== arg1
.nr
) ||
364 (dst
.file
== arg2
.file
&& dst
.nr
== arg2
.nr
)) {
365 struct brw_compile
*p
= &c
->func
;
366 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
367 func(c
, tmp
, arg0
, arg1
, arg2
);
368 brw_MOV(p
, dst
, tmp
);
372 func(c
, dst
, arg0
, arg1
, arg2
);
376 static void emit_sop( struct brw_compile
*p
,
382 brw_MOV(p
, dst
, brw_imm_f(0.0f
));
383 brw_CMP(p
, brw_null_reg(), cond
, arg0
, arg1
);
384 brw_MOV(p
, dst
, brw_imm_f(1.0f
));
385 brw_set_predicate_control_flag_value(p
, 0xff);
388 static void emit_seq( struct brw_compile
*p
,
391 struct brw_reg arg1
)
393 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_EQ
);
396 static void emit_sne( struct brw_compile
*p
,
399 struct brw_reg arg1
)
401 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_NEQ
);
403 static void emit_slt( struct brw_compile
*p
,
406 struct brw_reg arg1
)
408 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_L
);
411 static void emit_sle( struct brw_compile
*p
,
414 struct brw_reg arg1
)
416 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_LE
);
419 static void emit_sgt( struct brw_compile
*p
,
422 struct brw_reg arg1
)
424 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_G
);
427 static void emit_sge( struct brw_compile
*p
,
430 struct brw_reg arg1
)
432 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_GE
);
435 static void emit_max( struct brw_compile
*p
,
438 struct brw_reg arg1
)
440 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, arg1
);
441 brw_SEL(p
, dst
, arg1
, arg0
);
442 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
445 static void emit_min( struct brw_compile
*p
,
448 struct brw_reg arg1
)
450 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, arg1
);
451 brw_SEL(p
, dst
, arg0
, arg1
);
452 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
456 static void emit_math1( struct brw_vs_compile
*c
,
462 /* There are various odd behaviours with SEND on the simulator. In
463 * addition there are documented issues with the fact that the GEN4
464 * processor doesn't do dependency control properly on SEND
465 * results. So, on balance, this kludge to get around failures
466 * with writemasked math results looks like it might be necessary
467 * whether that turns out to be a simulator bug or not:
469 struct brw_compile
*p
= &c
->func
;
470 struct brw_reg tmp
= dst
;
471 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
472 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
480 BRW_MATH_SATURATE_NONE
,
483 BRW_MATH_DATA_SCALAR
,
487 brw_MOV(p
, dst
, tmp
);
493 static void emit_math2( struct brw_vs_compile
*c
,
500 struct brw_compile
*p
= &c
->func
;
501 struct brw_reg tmp
= dst
;
502 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
503 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
508 brw_MOV(p
, brw_message_reg(3), arg1
);
513 BRW_MATH_SATURATE_NONE
,
516 BRW_MATH_DATA_SCALAR
,
520 brw_MOV(p
, dst
, tmp
);
526 static void emit_exp_noalias( struct brw_vs_compile
*c
,
528 struct brw_reg arg0
)
530 struct brw_compile
*p
= &c
->func
;
533 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_X
) {
534 struct brw_reg tmp
= get_tmp(c
);
535 struct brw_reg tmp_d
= retype(tmp
, BRW_REGISTER_TYPE_D
);
537 /* tmp_d = floor(arg0.x) */
538 brw_RNDD(p
, tmp_d
, brw_swizzle1(arg0
, 0));
540 /* result[0] = 2.0 ^ tmp */
542 /* Adjust exponent for floating point:
545 brw_ADD(p
, brw_writemask(tmp_d
, BRW_WRITEMASK_X
), tmp_d
, brw_imm_d(127));
547 /* Install exponent and sign.
548 * Excess drops off the edge:
550 brw_SHL(p
, brw_writemask(retype(dst
, BRW_REGISTER_TYPE_D
), BRW_WRITEMASK_X
),
551 tmp_d
, brw_imm_d(23));
556 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_Y
) {
557 /* result[1] = arg0.x - floor(arg0.x) */
558 brw_FRC(p
, brw_writemask(dst
, BRW_WRITEMASK_Y
), brw_swizzle1(arg0
, 0));
561 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_Z
) {
562 /* As with the LOG instruction, we might be better off just
563 * doing a taylor expansion here, seeing as we have to do all
566 * If mathbox partial precision is too low, consider also:
567 * result[3] = result[0] * EXP(result[1])
570 BRW_MATH_FUNCTION_EXP
,
571 brw_writemask(dst
, BRW_WRITEMASK_Z
),
572 brw_swizzle1(arg0
, 0),
573 BRW_MATH_PRECISION_FULL
);
576 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_W
) {
577 /* result[3] = 1.0; */
578 brw_MOV(p
, brw_writemask(dst
, BRW_WRITEMASK_W
), brw_imm_f(1));
583 static void emit_log_noalias( struct brw_vs_compile
*c
,
585 struct brw_reg arg0
)
587 struct brw_compile
*p
= &c
->func
;
588 struct brw_reg tmp
= dst
;
589 struct brw_reg tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
590 struct brw_reg arg0_ud
= retype(arg0
, BRW_REGISTER_TYPE_UD
);
591 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
592 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
596 tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
599 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
602 * These almost look likey they could be joined up, but not really
605 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
606 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
608 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_XZ
) {
610 brw_writemask(tmp_ud
, BRW_WRITEMASK_X
),
611 brw_swizzle1(arg0_ud
, 0),
612 brw_imm_ud((1U<<31)-1));
615 brw_writemask(tmp_ud
, BRW_WRITEMASK_X
),
620 brw_writemask(tmp
, BRW_WRITEMASK_X
),
621 retype(tmp_ud
, BRW_REGISTER_TYPE_D
), /* does it matter? */
625 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_YZ
) {
627 brw_writemask(tmp_ud
, BRW_WRITEMASK_Y
),
628 brw_swizzle1(arg0_ud
, 0),
629 brw_imm_ud((1<<23)-1));
632 brw_writemask(tmp_ud
, BRW_WRITEMASK_Y
),
634 brw_imm_ud(127<<23));
637 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_Z
) {
638 /* result[2] = result[0] + LOG2(result[1]); */
640 /* Why bother? The above is just a hint how to do this with a
641 * taylor series. Maybe we *should* use a taylor series as by
642 * the time all the above has been done it's almost certainly
643 * quicker than calling the mathbox, even with low precision.
646 * - result[0] + mathbox.LOG2(result[1])
647 * - mathbox.LOG2(arg0.x)
648 * - result[0] + inline_taylor_approx(result[1])
651 BRW_MATH_FUNCTION_LOG
,
652 brw_writemask(tmp
, BRW_WRITEMASK_Z
),
653 brw_swizzle1(tmp
, 1),
654 BRW_MATH_PRECISION_FULL
);
657 brw_writemask(tmp
, BRW_WRITEMASK_Z
),
658 brw_swizzle1(tmp
, 2),
659 brw_swizzle1(tmp
, 0));
662 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_W
) {
663 /* result[3] = 1.0; */
664 brw_MOV(p
, brw_writemask(tmp
, BRW_WRITEMASK_W
), brw_imm_f(1));
668 brw_MOV(p
, dst
, tmp
);
674 /* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
676 static void emit_dst_noalias( struct brw_vs_compile
*c
,
681 struct brw_compile
*p
= &c
->func
;
683 /* There must be a better way to do this:
685 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_X
)
686 brw_MOV(p
, brw_writemask(dst
, BRW_WRITEMASK_X
), brw_imm_f(1.0));
687 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_Y
)
688 brw_MUL(p
, brw_writemask(dst
, BRW_WRITEMASK_Y
), arg0
, arg1
);
689 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_Z
)
690 brw_MOV(p
, brw_writemask(dst
, BRW_WRITEMASK_Z
), arg0
);
691 if (dst
.dw1
.bits
.writemask
& BRW_WRITEMASK_W
)
692 brw_MOV(p
, brw_writemask(dst
, BRW_WRITEMASK_W
), arg1
);
696 static void emit_xpd( struct brw_compile
*p
,
701 brw_MUL(p
, brw_null_reg(), brw_swizzle(t
, 1,2,0,3), brw_swizzle(u
,2,0,1,3));
702 brw_MAC(p
, dst
, negate(brw_swizzle(t
, 2,0,1,3)), brw_swizzle(u
,1,2,0,3));
706 static void emit_lit_noalias( struct brw_vs_compile
*c
,
708 struct brw_reg arg0
)
710 struct brw_compile
*p
= &c
->func
;
711 struct brw_instruction
*if_insn
;
712 struct brw_reg tmp
= dst
;
713 GLboolean need_tmp
= (dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
718 brw_MOV(p
, brw_writemask(dst
, BRW_WRITEMASK_YZ
), brw_imm_f(0));
719 brw_MOV(p
, brw_writemask(dst
, BRW_WRITEMASK_XW
), brw_imm_f(1));
721 /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
722 * to get all channels active inside the IF. In the clipping code
723 * we run with NoMask, so it's not an option and we can use
724 * BRW_EXECUTE_1 for all comparisions.
726 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,0), brw_imm_f(0));
727 if_insn
= brw_IF(p
, BRW_EXECUTE_8
);
729 brw_MOV(p
, brw_writemask(dst
, BRW_WRITEMASK_Y
), brw_swizzle1(arg0
,0));
731 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,1), brw_imm_f(0));
732 brw_MOV(p
, brw_writemask(tmp
, BRW_WRITEMASK_Z
), brw_swizzle1(arg0
,1));
733 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
736 BRW_MATH_FUNCTION_POW
,
737 brw_writemask(dst
, BRW_WRITEMASK_Z
),
738 brw_swizzle1(tmp
, 2),
739 brw_swizzle1(arg0
, 3),
740 BRW_MATH_PRECISION_PARTIAL
);
743 brw_ENDIF(p
, if_insn
);
748 static void emit_lrp_noalias(struct brw_vs_compile
*c
,
754 struct brw_compile
*p
= &c
->func
;
756 brw_ADD(p
, dst
, negate(arg0
), brw_imm_f(1.0));
757 brw_MUL(p
, brw_null_reg(), dst
, arg2
);
758 brw_MAC(p
, dst
, arg0
, arg1
);
761 /** 3 or 4-component vector normalization */
762 static void emit_nrm( struct brw_vs_compile
*c
,
767 struct brw_compile
*p
= &c
->func
;
768 struct brw_reg tmp
= get_tmp(c
);
770 /* tmp = dot(arg0, arg0) */
772 brw_DP3(p
, tmp
, arg0
, arg0
);
774 brw_DP4(p
, tmp
, arg0
, arg0
);
776 /* tmp = 1 / sqrt(tmp) */
777 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, tmp
, tmp
, BRW_MATH_PRECISION_FULL
);
779 /* dst = arg0 * tmp */
780 brw_MUL(p
, dst
, arg0
, tmp
);
786 static struct brw_reg
787 get_constant(struct brw_vs_compile
*c
,
792 struct brw_compile
*p
= &c
->func
;
793 struct brw_reg const_reg
;
794 struct brw_reg const2_reg
;
796 assert(argIndex
< 3);
798 if (c
->current_const
[argIndex
].index
!= index
|| relAddr
) {
799 struct brw_reg addrReg
= c
->regs
[TGSI_FILE_ADDRESS
][0];
801 c
->current_const
[argIndex
].index
= index
;
804 printf(" fetch const[%d] for arg %d into reg %d\n",
805 src
.Index
, argIndex
, c
->current_const
[argIndex
].reg
.nr
);
807 /* need to fetch the constant now */
809 c
->current_const
[argIndex
].reg
,/* writeback dest */
811 relAddr
, /* relative indexing? */
812 addrReg
, /* address register */
813 16 * index
, /* byte offset */
814 SURF_INDEX_VERT_CONST_BUFFER
/* binding table index */
819 const2_reg
= get_tmp(c
);
821 /* use upper half of address reg for second read */
822 addrReg
= stride(addrReg
, 0, 4, 0);
826 const2_reg
, /* writeback dest */
828 relAddr
, /* relative indexing? */
829 addrReg
, /* address register */
830 16 * index
, /* byte offset */
831 SURF_INDEX_VERT_CONST_BUFFER
836 const_reg
= c
->current_const
[argIndex
].reg
;
839 /* merge the two Owords into the constant register */
840 /* const_reg[7..4] = const2_reg[7..4] */
842 suboffset(stride(const_reg
, 0, 4, 1), 4),
843 suboffset(stride(const2_reg
, 0, 4, 1), 4));
844 release_tmp(c
, const2_reg
);
847 /* replicate lower four floats into upper half (to get XYZWXYZW) */
848 const_reg
= stride(const_reg
, 0, 4, 0);
857 /* TODO: relative addressing!
859 static struct brw_reg
get_reg( struct brw_vs_compile
*c
,
860 enum tgsi_file_type file
,
864 case TGSI_FILE_TEMPORARY
:
865 case TGSI_FILE_INPUT
:
866 case TGSI_FILE_OUTPUT
:
867 case TGSI_FILE_CONSTANT
:
868 assert(c
->regs
[file
][index
].nr
!= 0);
869 return c
->regs
[file
][index
];
871 case TGSI_FILE_ADDRESS
:
873 return c
->regs
[file
][index
];
875 case TGSI_FILE_NULL
: /* undef values */
876 return brw_null_reg();
880 return brw_null_reg();
886 * Indirect addressing: get reg[[arg] + offset].
888 static struct brw_reg
deref( struct brw_vs_compile
*c
,
892 struct brw_compile
*p
= &c
->func
;
893 struct brw_reg tmp
= vec4(get_tmp(c
));
894 struct brw_reg addr_reg
= c
->regs
[TGSI_FILE_ADDRESS
][0];
895 struct brw_reg vp_address
= retype(vec1(addr_reg
), BRW_REGISTER_TYPE_UW
);
896 GLuint byte_offset
= arg
.nr
* 32 + arg
.subnr
+ offset
* 16;
897 struct brw_reg indirect
= brw_vec4_indirect(0,0);
900 brw_push_insn_state(p
);
901 brw_set_access_mode(p
, BRW_ALIGN_1
);
903 /* This is pretty clunky - load the address register twice and
904 * fetch each 4-dword value in turn. There must be a way to do
905 * this in a single pass, but I couldn't get it to work.
907 brw_ADD(p
, brw_address_reg(0), vp_address
, brw_imm_d(byte_offset
));
908 brw_MOV(p
, tmp
, indirect
);
910 brw_ADD(p
, brw_address_reg(0), suboffset(vp_address
, 8), brw_imm_d(byte_offset
));
911 brw_MOV(p
, suboffset(tmp
, 4), indirect
);
913 brw_pop_insn_state(p
);
916 /* NOTE: tmp not released */
922 * Get brw reg corresponding to the instruction's [argIndex] src reg.
923 * TODO: relative addressing!
925 static struct brw_reg
926 get_src_reg( struct brw_vs_compile
*c
,
934 case TGSI_FILE_TEMPORARY
:
935 case TGSI_FILE_INPUT
:
936 case TGSI_FILE_OUTPUT
:
938 return deref(c
, c
->regs
[file
][0], index
);
941 assert(c
->regs
[file
][index
].nr
!= 0);
942 return c
->regs
[file
][index
];
945 case TGSI_FILE_IMMEDIATE
:
946 return c
->regs
[file
][index
];
948 case TGSI_FILE_CONSTANT
:
949 if (c
->vp
->use_const_buffer
) {
950 return get_constant(c
, argIndex
, index
, relAddr
);
953 return deref(c
, c
->regs
[TGSI_FILE_CONSTANT
][0], index
);
956 assert(c
->regs
[TGSI_FILE_CONSTANT
][index
].nr
!= 0);
957 return c
->regs
[TGSI_FILE_CONSTANT
][index
];
959 case TGSI_FILE_ADDRESS
:
961 return c
->regs
[file
][index
];
964 /* this is a normal case since we loop over all three src args */
965 return brw_null_reg();
969 return brw_null_reg();
974 static void emit_arl( struct brw_vs_compile
*c
,
976 struct brw_reg arg0
)
978 struct brw_compile
*p
= &c
->func
;
979 struct brw_reg tmp
= dst
;
980 GLboolean need_tmp
= (dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
985 brw_RNDD(p
, tmp
, arg0
); /* tmp = round(arg0) */
986 brw_MUL(p
, dst
, tmp
, brw_imm_d(16)); /* dst = tmp * 16 */
994 * Return the brw reg for the given instruction's src argument.
996 static struct brw_reg
get_arg( struct brw_vs_compile
*c
,
997 const struct tgsi_full_src_register
*src
,
1002 if (src
->SrcRegister
.File
== TGSI_FILE_NULL
)
1003 return brw_null_reg();
1005 reg
= get_src_reg(c
, argIndex
,
1006 src
->SrcRegister
.File
,
1007 src
->SrcRegister
.Index
,
1008 src
->SrcRegister
.Indirect
);
1010 /* Convert 3-bit swizzle to 2-bit.
1012 reg
.dw1
.bits
.swizzle
= BRW_SWIZZLE4(src
->SrcRegister
.SwizzleX
,
1013 src
->SrcRegister
.SwizzleY
,
1014 src
->SrcRegister
.SwizzleZ
,
1015 src
->SrcRegister
.SwizzleW
);
1017 reg
.negate
= src
->SrcRegister
.Negate
? 1 : 0;
1027 * Get brw register for the given program dest register.
1029 static struct brw_reg
get_dst( struct brw_vs_compile
*c
,
1032 unsigned writemask
)
1037 case TGSI_FILE_TEMPORARY
:
1038 case TGSI_FILE_OUTPUT
:
1039 assert(c
->regs
[file
][index
].nr
!= 0);
1040 reg
= c
->regs
[file
][index
];
1042 case TGSI_FILE_ADDRESS
:
1044 reg
= c
->regs
[file
][index
];
1046 case TGSI_FILE_NULL
:
1047 /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
1048 reg
= brw_null_reg();
1052 reg
= brw_null_reg();
1055 reg
.dw1
.bits
.writemask
= writemask
;
1064 * Post-vertex-program processing. Send the results to the URB.
1066 static void emit_vertex_write( struct brw_vs_compile
*c
)
1068 struct brw_compile
*p
= &c
->func
;
1069 struct brw_reg m0
= brw_message_reg(0);
1070 struct brw_reg pos
= c
->regs
[TGSI_FILE_OUTPUT
][VERT_RESULT_HPOS
];
1073 GLuint len_vertext_header
= 2;
1075 if (c
->key
.copy_edgeflag
) {
1078 get_reg(c
, TGSI_FILE_OUTPUT
, 0),
1079 get_reg(c
, TGSI_FILE_INPUT
, 0));
1082 /* Build ndc coords */
1084 /* ndc = 1.0 / pos.w */
1085 emit_math1(c
, BRW_MATH_FUNCTION_INV
, ndc
, brw_swizzle1(pos
, 3), BRW_MATH_PRECISION_FULL
);
1086 /* ndc.xyz = pos * ndc */
1087 brw_MUL(p
, brw_writemask(ndc
, BRW_WRITEMASK_XYZ
), pos
, ndc
);
1089 /* Update the header for point size, user clipping flags, and -ve rhw
1092 if (c
->prog_data
.writes_psiz
||
1093 c
->key
.nr_userclip
||
1096 struct brw_reg header1
= retype(get_tmp(c
), BRW_REGISTER_TYPE_UD
);
1099 brw_MOV(p
, header1
, brw_imm_ud(0));
1101 brw_set_access_mode(p
, BRW_ALIGN_16
);
1103 if (c
->prog_data
.writes_psiz
) {
1104 struct brw_reg psiz
= c
->regs
[TGSI_FILE_OUTPUT
][VERT_RESULT_PSIZ
];
1105 brw_MUL(p
, brw_writemask(header1
, BRW_WRITEMASK_W
), brw_swizzle1(psiz
, 0), brw_imm_f(1<<11));
1106 brw_AND(p
, brw_writemask(header1
, BRW_WRITEMASK_W
), header1
, brw_imm_ud(0x7ff<<8));
1109 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
1110 brw_set_conditionalmod(p
, BRW_CONDITIONAL_L
);
1111 brw_DP4(p
, brw_null_reg(), pos
, c
->userplane
[i
]);
1112 brw_OR(p
, brw_writemask(header1
, BRW_WRITEMASK_W
), header1
, brw_imm_ud(1<<i
));
1113 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1116 /* i965 clipping workaround:
1117 * 1) Test for -ve rhw
1119 * set ndc = (0,0,0,0)
1122 * Later, clipping will detect ucp[6] and ensure the primitive is
1123 * clipped against all fixed planes.
1125 if (c
->chipset
.is_965
) {
1127 vec8(brw_null_reg()),
1129 brw_swizzle1(ndc
, 3),
1132 brw_OR(p
, brw_writemask(header1
, BRW_WRITEMASK_W
), header1
, brw_imm_ud(1<<6));
1133 brw_MOV(p
, ndc
, brw_imm_f(0));
1134 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1137 brw_set_access_mode(p
, BRW_ALIGN_1
); /* why? */
1138 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), header1
);
1139 brw_set_access_mode(p
, BRW_ALIGN_16
);
1141 release_tmp(c
, header1
);
1144 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
1147 /* Emit the (interleaved) headers for the two vertices - an 8-reg
1148 * of zeros followed by two sets of NDC coordinates:
1150 brw_set_access_mode(p
, BRW_ALIGN_1
);
1151 brw_MOV(p
, offset(m0
, 2), ndc
);
1153 if (c
->chipset
.is_igdng
) {
1154 /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
1155 brw_MOV(p
, offset(m0
, 3), pos
); /* a portion of vertex header */
1156 /* m4, m5 contain the distances from vertex to the user clip planeXXX.
1157 * Seems it is useless for us.
1158 * m6 is used for aligning, so that the remainder of vertex element is
1161 brw_MOV(p
, offset(m0
, 7), pos
); /* the remainder of vertex element */
1162 len_vertext_header
= 6;
1164 brw_MOV(p
, offset(m0
, 3), pos
);
1165 len_vertext_header
= 2;
1168 eot
= (c
->first_overflow_output
== 0);
1171 brw_null_reg(), /* dest */
1172 0, /* starting mrf reg nr */
1176 MIN2(c
->nr_outputs
+ 1 + len_vertext_header
, (BRW_MAX_MRF
-1)), /* msg len */
1177 0, /* response len */
1179 eot
, /* writes complete */
1180 0, /* urb destination offset */
1181 BRW_URB_SWIZZLE_INTERLEAVE
);
1183 if (c
->first_overflow_output
> 0) {
1184 /* Not all of the vertex outputs/results fit into the MRF.
1185 * Move the overflowed attributes from the GRF to the MRF and
1186 * issue another brw_urb_WRITE().
1188 /* XXX I'm not 100% sure about which MRF regs to use here. Starting
1192 for (i
= c
->first_overflow_output
; i
< c
->prog_data
.nr_outputs
; i
++) {
1193 /* move from GRF to MRF */
1194 brw_MOV(p
, brw_message_reg(4+mrf
), c
->regs
[TGSI_FILE_OUTPUT
][i
]);
1199 brw_null_reg(), /* dest */
1200 4, /* starting mrf reg nr */
1204 mrf
+1, /* msg len */
1205 0, /* response len */
1207 1, /* writes complete */
1208 BRW_MAX_MRF
-1, /* urb destination offset */
1209 BRW_URB_SWIZZLE_INTERLEAVE
);
1215 * Called after code generation to resolve subroutine calls and the
1217 * \param end_inst points to brw code for END instruction
1218 * \param last_inst points to last instruction emitted before vertex write
1221 post_vs_emit( struct brw_vs_compile
*c
,
1222 struct brw_instruction
*end_inst
,
1223 struct brw_instruction
*last_inst
)
1227 brw_resolve_cals(&c
->func
);
1229 /* patch up the END code to jump past subroutines, etc */
1230 offset
= last_inst
- end_inst
;
1232 brw_set_src1(end_inst
, brw_imm_d(offset
* 16));
1234 end_inst
->header
.opcode
= BRW_OPCODE_NOP
;
1239 get_predicate(const struct tgsi_full_instruction
*inst
)
1241 /* XXX: disabling for now
1244 if (inst
->dst
.CondMask
== COND_TR
)
1245 return BRW_PREDICATE_NONE
;
1247 /* All of GLSL only produces predicates for COND_NE and one channel per
1248 * vector. Fail badly if someone starts doing something else, as it might
1249 * mean infinite looping or something.
1251 * We'd like to support all the condition codes, but our hardware doesn't
1252 * quite match the Mesa IR, which is modeled after the NV extensions. For
1253 * those, the instruction may update the condition codes or not, then any
1254 * later instruction may use one of those condition codes. For gen4, the
1255 * instruction may update the flags register based on one of the condition
1256 * codes output by the instruction, and then further instructions may
1257 * predicate on that. We can probably support this, but it won't
1258 * necessarily be easy.
1260 /* assert(inst->dst.CondMask == COND_NE); */
1262 switch (inst
->dst
.CondSwizzle
) {
1264 return BRW_PREDICATE_ALIGN16_REPLICATE_X
;
1266 return BRW_PREDICATE_ALIGN16_REPLICATE_Y
;
1268 return BRW_PREDICATE_ALIGN16_REPLICATE_Z
;
1270 return BRW_PREDICATE_ALIGN16_REPLICATE_W
;
1272 debug_printf("Unexpected predicate: 0x%08x\n",
1273 inst
->dst
.CondMask
);
1274 return BRW_PREDICATE_NORMAL
;
1277 return BRW_PREDICATE_NORMAL
;
1281 static void emit_insn(struct brw_vs_compile
*c
,
1282 const struct tgsi_full_instruction
*inst
)
1284 unsigned opcode
= inst
->Instruction
.Opcode
;
1285 unsigned label
= inst
->InstructionExtLabel
.Label
;
1286 struct brw_compile
*p
= &c
->func
;
1287 struct brw_reg args
[3], dst
;
1291 printf("%d: ", insn
);
1292 _mesa_print_instruction(inst
);
1295 /* Get argument regs.
1297 for (i
= 0; i
< 3; i
++) {
1298 args
[i
] = get_arg(c
, &inst
->FullSrcRegisters
[i
], i
);
1301 /* Get dest regs. Note that it is possible for a reg to be both
1302 * dst and arg, given the static allocation of registers. So
1303 * care needs to be taken emitting multi-operation instructions.
1306 inst
->FullDstRegisters
[0].DstRegister
.File
,
1307 inst
->FullDstRegisters
[0].DstRegister
.Index
,
1308 inst
->FullDstRegisters
[0].DstRegister
.WriteMask
);
1312 if (inst
->Instruction
.Saturate
!= TGSI_SAT_NONE
) {
1313 debug_printf("Unsupported saturate in vertex shader");
1317 case TGSI_OPCODE_ABS
:
1318 brw_MOV(p
, dst
, brw_abs(args
[0]));
1320 case TGSI_OPCODE_ADD
:
1321 brw_ADD(p
, dst
, args
[0], args
[1]);
1323 case TGSI_OPCODE_COS
:
1324 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1326 case TGSI_OPCODE_DP3
:
1327 brw_DP3(p
, dst
, args
[0], args
[1]);
1329 case TGSI_OPCODE_DP4
:
1330 brw_DP4(p
, dst
, args
[0], args
[1]);
1332 case TGSI_OPCODE_DPH
:
1333 brw_DPH(p
, dst
, args
[0], args
[1]);
1335 case TGSI_OPCODE_NRM
:
1336 emit_nrm(c
, dst
, args
[0], 3);
1338 case TGSI_OPCODE_NRM4
:
1339 emit_nrm(c
, dst
, args
[0], 4);
1341 case TGSI_OPCODE_DST
:
1342 unalias2(c
, dst
, args
[0], args
[1], emit_dst_noalias
);
1344 case TGSI_OPCODE_EXP
:
1345 unalias1(c
, dst
, args
[0], emit_exp_noalias
);
1347 case TGSI_OPCODE_EX2
:
1348 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1350 case TGSI_OPCODE_ARL
:
1351 emit_arl(c
, dst
, args
[0]);
1353 case TGSI_OPCODE_FLR
:
1354 brw_RNDD(p
, dst
, args
[0]);
1356 case TGSI_OPCODE_FRC
:
1357 brw_FRC(p
, dst
, args
[0]);
1359 case TGSI_OPCODE_LOG
:
1360 unalias1(c
, dst
, args
[0], emit_log_noalias
);
1362 case TGSI_OPCODE_LG2
:
1363 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1365 case TGSI_OPCODE_LIT
:
1366 unalias1(c
, dst
, args
[0], emit_lit_noalias
);
1368 case TGSI_OPCODE_LRP
:
1369 unalias3(c
, dst
, args
[0], args
[1], args
[2], emit_lrp_noalias
);
1371 case TGSI_OPCODE_MAD
:
1372 brw_MOV(p
, brw_acc_reg(), args
[2]);
1373 brw_MAC(p
, dst
, args
[0], args
[1]);
1375 case TGSI_OPCODE_MAX
:
1376 emit_max(p
, dst
, args
[0], args
[1]);
1378 case TGSI_OPCODE_MIN
:
1379 emit_min(p
, dst
, args
[0], args
[1]);
1381 case TGSI_OPCODE_MOV
:
1382 brw_MOV(p
, dst
, args
[0]);
1384 case TGSI_OPCODE_MUL
:
1385 brw_MUL(p
, dst
, args
[0], args
[1]);
1387 case TGSI_OPCODE_POW
:
1388 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, args
[0], args
[1], BRW_MATH_PRECISION_FULL
);
1390 case TGSI_OPCODE_RCP
:
1391 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1393 case TGSI_OPCODE_RSQ
:
1394 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1396 case TGSI_OPCODE_SEQ
:
1397 emit_seq(p
, dst
, args
[0], args
[1]);
1399 case TGSI_OPCODE_SIN
:
1400 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1402 case TGSI_OPCODE_SNE
:
1403 emit_sne(p
, dst
, args
[0], args
[1]);
1405 case TGSI_OPCODE_SGE
:
1406 emit_sge(p
, dst
, args
[0], args
[1]);
1408 case TGSI_OPCODE_SGT
:
1409 emit_sgt(p
, dst
, args
[0], args
[1]);
1411 case TGSI_OPCODE_SLT
:
1412 emit_slt(p
, dst
, args
[0], args
[1]);
1414 case TGSI_OPCODE_SLE
:
1415 emit_sle(p
, dst
, args
[0], args
[1]);
1417 case TGSI_OPCODE_SUB
:
1418 brw_ADD(p
, dst
, args
[0], negate(args
[1]));
1420 case TGSI_OPCODE_TRUNC
:
1421 /* round toward zero */
1422 brw_RNDZ(p
, dst
, args
[0]);
1424 case TGSI_OPCODE_XPD
:
1425 emit_xpd(p
, dst
, args
[0], args
[1]);
1427 case TGSI_OPCODE_IF
:
1428 assert(c
->if_depth
< MAX_IF_DEPTH
);
1429 c
->if_inst
[c
->if_depth
] = brw_IF(p
, BRW_EXECUTE_8
);
1430 /* Note that brw_IF smashes the predicate_control field. */
1431 c
->if_inst
[c
->if_depth
]->header
.predicate_control
= get_predicate(inst
);
1434 case TGSI_OPCODE_ELSE
:
1435 c
->if_inst
[c
->if_depth
-1] = brw_ELSE(p
, c
->if_inst
[c
->if_depth
-1]);
1437 case TGSI_OPCODE_ENDIF
:
1438 assert(c
->if_depth
> 0);
1439 brw_ENDIF(p
, c
->if_inst
[--c
->if_depth
]);
1441 case TGSI_OPCODE_BGNLOOP
:
1442 c
->loop_inst
[c
->loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
1444 case TGSI_OPCODE_BRK
:
1445 brw_set_predicate_control(p
, get_predicate(inst
));
1447 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1449 case TGSI_OPCODE_CONT
:
1450 brw_set_predicate_control(p
, get_predicate(inst
));
1452 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1454 case TGSI_OPCODE_ENDLOOP
:
1456 struct brw_instruction
*inst0
, *inst1
;
1461 if (c
->chipset
.is_igdng
)
1464 inst0
= inst1
= brw_WHILE(p
, c
->loop_inst
[c
->loop_depth
]);
1465 /* patch all the BREAK/CONT instructions from last BEGINLOOP */
1466 while (inst0
> c
->loop_inst
[c
->loop_depth
]) {
1468 if (inst0
->header
.opcode
== TGSI_OPCODE_BRK
) {
1469 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
1470 inst0
->bits3
.if_else
.pop_count
= 0;
1472 else if (inst0
->header
.opcode
== TGSI_OPCODE_CONT
) {
1473 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
1474 inst0
->bits3
.if_else
.pop_count
= 0;
1479 case TGSI_OPCODE_BRA
:
1480 brw_set_predicate_control(p
, get_predicate(inst
));
1481 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1482 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1484 case TGSI_OPCODE_CAL
:
1485 brw_set_access_mode(p
, BRW_ALIGN_1
);
1486 brw_ADD(p
, deref_1d(c
->stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
1487 brw_set_access_mode(p
, BRW_ALIGN_16
);
1488 brw_ADD(p
, get_addr_reg(c
->stack_index
),
1489 get_addr_reg(c
->stack_index
), brw_imm_d(4));
1490 brw_save_call(p
, label
, p
->nr_insn
);
1491 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1493 case TGSI_OPCODE_RET
:
1494 brw_ADD(p
, get_addr_reg(c
->stack_index
),
1495 get_addr_reg(c
->stack_index
), brw_imm_d(-4));
1496 brw_set_access_mode(p
, BRW_ALIGN_1
);
1497 brw_MOV(p
, brw_ip_reg(), deref_1d(c
->stack_index
, 0));
1498 brw_set_access_mode(p
, BRW_ALIGN_16
);
1500 case TGSI_OPCODE_END
:
1501 c
->end_offset
= p
->nr_insn
;
1502 /* this instruction will get patched later to jump past subroutine
1505 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1507 case TGSI_OPCODE_BGNSUB
:
1508 brw_save_label(p
, p
->nr_insn
, p
->nr_insn
);
1510 case TGSI_OPCODE_ENDSUB
:
1514 debug_printf("Unsupported opcode %i (%s) in vertex shader",
1516 tgsi_get_opcode_name(opcode
));
1519 /* Set the predication update on the last instruction of the native
1520 * instruction sequence.
1522 * This would be problematic if it was set on a math instruction,
1523 * but that shouldn't be the case with the current GLSL compiler.
1528 if (inst
->CondUpdate
) {
1529 struct brw_instruction
*hw_insn
= &p
->store
[p
->nr_insn
- 1];
1531 assert(hw_insn
->header
.destreg__conditionalmod
== 0);
1532 hw_insn
->header
.destreg__conditionalmod
= BRW_CONDITIONAL_NZ
;
1540 /* Emit the vertex program instructions here.
1542 void brw_vs_emit(struct brw_vs_compile
*c
)
1544 struct brw_compile
*p
= &c
->func
;
1545 const struct tgsi_token
*tokens
= c
->vp
->tokens
;
1546 struct brw_instruction
*end_inst
, *last_inst
;
1547 struct tgsi_parse_context parse
;
1548 struct tgsi_full_instruction
*inst
;
1549 boolean done
= FALSE
;
1552 if (BRW_DEBUG
& DEBUG_VS
)
1553 tgsi_dump(c
->vp
->tokens
, 0);
1555 c
->stack_index
= brw_indirect(0, 0);
1557 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1558 brw_set_access_mode(p
, BRW_ALIGN_16
);
1561 tgsi_parse_init( &parse
, tokens
);
1562 while( !tgsi_parse_end_of_tokens( &parse
) ) {
1563 tgsi_parse_token( &parse
);
1565 switch( parse
.FullToken
.Token
.Type
) {
1566 case TGSI_TOKEN_TYPE_DECLARATION
:
1567 /* Nothing to do -- using info from tgsi_scan().
1571 case TGSI_TOKEN_TYPE_IMMEDIATE
: {
1572 static const float id
[4] = {0,0,0,1};
1573 const float *imm
= &parse
.FullToken
.FullImmediate
.u
[i
].Float
;
1574 unsigned size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
1576 for (i
= 0; i
< size
; i
++)
1577 c
->immediate
[c
->nr_immediates
][i
] = imm
[i
];
1580 c
->immediate
[c
->nr_immediates
][i
] = id
[i
];
1586 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1592 /* Static register allocation
1594 brw_vs_alloc_regs(c
);
1595 brw_MOV(p
, get_addr_reg(c
->stack_index
), brw_address(c
->stack
));
1599 tgsi_parse_init( &parse
, tokens
);
1600 while( !tgsi_parse_end_of_tokens( &parse
) ) {
1601 tgsi_parse_token( &parse
);
1603 switch( parse
.FullToken
.Token
.Type
) {
1604 case TGSI_TOKEN_TYPE_DECLARATION
:
1605 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1608 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1609 inst
= &parse
.FullToken
.FullInstruction
;
1610 emit_insn( c
, inst
);
1617 tgsi_parse_free( &parse
);
1619 end_inst
= &p
->store
[c
->end_offset
];
1620 last_inst
= &p
->store
[p
->nr_insn
];
1622 /* The END instruction will be patched to jump to this code */
1623 emit_vertex_write(c
);
1625 post_vs_emit(c
, end_inst
, last_inst
);
1627 if (BRW_DEBUG
& DEBUG_VS
) {
1628 debug_printf("vs-native:\n");
1629 brw_disasm(stderr
, p
->store
, p
->nr_insn
);