2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "shader/program.h"
35 #include "shader/prog_parameter.h"
36 #include "shader/prog_print.h"
37 #include "brw_context.h"
42 /* Do things as simply as possible. Allocate and populate all regs
45 static void brw_vs_alloc_regs( struct brw_vs_compile
*c
)
47 GLuint i
, reg
= 0, mrf
;
50 /* r0 -- reserved as usual
52 c
->r0
= brw_vec8_grf(reg
, 0); reg
++;
54 /* User clip planes from curbe:
56 if (c
->key
.nr_userclip
) {
57 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
58 c
->userplane
[i
] = stride( brw_vec4_grf(reg
+3+i
/2, (i
%2) * 4), 0, 4, 1);
61 /* Deal with curbe alignment:
63 reg
+= ((6+c
->key
.nr_userclip
+3)/4)*2;
66 /* Vertex program parameters from curbe:
68 nr_params
= c
->vp
->program
.Base
.Parameters
->NumParameters
;
69 for (i
= 0; i
< nr_params
; i
++) {
70 c
->regs
[PROGRAM_STATE_VAR
][i
] = stride( brw_vec4_grf(reg
+i
/2, (i
%2) * 4), 0, 4, 1);
72 reg
+= (nr_params
+1)/2;
74 c
->prog_data
.curb_read_length
= reg
- 1;
78 /* Allocate input regs:
81 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
82 if (c
->prog_data
.inputs_read
& (1<<i
)) {
84 c
->regs
[PROGRAM_INPUT
][i
] = brw_vec8_grf(reg
, 0);
90 /* Allocate outputs: TODO: could organize the non-position outputs
91 * to go straight into message regs.
94 c
->first_output
= reg
;
96 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
97 if (c
->prog_data
.outputs_written
& (1<<i
)) {
99 if (i
== VERT_RESULT_HPOS
) {
100 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
103 else if (i
== VERT_RESULT_PSIZ
) {
104 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
106 mrf
++; /* just a placeholder? XXX fix later stages & remove this */
109 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_message_reg(mrf
);
115 /* Allocate program temporaries:
117 for (i
= 0; i
< c
->vp
->program
.Base
.NumTemporaries
; i
++) {
118 c
->regs
[PROGRAM_TEMPORARY
][i
] = brw_vec8_grf(reg
, 0);
122 /* Address reg(s). Don't try to use the internal address reg until
125 for (i
= 0; i
< c
->vp
->program
.Base
.NumAddressRegs
; i
++) {
126 c
->regs
[PROGRAM_ADDRESS
][i
] = brw_reg(BRW_GENERAL_REGISTER_FILE
,
130 BRW_VERTICAL_STRIDE_8
,
132 BRW_HORIZONTAL_STRIDE_1
,
138 for (i
= 0; i
< 128; i
++) {
139 if (c
->output_regs
[i
].used_in_src
) {
140 c
->output_regs
[i
].reg
= brw_vec8_grf(reg
, 0);
145 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg
, 0);
149 /* Some opcodes need an internal temporary:
152 c
->last_tmp
= reg
; /* for allocation purposes */
154 /* Each input reg holds data from two vertices. The
155 * urb_read_length is the number of registers read from *each*
156 * vertex urb, so is half the amount:
158 c
->prog_data
.urb_read_length
= (c
->nr_inputs
+1)/2;
160 c
->prog_data
.urb_entry_size
= (c
->nr_outputs
+2+3)/4;
161 c
->prog_data
.total_grf
= reg
;
165 static struct brw_reg
get_tmp( struct brw_vs_compile
*c
)
167 struct brw_reg tmp
= brw_vec8_grf(c
->last_tmp
, 0);
169 if (++c
->last_tmp
> c
->prog_data
.total_grf
)
170 c
->prog_data
.total_grf
= c
->last_tmp
;
175 static void release_tmp( struct brw_vs_compile
*c
, struct brw_reg tmp
)
177 if (tmp
.nr
== c
->last_tmp
-1)
181 static void release_tmps( struct brw_vs_compile
*c
)
183 c
->last_tmp
= c
->first_tmp
;
187 static void unalias1( struct brw_vs_compile
*c
,
190 void (*func
)( struct brw_vs_compile
*,
194 if (dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) {
195 struct brw_compile
*p
= &c
->func
;
196 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
198 brw_MOV(p
, dst
, tmp
);
205 static void unalias2( struct brw_vs_compile
*c
,
209 void (*func
)( struct brw_vs_compile
*,
214 if ((dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) ||
215 (dst
.file
== arg1
.file
&& dst
.nr
== arg1
.nr
)) {
216 struct brw_compile
*p
= &c
->func
;
217 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
218 func(c
, tmp
, arg0
, arg1
);
219 brw_MOV(p
, dst
, tmp
);
222 func(c
, dst
, arg0
, arg1
);
226 static void emit_sop( struct brw_compile
*p
,
232 brw_push_insn_state(p
);
233 brw_CMP(p
, brw_null_reg(), cond
, arg0
, arg1
);
234 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
235 brw_MOV(p
, dst
, brw_imm_f(1.0f
));
236 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
237 brw_MOV(p
, dst
, brw_imm_f(0.0f
));
238 brw_pop_insn_state(p
);
241 static void emit_seq( struct brw_compile
*p
,
244 struct brw_reg arg1
)
246 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_EQ
);
249 static void emit_sne( struct brw_compile
*p
,
252 struct brw_reg arg1
)
254 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_NEQ
);
256 static void emit_slt( struct brw_compile
*p
,
259 struct brw_reg arg1
)
261 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_L
);
264 static void emit_sle( struct brw_compile
*p
,
267 struct brw_reg arg1
)
269 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_LE
);
272 static void emit_sgt( struct brw_compile
*p
,
275 struct brw_reg arg1
)
277 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_G
);
280 static void emit_sge( struct brw_compile
*p
,
283 struct brw_reg arg1
)
285 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_GE
);
288 static void emit_max( struct brw_compile
*p
,
291 struct brw_reg arg1
)
293 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, arg1
);
294 brw_SEL(p
, dst
, arg1
, arg0
);
295 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
298 static void emit_min( struct brw_compile
*p
,
301 struct brw_reg arg1
)
303 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, arg1
);
304 brw_SEL(p
, dst
, arg0
, arg1
);
305 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
309 static void emit_math1( struct brw_vs_compile
*c
,
315 /* There are various odd behaviours with SEND on the simulator. In
316 * addition there are documented issues with the fact that the GEN4
317 * processor doesn't do dependency control properly on SEND
318 * results. So, on balance, this kludge to get around failures
319 * with writemasked math results looks like it might be necessary
320 * whether that turns out to be a simulator bug or not:
322 struct brw_compile
*p
= &c
->func
;
323 struct brw_reg tmp
= dst
;
324 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
325 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
333 BRW_MATH_SATURATE_NONE
,
336 BRW_MATH_DATA_SCALAR
,
340 brw_MOV(p
, dst
, tmp
);
345 static void emit_math2( struct brw_vs_compile
*c
,
352 struct brw_compile
*p
= &c
->func
;
353 struct brw_reg tmp
= dst
;
354 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
355 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
360 brw_MOV(p
, brw_message_reg(3), arg1
);
365 BRW_MATH_SATURATE_NONE
,
368 BRW_MATH_DATA_SCALAR
,
372 brw_MOV(p
, dst
, tmp
);
379 static void emit_exp_noalias( struct brw_vs_compile
*c
,
381 struct brw_reg arg0
)
383 struct brw_compile
*p
= &c
->func
;
386 if (dst
.dw1
.bits
.writemask
& WRITEMASK_X
) {
387 struct brw_reg tmp
= get_tmp(c
);
388 struct brw_reg tmp_d
= retype(tmp
, BRW_REGISTER_TYPE_D
);
390 /* tmp_d = floor(arg0.x) */
391 brw_RNDD(p
, tmp_d
, brw_swizzle1(arg0
, 0));
393 /* result[0] = 2.0 ^ tmp */
395 /* Adjust exponent for floating point:
398 brw_ADD(p
, brw_writemask(tmp_d
, WRITEMASK_X
), tmp_d
, brw_imm_d(127));
400 /* Install exponent and sign.
401 * Excess drops off the edge:
403 brw_SHL(p
, brw_writemask(retype(dst
, BRW_REGISTER_TYPE_D
), WRITEMASK_X
),
404 tmp_d
, brw_imm_d(23));
409 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Y
) {
410 /* result[1] = arg0.x - floor(arg0.x) */
411 brw_FRC(p
, brw_writemask(dst
, WRITEMASK_Y
), brw_swizzle1(arg0
, 0));
414 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
) {
415 /* As with the LOG instruction, we might be better off just
416 * doing a taylor expansion here, seeing as we have to do all
419 * If mathbox partial precision is too low, consider also:
420 * result[3] = result[0] * EXP(result[1])
423 BRW_MATH_FUNCTION_EXP
,
424 brw_writemask(dst
, WRITEMASK_Z
),
425 brw_swizzle1(arg0
, 0),
426 BRW_MATH_PRECISION_PARTIAL
);
429 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
) {
430 /* result[3] = 1.0; */
431 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_W
), brw_imm_f(1));
436 static void emit_log_noalias( struct brw_vs_compile
*c
,
438 struct brw_reg arg0
)
440 struct brw_compile
*p
= &c
->func
;
441 struct brw_reg tmp
= dst
;
442 struct brw_reg tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
443 struct brw_reg arg0_ud
= retype(arg0
, BRW_REGISTER_TYPE_UD
);
444 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
445 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
449 tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
452 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
455 * These almost look likey they could be joined up, but not really
458 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
459 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
461 if (dst
.dw1
.bits
.writemask
& WRITEMASK_XZ
) {
463 brw_writemask(tmp_ud
, WRITEMASK_X
),
464 brw_swizzle1(arg0_ud
, 0),
465 brw_imm_ud((1U<<31)-1));
468 brw_writemask(tmp_ud
, WRITEMASK_X
),
473 brw_writemask(tmp
, WRITEMASK_X
),
474 retype(tmp_ud
, BRW_REGISTER_TYPE_D
), /* does it matter? */
478 if (dst
.dw1
.bits
.writemask
& WRITEMASK_YZ
) {
480 brw_writemask(tmp_ud
, WRITEMASK_Y
),
481 brw_swizzle1(arg0_ud
, 0),
482 brw_imm_ud((1<<23)-1));
485 brw_writemask(tmp_ud
, WRITEMASK_Y
),
487 brw_imm_ud(127<<23));
490 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
) {
491 /* result[2] = result[0] + LOG2(result[1]); */
493 /* Why bother? The above is just a hint how to do this with a
494 * taylor series. Maybe we *should* use a taylor series as by
495 * the time all the above has been done it's almost certainly
496 * quicker than calling the mathbox, even with low precision.
499 * - result[0] + mathbox.LOG2(result[1])
500 * - mathbox.LOG2(arg0.x)
501 * - result[0] + inline_taylor_approx(result[1])
504 BRW_MATH_FUNCTION_LOG
,
505 brw_writemask(tmp
, WRITEMASK_Z
),
506 brw_swizzle1(tmp
, 1),
507 BRW_MATH_PRECISION_FULL
);
510 brw_writemask(tmp
, WRITEMASK_Z
),
511 brw_swizzle1(tmp
, 2),
512 brw_swizzle1(tmp
, 0));
515 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
) {
516 /* result[3] = 1.0; */
517 brw_MOV(p
, brw_writemask(tmp
, WRITEMASK_W
), brw_imm_f(1));
521 brw_MOV(p
, dst
, tmp
);
529 /* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
531 static void emit_dst_noalias( struct brw_vs_compile
*c
,
536 struct brw_compile
*p
= &c
->func
;
538 /* There must be a better way to do this:
540 if (dst
.dw1
.bits
.writemask
& WRITEMASK_X
)
541 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_X
), brw_imm_f(1.0));
542 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Y
)
543 brw_MUL(p
, brw_writemask(dst
, WRITEMASK_Y
), arg0
, arg1
);
544 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
)
545 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_Z
), arg0
);
546 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
)
547 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_W
), arg1
);
550 static void emit_xpd( struct brw_compile
*p
,
555 brw_MUL(p
, brw_null_reg(), brw_swizzle(t
, 1,2,0,3), brw_swizzle(u
,2,0,1,3));
556 brw_MAC(p
, dst
, negate(brw_swizzle(t
, 2,0,1,3)), brw_swizzle(u
,1,2,0,3));
561 static void emit_lit_noalias( struct brw_vs_compile
*c
,
563 struct brw_reg arg0
)
565 struct brw_compile
*p
= &c
->func
;
566 struct brw_instruction
*if_insn
;
567 struct brw_reg tmp
= dst
;
568 GLboolean need_tmp
= (dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
573 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_YZ
), brw_imm_f(0));
574 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_XW
), brw_imm_f(1));
576 /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
577 * to get all channels active inside the IF. In the clipping code
578 * we run with NoMask, so it's not an option and we can use
579 * BRW_EXECUTE_1 for all comparisions.
581 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,0), brw_imm_f(0));
582 if_insn
= brw_IF(p
, BRW_EXECUTE_8
);
584 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_Y
), brw_swizzle1(arg0
,0));
586 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,1), brw_imm_f(0));
587 brw_MOV(p
, brw_writemask(tmp
, WRITEMASK_Z
), brw_swizzle1(arg0
,1));
588 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
591 BRW_MATH_FUNCTION_POW
,
592 brw_writemask(dst
, WRITEMASK_Z
),
593 brw_swizzle1(tmp
, 2),
594 brw_swizzle1(arg0
, 3),
595 BRW_MATH_PRECISION_PARTIAL
);
598 brw_ENDIF(p
, if_insn
);
605 /* TODO: relative addressing!
607 static struct brw_reg
get_reg( struct brw_vs_compile
*c
,
613 case PROGRAM_TEMPORARY
:
616 assert(c
->regs
[file
][index
].nr
!= 0);
617 return c
->regs
[file
][index
];
618 case PROGRAM_STATE_VAR
:
619 case PROGRAM_CONSTANT
:
620 case PROGRAM_UNIFORM
:
621 assert(c
->regs
[PROGRAM_STATE_VAR
][index
].nr
!= 0);
622 return c
->regs
[PROGRAM_STATE_VAR
][index
];
623 case PROGRAM_ADDRESS
:
625 return c
->regs
[file
][index
];
627 case PROGRAM_UNDEFINED
: /* undef values */
628 return brw_null_reg();
630 case PROGRAM_LOCAL_PARAM
:
631 case PROGRAM_ENV_PARAM
:
632 case PROGRAM_WRITE_ONLY
:
635 return brw_null_reg();
641 static struct brw_reg
deref( struct brw_vs_compile
*c
,
645 struct brw_compile
*p
= &c
->func
;
646 struct brw_reg tmp
= vec4(get_tmp(c
));
647 struct brw_reg vp_address
= retype(vec1(get_reg(c
, PROGRAM_ADDRESS
, 0)), BRW_REGISTER_TYPE_UW
);
648 GLuint byte_offset
= arg
.nr
* 32 + arg
.subnr
+ offset
* 16;
649 struct brw_reg indirect
= brw_vec4_indirect(0,0);
652 brw_push_insn_state(p
);
653 brw_set_access_mode(p
, BRW_ALIGN_1
);
655 /* This is pretty clunky - load the address register twice and
656 * fetch each 4-dword value in turn. There must be a way to do
657 * this in a single pass, but I couldn't get it to work.
659 brw_ADD(p
, brw_address_reg(0), vp_address
, brw_imm_d(byte_offset
));
660 brw_MOV(p
, tmp
, indirect
);
662 brw_ADD(p
, brw_address_reg(0), suboffset(vp_address
, 8), brw_imm_d(byte_offset
));
663 brw_MOV(p
, suboffset(tmp
, 4), indirect
);
665 brw_pop_insn_state(p
);
672 static void emit_arl( struct brw_vs_compile
*c
,
674 struct brw_reg arg0
)
676 struct brw_compile
*p
= &c
->func
;
677 struct brw_reg tmp
= dst
;
678 GLboolean need_tmp
= (dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
683 brw_RNDD(p
, tmp
, arg0
);
684 brw_MUL(p
, dst
, tmp
, brw_imm_d(16));
691 /* Will return mangled results for SWZ op. The emit_swz() function
692 * ignores this result and recalculates taking extended swizzles into
695 static struct brw_reg
get_arg( struct brw_vs_compile
*c
,
696 struct prog_src_register
*src
)
700 if (src
->File
== PROGRAM_UNDEFINED
)
701 return brw_null_reg();
704 reg
= deref(c
, c
->regs
[PROGRAM_STATE_VAR
][0], src
->Index
);
706 reg
= get_reg(c
, src
->File
, src
->Index
);
708 /* Convert 3-bit swizzle to 2-bit.
710 reg
.dw1
.bits
.swizzle
= BRW_SWIZZLE4(GET_SWZ(src
->Swizzle
, 0),
711 GET_SWZ(src
->Swizzle
, 1),
712 GET_SWZ(src
->Swizzle
, 2),
713 GET_SWZ(src
->Swizzle
, 3));
715 /* Note this is ok for non-swizzle instructions:
717 reg
.negate
= src
->NegateBase
? 1 : 0;
723 static struct brw_reg
get_dst( struct brw_vs_compile
*c
,
724 struct prog_dst_register dst
)
726 struct brw_reg reg
= get_reg(c
, dst
.File
, dst
.Index
);
728 reg
.dw1
.bits
.writemask
= dst
.WriteMask
;
736 static void emit_swz( struct brw_vs_compile
*c
,
738 struct prog_src_register src
)
740 struct brw_compile
*p
= &c
->func
;
741 GLuint zeros_mask
= 0;
742 GLuint ones_mask
= 0;
745 GLboolean need_tmp
= (src
.NegateBase
&&
746 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
747 struct brw_reg tmp
= dst
;
753 for (i
= 0; i
< 4; i
++) {
754 if (dst
.dw1
.bits
.writemask
& (1<<i
)) {
755 GLubyte s
= GET_SWZ(src
.Swizzle
, i
);
774 /* Do src first, in case dst aliases src:
780 arg0
= deref(c
, c
->regs
[PROGRAM_STATE_VAR
][0], src
.Index
);
782 arg0
= get_reg(c
, src
.File
, src
.Index
);
784 arg0
= brw_swizzle(arg0
,
785 src_swz
[0], src_swz
[1],
786 src_swz
[2], src_swz
[3]);
788 brw_MOV(p
, brw_writemask(tmp
, src_mask
), arg0
);
792 brw_MOV(p
, brw_writemask(tmp
, zeros_mask
), brw_imm_f(0));
795 brw_MOV(p
, brw_writemask(tmp
, ones_mask
), brw_imm_f(1));
798 brw_MOV(p
, brw_writemask(tmp
, src
.NegateBase
), negate(tmp
));
801 brw_MOV(p
, dst
, tmp
);
808 /* Post-vertex-program processing. Send the results to the URB.
810 static void emit_vertex_write( struct brw_vs_compile
*c
)
812 struct brw_compile
*p
= &c
->func
;
813 struct brw_reg m0
= brw_message_reg(0);
814 struct brw_reg pos
= c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_HPOS
];
817 if (c
->key
.copy_edgeflag
) {
819 get_reg(c
, PROGRAM_OUTPUT
, VERT_RESULT_EDGE
),
820 get_reg(c
, PROGRAM_INPUT
, VERT_ATTRIB_EDGEFLAG
));
824 /* Build ndc coords? TODO: Shortcircuit when w is known to be one.
826 if (!c
->key
.know_w_is_one
) {
828 emit_math1(c
, BRW_MATH_FUNCTION_INV
, ndc
, brw_swizzle1(pos
, 3), BRW_MATH_PRECISION_FULL
);
829 brw_MUL(p
, brw_writemask(ndc
, WRITEMASK_XYZ
), pos
, ndc
);
835 /* This includes the workaround for -ve rhw, so is no longer an
838 if ((c
->prog_data
.outputs_written
& (1<<VERT_RESULT_PSIZ
)) ||
839 c
->key
.nr_userclip
||
840 !c
->key
.know_w_is_one
)
842 struct brw_reg header1
= retype(get_tmp(c
), BRW_REGISTER_TYPE_UD
);
845 brw_MOV(p
, header1
, brw_imm_ud(0));
847 brw_set_access_mode(p
, BRW_ALIGN_16
);
849 if (c
->prog_data
.outputs_written
& (1<<VERT_RESULT_PSIZ
)) {
850 struct brw_reg psiz
= c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_PSIZ
];
851 brw_MUL(p
, brw_writemask(header1
, WRITEMASK_W
), brw_swizzle1(psiz
, 0), brw_imm_f(1<<11));
852 brw_AND(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(0x7ff<<8));
856 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
857 brw_set_conditionalmod(p
, BRW_CONDITIONAL_L
);
858 brw_DP4(p
, brw_null_reg(), pos
, c
->userplane
[i
]);
859 brw_OR(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(1<<i
));
860 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
864 /* i965 clipping workaround:
865 * 1) Test for -ve rhw
867 * set ndc = (0,0,0,0)
870 * Later, clipping will detect ucp[6] and ensure the primitive is
871 * clipped against all fixed planes.
873 if (!c
->key
.know_w_is_one
) {
875 vec8(brw_null_reg()),
877 brw_swizzle1(ndc
, 3),
880 brw_OR(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(1<<6));
881 brw_MOV(p
, ndc
, brw_imm_f(0));
882 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
885 brw_set_access_mode(p
, BRW_ALIGN_1
); /* why? */
886 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), header1
);
887 brw_set_access_mode(p
, BRW_ALIGN_16
);
889 release_tmp(c
, header1
);
892 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
896 /* Emit the (interleaved) headers for the two vertices - an 8-reg
897 * of zeros followed by two sets of NDC coordinates:
899 brw_set_access_mode(p
, BRW_ALIGN_1
);
900 brw_MOV(p
, offset(m0
, 2), ndc
);
901 brw_MOV(p
, offset(m0
, 3), pos
);
905 brw_null_reg(), /* dest */
906 0, /* starting mrf reg nr */
910 c
->nr_outputs
+ 3, /* msg len */
911 0, /* response len */
913 1, /* writes complete */
914 0, /* urb destination offset */
915 BRW_URB_SWIZZLE_INTERLEAVE
);
920 post_vs_emit( struct brw_vs_compile
*c
, struct brw_instruction
*end_inst
)
922 GLuint nr_insns
= c
->vp
->program
.Base
.NumInstructions
;
923 GLuint insn
, target_insn
;
924 struct prog_instruction
*inst1
, *inst2
;
925 struct brw_instruction
*brw_inst1
, *brw_inst2
;
927 for (insn
= 0; insn
< nr_insns
; insn
++) {
928 inst1
= &c
->vp
->program
.Base
.Instructions
[insn
];
929 brw_inst1
= inst1
->Data
;
930 switch (inst1
->Opcode
) {
933 target_insn
= inst1
->BranchTarget
;
934 inst2
= &c
->vp
->program
.Base
.Instructions
[target_insn
];
935 brw_inst2
= inst2
->Data
;
936 offset
= brw_inst2
- brw_inst1
;
937 brw_set_src1(brw_inst1
, brw_imm_d(offset
*16));
940 offset
= end_inst
- brw_inst1
;
941 brw_set_src1(brw_inst1
, brw_imm_d(offset
*16));
949 /* Emit the fragment program instructions here.
951 void brw_vs_emit(struct brw_vs_compile
*c
)
954 struct brw_compile
*p
= &c
->func
;
955 GLuint nr_insns
= c
->vp
->program
.Base
.NumInstructions
;
956 GLuint insn
, if_insn
= 0;
957 struct brw_instruction
*end_inst
;
958 struct brw_instruction
*if_inst
[MAX_IFSN
];
959 struct brw_indirect stack_index
= brw_indirect(0, 0);
964 if (INTEL_DEBUG
& DEBUG_VS
) {
965 _mesa_printf("\n\n\nvs-emit:\n");
966 _mesa_print_program(&c
->vp
->program
.Base
);
970 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
971 brw_set_access_mode(p
, BRW_ALIGN_16
);
973 /* Message registers can't be read, so copy the output into GRF register
974 if they are used in source registers */
975 for (insn
= 0; insn
< nr_insns
; insn
++) {
977 struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[insn
];
978 for (i
= 0; i
< 3; i
++) {
979 struct prog_src_register
*src
= &inst
->SrcReg
[i
];
980 GLuint index
= src
->Index
;
981 GLuint file
= src
->File
;
982 if (file
== PROGRAM_OUTPUT
&& index
!= VERT_RESULT_HPOS
)
983 c
->output_regs
[index
].used_in_src
= GL_TRUE
;
987 /* Static register allocation
989 brw_vs_alloc_regs(c
);
990 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
992 for (insn
= 0; insn
< nr_insns
; insn
++) {
994 struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[insn
];
995 struct brw_reg args
[3], dst
;
998 /* Get argument regs. SWZ is special and does this itself.
1000 inst
->Data
= &p
->store
[p
->nr_insn
];
1001 if (inst
->Opcode
!= OPCODE_SWZ
)
1002 for (i
= 0; i
< 3; i
++) {
1003 struct prog_src_register
*src
= &inst
->SrcReg
[i
];
1006 if (file
== PROGRAM_OUTPUT
&&c
->output_regs
[index
].used_in_src
)
1007 args
[i
] = c
->output_regs
[index
].reg
;
1009 args
[i
] = get_arg(c
, src
);
1012 /* Get dest regs. Note that it is possible for a reg to be both
1013 * dst and arg, given the static allocation of registers. So
1014 * care needs to be taken emitting multi-operation instructions.
1016 index
= inst
->DstReg
.Index
;
1017 file
= inst
->DstReg
.File
;
1018 if (file
== PROGRAM_OUTPUT
&& c
->output_regs
[index
].used_in_src
)
1019 dst
= c
->output_regs
[index
].reg
;
1021 dst
= get_dst(c
, inst
->DstReg
);
1023 switch (inst
->Opcode
) {
1025 brw_MOV(p
, dst
, brw_abs(args
[0]));
1028 brw_ADD(p
, dst
, args
[0], args
[1]);
1031 brw_DP3(p
, dst
, args
[0], args
[1]);
1034 brw_DP4(p
, dst
, args
[0], args
[1]);
1037 brw_DPH(p
, dst
, args
[0], args
[1]);
1040 unalias2(c
, dst
, args
[0], args
[1], emit_dst_noalias
);
1043 unalias1(c
, dst
, args
[0], emit_exp_noalias
);
1046 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1049 emit_arl(c
, dst
, args
[0]);
1052 brw_RNDD(p
, dst
, args
[0]);
1055 brw_FRC(p
, dst
, args
[0]);
1058 unalias1(c
, dst
, args
[0], emit_log_noalias
);
1061 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1064 unalias1(c
, dst
, args
[0], emit_lit_noalias
);
1067 brw_MOV(p
, brw_acc_reg(), args
[2]);
1068 brw_MAC(p
, dst
, args
[0], args
[1]);
1071 emit_max(p
, dst
, args
[0], args
[1]);
1074 emit_min(p
, dst
, args
[0], args
[1]);
1077 brw_MOV(p
, dst
, args
[0]);
1080 brw_MUL(p
, dst
, args
[0], args
[1]);
1083 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, args
[0], args
[1], BRW_MATH_PRECISION_FULL
);
1086 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1089 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1093 emit_seq(p
, dst
, args
[0], args
[1]);
1096 emit_sne(p
, dst
, args
[0], args
[1]);
1099 emit_sge(p
, dst
, args
[0], args
[1]);
1102 emit_sgt(p
, dst
, args
[0], args
[1]);
1105 emit_slt(p
, dst
, args
[0], args
[1]);
1108 emit_sle(p
, dst
, args
[0], args
[1]);
1111 brw_ADD(p
, dst
, args
[0], negate(args
[1]));
1114 /* The args[0] value can't be used here as it won't have
1115 * correctly encoded the full swizzle:
1117 emit_swz(c
, dst
, inst
->SrcReg
[0] );
1120 emit_xpd(p
, dst
, args
[0], args
[1]);
1123 assert(if_insn
< MAX_IFSN
);
1124 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
1127 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
1130 assert(if_insn
> 0);
1131 brw_ENDIF(p
, if_inst
[--if_insn
]);
1134 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1135 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1136 brw_set_predicate_control_flag_value(p
, 0xff);
1139 brw_set_access_mode(p
, BRW_ALIGN_1
);
1140 brw_ADD(p
, deref_1uw(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
1141 brw_set_access_mode(p
, BRW_ALIGN_16
);
1142 brw_ADD(p
, get_addr_reg(stack_index
),
1143 get_addr_reg(stack_index
), brw_imm_d(4));
1144 inst
->Data
= &p
->store
[p
->nr_insn
];
1145 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1148 brw_ADD(p
, get_addr_reg(stack_index
),
1149 get_addr_reg(stack_index
), brw_imm_d(-4));
1150 brw_set_access_mode(p
, BRW_ALIGN_1
);
1151 brw_MOV(p
, brw_ip_reg(), deref_1uw(stack_index
, 0));
1152 brw_set_access_mode(p
, BRW_ALIGN_16
);
1154 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1161 _mesa_printf("Unsupport opcode %d in vertex shader\n", inst
->Opcode
);
1165 if (inst
->DstReg
.File
== PROGRAM_OUTPUT
1166 &&inst
->DstReg
.Index
!= VERT_RESULT_HPOS
1167 &&c
->output_regs
[inst
->DstReg
.Index
].used_in_src
)
1168 brw_MOV(p
, get_dst(c
, inst
->DstReg
), dst
);
1173 end_inst
= &p
->store
[p
->nr_insn
];
1174 emit_vertex_write(c
);
1175 post_vs_emit(c
, end_inst
);
1176 for (insn
= 0; insn
< nr_insns
; insn
++)
1177 c
->vp
->program
.Base
.Instructions
[insn
].Data
= NULL
;