2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "shader/program.h"
35 #include "shader/prog_parameter.h"
36 #include "shader/prog_print.h"
37 #include "brw_context.h"
42 /* Do things as simply as possible. Allocate and populate all regs
45 static void brw_vs_alloc_regs( struct brw_vs_compile
*c
)
47 GLuint i
, reg
= 0, mrf
;
50 /* r0 -- reserved as usual
52 c
->r0
= brw_vec8_grf(reg
, 0); reg
++;
54 /* User clip planes from curbe:
56 if (c
->key
.nr_userclip
) {
57 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
58 c
->userplane
[i
] = stride( brw_vec4_grf(reg
+3+i
/2, (i
%2) * 4), 0, 4, 1);
61 /* Deal with curbe alignment:
63 reg
+= ((6+c
->key
.nr_userclip
+3)/4)*2;
66 /* Vertex program parameters from curbe:
68 nr_params
= c
->vp
->program
.Base
.Parameters
->NumParameters
;
69 for (i
= 0; i
< nr_params
; i
++) {
70 c
->regs
[PROGRAM_STATE_VAR
][i
] = stride( brw_vec4_grf(reg
+i
/2, (i
%2) * 4), 0, 4, 1);
72 reg
+= (nr_params
+1)/2;
74 c
->prog_data
.curb_read_length
= reg
- 1;
78 /* Allocate input regs:
81 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
82 if (c
->prog_data
.inputs_read
& (1<<i
)) {
84 c
->regs
[PROGRAM_INPUT
][i
] = brw_vec8_grf(reg
, 0);
90 /* Allocate outputs: TODO: could organize the non-position outputs
91 * to go straight into message regs.
94 c
->first_output
= reg
;
96 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
97 if (c
->prog_data
.outputs_written
& (1<<i
)) {
99 if (i
== VERT_RESULT_HPOS
) {
100 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
103 else if (i
== VERT_RESULT_PSIZ
) {
104 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
106 mrf
++; /* just a placeholder? XXX fix later stages & remove this */
109 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_message_reg(mrf
);
115 /* Allocate program temporaries:
117 for (i
= 0; i
< c
->vp
->program
.Base
.NumTemporaries
; i
++) {
118 c
->regs
[PROGRAM_TEMPORARY
][i
] = brw_vec8_grf(reg
, 0);
122 /* Address reg(s). Don't try to use the internal address reg until
125 for (i
= 0; i
< c
->vp
->program
.Base
.NumAddressRegs
; i
++) {
126 c
->regs
[PROGRAM_ADDRESS
][i
] = brw_reg(BRW_GENERAL_REGISTER_FILE
,
130 BRW_VERTICAL_STRIDE_8
,
132 BRW_HORIZONTAL_STRIDE_1
,
138 for (i
= 0; i
< 128; i
++) {
139 if (c
->output_regs
[i
].used_in_src
) {
140 c
->output_regs
[i
].reg
= brw_vec8_grf(reg
, 0);
145 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg
, 0);
149 /* Some opcodes need an internal temporary:
152 c
->last_tmp
= reg
; /* for allocation purposes */
154 /* Each input reg holds data from two vertices. The
155 * urb_read_length is the number of registers read from *each*
156 * vertex urb, so is half the amount:
158 c
->prog_data
.urb_read_length
= (c
->nr_inputs
+1)/2;
160 c
->prog_data
.urb_entry_size
= (c
->nr_outputs
+2+3)/4;
161 c
->prog_data
.total_grf
= reg
;
165 static struct brw_reg
get_tmp( struct brw_vs_compile
*c
)
167 struct brw_reg tmp
= brw_vec8_grf(c
->last_tmp
, 0);
169 if (++c
->last_tmp
> c
->prog_data
.total_grf
)
170 c
->prog_data
.total_grf
= c
->last_tmp
;
175 static void release_tmp( struct brw_vs_compile
*c
, struct brw_reg tmp
)
177 if (tmp
.nr
== c
->last_tmp
-1)
181 static void release_tmps( struct brw_vs_compile
*c
)
183 c
->last_tmp
= c
->first_tmp
;
187 static void unalias1( struct brw_vs_compile
*c
,
190 void (*func
)( struct brw_vs_compile
*,
194 if (dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) {
195 struct brw_compile
*p
= &c
->func
;
196 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
198 brw_MOV(p
, dst
, tmp
);
205 static void unalias2( struct brw_vs_compile
*c
,
209 void (*func
)( struct brw_vs_compile
*,
214 if ((dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) ||
215 (dst
.file
== arg1
.file
&& dst
.nr
== arg1
.nr
)) {
216 struct brw_compile
*p
= &c
->func
;
217 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
218 func(c
, tmp
, arg0
, arg1
);
219 brw_MOV(p
, dst
, tmp
);
222 func(c
, dst
, arg0
, arg1
);
226 static void emit_sop( struct brw_compile
*p
,
232 brw_MOV(p
, dst
, brw_imm_f(0.0f
));
233 brw_CMP(p
, brw_null_reg(), cond
, arg0
, arg1
);
234 brw_MOV(p
, dst
, brw_imm_f(1.0f
));
235 brw_set_predicate_control_flag_value(p
, 0xff);
238 static void emit_seq( struct brw_compile
*p
,
241 struct brw_reg arg1
)
243 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_EQ
);
246 static void emit_sne( struct brw_compile
*p
,
249 struct brw_reg arg1
)
251 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_NEQ
);
253 static void emit_slt( struct brw_compile
*p
,
256 struct brw_reg arg1
)
258 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_L
);
261 static void emit_sle( struct brw_compile
*p
,
264 struct brw_reg arg1
)
266 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_LE
);
269 static void emit_sgt( struct brw_compile
*p
,
272 struct brw_reg arg1
)
274 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_G
);
277 static void emit_sge( struct brw_compile
*p
,
280 struct brw_reg arg1
)
282 emit_sop(p
, dst
, arg0
, arg1
, BRW_CONDITIONAL_GE
);
285 static void emit_max( struct brw_compile
*p
,
288 struct brw_reg arg1
)
290 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, arg1
);
291 brw_SEL(p
, dst
, arg1
, arg0
);
292 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
295 static void emit_min( struct brw_compile
*p
,
298 struct brw_reg arg1
)
300 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, arg1
);
301 brw_SEL(p
, dst
, arg0
, arg1
);
302 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
306 static void emit_math1( struct brw_vs_compile
*c
,
312 /* There are various odd behaviours with SEND on the simulator. In
313 * addition there are documented issues with the fact that the GEN4
314 * processor doesn't do dependency control properly on SEND
315 * results. So, on balance, this kludge to get around failures
316 * with writemasked math results looks like it might be necessary
317 * whether that turns out to be a simulator bug or not:
319 struct brw_compile
*p
= &c
->func
;
320 struct brw_reg tmp
= dst
;
321 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
322 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
330 BRW_MATH_SATURATE_NONE
,
333 BRW_MATH_DATA_SCALAR
,
337 brw_MOV(p
, dst
, tmp
);
342 static void emit_math2( struct brw_vs_compile
*c
,
349 struct brw_compile
*p
= &c
->func
;
350 struct brw_reg tmp
= dst
;
351 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
352 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
357 brw_MOV(p
, brw_message_reg(3), arg1
);
362 BRW_MATH_SATURATE_NONE
,
365 BRW_MATH_DATA_SCALAR
,
369 brw_MOV(p
, dst
, tmp
);
376 static void emit_exp_noalias( struct brw_vs_compile
*c
,
378 struct brw_reg arg0
)
380 struct brw_compile
*p
= &c
->func
;
383 if (dst
.dw1
.bits
.writemask
& WRITEMASK_X
) {
384 struct brw_reg tmp
= get_tmp(c
);
385 struct brw_reg tmp_d
= retype(tmp
, BRW_REGISTER_TYPE_D
);
387 /* tmp_d = floor(arg0.x) */
388 brw_RNDD(p
, tmp_d
, brw_swizzle1(arg0
, 0));
390 /* result[0] = 2.0 ^ tmp */
392 /* Adjust exponent for floating point:
395 brw_ADD(p
, brw_writemask(tmp_d
, WRITEMASK_X
), tmp_d
, brw_imm_d(127));
397 /* Install exponent and sign.
398 * Excess drops off the edge:
400 brw_SHL(p
, brw_writemask(retype(dst
, BRW_REGISTER_TYPE_D
), WRITEMASK_X
),
401 tmp_d
, brw_imm_d(23));
406 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Y
) {
407 /* result[1] = arg0.x - floor(arg0.x) */
408 brw_FRC(p
, brw_writemask(dst
, WRITEMASK_Y
), brw_swizzle1(arg0
, 0));
411 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
) {
412 /* As with the LOG instruction, we might be better off just
413 * doing a taylor expansion here, seeing as we have to do all
416 * If mathbox partial precision is too low, consider also:
417 * result[3] = result[0] * EXP(result[1])
420 BRW_MATH_FUNCTION_EXP
,
421 brw_writemask(dst
, WRITEMASK_Z
),
422 brw_swizzle1(arg0
, 0),
423 BRW_MATH_PRECISION_PARTIAL
);
426 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
) {
427 /* result[3] = 1.0; */
428 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_W
), brw_imm_f(1));
433 static void emit_log_noalias( struct brw_vs_compile
*c
,
435 struct brw_reg arg0
)
437 struct brw_compile
*p
= &c
->func
;
438 struct brw_reg tmp
= dst
;
439 struct brw_reg tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
440 struct brw_reg arg0_ud
= retype(arg0
, BRW_REGISTER_TYPE_UD
);
441 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
442 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
446 tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
449 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
452 * These almost look likey they could be joined up, but not really
455 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
456 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
458 if (dst
.dw1
.bits
.writemask
& WRITEMASK_XZ
) {
460 brw_writemask(tmp_ud
, WRITEMASK_X
),
461 brw_swizzle1(arg0_ud
, 0),
462 brw_imm_ud((1U<<31)-1));
465 brw_writemask(tmp_ud
, WRITEMASK_X
),
470 brw_writemask(tmp
, WRITEMASK_X
),
471 retype(tmp_ud
, BRW_REGISTER_TYPE_D
), /* does it matter? */
475 if (dst
.dw1
.bits
.writemask
& WRITEMASK_YZ
) {
477 brw_writemask(tmp_ud
, WRITEMASK_Y
),
478 brw_swizzle1(arg0_ud
, 0),
479 brw_imm_ud((1<<23)-1));
482 brw_writemask(tmp_ud
, WRITEMASK_Y
),
484 brw_imm_ud(127<<23));
487 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
) {
488 /* result[2] = result[0] + LOG2(result[1]); */
490 /* Why bother? The above is just a hint how to do this with a
491 * taylor series. Maybe we *should* use a taylor series as by
492 * the time all the above has been done it's almost certainly
493 * quicker than calling the mathbox, even with low precision.
496 * - result[0] + mathbox.LOG2(result[1])
497 * - mathbox.LOG2(arg0.x)
498 * - result[0] + inline_taylor_approx(result[1])
501 BRW_MATH_FUNCTION_LOG
,
502 brw_writemask(tmp
, WRITEMASK_Z
),
503 brw_swizzle1(tmp
, 1),
504 BRW_MATH_PRECISION_FULL
);
507 brw_writemask(tmp
, WRITEMASK_Z
),
508 brw_swizzle1(tmp
, 2),
509 brw_swizzle1(tmp
, 0));
512 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
) {
513 /* result[3] = 1.0; */
514 brw_MOV(p
, brw_writemask(tmp
, WRITEMASK_W
), brw_imm_f(1));
518 brw_MOV(p
, dst
, tmp
);
526 /* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
528 static void emit_dst_noalias( struct brw_vs_compile
*c
,
533 struct brw_compile
*p
= &c
->func
;
535 /* There must be a better way to do this:
537 if (dst
.dw1
.bits
.writemask
& WRITEMASK_X
)
538 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_X
), brw_imm_f(1.0));
539 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Y
)
540 brw_MUL(p
, brw_writemask(dst
, WRITEMASK_Y
), arg0
, arg1
);
541 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
)
542 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_Z
), arg0
);
543 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
)
544 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_W
), arg1
);
547 static void emit_xpd( struct brw_compile
*p
,
552 brw_MUL(p
, brw_null_reg(), brw_swizzle(t
, 1,2,0,3), brw_swizzle(u
,2,0,1,3));
553 brw_MAC(p
, dst
, negate(brw_swizzle(t
, 2,0,1,3)), brw_swizzle(u
,1,2,0,3));
558 static void emit_lit_noalias( struct brw_vs_compile
*c
,
560 struct brw_reg arg0
)
562 struct brw_compile
*p
= &c
->func
;
563 struct brw_instruction
*if_insn
;
564 struct brw_reg tmp
= dst
;
565 GLboolean need_tmp
= (dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
570 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_YZ
), brw_imm_f(0));
571 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_XW
), brw_imm_f(1));
573 /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
574 * to get all channels active inside the IF. In the clipping code
575 * we run with NoMask, so it's not an option and we can use
576 * BRW_EXECUTE_1 for all comparisions.
578 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,0), brw_imm_f(0));
579 if_insn
= brw_IF(p
, BRW_EXECUTE_8
);
581 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_Y
), brw_swizzle1(arg0
,0));
583 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,1), brw_imm_f(0));
584 brw_MOV(p
, brw_writemask(tmp
, WRITEMASK_Z
), brw_swizzle1(arg0
,1));
585 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
588 BRW_MATH_FUNCTION_POW
,
589 brw_writemask(dst
, WRITEMASK_Z
),
590 brw_swizzle1(tmp
, 2),
591 brw_swizzle1(arg0
, 3),
592 BRW_MATH_PRECISION_PARTIAL
);
595 brw_ENDIF(p
, if_insn
);
602 /* TODO: relative addressing!
604 static struct brw_reg
get_reg( struct brw_vs_compile
*c
,
610 case PROGRAM_TEMPORARY
:
613 assert(c
->regs
[file
][index
].nr
!= 0);
614 return c
->regs
[file
][index
];
615 case PROGRAM_STATE_VAR
:
616 case PROGRAM_CONSTANT
:
617 case PROGRAM_UNIFORM
:
618 assert(c
->regs
[PROGRAM_STATE_VAR
][index
].nr
!= 0);
619 return c
->regs
[PROGRAM_STATE_VAR
][index
];
620 case PROGRAM_ADDRESS
:
622 return c
->regs
[file
][index
];
624 case PROGRAM_UNDEFINED
: /* undef values */
625 return brw_null_reg();
627 case PROGRAM_LOCAL_PARAM
:
628 case PROGRAM_ENV_PARAM
:
629 case PROGRAM_WRITE_ONLY
:
632 return brw_null_reg();
638 static struct brw_reg
deref( struct brw_vs_compile
*c
,
642 struct brw_compile
*p
= &c
->func
;
643 struct brw_reg tmp
= vec4(get_tmp(c
));
644 struct brw_reg vp_address
= retype(vec1(get_reg(c
, PROGRAM_ADDRESS
, 0)), BRW_REGISTER_TYPE_UW
);
645 GLuint byte_offset
= arg
.nr
* 32 + arg
.subnr
+ offset
* 16;
646 struct brw_reg indirect
= brw_vec4_indirect(0,0);
649 brw_push_insn_state(p
);
650 brw_set_access_mode(p
, BRW_ALIGN_1
);
652 /* This is pretty clunky - load the address register twice and
653 * fetch each 4-dword value in turn. There must be a way to do
654 * this in a single pass, but I couldn't get it to work.
656 brw_ADD(p
, brw_address_reg(0), vp_address
, brw_imm_d(byte_offset
));
657 brw_MOV(p
, tmp
, indirect
);
659 brw_ADD(p
, brw_address_reg(0), suboffset(vp_address
, 8), brw_imm_d(byte_offset
));
660 brw_MOV(p
, suboffset(tmp
, 4), indirect
);
662 brw_pop_insn_state(p
);
669 static void emit_arl( struct brw_vs_compile
*c
,
671 struct brw_reg arg0
)
673 struct brw_compile
*p
= &c
->func
;
674 struct brw_reg tmp
= dst
;
675 GLboolean need_tmp
= (dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
680 brw_RNDD(p
, tmp
, arg0
);
681 brw_MUL(p
, dst
, tmp
, brw_imm_d(16));
688 /* Will return mangled results for SWZ op. The emit_swz() function
689 * ignores this result and recalculates taking extended swizzles into
692 static struct brw_reg
get_arg( struct brw_vs_compile
*c
,
693 struct prog_src_register
*src
)
697 if (src
->File
== PROGRAM_UNDEFINED
)
698 return brw_null_reg();
701 reg
= deref(c
, c
->regs
[PROGRAM_STATE_VAR
][0], src
->Index
);
703 reg
= get_reg(c
, src
->File
, src
->Index
);
705 /* Convert 3-bit swizzle to 2-bit.
707 reg
.dw1
.bits
.swizzle
= BRW_SWIZZLE4(GET_SWZ(src
->Swizzle
, 0),
708 GET_SWZ(src
->Swizzle
, 1),
709 GET_SWZ(src
->Swizzle
, 2),
710 GET_SWZ(src
->Swizzle
, 3));
712 /* Note this is ok for non-swizzle instructions:
714 reg
.negate
= src
->NegateBase
? 1 : 0;
720 static struct brw_reg
get_dst( struct brw_vs_compile
*c
,
721 struct prog_dst_register dst
)
723 struct brw_reg reg
= get_reg(c
, dst
.File
, dst
.Index
);
725 reg
.dw1
.bits
.writemask
= dst
.WriteMask
;
733 static void emit_swz( struct brw_vs_compile
*c
,
735 struct prog_src_register src
)
737 struct brw_compile
*p
= &c
->func
;
738 GLuint zeros_mask
= 0;
739 GLuint ones_mask
= 0;
742 GLboolean need_tmp
= (src
.NegateBase
&&
743 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
744 struct brw_reg tmp
= dst
;
750 for (i
= 0; i
< 4; i
++) {
751 if (dst
.dw1
.bits
.writemask
& (1<<i
)) {
752 GLubyte s
= GET_SWZ(src
.Swizzle
, i
);
771 /* Do src first, in case dst aliases src:
777 arg0
= deref(c
, c
->regs
[PROGRAM_STATE_VAR
][0], src
.Index
);
779 arg0
= get_reg(c
, src
.File
, src
.Index
);
781 arg0
= brw_swizzle(arg0
,
782 src_swz
[0], src_swz
[1],
783 src_swz
[2], src_swz
[3]);
785 brw_MOV(p
, brw_writemask(tmp
, src_mask
), arg0
);
789 brw_MOV(p
, brw_writemask(tmp
, zeros_mask
), brw_imm_f(0));
792 brw_MOV(p
, brw_writemask(tmp
, ones_mask
), brw_imm_f(1));
795 brw_MOV(p
, brw_writemask(tmp
, src
.NegateBase
), negate(tmp
));
798 brw_MOV(p
, dst
, tmp
);
805 /* Post-vertex-program processing. Send the results to the URB.
807 static void emit_vertex_write( struct brw_vs_compile
*c
)
809 struct brw_compile
*p
= &c
->func
;
810 struct brw_reg m0
= brw_message_reg(0);
811 struct brw_reg pos
= c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_HPOS
];
814 if (c
->key
.copy_edgeflag
) {
816 get_reg(c
, PROGRAM_OUTPUT
, VERT_RESULT_EDGE
),
817 get_reg(c
, PROGRAM_INPUT
, VERT_ATTRIB_EDGEFLAG
));
821 /* Build ndc coords? TODO: Shortcircuit when w is known to be one.
823 if (!c
->key
.know_w_is_one
) {
825 emit_math1(c
, BRW_MATH_FUNCTION_INV
, ndc
, brw_swizzle1(pos
, 3), BRW_MATH_PRECISION_FULL
);
826 brw_MUL(p
, brw_writemask(ndc
, WRITEMASK_XYZ
), pos
, ndc
);
832 /* This includes the workaround for -ve rhw, so is no longer an
835 if ((c
->prog_data
.outputs_written
& (1<<VERT_RESULT_PSIZ
)) ||
836 c
->key
.nr_userclip
||
837 !c
->key
.know_w_is_one
)
839 struct brw_reg header1
= retype(get_tmp(c
), BRW_REGISTER_TYPE_UD
);
842 brw_MOV(p
, header1
, brw_imm_ud(0));
844 brw_set_access_mode(p
, BRW_ALIGN_16
);
846 if (c
->prog_data
.outputs_written
& (1<<VERT_RESULT_PSIZ
)) {
847 struct brw_reg psiz
= c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_PSIZ
];
848 brw_MUL(p
, brw_writemask(header1
, WRITEMASK_W
), brw_swizzle1(psiz
, 0), brw_imm_f(1<<11));
849 brw_AND(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(0x7ff<<8));
853 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
854 brw_set_conditionalmod(p
, BRW_CONDITIONAL_L
);
855 brw_DP4(p
, brw_null_reg(), pos
, c
->userplane
[i
]);
856 brw_OR(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(1<<i
));
857 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
861 /* i965 clipping workaround:
862 * 1) Test for -ve rhw
864 * set ndc = (0,0,0,0)
867 * Later, clipping will detect ucp[6] and ensure the primitive is
868 * clipped against all fixed planes.
870 if (!(BRW_IS_GM45(p
->brw
) || BRW_IS_G4X(p
->brw
)) && !c
->key
.know_w_is_one
) {
872 vec8(brw_null_reg()),
874 brw_swizzle1(ndc
, 3),
877 brw_OR(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(1<<6));
878 brw_MOV(p
, ndc
, brw_imm_f(0));
879 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
882 brw_set_access_mode(p
, BRW_ALIGN_1
); /* why? */
883 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), header1
);
884 brw_set_access_mode(p
, BRW_ALIGN_16
);
886 release_tmp(c
, header1
);
889 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
893 /* Emit the (interleaved) headers for the two vertices - an 8-reg
894 * of zeros followed by two sets of NDC coordinates:
896 brw_set_access_mode(p
, BRW_ALIGN_1
);
897 brw_MOV(p
, offset(m0
, 2), ndc
);
898 brw_MOV(p
, offset(m0
, 3), pos
);
902 brw_null_reg(), /* dest */
903 0, /* starting mrf reg nr */
907 c
->nr_outputs
+ 3, /* msg len */
908 0, /* response len */
910 1, /* writes complete */
911 0, /* urb destination offset */
912 BRW_URB_SWIZZLE_INTERLEAVE
);
917 post_vs_emit( struct brw_vs_compile
*c
, struct brw_instruction
*end_inst
)
919 GLuint nr_insns
= c
->vp
->program
.Base
.NumInstructions
;
920 GLuint insn
, target_insn
;
921 struct prog_instruction
*inst1
, *inst2
;
922 struct brw_instruction
*brw_inst1
, *brw_inst2
;
924 for (insn
= 0; insn
< nr_insns
; insn
++) {
925 inst1
= &c
->vp
->program
.Base
.Instructions
[insn
];
926 brw_inst1
= inst1
->Data
;
927 switch (inst1
->Opcode
) {
930 target_insn
= inst1
->BranchTarget
;
931 inst2
= &c
->vp
->program
.Base
.Instructions
[target_insn
];
932 brw_inst2
= inst2
->Data
;
933 offset
= brw_inst2
- brw_inst1
;
934 brw_set_src1(brw_inst1
, brw_imm_d(offset
*16));
937 offset
= end_inst
- brw_inst1
;
938 brw_set_src1(brw_inst1
, brw_imm_d(offset
*16));
946 /* Emit the fragment program instructions here.
948 void brw_vs_emit(struct brw_vs_compile
*c
)
951 struct brw_compile
*p
= &c
->func
;
952 GLuint nr_insns
= c
->vp
->program
.Base
.NumInstructions
;
953 GLuint insn
, if_insn
= 0;
954 struct brw_instruction
*end_inst
;
955 struct brw_instruction
*if_inst
[MAX_IFSN
];
956 struct brw_indirect stack_index
= brw_indirect(0, 0);
961 if (INTEL_DEBUG
& DEBUG_VS
) {
962 _mesa_printf("\n\n\nvs-emit:\n");
963 _mesa_print_program(&c
->vp
->program
.Base
);
967 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
968 brw_set_access_mode(p
, BRW_ALIGN_16
);
970 /* Message registers can't be read, so copy the output into GRF register
971 if they are used in source registers */
972 for (insn
= 0; insn
< nr_insns
; insn
++) {
974 struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[insn
];
975 for (i
= 0; i
< 3; i
++) {
976 struct prog_src_register
*src
= &inst
->SrcReg
[i
];
977 GLuint index
= src
->Index
;
978 GLuint file
= src
->File
;
979 if (file
== PROGRAM_OUTPUT
&& index
!= VERT_RESULT_HPOS
)
980 c
->output_regs
[index
].used_in_src
= GL_TRUE
;
984 /* Static register allocation
986 brw_vs_alloc_regs(c
);
987 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
989 for (insn
= 0; insn
< nr_insns
; insn
++) {
991 struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[insn
];
992 struct brw_reg args
[3], dst
;
995 /* Get argument regs. SWZ is special and does this itself.
997 inst
->Data
= &p
->store
[p
->nr_insn
];
998 if (inst
->Opcode
!= OPCODE_SWZ
)
999 for (i
= 0; i
< 3; i
++) {
1000 struct prog_src_register
*src
= &inst
->SrcReg
[i
];
1003 if (file
== PROGRAM_OUTPUT
&&c
->output_regs
[index
].used_in_src
)
1004 args
[i
] = c
->output_regs
[index
].reg
;
1006 args
[i
] = get_arg(c
, src
);
1009 /* Get dest regs. Note that it is possible for a reg to be both
1010 * dst and arg, given the static allocation of registers. So
1011 * care needs to be taken emitting multi-operation instructions.
1013 index
= inst
->DstReg
.Index
;
1014 file
= inst
->DstReg
.File
;
1015 if (file
== PROGRAM_OUTPUT
&& c
->output_regs
[index
].used_in_src
)
1016 dst
= c
->output_regs
[index
].reg
;
1018 dst
= get_dst(c
, inst
->DstReg
);
1020 switch (inst
->Opcode
) {
1022 brw_MOV(p
, dst
, brw_abs(args
[0]));
1025 brw_ADD(p
, dst
, args
[0], args
[1]);
1028 brw_DP3(p
, dst
, args
[0], args
[1]);
1031 brw_DP4(p
, dst
, args
[0], args
[1]);
1034 brw_DPH(p
, dst
, args
[0], args
[1]);
1037 unalias2(c
, dst
, args
[0], args
[1], emit_dst_noalias
);
1040 unalias1(c
, dst
, args
[0], emit_exp_noalias
);
1043 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1046 emit_arl(c
, dst
, args
[0]);
1049 brw_RNDD(p
, dst
, args
[0]);
1052 brw_FRC(p
, dst
, args
[0]);
1055 unalias1(c
, dst
, args
[0], emit_log_noalias
);
1058 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1061 unalias1(c
, dst
, args
[0], emit_lit_noalias
);
1064 brw_MOV(p
, brw_acc_reg(), args
[2]);
1065 brw_MAC(p
, dst
, args
[0], args
[1]);
1068 emit_max(p
, dst
, args
[0], args
[1]);
1071 emit_min(p
, dst
, args
[0], args
[1]);
1074 brw_MOV(p
, dst
, args
[0]);
1077 brw_MUL(p
, dst
, args
[0], args
[1]);
1080 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, args
[0], args
[1], BRW_MATH_PRECISION_FULL
);
1083 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1086 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1090 emit_seq(p
, dst
, args
[0], args
[1]);
1093 emit_sne(p
, dst
, args
[0], args
[1]);
1096 emit_sge(p
, dst
, args
[0], args
[1]);
1099 emit_sgt(p
, dst
, args
[0], args
[1]);
1102 emit_slt(p
, dst
, args
[0], args
[1]);
1105 emit_sle(p
, dst
, args
[0], args
[1]);
1108 brw_ADD(p
, dst
, args
[0], negate(args
[1]));
1111 /* The args[0] value can't be used here as it won't have
1112 * correctly encoded the full swizzle:
1114 emit_swz(c
, dst
, inst
->SrcReg
[0] );
1117 emit_xpd(p
, dst
, args
[0], args
[1]);
1120 assert(if_insn
< MAX_IFSN
);
1121 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
1124 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
1127 assert(if_insn
> 0);
1128 brw_ENDIF(p
, if_inst
[--if_insn
]);
1131 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1132 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1133 brw_set_predicate_control_flag_value(p
, 0xff);
1136 brw_set_access_mode(p
, BRW_ALIGN_1
);
1137 brw_ADD(p
, deref_1d(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
1138 brw_set_access_mode(p
, BRW_ALIGN_16
);
1139 brw_ADD(p
, get_addr_reg(stack_index
),
1140 get_addr_reg(stack_index
), brw_imm_d(4));
1141 inst
->Data
= &p
->store
[p
->nr_insn
];
1142 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1145 brw_ADD(p
, get_addr_reg(stack_index
),
1146 get_addr_reg(stack_index
), brw_imm_d(-4));
1147 brw_set_access_mode(p
, BRW_ALIGN_1
);
1148 brw_MOV(p
, brw_ip_reg(), deref_1d(stack_index
, 0));
1149 brw_set_access_mode(p
, BRW_ALIGN_16
);
1151 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1158 _mesa_printf("Unsupport opcode %d in vertex shader\n", inst
->Opcode
);
1162 if (inst
->DstReg
.File
== PROGRAM_OUTPUT
1163 &&inst
->DstReg
.Index
!= VERT_RESULT_HPOS
1164 &&c
->output_regs
[inst
->DstReg
.Index
].used_in_src
)
1165 brw_MOV(p
, get_dst(c
, inst
->DstReg
), dst
);
1170 end_inst
= &p
->store
[p
->nr_insn
];
1171 emit_vertex_write(c
);
1172 post_vs_emit(c
, end_inst
);
1173 for (insn
= 0; insn
< nr_insns
; insn
++)
1174 c
->vp
->program
.Base
.Instructions
[insn
].Data
= NULL
;