2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
37 #include "intel_batchbuffer.h"
39 #include "brw_defines.h"
40 #include "brw_context.h"
46 static struct brw_reg
get_vert_attr(struct brw_sf_compile
*c
,
50 GLuint off
= c
->attr_to_idx
[attr
] / 2;
51 GLuint sub
= c
->attr_to_idx
[attr
] % 2;
53 return brw_vec4_grf(vert
.nr
+ off
, sub
* 4);
56 static GLboolean
have_attr(struct brw_sf_compile
*c
,
59 return (c
->key
.attrs
& (1<<attr
)) ? 1 : 0;
64 /***********************************************************************
67 static void copy_bfc( struct brw_sf_compile
*c
,
70 struct brw_compile
*p
= &c
->func
;
73 for (i
= 0; i
< 2; i
++) {
74 if (have_attr(c
, VERT_RESULT_COL0
+i
) &&
75 have_attr(c
, VERT_RESULT_BFC0
+i
))
77 get_vert_attr(c
, vert
, VERT_RESULT_COL0
+i
),
78 get_vert_attr(c
, vert
, VERT_RESULT_BFC0
+i
));
83 static void do_twoside_color( struct brw_sf_compile
*c
)
85 struct brw_compile
*p
= &c
->func
;
86 struct brw_instruction
*if_insn
;
87 GLuint backface_conditional
= c
->key
.frontface_ccw
? BRW_CONDITIONAL_G
: BRW_CONDITIONAL_L
;
89 /* Already done in clip program:
91 if (c
->key
.primitive
== SF_UNFILLED_TRIS
)
94 /* XXX: What happens if BFC isn't present? This could only happen
95 * for user-supplied vertex programs, as t_vp_build.c always does
98 if (!(have_attr(c
, VERT_RESULT_COL0
) && have_attr(c
, VERT_RESULT_BFC0
)) &&
99 !(have_attr(c
, VERT_RESULT_COL1
) && have_attr(c
, VERT_RESULT_BFC1
)))
102 /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
103 * to get all channels active inside the IF. In the clipping code
104 * we run with NoMask, so it's not an option and we can use
105 * BRW_EXECUTE_1 for all comparisions.
107 brw_push_insn_state(p
);
108 brw_CMP(p
, vec4(brw_null_reg()), backface_conditional
, c
->det
, brw_imm_f(0));
109 if_insn
= brw_IF(p
, BRW_EXECUTE_4
);
111 switch (c
->nr_verts
) {
112 case 3: copy_bfc(c
, c
->vert
[2]);
113 case 2: copy_bfc(c
, c
->vert
[1]);
114 case 1: copy_bfc(c
, c
->vert
[0]);
117 brw_ENDIF(p
, if_insn
);
118 brw_pop_insn_state(p
);
123 /***********************************************************************
127 #define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
128 (1<<VERT_RESULT_COL1))
130 static void copy_colors( struct brw_sf_compile
*c
,
134 struct brw_compile
*p
= &c
->func
;
137 for (i
= VERT_RESULT_COL0
; i
<= VERT_RESULT_COL1
; i
++) {
140 get_vert_attr(c
, dst
, i
),
141 get_vert_attr(c
, src
, i
));
147 /* Need to use a computed jump to copy flatshaded attributes as the
148 * vertices are ordered according to y-coordinate before reaching this
149 * point, so the PV could be anywhere.
151 static void do_flatshade_triangle( struct brw_sf_compile
*c
)
153 struct brw_compile
*p
= &c
->func
;
154 struct brw_reg ip
= brw_ip_reg();
155 GLuint nr
= brw_count_bits(c
->key
.attrs
& VERT_RESULT_COLOR_BITS
);
159 /* Already done in clip program:
161 if (c
->key
.primitive
== SF_UNFILLED_TRIS
)
164 brw_push_insn_state(p
);
166 brw_MUL(p
, c
->pv
, c
->pv
, brw_imm_ud(nr
*2+1));
167 brw_JMPI(p
, ip
, ip
, c
->pv
);
169 copy_colors(c
, c
->vert
[1], c
->vert
[0]);
170 copy_colors(c
, c
->vert
[2], c
->vert
[0]);
171 brw_JMPI(p
, ip
, ip
, brw_imm_ud(nr
*4+1));
173 copy_colors(c
, c
->vert
[0], c
->vert
[1]);
174 copy_colors(c
, c
->vert
[2], c
->vert
[1]);
175 brw_JMPI(p
, ip
, ip
, brw_imm_ud(nr
*2));
177 copy_colors(c
, c
->vert
[0], c
->vert
[2]);
178 copy_colors(c
, c
->vert
[1], c
->vert
[2]);
180 brw_pop_insn_state(p
);
184 static void do_flatshade_line( struct brw_sf_compile
*c
)
186 struct brw_compile
*p
= &c
->func
;
187 struct brw_reg ip
= brw_ip_reg();
188 GLuint nr
= brw_count_bits(c
->key
.attrs
& VERT_RESULT_COLOR_BITS
);
193 /* Already done in clip program:
195 if (c
->key
.primitive
== SF_UNFILLED_TRIS
)
198 brw_push_insn_state(p
);
200 brw_MUL(p
, c
->pv
, c
->pv
, brw_imm_ud(nr
+1));
201 brw_JMPI(p
, ip
, ip
, c
->pv
);
202 copy_colors(c
, c
->vert
[1], c
->vert
[0]);
204 brw_JMPI(p
, ip
, ip
, brw_imm_ud(nr
));
205 copy_colors(c
, c
->vert
[0], c
->vert
[1]);
207 brw_pop_insn_state(p
);
212 /***********************************************************************
217 static void alloc_regs( struct brw_sf_compile
*c
)
221 /* Values computed by fixed function unit:
223 c
->pv
= retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD
);
224 c
->det
= brw_vec1_grf(1, 2);
225 c
->dx0
= brw_vec1_grf(1, 3);
226 c
->dx2
= brw_vec1_grf(1, 4);
227 c
->dy0
= brw_vec1_grf(1, 5);
228 c
->dy2
= brw_vec1_grf(1, 6);
230 /* z and 1/w passed in seperately:
232 c
->z
[0] = brw_vec1_grf(2, 0);
233 c
->inv_w
[0] = brw_vec1_grf(2, 1);
234 c
->z
[1] = brw_vec1_grf(2, 2);
235 c
->inv_w
[1] = brw_vec1_grf(2, 3);
236 c
->z
[2] = brw_vec1_grf(2, 4);
237 c
->inv_w
[2] = brw_vec1_grf(2, 5);
242 for (i
= 0; i
< c
->nr_verts
; i
++) {
243 c
->vert
[i
] = brw_vec8_grf(reg
, 0);
244 reg
+= c
->nr_attr_regs
;
247 /* Temporaries, allocated after last vertex reg.
249 c
->inv_det
= brw_vec1_grf(reg
, 0); reg
++;
250 c
->a1_sub_a0
= brw_vec8_grf(reg
, 0); reg
++;
251 c
->a2_sub_a0
= brw_vec8_grf(reg
, 0); reg
++;
252 c
->tmp
= brw_vec8_grf(reg
, 0); reg
++;
254 /* Note grf allocation:
256 c
->prog_data
.total_grf
= reg
;
259 /* Outputs of this program - interpolation coefficients for
262 c
->m1Cx
= brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE
, 1, 0);
263 c
->m2Cy
= brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE
, 2, 0);
264 c
->m3C0
= brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE
, 3, 0);
268 static void copy_z_inv_w( struct brw_sf_compile
*c
)
270 struct brw_compile
*p
= &c
->func
;
273 brw_push_insn_state(p
);
275 /* Copy both scalars with a single MOV:
277 for (i
= 0; i
< c
->nr_verts
; i
++)
278 brw_MOV(p
, vec2(suboffset(c
->vert
[i
], 2)), vec2(c
->z
[i
]));
280 brw_pop_insn_state(p
);
284 static void invert_det( struct brw_sf_compile
*c
)
286 /* Looks like we invert all 8 elements just to get 1/det in
291 BRW_MATH_FUNCTION_INV
,
292 BRW_MATH_SATURATE_NONE
,
295 BRW_MATH_DATA_SCALAR
,
296 BRW_MATH_PRECISION_FULL
);
300 #define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \
304 static GLboolean
calculate_masks( struct brw_sf_compile
*c
,
310 GLboolean is_last_attr
= (reg
== c
->nr_setup_regs
- 1);
311 GLuint persp_mask
= c
->key
.attrs
& ~NON_PERPECTIVE_ATTRS
;
314 if (c
->key
.do_flat_shading
)
315 linear_mask
= c
->key
.attrs
& ~(FRAG_BIT_COL0
|FRAG_BIT_COL1
);
317 linear_mask
= c
->key
.attrs
;
323 if (persp_mask
& (1 << c
->idx_to_attr
[reg
*2]))
326 if (linear_mask
& (1 << c
->idx_to_attr
[reg
*2]))
329 /* Maybe only processs one attribute on the final round:
331 if (reg
*2+1 < c
->nr_setup_attrs
) {
334 if (persp_mask
& (1 << c
->idx_to_attr
[reg
*2+1]))
337 if (linear_mask
& (1 << c
->idx_to_attr
[reg
*2+1]))
346 void brw_emit_tri_setup( struct brw_sf_compile
*c
, GLboolean allocate
)
348 struct brw_compile
*p
= &c
->func
;
359 if (c
->key
.do_twoside_color
)
362 if (c
->key
.do_flat_shading
)
363 do_flatshade_triangle(c
);
366 for (i
= 0; i
< c
->nr_setup_regs
; i
++)
368 /* Pair of incoming attributes:
370 struct brw_reg a0
= offset(c
->vert
[0], i
);
371 struct brw_reg a1
= offset(c
->vert
[1], i
);
372 struct brw_reg a2
= offset(c
->vert
[2], i
);
373 GLushort pc
, pc_persp
, pc_linear
;
374 GLboolean last
= calculate_masks(c
, i
, &pc
, &pc_persp
, &pc_linear
);
378 brw_set_predicate_control_flag_value(p
, pc_persp
);
379 brw_MUL(p
, a0
, a0
, c
->inv_w
[0]);
380 brw_MUL(p
, a1
, a1
, c
->inv_w
[1]);
381 brw_MUL(p
, a2
, a2
, c
->inv_w
[2]);
385 /* Calculate coefficients for interpolated values:
389 brw_set_predicate_control_flag_value(p
, pc_linear
);
391 brw_ADD(p
, c
->a1_sub_a0
, a1
, negate(a0
));
392 brw_ADD(p
, c
->a2_sub_a0
, a2
, negate(a0
));
396 brw_MUL(p
, brw_null_reg(), c
->a1_sub_a0
, c
->dy2
);
397 brw_MAC(p
, c
->tmp
, c
->a2_sub_a0
, negate(c
->dy0
));
398 brw_MUL(p
, c
->m1Cx
, c
->tmp
, c
->inv_det
);
402 brw_MUL(p
, brw_null_reg(), c
->a2_sub_a0
, c
->dx0
);
403 brw_MAC(p
, c
->tmp
, c
->a1_sub_a0
, negate(c
->dx2
));
404 brw_MUL(p
, c
->m2Cy
, c
->tmp
, c
->inv_det
);
408 brw_set_predicate_control_flag_value(p
, pc
);
409 /* start point for interpolation
411 brw_MOV(p
, c
->m3C0
, a0
);
413 /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
414 * the send instruction:
419 brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
423 0, /* response len */
425 last
, /* writes complete */
427 BRW_URB_SWIZZLE_TRANSPOSE
); /* XXX: Swizzle control "SF to windower" */
434 void brw_emit_line_setup( struct brw_sf_compile
*c
, GLboolean allocate
)
436 struct brw_compile
*p
= &c
->func
;
448 if (c
->key
.do_flat_shading
)
449 do_flatshade_line(c
);
451 for (i
= 0; i
< c
->nr_setup_regs
; i
++)
453 /* Pair of incoming attributes:
455 struct brw_reg a0
= offset(c
->vert
[0], i
);
456 struct brw_reg a1
= offset(c
->vert
[1], i
);
457 GLushort pc
, pc_persp
, pc_linear
;
458 GLboolean last
= calculate_masks(c
, i
, &pc
, &pc_persp
, &pc_linear
);
462 brw_set_predicate_control_flag_value(p
, pc_persp
);
463 brw_MUL(p
, a0
, a0
, c
->inv_w
[0]);
464 brw_MUL(p
, a1
, a1
, c
->inv_w
[1]);
467 /* Calculate coefficients for position, color:
470 brw_set_predicate_control_flag_value(p
, pc_linear
);
472 brw_ADD(p
, c
->a1_sub_a0
, a1
, negate(a0
));
474 brw_MUL(p
, c
->tmp
, c
->a1_sub_a0
, c
->dx0
);
475 brw_MUL(p
, c
->m1Cx
, c
->tmp
, c
->inv_det
);
477 brw_MUL(p
, c
->tmp
, c
->a1_sub_a0
, c
->dy0
);
478 brw_MUL(p
, c
->m2Cy
, c
->tmp
, c
->inv_det
);
482 brw_set_predicate_control_flag_value(p
, pc
);
484 /* start point for interpolation
486 brw_MOV(p
, c
->m3C0
, a0
);
488 /* Copy m0..m3 to URB.
497 0, /* response len */
499 last
, /* writes complete */
500 i
*4, /* urb destination offset */
501 BRW_URB_SWIZZLE_TRANSPOSE
);
506 void brw_emit_point_sprite_setup( struct brw_sf_compile
*c
, GLboolean allocate
)
508 struct brw_compile
*p
= &c
->func
;
517 for (i
= 0; i
< c
->nr_setup_regs
; i
++)
519 struct brw_sf_point_tex
*tex
= &c
->point_attrs
[c
->idx_to_attr
[2*i
]];
520 struct brw_reg a0
= offset(c
->vert
[0], i
);
521 GLushort pc
, pc_persp
, pc_linear
;
522 GLboolean last
= calculate_masks(c
, i
, &pc
, &pc_persp
, &pc_linear
);
526 if (!tex
->CoordReplace
) {
527 brw_set_predicate_control_flag_value(p
, pc_persp
);
528 brw_MUL(p
, a0
, a0
, c
->inv_w
[0]);
532 if (tex
->CoordReplace
) {
533 /* Caculate 1.0/PointWidth */
536 BRW_MATH_FUNCTION_INV
,
537 BRW_MATH_SATURATE_NONE
,
540 BRW_MATH_DATA_SCALAR
,
541 BRW_MATH_PRECISION_FULL
);
543 if (c
->key
.SpriteOrigin
== GL_UPPER_LEFT
) {
544 brw_MUL(p
, c
->m1Cx
, c
->tmp
, c
->inv_w
[0]);
545 brw_MOV(p
, vec1(suboffset(c
->m1Cx
, 1)), brw_imm_f(0.0));
546 brw_MUL(p
, c
->m2Cy
, c
->tmp
, negate(c
->inv_w
[0]));
547 brw_MOV(p
, vec1(suboffset(c
->m2Cy
, 0)), brw_imm_f(0.0));
549 brw_MUL(p
, c
->m1Cx
, c
->tmp
, c
->inv_w
[0]);
550 brw_MOV(p
, vec1(suboffset(c
->m1Cx
, 1)), brw_imm_f(0.0));
551 brw_MUL(p
, c
->m2Cy
, c
->tmp
, c
->inv_w
[0]);
552 brw_MOV(p
, vec1(suboffset(c
->m2Cy
, 0)), brw_imm_f(0.0));
555 brw_MOV(p
, c
->m1Cx
, brw_imm_ud(0));
556 brw_MOV(p
, c
->m2Cy
, brw_imm_ud(0));
560 brw_set_predicate_control_flag_value(p
, pc
);
561 if (tex
->CoordReplace
) {
562 if (c
->key
.SpriteOrigin
== GL_UPPER_LEFT
) {
563 brw_MUL(p
, c
->m3C0
, c
->inv_w
[0], brw_imm_f(1.0));
564 brw_MOV(p
, vec1(suboffset(c
->m3C0
, 0)), brw_imm_f(0.0));
567 brw_MOV(p
, c
->m3C0
, brw_imm_f(0.0));
569 brw_MOV(p
, c
->m3C0
, a0
); /* constant value */
572 /* Copy m0..m3 to URB.
581 0, /* response len */
583 last
, /* writes complete */
584 i
*4, /* urb destination offset */
585 BRW_URB_SWIZZLE_TRANSPOSE
);
590 /* Points setup - several simplifications as all attributes are
591 * constant across the face of the point (point sprites excluded!)
593 void brw_emit_point_setup( struct brw_sf_compile
*c
, GLboolean allocate
)
595 struct brw_compile
*p
= &c
->func
;
605 brw_MOV(p
, c
->m1Cx
, brw_imm_ud(0)); /* zero - move out of loop */
606 brw_MOV(p
, c
->m2Cy
, brw_imm_ud(0)); /* zero - move out of loop */
608 for (i
= 0; i
< c
->nr_setup_regs
; i
++)
610 struct brw_reg a0
= offset(c
->vert
[0], i
);
611 GLushort pc
, pc_persp
, pc_linear
;
612 GLboolean last
= calculate_masks(c
, i
, &pc
, &pc_persp
, &pc_linear
);
616 /* This seems odd as the values are all constant, but the
617 * fragment shader will be expecting it:
619 brw_set_predicate_control_flag_value(p
, pc_persp
);
620 brw_MUL(p
, a0
, a0
, c
->inv_w
[0]);
624 /* The delta values are always zero, just send the starting
625 * coordinate. Again, this is to fit in with the interpolation
626 * code in the fragment shader.
629 brw_set_predicate_control_flag_value(p
, pc
);
631 brw_MOV(p
, c
->m3C0
, a0
); /* constant value */
633 /* Copy m0..m3 to URB.
642 0, /* response len */
644 last
, /* writes complete */
645 i
*4, /* urb destination offset */
646 BRW_URB_SWIZZLE_TRANSPOSE
);
651 void brw_emit_anyprim_setup( struct brw_sf_compile
*c
)
653 struct brw_compile
*p
= &c
->func
;
654 struct brw_reg ip
= brw_ip_reg();
655 struct brw_reg payload_prim
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, 1, 0);
656 struct brw_reg payload_attr
= get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE
, 1, 0), 0);
657 struct brw_reg primmask
;
658 struct brw_instruction
*jmp
;
659 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
666 primmask
= retype(get_element(c
->tmp
, 0), BRW_REGISTER_TYPE_UD
);
668 brw_MOV(p
, primmask
, brw_imm_ud(1));
669 brw_SHL(p
, primmask
, primmask
, payload_prim
);
671 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
672 brw_AND(p
, v1_null_ud
, primmask
, brw_imm_ud((1<<_3DPRIM_TRILIST
) |
673 (1<<_3DPRIM_TRISTRIP
) |
674 (1<<_3DPRIM_TRIFAN
) |
675 (1<<_3DPRIM_TRISTRIP_REVERSE
) |
676 (1<<_3DPRIM_POLYGON
) |
677 (1<<_3DPRIM_RECTLIST
) |
678 (1<<_3DPRIM_TRIFAN_NOSTIPPLE
)));
679 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
681 saveflag
= p
->flag_value
;
682 brw_push_insn_state(p
);
683 brw_emit_tri_setup( c
, GL_FALSE
);
684 brw_pop_insn_state(p
);
685 p
->flag_value
= saveflag
;
686 /* note - thread killed in subroutine, so must
687 * restore the flag which is changed when building
688 * the subroutine. fix #13240
691 brw_land_fwd_jump(p
, jmp
);
693 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
694 brw_AND(p
, v1_null_ud
, primmask
, brw_imm_ud((1<<_3DPRIM_LINELIST
) |
695 (1<<_3DPRIM_LINESTRIP
) |
696 (1<<_3DPRIM_LINELOOP
) |
697 (1<<_3DPRIM_LINESTRIP_CONT
) |
698 (1<<_3DPRIM_LINESTRIP_BF
) |
699 (1<<_3DPRIM_LINESTRIP_CONT_BF
)));
700 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
702 saveflag
= p
->flag_value
;
703 brw_push_insn_state(p
);
704 brw_emit_line_setup( c
, GL_FALSE
);
705 brw_pop_insn_state(p
);
706 p
->flag_value
= saveflag
;
707 /* note - thread killed in subroutine */
709 brw_land_fwd_jump(p
, jmp
);
711 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
712 brw_AND(p
, v1_null_ud
, payload_attr
, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE
));
713 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
715 saveflag
= p
->flag_value
;
716 brw_push_insn_state(p
);
717 brw_emit_point_sprite_setup( c
, GL_FALSE
);
718 brw_pop_insn_state(p
);
719 p
->flag_value
= saveflag
;
721 brw_land_fwd_jump(p
, jmp
);
723 brw_emit_point_setup( c
, GL_FALSE
);