2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
37 #include "intel_batchbuffer.h"
39 #include "brw_defines.h"
40 #include "brw_context.h"
46 static struct brw_reg
get_vert_attr(struct brw_sf_compile
*c
,
50 GLuint off
= c
->attr_to_idx
[attr
] / 2;
51 GLuint sub
= c
->attr_to_idx
[attr
] % 2;
53 return brw_vec4_grf(vert
.nr
+ off
, sub
* 4);
56 static GLboolean
have_attr(struct brw_sf_compile
*c
,
59 return (c
->key
.attrs
& BITFIELD64_BIT(attr
)) ? 1 : 0;
62 /***********************************************************************
65 static void copy_bfc( struct brw_sf_compile
*c
,
68 struct brw_compile
*p
= &c
->func
;
71 for (i
= 0; i
< 2; i
++) {
72 if (have_attr(c
, VERT_RESULT_COL0
+i
) &&
73 have_attr(c
, VERT_RESULT_BFC0
+i
))
75 get_vert_attr(c
, vert
, VERT_RESULT_COL0
+i
),
76 get_vert_attr(c
, vert
, VERT_RESULT_BFC0
+i
));
81 static void do_twoside_color( struct brw_sf_compile
*c
)
83 struct brw_compile
*p
= &c
->func
;
84 struct brw_instruction
*if_insn
;
85 GLuint backface_conditional
= c
->key
.frontface_ccw
? BRW_CONDITIONAL_G
: BRW_CONDITIONAL_L
;
87 /* Already done in clip program:
89 if (c
->key
.primitive
== SF_UNFILLED_TRIS
)
92 /* XXX: What happens if BFC isn't present? This could only happen
93 * for user-supplied vertex programs, as t_vp_build.c always does
96 if (!(have_attr(c
, VERT_RESULT_COL0
) && have_attr(c
, VERT_RESULT_BFC0
)) &&
97 !(have_attr(c
, VERT_RESULT_COL1
) && have_attr(c
, VERT_RESULT_BFC1
)))
100 /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
101 * to get all channels active inside the IF. In the clipping code
102 * we run with NoMask, so it's not an option and we can use
103 * BRW_EXECUTE_1 for all comparisions.
105 brw_push_insn_state(p
);
106 brw_CMP(p
, vec4(brw_null_reg()), backface_conditional
, c
->det
, brw_imm_f(0));
107 if_insn
= brw_IF(p
, BRW_EXECUTE_4
);
109 switch (c
->nr_verts
) {
110 case 3: copy_bfc(c
, c
->vert
[2]);
111 case 2: copy_bfc(c
, c
->vert
[1]);
112 case 1: copy_bfc(c
, c
->vert
[0]);
115 brw_ENDIF(p
, if_insn
);
116 brw_pop_insn_state(p
);
121 /***********************************************************************
125 #define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
126 BITFIELD64_BIT(VERT_RESULT_COL1))
128 static void copy_colors( struct brw_sf_compile
*c
,
132 struct brw_compile
*p
= &c
->func
;
135 for (i
= VERT_RESULT_COL0
; i
<= VERT_RESULT_COL1
; i
++) {
138 get_vert_attr(c
, dst
, i
),
139 get_vert_attr(c
, src
, i
));
145 /* Need to use a computed jump to copy flatshaded attributes as the
146 * vertices are ordered according to y-coordinate before reaching this
147 * point, so the PV could be anywhere.
149 static void do_flatshade_triangle( struct brw_sf_compile
*c
)
151 struct brw_compile
*p
= &c
->func
;
152 struct intel_context
*intel
= &p
->brw
->intel
;
153 struct brw_reg ip
= brw_ip_reg();
154 GLuint nr
= brw_count_bits(c
->key
.attrs
& VERT_RESULT_COLOR_BITS
);
160 /* Already done in clip program:
162 if (c
->key
.primitive
== SF_UNFILLED_TRIS
)
165 if (intel
->is_ironlake
)
168 brw_push_insn_state(p
);
170 brw_MUL(p
, c
->pv
, c
->pv
, brw_imm_d(jmpi
*(nr
*2+1)));
171 brw_JMPI(p
, ip
, ip
, c
->pv
);
173 copy_colors(c
, c
->vert
[1], c
->vert
[0]);
174 copy_colors(c
, c
->vert
[2], c
->vert
[0]);
175 brw_JMPI(p
, ip
, ip
, brw_imm_d(jmpi
*(nr
*4+1)));
177 copy_colors(c
, c
->vert
[0], c
->vert
[1]);
178 copy_colors(c
, c
->vert
[2], c
->vert
[1]);
179 brw_JMPI(p
, ip
, ip
, brw_imm_d(jmpi
*nr
*2));
181 copy_colors(c
, c
->vert
[0], c
->vert
[2]);
182 copy_colors(c
, c
->vert
[1], c
->vert
[2]);
184 brw_pop_insn_state(p
);
188 static void do_flatshade_line( struct brw_sf_compile
*c
)
190 struct brw_compile
*p
= &c
->func
;
191 struct intel_context
*intel
= &p
->brw
->intel
;
192 struct brw_reg ip
= brw_ip_reg();
193 GLuint nr
= brw_count_bits(c
->key
.attrs
& VERT_RESULT_COLOR_BITS
);
199 /* Already done in clip program:
201 if (c
->key
.primitive
== SF_UNFILLED_TRIS
)
204 if (intel
->is_ironlake
)
207 brw_push_insn_state(p
);
209 brw_MUL(p
, c
->pv
, c
->pv
, brw_imm_d(jmpi
*(nr
+1)));
210 brw_JMPI(p
, ip
, ip
, c
->pv
);
211 copy_colors(c
, c
->vert
[1], c
->vert
[0]);
213 brw_JMPI(p
, ip
, ip
, brw_imm_ud(jmpi
*nr
));
214 copy_colors(c
, c
->vert
[0], c
->vert
[1]);
216 brw_pop_insn_state(p
);
221 /***********************************************************************
226 static void alloc_regs( struct brw_sf_compile
*c
)
230 /* Values computed by fixed function unit:
232 c
->pv
= retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D
);
233 c
->det
= brw_vec1_grf(1, 2);
234 c
->dx0
= brw_vec1_grf(1, 3);
235 c
->dx2
= brw_vec1_grf(1, 4);
236 c
->dy0
= brw_vec1_grf(1, 5);
237 c
->dy2
= brw_vec1_grf(1, 6);
239 /* z and 1/w passed in seperately:
241 c
->z
[0] = brw_vec1_grf(2, 0);
242 c
->inv_w
[0] = brw_vec1_grf(2, 1);
243 c
->z
[1] = brw_vec1_grf(2, 2);
244 c
->inv_w
[1] = brw_vec1_grf(2, 3);
245 c
->z
[2] = brw_vec1_grf(2, 4);
246 c
->inv_w
[2] = brw_vec1_grf(2, 5);
251 for (i
= 0; i
< c
->nr_verts
; i
++) {
252 c
->vert
[i
] = brw_vec8_grf(reg
, 0);
253 reg
+= c
->nr_attr_regs
;
256 /* Temporaries, allocated after last vertex reg.
258 c
->inv_det
= brw_vec1_grf(reg
, 0); reg
++;
259 c
->a1_sub_a0
= brw_vec8_grf(reg
, 0); reg
++;
260 c
->a2_sub_a0
= brw_vec8_grf(reg
, 0); reg
++;
261 c
->tmp
= brw_vec8_grf(reg
, 0); reg
++;
263 /* Note grf allocation:
265 c
->prog_data
.total_grf
= reg
;
268 /* Outputs of this program - interpolation coefficients for
271 c
->m1Cx
= brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE
, 1, 0);
272 c
->m2Cy
= brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE
, 2, 0);
273 c
->m3C0
= brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE
, 3, 0);
277 static void copy_z_inv_w( struct brw_sf_compile
*c
)
279 struct brw_compile
*p
= &c
->func
;
282 brw_push_insn_state(p
);
284 /* Copy both scalars with a single MOV:
286 for (i
= 0; i
< c
->nr_verts
; i
++)
287 brw_MOV(p
, vec2(suboffset(c
->vert
[i
], 2)), vec2(c
->z
[i
]));
289 brw_pop_insn_state(p
);
293 static void invert_det( struct brw_sf_compile
*c
)
295 /* Looks like we invert all 8 elements just to get 1/det in
300 BRW_MATH_FUNCTION_INV
,
301 BRW_MATH_SATURATE_NONE
,
304 BRW_MATH_DATA_SCALAR
,
305 BRW_MATH_PRECISION_FULL
);
310 static GLboolean
calculate_masks( struct brw_sf_compile
*c
,
316 GLboolean is_last_attr
= (reg
== c
->nr_setup_regs
- 1);
317 GLbitfield64 persp_mask
;
318 GLbitfield64 linear_mask
;
320 if (c
->key
.do_flat_shading
|| c
->key
.linear_color
)
321 persp_mask
= c
->key
.attrs
& ~(FRAG_BIT_WPOS
|
325 persp_mask
= c
->key
.attrs
& ~(FRAG_BIT_WPOS
);
327 if (c
->key
.do_flat_shading
)
328 linear_mask
= c
->key
.attrs
& ~(FRAG_BIT_COL0
|FRAG_BIT_COL1
);
330 linear_mask
= c
->key
.attrs
;
336 if (persp_mask
& BITFIELD64_BIT(c
->idx_to_attr
[reg
*2]))
339 if (linear_mask
& BITFIELD64_BIT(c
->idx_to_attr
[reg
*2]))
342 /* Maybe only processs one attribute on the final round:
344 if (reg
*2+1 < c
->nr_setup_attrs
) {
347 if (persp_mask
& BITFIELD64_BIT(c
->idx_to_attr
[reg
*2+1]))
350 if (linear_mask
& BITFIELD64_BIT(c
->idx_to_attr
[reg
*2+1]))
359 void brw_emit_tri_setup( struct brw_sf_compile
*c
, GLboolean allocate
)
361 struct brw_compile
*p
= &c
->func
;
372 if (c
->key
.do_twoside_color
)
375 if (c
->key
.do_flat_shading
)
376 do_flatshade_triangle(c
);
379 for (i
= 0; i
< c
->nr_setup_regs
; i
++)
381 /* Pair of incoming attributes:
383 struct brw_reg a0
= offset(c
->vert
[0], i
);
384 struct brw_reg a1
= offset(c
->vert
[1], i
);
385 struct brw_reg a2
= offset(c
->vert
[2], i
);
386 GLushort pc
, pc_persp
, pc_linear
;
387 GLboolean last
= calculate_masks(c
, i
, &pc
, &pc_persp
, &pc_linear
);
391 brw_set_predicate_control_flag_value(p
, pc_persp
);
392 brw_MUL(p
, a0
, a0
, c
->inv_w
[0]);
393 brw_MUL(p
, a1
, a1
, c
->inv_w
[1]);
394 brw_MUL(p
, a2
, a2
, c
->inv_w
[2]);
398 /* Calculate coefficients for interpolated values:
402 brw_set_predicate_control_flag_value(p
, pc_linear
);
404 brw_ADD(p
, c
->a1_sub_a0
, a1
, negate(a0
));
405 brw_ADD(p
, c
->a2_sub_a0
, a2
, negate(a0
));
409 brw_MUL(p
, brw_null_reg(), c
->a1_sub_a0
, c
->dy2
);
410 brw_MAC(p
, c
->tmp
, c
->a2_sub_a0
, negate(c
->dy0
));
411 brw_MUL(p
, c
->m1Cx
, c
->tmp
, c
->inv_det
);
415 brw_MUL(p
, brw_null_reg(), c
->a2_sub_a0
, c
->dx0
);
416 brw_MAC(p
, c
->tmp
, c
->a1_sub_a0
, negate(c
->dx2
));
417 brw_MUL(p
, c
->m2Cy
, c
->tmp
, c
->inv_det
);
421 brw_set_predicate_control_flag_value(p
, pc
);
422 /* start point for interpolation
424 brw_MOV(p
, c
->m3C0
, a0
);
426 /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
427 * the send instruction:
432 brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
436 0, /* response len */
438 last
, /* writes complete */
440 BRW_URB_SWIZZLE_TRANSPOSE
); /* XXX: Swizzle control "SF to windower" */
447 void brw_emit_line_setup( struct brw_sf_compile
*c
, GLboolean allocate
)
449 struct brw_compile
*p
= &c
->func
;
461 if (c
->key
.do_flat_shading
)
462 do_flatshade_line(c
);
464 for (i
= 0; i
< c
->nr_setup_regs
; i
++)
466 /* Pair of incoming attributes:
468 struct brw_reg a0
= offset(c
->vert
[0], i
);
469 struct brw_reg a1
= offset(c
->vert
[1], i
);
470 GLushort pc
, pc_persp
, pc_linear
;
471 GLboolean last
= calculate_masks(c
, i
, &pc
, &pc_persp
, &pc_linear
);
475 brw_set_predicate_control_flag_value(p
, pc_persp
);
476 brw_MUL(p
, a0
, a0
, c
->inv_w
[0]);
477 brw_MUL(p
, a1
, a1
, c
->inv_w
[1]);
480 /* Calculate coefficients for position, color:
483 brw_set_predicate_control_flag_value(p
, pc_linear
);
485 brw_ADD(p
, c
->a1_sub_a0
, a1
, negate(a0
));
487 brw_MUL(p
, c
->tmp
, c
->a1_sub_a0
, c
->dx0
);
488 brw_MUL(p
, c
->m1Cx
, c
->tmp
, c
->inv_det
);
490 brw_MUL(p
, c
->tmp
, c
->a1_sub_a0
, c
->dy0
);
491 brw_MUL(p
, c
->m2Cy
, c
->tmp
, c
->inv_det
);
495 brw_set_predicate_control_flag_value(p
, pc
);
497 /* start point for interpolation
499 brw_MOV(p
, c
->m3C0
, a0
);
501 /* Copy m0..m3 to URB.
510 0, /* response len */
512 last
, /* writes complete */
513 i
*4, /* urb destination offset */
514 BRW_URB_SWIZZLE_TRANSPOSE
);
519 void brw_emit_point_sprite_setup( struct brw_sf_compile
*c
, GLboolean allocate
)
521 struct brw_compile
*p
= &c
->func
;
530 for (i
= 0; i
< c
->nr_setup_regs
; i
++)
532 struct brw_sf_point_tex
*tex
= &c
->point_attrs
[c
->idx_to_attr
[2*i
]];
533 struct brw_reg a0
= offset(c
->vert
[0], i
);
534 GLushort pc
, pc_persp
, pc_linear
;
535 GLboolean last
= calculate_masks(c
, i
, &pc
, &pc_persp
, &pc_linear
);
539 if (!tex
->CoordReplace
) {
540 brw_set_predicate_control_flag_value(p
, pc_persp
);
541 brw_MUL(p
, a0
, a0
, c
->inv_w
[0]);
545 if (tex
->CoordReplace
) {
546 /* Caculate 1.0/PointWidth */
549 BRW_MATH_FUNCTION_INV
,
550 BRW_MATH_SATURATE_NONE
,
553 BRW_MATH_DATA_SCALAR
,
554 BRW_MATH_PRECISION_FULL
);
556 if (c
->key
.sprite_origin_lower_left
) {
557 brw_MUL(p
, c
->m1Cx
, c
->tmp
, c
->inv_w
[0]);
558 brw_MOV(p
, vec1(suboffset(c
->m1Cx
, 1)), brw_imm_f(0.0));
559 brw_MUL(p
, c
->m2Cy
, c
->tmp
, negate(c
->inv_w
[0]));
560 brw_MOV(p
, vec1(suboffset(c
->m2Cy
, 0)), brw_imm_f(0.0));
562 brw_MUL(p
, c
->m1Cx
, c
->tmp
, c
->inv_w
[0]);
563 brw_MOV(p
, vec1(suboffset(c
->m1Cx
, 1)), brw_imm_f(0.0));
564 brw_MUL(p
, c
->m2Cy
, c
->tmp
, c
->inv_w
[0]);
565 brw_MOV(p
, vec1(suboffset(c
->m2Cy
, 0)), brw_imm_f(0.0));
568 brw_MOV(p
, c
->m1Cx
, brw_imm_ud(0));
569 brw_MOV(p
, c
->m2Cy
, brw_imm_ud(0));
573 brw_set_predicate_control_flag_value(p
, pc
);
574 if (tex
->CoordReplace
) {
575 if (c
->key
.sprite_origin_lower_left
) {
576 brw_MUL(p
, c
->m3C0
, c
->inv_w
[0], brw_imm_f(1.0));
577 brw_MOV(p
, vec1(suboffset(c
->m3C0
, 0)), brw_imm_f(0.0));
580 brw_MOV(p
, c
->m3C0
, brw_imm_f(0.0));
582 brw_MOV(p
, c
->m3C0
, a0
); /* constant value */
585 /* Copy m0..m3 to URB.
594 0, /* response len */
596 last
, /* writes complete */
597 i
*4, /* urb destination offset */
598 BRW_URB_SWIZZLE_TRANSPOSE
);
603 /* Points setup - several simplifications as all attributes are
604 * constant across the face of the point (point sprites excluded!)
606 void brw_emit_point_setup( struct brw_sf_compile
*c
, GLboolean allocate
)
608 struct brw_compile
*p
= &c
->func
;
618 brw_MOV(p
, c
->m1Cx
, brw_imm_ud(0)); /* zero - move out of loop */
619 brw_MOV(p
, c
->m2Cy
, brw_imm_ud(0)); /* zero - move out of loop */
621 for (i
= 0; i
< c
->nr_setup_regs
; i
++)
623 struct brw_reg a0
= offset(c
->vert
[0], i
);
624 GLushort pc
, pc_persp
, pc_linear
;
625 GLboolean last
= calculate_masks(c
, i
, &pc
, &pc_persp
, &pc_linear
);
629 /* This seems odd as the values are all constant, but the
630 * fragment shader will be expecting it:
632 brw_set_predicate_control_flag_value(p
, pc_persp
);
633 brw_MUL(p
, a0
, a0
, c
->inv_w
[0]);
637 /* The delta values are always zero, just send the starting
638 * coordinate. Again, this is to fit in with the interpolation
639 * code in the fragment shader.
642 brw_set_predicate_control_flag_value(p
, pc
);
644 brw_MOV(p
, c
->m3C0
, a0
); /* constant value */
646 /* Copy m0..m3 to URB.
655 0, /* response len */
657 last
, /* writes complete */
658 i
*4, /* urb destination offset */
659 BRW_URB_SWIZZLE_TRANSPOSE
);
664 void brw_emit_anyprim_setup( struct brw_sf_compile
*c
)
666 struct brw_compile
*p
= &c
->func
;
667 struct brw_reg ip
= brw_ip_reg();
668 struct brw_reg payload_prim
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, 1, 0);
669 struct brw_reg payload_attr
= get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE
, 1, 0), 0);
670 struct brw_reg primmask
;
671 struct brw_instruction
*jmp
;
672 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
679 primmask
= retype(get_element(c
->tmp
, 0), BRW_REGISTER_TYPE_UD
);
681 brw_MOV(p
, primmask
, brw_imm_ud(1));
682 brw_SHL(p
, primmask
, primmask
, payload_prim
);
684 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
685 brw_AND(p
, v1_null_ud
, primmask
, brw_imm_ud((1<<_3DPRIM_TRILIST
) |
686 (1<<_3DPRIM_TRISTRIP
) |
687 (1<<_3DPRIM_TRIFAN
) |
688 (1<<_3DPRIM_TRISTRIP_REVERSE
) |
689 (1<<_3DPRIM_POLYGON
) |
690 (1<<_3DPRIM_RECTLIST
) |
691 (1<<_3DPRIM_TRIFAN_NOSTIPPLE
)));
692 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_d(0));
694 saveflag
= p
->flag_value
;
695 brw_push_insn_state(p
);
696 brw_emit_tri_setup( c
, GL_FALSE
);
697 brw_pop_insn_state(p
);
698 p
->flag_value
= saveflag
;
699 /* note - thread killed in subroutine, so must
700 * restore the flag which is changed when building
701 * the subroutine. fix #13240
704 brw_land_fwd_jump(p
, jmp
);
706 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
707 brw_AND(p
, v1_null_ud
, primmask
, brw_imm_ud((1<<_3DPRIM_LINELIST
) |
708 (1<<_3DPRIM_LINESTRIP
) |
709 (1<<_3DPRIM_LINELOOP
) |
710 (1<<_3DPRIM_LINESTRIP_CONT
) |
711 (1<<_3DPRIM_LINESTRIP_BF
) |
712 (1<<_3DPRIM_LINESTRIP_CONT_BF
)));
713 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_d(0));
715 saveflag
= p
->flag_value
;
716 brw_push_insn_state(p
);
717 brw_emit_line_setup( c
, GL_FALSE
);
718 brw_pop_insn_state(p
);
719 p
->flag_value
= saveflag
;
720 /* note - thread killed in subroutine */
722 brw_land_fwd_jump(p
, jmp
);
724 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
725 brw_AND(p
, v1_null_ud
, payload_attr
, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE
));
726 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_d(0));
728 saveflag
= p
->flag_value
;
729 brw_push_insn_state(p
);
730 brw_emit_point_sprite_setup( c
, GL_FALSE
);
731 brw_pop_insn_state(p
);
732 p
->flag_value
= saveflag
;
734 brw_land_fwd_jump(p
, jmp
);
736 brw_emit_point_setup( c
, GL_FALSE
);