2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 /* Not quite sure how correct this is - need to understand horiz
38 * vs. vertical strides a little better.
40 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
49 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
50 * corresponding to each of the 16 execution channels.
52 * R1.0 -- triangle vertex 0.X
53 * R1.1 -- triangle vertex 0.Y
54 * R1.2 -- tile 0 x,y coords (2 packed uwords)
55 * R1.3 -- tile 1 x,y coords (2 packed uwords)
56 * R1.4 -- tile 2 x,y coords (2 packed uwords)
57 * R1.5 -- tile 3 x,y coords (2 packed uwords)
64 static void emit_pixel_xy(struct brw_compile
*p
,
65 const struct brw_reg
*dst
,
68 struct brw_reg r1
= brw_vec1_grf(1, 0);
69 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
71 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
73 /* Calculate pixel centers by adding 1 or 0 to each of the
74 * micro-tile coordinates passed in r1.
76 if (mask
& WRITEMASK_X
) {
78 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
79 stride(suboffset(r1_uw
, 4), 2, 4, 0),
80 brw_imm_v(0x10101010));
83 if (mask
& WRITEMASK_Y
) {
85 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
86 stride(suboffset(r1_uw
,5), 2, 4, 0),
87 brw_imm_v(0x11001100));
90 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
95 static void emit_delta_xy(struct brw_compile
*p
,
96 const struct brw_reg
*dst
,
98 const struct brw_reg
*arg0
)
100 struct brw_reg r1
= brw_vec1_grf(1, 0);
102 /* Calc delta X,Y by subtracting origin in r1 from the pixel
105 if (mask
& WRITEMASK_X
) {
108 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
112 if (mask
& WRITEMASK_Y
) {
115 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
116 negate(suboffset(r1
,1)));
121 static void emit_wpos_xy(struct brw_wm_compile
*c
,
122 const struct brw_reg
*dst
,
124 const struct brw_reg
*arg0
)
126 struct brw_compile
*p
= &c
->func
;
128 /* Calculate the pixel offset from window bottom left into destination
131 if (mask
& WRITEMASK_X
) {
132 /* X' = X - origin */
135 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
136 brw_imm_d(0 - c
->key
.origin_x
));
139 if (mask
& WRITEMASK_Y
) {
140 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
143 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
144 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
149 static void emit_pixel_w( struct brw_compile
*p
,
150 const struct brw_reg
*dst
,
152 const struct brw_reg
*arg0
,
153 const struct brw_reg
*deltas
)
155 /* Don't need this if all you are doing is interpolating color, for
158 if (mask
& WRITEMASK_W
) {
159 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
161 /* Calc 1/w - just linterp wpos[3] optimized by putting the
162 * result straight into a message reg.
164 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
165 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
168 brw_math_16( p
, dst
[3],
169 BRW_MATH_FUNCTION_INV
,
170 BRW_MATH_SATURATE_NONE
,
172 BRW_MATH_PRECISION_FULL
);
178 static void emit_linterp( struct brw_compile
*p
,
179 const struct brw_reg
*dst
,
181 const struct brw_reg
*arg0
,
182 const struct brw_reg
*deltas
)
184 struct brw_reg interp
[4];
185 GLuint nr
= arg0
[0].nr
;
188 interp
[0] = brw_vec1_grf(nr
, 0);
189 interp
[1] = brw_vec1_grf(nr
, 4);
190 interp
[2] = brw_vec1_grf(nr
+1, 0);
191 interp
[3] = brw_vec1_grf(nr
+1, 4);
193 for (i
= 0; i
< 4; i
++) {
195 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
196 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
202 static void emit_pinterp( struct brw_compile
*p
,
203 const struct brw_reg
*dst
,
205 const struct brw_reg
*arg0
,
206 const struct brw_reg
*deltas
,
207 const struct brw_reg
*w
)
209 struct brw_reg interp
[4];
210 GLuint nr
= arg0
[0].nr
;
213 interp
[0] = brw_vec1_grf(nr
, 0);
214 interp
[1] = brw_vec1_grf(nr
, 4);
215 interp
[2] = brw_vec1_grf(nr
+1, 0);
216 interp
[3] = brw_vec1_grf(nr
+1, 4);
218 for (i
= 0; i
< 4; i
++) {
220 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
221 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
224 for (i
= 0; i
< 4; i
++) {
226 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
232 static void emit_cinterp( struct brw_compile
*p
,
233 const struct brw_reg
*dst
,
235 const struct brw_reg
*arg0
)
237 struct brw_reg interp
[4];
238 GLuint nr
= arg0
[0].nr
;
241 interp
[0] = brw_vec1_grf(nr
, 0);
242 interp
[1] = brw_vec1_grf(nr
, 4);
243 interp
[2] = brw_vec1_grf(nr
+1, 0);
244 interp
[3] = brw_vec1_grf(nr
+1, 4);
246 for (i
= 0; i
< 4; i
++) {
248 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
253 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
254 static void emit_frontfacing( struct brw_compile
*p
,
255 const struct brw_reg
*dst
,
258 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
261 if (!(mask
& WRITEMASK_XYZW
))
264 for (i
= 0; i
< 4; i
++) {
266 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
270 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
273 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
274 for (i
= 0; i
< 4; i
++) {
276 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
279 brw_set_predicate_control_flag_value(p
, 0xff);
282 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
285 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
287 * and we're trying to produce:
290 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
291 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
292 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
293 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
294 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
295 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
296 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
297 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
299 * and add another set of two more subspans if in 16-pixel dispatch mode.
301 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
302 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
303 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
304 * between each other. We could probably do it like ddx and swizzle the right
305 * order later, but bail for now and just produce
306 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
308 void emit_ddxy(struct brw_compile
*p
,
309 const struct brw_reg
*dst
,
312 const struct brw_reg
*arg0
)
315 struct brw_reg src0
, src1
;
318 brw_set_saturate(p
, 1);
319 for (i
= 0; i
< 4; i
++ ) {
322 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 1,
324 BRW_VERTICAL_STRIDE_2
,
326 BRW_HORIZONTAL_STRIDE_0
,
327 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
328 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
330 BRW_VERTICAL_STRIDE_2
,
332 BRW_HORIZONTAL_STRIDE_0
,
333 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
335 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
337 BRW_VERTICAL_STRIDE_4
,
339 BRW_HORIZONTAL_STRIDE_0
,
340 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
341 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 2,
343 BRW_VERTICAL_STRIDE_4
,
345 BRW_HORIZONTAL_STRIDE_0
,
346 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
348 brw_ADD(p
, dst
[i
], src0
, negate(src1
));
352 brw_set_saturate(p
, 0);
355 void emit_alu1(struct brw_compile
*p
,
356 struct brw_instruction
*(*func
)(struct brw_compile
*,
359 const struct brw_reg
*dst
,
361 const struct brw_reg
*arg0
)
366 brw_set_saturate(p
, 1);
368 for (i
= 0; i
< 4; i
++) {
370 func(p
, dst
[i
], arg0
[i
]);
375 brw_set_saturate(p
, 0);
379 void emit_alu2(struct brw_compile
*p
,
380 struct brw_instruction
*(*func
)(struct brw_compile
*,
384 const struct brw_reg
*dst
,
386 const struct brw_reg
*arg0
,
387 const struct brw_reg
*arg1
)
392 brw_set_saturate(p
, 1);
394 for (i
= 0; i
< 4; i
++) {
396 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
401 brw_set_saturate(p
, 0);
405 static void emit_mad( struct brw_compile
*p
,
406 const struct brw_reg
*dst
,
408 const struct brw_reg
*arg0
,
409 const struct brw_reg
*arg1
,
410 const struct brw_reg
*arg2
)
414 for (i
= 0; i
< 4; i
++) {
416 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
418 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
419 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
420 brw_set_saturate(p
, 0);
425 static void emit_lrp( struct brw_compile
*p
,
426 const struct brw_reg
*dst
,
428 const struct brw_reg
*arg0
,
429 const struct brw_reg
*arg1
,
430 const struct brw_reg
*arg2
)
434 /* Uses dst as a temporary:
436 for (i
= 0; i
< 4; i
++) {
438 /* Can I use the LINE instruction for this?
440 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
441 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
443 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
444 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
445 brw_set_saturate(p
, 0);
450 static void emit_sop( struct brw_compile
*p
,
451 const struct brw_reg
*dst
,
454 const struct brw_reg
*arg0
,
455 const struct brw_reg
*arg1
)
459 for (i
= 0; i
< 4; i
++) {
461 brw_MOV(p
, dst
[i
], brw_imm_f(0));
462 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
463 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
464 brw_set_predicate_control_flag_value(p
, 0xff);
469 static void emit_slt( struct brw_compile
*p
,
470 const struct brw_reg
*dst
,
472 const struct brw_reg
*arg0
,
473 const struct brw_reg
*arg1
)
475 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
478 static void emit_sle( struct brw_compile
*p
,
479 const struct brw_reg
*dst
,
481 const struct brw_reg
*arg0
,
482 const struct brw_reg
*arg1
)
484 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
487 static void emit_sgt( struct brw_compile
*p
,
488 const struct brw_reg
*dst
,
490 const struct brw_reg
*arg0
,
491 const struct brw_reg
*arg1
)
493 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
496 static void emit_sge( struct brw_compile
*p
,
497 const struct brw_reg
*dst
,
499 const struct brw_reg
*arg0
,
500 const struct brw_reg
*arg1
)
502 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
505 static void emit_seq( struct brw_compile
*p
,
506 const struct brw_reg
*dst
,
508 const struct brw_reg
*arg0
,
509 const struct brw_reg
*arg1
)
511 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
514 static void emit_sne( struct brw_compile
*p
,
515 const struct brw_reg
*dst
,
517 const struct brw_reg
*arg0
,
518 const struct brw_reg
*arg1
)
520 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
523 static void emit_cmp( struct brw_compile
*p
,
524 const struct brw_reg
*dst
,
526 const struct brw_reg
*arg0
,
527 const struct brw_reg
*arg1
,
528 const struct brw_reg
*arg2
)
532 for (i
= 0; i
< 4; i
++) {
534 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
535 brw_MOV(p
, dst
[i
], arg2
[i
]);
536 brw_set_saturate(p
, 0);
538 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
540 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
541 brw_MOV(p
, dst
[i
], arg1
[i
]);
542 brw_set_saturate(p
, 0);
543 brw_set_predicate_control_flag_value(p
, 0xff);
548 static void emit_max( struct brw_compile
*p
,
549 const struct brw_reg
*dst
,
551 const struct brw_reg
*arg0
,
552 const struct brw_reg
*arg1
)
556 for (i
= 0; i
< 4; i
++) {
558 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
559 brw_MOV(p
, dst
[i
], arg0
[i
]);
560 brw_set_saturate(p
, 0);
562 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
564 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
565 brw_MOV(p
, dst
[i
], arg1
[i
]);
566 brw_set_saturate(p
, 0);
567 brw_set_predicate_control_flag_value(p
, 0xff);
572 static void emit_min( struct brw_compile
*p
,
573 const struct brw_reg
*dst
,
575 const struct brw_reg
*arg0
,
576 const struct brw_reg
*arg1
)
580 for (i
= 0; i
< 4; i
++) {
582 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
583 brw_MOV(p
, dst
[i
], arg1
[i
]);
584 brw_set_saturate(p
, 0);
586 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
588 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
589 brw_MOV(p
, dst
[i
], arg0
[i
]);
590 brw_set_saturate(p
, 0);
591 brw_set_predicate_control_flag_value(p
, 0xff);
597 static void emit_dp3( struct brw_compile
*p
,
598 const struct brw_reg
*dst
,
600 const struct brw_reg
*arg0
,
601 const struct brw_reg
*arg1
)
603 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
605 if (!(mask
& WRITEMASK_XYZW
))
606 return; /* Do not emit dead code */
608 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
610 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
611 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
613 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
614 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
615 brw_set_saturate(p
, 0);
619 static void emit_dp4( struct brw_compile
*p
,
620 const struct brw_reg
*dst
,
622 const struct brw_reg
*arg0
,
623 const struct brw_reg
*arg1
)
625 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
627 if (!(mask
& WRITEMASK_XYZW
))
628 return; /* Do not emit dead code */
630 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
632 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
633 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
634 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
636 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
637 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
638 brw_set_saturate(p
, 0);
642 static void emit_dph( struct brw_compile
*p
,
643 const struct brw_reg
*dst
,
645 const struct brw_reg
*arg0
,
646 const struct brw_reg
*arg1
)
648 const int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
650 if (!(mask
& WRITEMASK_XYZW
))
651 return; /* Do not emit dead code */
653 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
655 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
656 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
657 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
659 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
660 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
661 brw_set_saturate(p
, 0);
665 static void emit_xpd( struct brw_compile
*p
,
666 const struct brw_reg
*dst
,
668 const struct brw_reg
*arg0
,
669 const struct brw_reg
*arg1
)
673 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
675 for (i
= 0 ; i
< 3; i
++) {
680 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
682 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
683 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
684 brw_set_saturate(p
, 0);
690 static void emit_math1( struct brw_compile
*p
,
692 const struct brw_reg
*dst
,
694 const struct brw_reg
*arg0
)
696 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
698 if (!(mask
& WRITEMASK_XYZW
))
699 return; /* Do not emit dead code */
701 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
703 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
705 /* Send two messages to perform all 16 operations:
710 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
713 BRW_MATH_PRECISION_FULL
);
717 static void emit_math2( struct brw_compile
*p
,
719 const struct brw_reg
*dst
,
721 const struct brw_reg
*arg0
,
722 const struct brw_reg
*arg1
)
724 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
726 if (!(mask
& WRITEMASK_XYZW
))
727 return; /* Do not emit dead code */
729 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
731 brw_push_insn_state(p
);
733 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
734 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
735 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
736 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
738 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
739 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
740 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
741 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
744 /* Send two messages to perform all 16 operations:
746 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
750 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
753 BRW_MATH_DATA_VECTOR
,
754 BRW_MATH_PRECISION_FULL
);
756 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
758 offset(dst
[dst_chan
],1),
760 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
763 BRW_MATH_DATA_VECTOR
,
764 BRW_MATH_PRECISION_FULL
);
766 brw_pop_insn_state(p
);
771 static void emit_tex( struct brw_wm_compile
*c
,
772 const struct brw_wm_instruction
*inst
,
775 struct brw_reg
*arg
)
777 struct brw_compile
*p
= &c
->func
;
778 GLuint msgLength
, responseLength
;
783 /* How many input regs are there?
785 switch (inst
->tex_idx
) {
786 case TEXTURE_1D_INDEX
:
790 case TEXTURE_2D_INDEX
:
791 case TEXTURE_RECT_INDEX
:
795 case TEXTURE_3D_INDEX
:
796 case TEXTURE_CUBE_INDEX
:
797 emit
= WRITEMASK_XYZ
;
801 /* unexpected target */
805 if (inst
->tex_shadow
) {
812 for (i
= 0; i
< nr
; i
++) {
813 static const GLuint swz
[4] = {0,1,2,2};
815 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
817 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
821 responseLength
= 8; /* always */
823 if (BRW_IS_IGDNG(p
->brw
)) {
824 if (inst
->tex_shadow
)
825 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG
;
827 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG
;
829 if (inst
->tex_shadow
)
830 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
832 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
836 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
838 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
839 SURF_INDEX_TEXTURE(inst
->tex_unit
),
840 inst
->tex_unit
, /* sampler */
847 BRW_SAMPLER_SIMD_MODE_SIMD16
);
851 static void emit_txb( struct brw_wm_compile
*c
,
852 const struct brw_wm_instruction
*inst
,
855 struct brw_reg
*arg
)
857 struct brw_compile
*p
= &c
->func
;
860 /* Shadow ignored for txb.
862 switch (inst
->tex_idx
) {
863 case TEXTURE_1D_INDEX
:
864 brw_MOV(p
, brw_message_reg(2), arg
[0]);
865 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
866 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
868 case TEXTURE_2D_INDEX
:
869 case TEXTURE_RECT_INDEX
:
870 brw_MOV(p
, brw_message_reg(2), arg
[0]);
871 brw_MOV(p
, brw_message_reg(4), arg
[1]);
872 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
874 case TEXTURE_3D_INDEX
:
875 case TEXTURE_CUBE_INDEX
:
876 brw_MOV(p
, brw_message_reg(2), arg
[0]);
877 brw_MOV(p
, brw_message_reg(4), arg
[1]);
878 brw_MOV(p
, brw_message_reg(6), arg
[2]);
881 /* unexpected target */
885 brw_MOV(p
, brw_message_reg(8), arg
[3]);
888 if (BRW_IS_IGDNG(p
->brw
))
889 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG
;
891 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
894 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
896 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
897 SURF_INDEX_TEXTURE(inst
->tex_unit
),
898 inst
->tex_unit
, /* sampler */
901 8, /* responseLength */
905 BRW_SAMPLER_SIMD_MODE_SIMD16
);
909 static void emit_lit( struct brw_compile
*p
,
910 const struct brw_reg
*dst
,
912 const struct brw_reg
*arg0
)
914 assert((mask
& WRITEMASK_XW
) == 0);
916 if (mask
& WRITEMASK_Y
) {
917 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
918 brw_MOV(p
, dst
[1], arg0
[0]);
919 brw_set_saturate(p
, 0);
922 if (mask
& WRITEMASK_Z
) {
923 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
925 WRITEMASK_X
| (mask
& SATURATE
),
930 /* Ordinarily you'd use an iff statement to skip or shortcircuit
931 * some of the POW calculations above, but 16-wide iff statements
932 * seem to lock c1 hardware, so this is a nasty workaround:
934 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
936 if (mask
& WRITEMASK_Y
)
937 brw_MOV(p
, dst
[1], brw_imm_f(0));
939 if (mask
& WRITEMASK_Z
)
940 brw_MOV(p
, dst
[2], brw_imm_f(0));
942 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
946 /* Kill pixel - set execution mask to zero for those pixels which
949 static void emit_kil( struct brw_wm_compile
*c
,
950 struct brw_reg
*arg0
)
952 struct brw_compile
*p
= &c
->func
;
953 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
956 /* XXX - usually won't need 4 compares!
958 for (i
= 0; i
< 4; i
++) {
959 brw_push_insn_state(p
);
960 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
961 brw_set_predicate_control_flag_value(p
, 0xff);
962 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
963 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
964 brw_pop_insn_state(p
);
968 /* KIL_NV kills the pixels that are currently executing, not based on a test
971 static void emit_kil_nv( struct brw_wm_compile
*c
)
973 struct brw_compile
*p
= &c
->func
;
974 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
976 brw_push_insn_state(p
);
977 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
978 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
979 brw_AND(p
, r0uw
, c
->emit_mask_reg
, r0uw
);
980 brw_pop_insn_state(p
);
983 static void fire_fb_write( struct brw_wm_compile
*c
,
989 struct brw_compile
*p
= &c
->func
;
991 /* Pass through control information:
993 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
995 brw_push_insn_state(p
);
996 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
997 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
999 brw_message_reg(base_reg
+ 1),
1000 brw_vec8_grf(1, 0));
1001 brw_pop_insn_state(p
);
1004 /* Send framebuffer write message: */
1005 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1007 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
1009 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1017 static void emit_aa( struct brw_wm_compile
*c
,
1018 struct brw_reg
*arg1
,
1021 struct brw_compile
*p
= &c
->func
;
1022 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
1023 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
1024 struct brw_reg aa
= offset(arg1
[comp
], off
);
1026 brw_push_insn_state(p
);
1027 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
1028 brw_MOV(p
, brw_message_reg(reg
), aa
);
1029 brw_pop_insn_state(p
);
1033 /* Post-fragment-program processing. Send the results to the
1035 * \param arg0 the fragment color
1036 * \param arg1 the pass-through depth value
1037 * \param arg2 the shader-computed depth value
1039 static void emit_fb_write( struct brw_wm_compile
*c
,
1040 struct brw_reg
*arg0
,
1041 struct brw_reg
*arg1
,
1042 struct brw_reg
*arg2
,
1046 struct brw_compile
*p
= &c
->func
;
1050 /* Reserve a space for AA - may not be needed:
1052 if (c
->key
.aa_dest_stencil_reg
)
1055 /* I don't really understand how this achieves the color interleave
1056 * (ie RGBARGBA) in the result: [Do the saturation here]
1059 brw_push_insn_state(p
);
1061 for (channel
= 0; channel
< 4; channel
++) {
1062 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1063 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1065 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1067 brw_message_reg(nr
+ channel
),
1070 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
1072 brw_message_reg(nr
+ channel
+ 4),
1073 sechalf(arg0
[channel
]));
1076 /* skip over the regs populated above:
1080 brw_pop_insn_state(p
);
1083 if (c
->key
.source_depth_to_render_target
)
1085 if (c
->key
.computes_depth
)
1086 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1088 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1093 if (c
->key
.dest_depth_reg
)
1095 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1096 GLuint off
= c
->key
.dest_depth_reg
% 2;
1099 brw_push_insn_state(p
);
1100 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1102 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1104 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1105 brw_pop_insn_state(p
);
1108 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1113 if (!c
->key
.runtime_check_aads_emit
) {
1114 if (c
->key
.aa_dest_stencil_reg
)
1115 emit_aa(c
, arg1
, 2);
1117 fire_fb_write(c
, 0, nr
, target
, eot
);
1120 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1121 struct brw_reg ip
= brw_ip_reg();
1122 struct brw_instruction
*jmp
;
1124 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1125 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1128 get_element_ud(brw_vec8_grf(1,0), 6),
1131 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_d(0));
1133 emit_aa(c
, arg1
, 2);
1134 fire_fb_write(c
, 0, nr
, target
, eot
);
1135 /* note - thread killed in subroutine */
1137 brw_land_fwd_jump(p
, jmp
);
1139 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1141 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1147 * Move a GPR to scratch memory.
1149 static void emit_spill( struct brw_wm_compile
*c
,
1153 struct brw_compile
*p
= &c
->func
;
1156 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1158 brw_MOV(p
, brw_message_reg(2), reg
);
1161 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1162 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1165 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1171 * Load a GPR from scratch memory.
1173 static void emit_unspill( struct brw_wm_compile
*c
,
1177 struct brw_compile
*p
= &c
->func
;
1179 /* Slot 0 is the undef value.
1182 brw_MOV(p
, reg
, brw_imm_f(0));
1187 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1188 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1192 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1198 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1199 * Args with unspill_reg != 0 will be loaded from scratch memory.
1201 static void get_argument_regs( struct brw_wm_compile
*c
,
1202 struct brw_wm_ref
*arg
[],
1203 struct brw_reg
*regs
)
1207 for (i
= 0; i
< 4; i
++) {
1209 if (arg
[i
]->unspill_reg
)
1211 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1212 arg
[i
]->value
->spill_slot
);
1214 regs
[i
] = arg
[i
]->hw_reg
;
1217 regs
[i
] = brw_null_reg();
1224 * For values that have a spill_slot!=0, write those regs to scratch memory.
1226 static void spill_values( struct brw_wm_compile
*c
,
1227 struct brw_wm_value
*values
,
1232 for (i
= 0; i
< nr
; i
++)
1233 if (values
[i
].spill_slot
)
1234 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1238 /* Emit the fragment program instructions here.
1240 void brw_wm_emit( struct brw_wm_compile
*c
)
1242 struct brw_compile
*p
= &c
->func
;
1245 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1247 /* Check if any of the payload regs need to be spilled:
1249 spill_values(c
, c
->payload
.depth
, 4);
1250 spill_values(c
, c
->creg
, c
->nr_creg
);
1251 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1254 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1256 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1257 struct brw_reg args
[3][4], dst
[4];
1258 GLuint i
, dst_flags
;
1260 /* Get argument regs:
1262 for (i
= 0; i
< 3; i
++)
1263 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1267 for (i
= 0; i
< 4; i
++)
1269 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1271 dst
[i
] = brw_null_reg();
1275 dst_flags
= inst
->writemask
;
1277 dst_flags
|= SATURATE
;
1279 switch (inst
->opcode
) {
1280 /* Generated instructions for calculating triangle interpolants:
1283 emit_pixel_xy(p
, dst
, dst_flags
);
1287 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1291 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1295 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1299 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1303 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1307 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1311 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1314 case WM_FRONTFACING
:
1315 emit_frontfacing(p
, dst
, dst_flags
);
1318 /* Straightforward arithmetic:
1321 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1325 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1329 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1333 emit_ddxy(p
, dst
, dst_flags
, GL_TRUE
, args
[0]);
1337 emit_ddxy(p
, dst
, dst_flags
, GL_FALSE
, args
[0]);
1341 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1345 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1349 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1353 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
1357 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1361 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1366 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1370 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1374 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1377 /* Higher math functions:
1380 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1384 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1388 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1392 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1396 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1400 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1404 /* There is an scs math function, but it would need some
1405 * fixup for 16-element execution.
1407 if (dst_flags
& WRITEMASK_X
)
1408 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1409 if (dst_flags
& WRITEMASK_Y
)
1410 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1414 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1420 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1424 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1428 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1432 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1436 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1439 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1442 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1445 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1448 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1452 emit_lit(p
, dst
, dst_flags
, args
[0]);
1455 /* Texturing operations:
1458 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1462 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1466 emit_kil(c
, args
[0]);
1474 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1475 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1476 _mesa_opcode_string(inst
->opcode
) :
1480 for (i
= 0; i
< 4; i
++)
1481 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1483 inst
->dst
[i
]->hw_reg
,
1484 inst
->dst
[i
]->spill_slot
);
1487 if (INTEL_DEBUG
& DEBUG_WM
) {
1490 _mesa_printf("wm-native:\n");
1491 for (i
= 0; i
< p
->nr_insn
; i
++)
1492 brw_disasm(stderr
, &p
->store
[i
]);