2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 /* Not quite sure how correct this is - need to understand horiz
38 * vs. vertical strides a little better.
40 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
49 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
50 * corresponding to each of the 16 execution channels.
52 * R1.0 -- triangle vertex 0.X
53 * R1.1 -- triangle vertex 0.Y
54 * R1.2 -- tile 0 x,y coords (2 packed uwords)
55 * R1.3 -- tile 1 x,y coords (2 packed uwords)
56 * R1.4 -- tile 2 x,y coords (2 packed uwords)
57 * R1.5 -- tile 3 x,y coords (2 packed uwords)
64 static void emit_pixel_xy(struct brw_compile
*p
,
65 const struct brw_reg
*dst
,
68 struct brw_reg r1
= brw_vec1_grf(1, 0);
69 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
71 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
73 /* Calculate pixel centers by adding 1 or 0 to each of the
74 * micro-tile coordinates passed in r1.
76 if (mask
& WRITEMASK_X
) {
78 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
79 stride(suboffset(r1_uw
, 4), 2, 4, 0),
80 brw_imm_v(0x10101010));
83 if (mask
& WRITEMASK_Y
) {
85 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
86 stride(suboffset(r1_uw
,5), 2, 4, 0),
87 brw_imm_v(0x11001100));
90 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
95 static void emit_delta_xy(struct brw_compile
*p
,
96 const struct brw_reg
*dst
,
98 const struct brw_reg
*arg0
)
100 struct brw_reg r1
= brw_vec1_grf(1, 0);
102 /* Calc delta X,Y by subtracting origin in r1 from the pixel
105 if (mask
& WRITEMASK_X
) {
108 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
112 if (mask
& WRITEMASK_Y
) {
115 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
116 negate(suboffset(r1
,1)));
121 static void emit_wpos_xy(struct brw_wm_compile
*c
,
122 const struct brw_reg
*dst
,
124 const struct brw_reg
*arg0
)
126 struct brw_compile
*p
= &c
->func
;
128 /* Calculate the pixel offset from window bottom left into destination
131 if (mask
& WRITEMASK_X
) {
132 /* X' = X - origin */
135 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
136 brw_imm_d(0 - c
->key
.origin_x
));
139 if (mask
& WRITEMASK_Y
) {
140 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
143 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
144 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
149 static void emit_pixel_w( struct brw_compile
*p
,
150 const struct brw_reg
*dst
,
152 const struct brw_reg
*arg0
,
153 const struct brw_reg
*deltas
)
155 /* Don't need this if all you are doing is interpolating color, for
158 if (mask
& WRITEMASK_W
) {
159 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
161 /* Calc 1/w - just linterp wpos[3] optimized by putting the
162 * result straight into a message reg.
164 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
165 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
168 brw_math_16( p
, dst
[3],
169 BRW_MATH_FUNCTION_INV
,
170 BRW_MATH_SATURATE_NONE
,
172 BRW_MATH_PRECISION_FULL
);
178 static void emit_linterp( struct brw_compile
*p
,
179 const struct brw_reg
*dst
,
181 const struct brw_reg
*arg0
,
182 const struct brw_reg
*deltas
)
184 struct brw_reg interp
[4];
185 GLuint nr
= arg0
[0].nr
;
188 interp
[0] = brw_vec1_grf(nr
, 0);
189 interp
[1] = brw_vec1_grf(nr
, 4);
190 interp
[2] = brw_vec1_grf(nr
+1, 0);
191 interp
[3] = brw_vec1_grf(nr
+1, 4);
193 for (i
= 0; i
< 4; i
++) {
195 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
196 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
202 static void emit_pinterp( struct brw_compile
*p
,
203 const struct brw_reg
*dst
,
205 const struct brw_reg
*arg0
,
206 const struct brw_reg
*deltas
,
207 const struct brw_reg
*w
)
209 struct brw_reg interp
[4];
210 GLuint nr
= arg0
[0].nr
;
213 interp
[0] = brw_vec1_grf(nr
, 0);
214 interp
[1] = brw_vec1_grf(nr
, 4);
215 interp
[2] = brw_vec1_grf(nr
+1, 0);
216 interp
[3] = brw_vec1_grf(nr
+1, 4);
218 for (i
= 0; i
< 4; i
++) {
220 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
221 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
224 for (i
= 0; i
< 4; i
++) {
226 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
232 static void emit_cinterp( struct brw_compile
*p
,
233 const struct brw_reg
*dst
,
235 const struct brw_reg
*arg0
)
237 struct brw_reg interp
[4];
238 GLuint nr
= arg0
[0].nr
;
241 interp
[0] = brw_vec1_grf(nr
, 0);
242 interp
[1] = brw_vec1_grf(nr
, 4);
243 interp
[2] = brw_vec1_grf(nr
+1, 0);
244 interp
[3] = brw_vec1_grf(nr
+1, 4);
246 for (i
= 0; i
< 4; i
++) {
248 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
253 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
254 static void emit_frontfacing( struct brw_compile
*p
,
255 const struct brw_reg
*dst
,
258 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
261 if (!(mask
& WRITEMASK_XYZW
))
264 for (i
= 0; i
< 4; i
++) {
266 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
270 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
273 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
274 for (i
= 0; i
< 4; i
++) {
276 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
279 brw_set_predicate_control_flag_value(p
, 0xff);
282 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
285 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
287 * and we're trying to produce:
290 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
291 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
292 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
293 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
294 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
295 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
296 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
297 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
299 * and add another set of two more subspans if in 16-pixel dispatch mode.
301 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
302 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
303 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
304 * between each other. We could probably do it like ddx and swizzle the right
305 * order later, but bail for now and just produce
306 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
308 void emit_ddxy(struct brw_compile
*p
,
309 const struct brw_reg
*dst
,
312 const struct brw_reg
*arg0
)
315 struct brw_reg src0
, src1
;
318 brw_set_saturate(p
, 1);
319 for (i
= 0; i
< 4; i
++ ) {
322 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 1,
324 BRW_VERTICAL_STRIDE_2
,
326 BRW_HORIZONTAL_STRIDE_0
,
327 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
328 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
330 BRW_VERTICAL_STRIDE_2
,
332 BRW_HORIZONTAL_STRIDE_0
,
333 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
335 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
337 BRW_VERTICAL_STRIDE_4
,
339 BRW_HORIZONTAL_STRIDE_0
,
340 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
341 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 2,
343 BRW_VERTICAL_STRIDE_4
,
345 BRW_HORIZONTAL_STRIDE_0
,
346 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
348 brw_ADD(p
, dst
[i
], src0
, negate(src1
));
352 brw_set_saturate(p
, 0);
355 static void emit_alu1( struct brw_compile
*p
,
356 struct brw_instruction
*(*func
)(struct brw_compile
*,
359 const struct brw_reg
*dst
,
361 const struct brw_reg
*arg0
)
366 brw_set_saturate(p
, 1);
368 for (i
= 0; i
< 4; i
++) {
370 func(p
, dst
[i
], arg0
[i
]);
375 brw_set_saturate(p
, 0);
379 static void emit_alu2( struct brw_compile
*p
,
380 struct brw_instruction
*(*func
)(struct brw_compile
*,
384 const struct brw_reg
*dst
,
386 const struct brw_reg
*arg0
,
387 const struct brw_reg
*arg1
)
392 brw_set_saturate(p
, 1);
394 for (i
= 0; i
< 4; i
++) {
396 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
401 brw_set_saturate(p
, 0);
405 static void emit_mad( struct brw_compile
*p
,
406 const struct brw_reg
*dst
,
408 const struct brw_reg
*arg0
,
409 const struct brw_reg
*arg1
,
410 const struct brw_reg
*arg2
)
414 for (i
= 0; i
< 4; i
++) {
416 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
418 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
419 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
420 brw_set_saturate(p
, 0);
425 static void emit_trunc( struct brw_compile
*p
,
426 const struct brw_reg
*dst
,
428 const struct brw_reg
*arg0
)
432 for (i
= 0; i
< 4; i
++) {
434 brw_RNDZ(p
, dst
[i
], arg0
[i
]);
439 static void emit_lrp( struct brw_compile
*p
,
440 const struct brw_reg
*dst
,
442 const struct brw_reg
*arg0
,
443 const struct brw_reg
*arg1
,
444 const struct brw_reg
*arg2
)
448 /* Uses dst as a temporary:
450 for (i
= 0; i
< 4; i
++) {
452 /* Can I use the LINE instruction for this?
454 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
455 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
457 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
458 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
459 brw_set_saturate(p
, 0);
464 static void emit_sop( struct brw_compile
*p
,
465 const struct brw_reg
*dst
,
468 const struct brw_reg
*arg0
,
469 const struct brw_reg
*arg1
)
473 for (i
= 0; i
< 4; i
++) {
475 brw_MOV(p
, dst
[i
], brw_imm_f(0));
476 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
477 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
478 brw_set_predicate_control_flag_value(p
, 0xff);
483 static void emit_slt( struct brw_compile
*p
,
484 const struct brw_reg
*dst
,
486 const struct brw_reg
*arg0
,
487 const struct brw_reg
*arg1
)
489 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
492 static void emit_sle( struct brw_compile
*p
,
493 const struct brw_reg
*dst
,
495 const struct brw_reg
*arg0
,
496 const struct brw_reg
*arg1
)
498 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
501 static void emit_sgt( struct brw_compile
*p
,
502 const struct brw_reg
*dst
,
504 const struct brw_reg
*arg0
,
505 const struct brw_reg
*arg1
)
507 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
510 static void emit_sge( struct brw_compile
*p
,
511 const struct brw_reg
*dst
,
513 const struct brw_reg
*arg0
,
514 const struct brw_reg
*arg1
)
516 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
519 static void emit_seq( struct brw_compile
*p
,
520 const struct brw_reg
*dst
,
522 const struct brw_reg
*arg0
,
523 const struct brw_reg
*arg1
)
525 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
528 static void emit_sne( struct brw_compile
*p
,
529 const struct brw_reg
*dst
,
531 const struct brw_reg
*arg0
,
532 const struct brw_reg
*arg1
)
534 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
537 static void emit_cmp( struct brw_compile
*p
,
538 const struct brw_reg
*dst
,
540 const struct brw_reg
*arg0
,
541 const struct brw_reg
*arg1
,
542 const struct brw_reg
*arg2
)
546 for (i
= 0; i
< 4; i
++) {
548 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
549 brw_MOV(p
, dst
[i
], arg2
[i
]);
550 brw_set_saturate(p
, 0);
552 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
554 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
555 brw_MOV(p
, dst
[i
], arg1
[i
]);
556 brw_set_saturate(p
, 0);
557 brw_set_predicate_control_flag_value(p
, 0xff);
562 static void emit_max( struct brw_compile
*p
,
563 const struct brw_reg
*dst
,
565 const struct brw_reg
*arg0
,
566 const struct brw_reg
*arg1
)
570 for (i
= 0; i
< 4; i
++) {
572 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
573 brw_MOV(p
, dst
[i
], arg0
[i
]);
574 brw_set_saturate(p
, 0);
576 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
578 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
579 brw_MOV(p
, dst
[i
], arg1
[i
]);
580 brw_set_saturate(p
, 0);
581 brw_set_predicate_control_flag_value(p
, 0xff);
586 static void emit_min( struct brw_compile
*p
,
587 const struct brw_reg
*dst
,
589 const struct brw_reg
*arg0
,
590 const struct brw_reg
*arg1
)
594 for (i
= 0; i
< 4; i
++) {
596 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
597 brw_MOV(p
, dst
[i
], arg1
[i
]);
598 brw_set_saturate(p
, 0);
600 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
602 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
603 brw_MOV(p
, dst
[i
], arg0
[i
]);
604 brw_set_saturate(p
, 0);
605 brw_set_predicate_control_flag_value(p
, 0xff);
611 static void emit_dp3( struct brw_compile
*p
,
612 const struct brw_reg
*dst
,
614 const struct brw_reg
*arg0
,
615 const struct brw_reg
*arg1
)
617 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
619 if (!(mask
& WRITEMASK_XYZW
))
620 return; /* Do not emit dead code */
622 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
624 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
625 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
627 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
628 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
629 brw_set_saturate(p
, 0);
633 static void emit_dp4( struct brw_compile
*p
,
634 const struct brw_reg
*dst
,
636 const struct brw_reg
*arg0
,
637 const struct brw_reg
*arg1
)
639 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
641 if (!(mask
& WRITEMASK_XYZW
))
642 return; /* Do not emit dead code */
644 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
646 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
647 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
648 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
650 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
651 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
652 brw_set_saturate(p
, 0);
656 static void emit_dph( struct brw_compile
*p
,
657 const struct brw_reg
*dst
,
659 const struct brw_reg
*arg0
,
660 const struct brw_reg
*arg1
)
662 const int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
664 if (!(mask
& WRITEMASK_XYZW
))
665 return; /* Do not emit dead code */
667 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
669 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
670 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
671 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
673 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
674 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
675 brw_set_saturate(p
, 0);
679 static void emit_xpd( struct brw_compile
*p
,
680 const struct brw_reg
*dst
,
682 const struct brw_reg
*arg0
,
683 const struct brw_reg
*arg1
)
687 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
689 for (i
= 0 ; i
< 3; i
++) {
694 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
696 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
697 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
698 brw_set_saturate(p
, 0);
704 static void emit_math1( struct brw_compile
*p
,
706 const struct brw_reg
*dst
,
708 const struct brw_reg
*arg0
)
710 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
712 if (!(mask
& WRITEMASK_XYZW
))
713 return; /* Do not emit dead code */
715 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
717 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
719 /* Send two messages to perform all 16 operations:
724 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
727 BRW_MATH_PRECISION_FULL
);
731 static void emit_math2( struct brw_compile
*p
,
733 const struct brw_reg
*dst
,
735 const struct brw_reg
*arg0
,
736 const struct brw_reg
*arg1
)
738 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
740 if (!(mask
& WRITEMASK_XYZW
))
741 return; /* Do not emit dead code */
743 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
745 brw_push_insn_state(p
);
747 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
748 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
749 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
750 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
752 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
753 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
754 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
755 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
758 /* Send two messages to perform all 16 operations:
760 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
764 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
767 BRW_MATH_DATA_VECTOR
,
768 BRW_MATH_PRECISION_FULL
);
770 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
772 offset(dst
[dst_chan
],1),
774 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
777 BRW_MATH_DATA_VECTOR
,
778 BRW_MATH_PRECISION_FULL
);
780 brw_pop_insn_state(p
);
785 static void emit_tex( struct brw_wm_compile
*c
,
786 const struct brw_wm_instruction
*inst
,
789 struct brw_reg
*arg
)
791 struct brw_compile
*p
= &c
->func
;
792 GLuint msgLength
, responseLength
;
797 /* How many input regs are there?
799 switch (inst
->tex_idx
) {
800 case TEXTURE_1D_INDEX
:
804 case TEXTURE_2D_INDEX
:
805 case TEXTURE_RECT_INDEX
:
809 case TEXTURE_3D_INDEX
:
810 case TEXTURE_CUBE_INDEX
:
811 emit
= WRITEMASK_XYZ
;
815 /* unexpected target */
819 if (inst
->tex_shadow
) {
826 for (i
= 0; i
< nr
; i
++) {
827 static const GLuint swz
[4] = {0,1,2,2};
829 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
831 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
835 responseLength
= 8; /* always */
837 if (BRW_IS_IGDNG(p
->brw
)) {
838 if (inst
->tex_shadow
)
839 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG
;
841 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG
;
843 if (inst
->tex_shadow
)
844 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
846 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
850 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
852 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
853 SURF_INDEX_TEXTURE(inst
->tex_unit
),
854 inst
->tex_unit
, /* sampler */
861 BRW_SAMPLER_SIMD_MODE_SIMD16
);
865 static void emit_txb( struct brw_wm_compile
*c
,
866 const struct brw_wm_instruction
*inst
,
869 struct brw_reg
*arg
)
871 struct brw_compile
*p
= &c
->func
;
874 /* Shadow ignored for txb.
876 switch (inst
->tex_idx
) {
877 case TEXTURE_1D_INDEX
:
878 brw_MOV(p
, brw_message_reg(2), arg
[0]);
879 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
880 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
882 case TEXTURE_2D_INDEX
:
883 case TEXTURE_RECT_INDEX
:
884 brw_MOV(p
, brw_message_reg(2), arg
[0]);
885 brw_MOV(p
, brw_message_reg(4), arg
[1]);
886 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
888 case TEXTURE_3D_INDEX
:
889 case TEXTURE_CUBE_INDEX
:
890 brw_MOV(p
, brw_message_reg(2), arg
[0]);
891 brw_MOV(p
, brw_message_reg(4), arg
[1]);
892 brw_MOV(p
, brw_message_reg(6), arg
[2]);
895 /* unexpected target */
899 brw_MOV(p
, brw_message_reg(8), arg
[3]);
902 if (BRW_IS_IGDNG(p
->brw
))
903 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG
;
905 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
908 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
910 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
911 SURF_INDEX_TEXTURE(inst
->tex_unit
),
912 inst
->tex_unit
, /* sampler */
915 8, /* responseLength */
919 BRW_SAMPLER_SIMD_MODE_SIMD16
);
923 static void emit_lit( struct brw_compile
*p
,
924 const struct brw_reg
*dst
,
926 const struct brw_reg
*arg0
)
928 assert((mask
& WRITEMASK_XW
) == 0);
930 if (mask
& WRITEMASK_Y
) {
931 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
932 brw_MOV(p
, dst
[1], arg0
[0]);
933 brw_set_saturate(p
, 0);
936 if (mask
& WRITEMASK_Z
) {
937 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
939 WRITEMASK_X
| (mask
& SATURATE
),
944 /* Ordinarily you'd use an iff statement to skip or shortcircuit
945 * some of the POW calculations above, but 16-wide iff statements
946 * seem to lock c1 hardware, so this is a nasty workaround:
948 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
950 if (mask
& WRITEMASK_Y
)
951 brw_MOV(p
, dst
[1], brw_imm_f(0));
953 if (mask
& WRITEMASK_Z
)
954 brw_MOV(p
, dst
[2], brw_imm_f(0));
956 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
960 /* Kill pixel - set execution mask to zero for those pixels which
963 static void emit_kil( struct brw_wm_compile
*c
,
964 struct brw_reg
*arg0
)
966 struct brw_compile
*p
= &c
->func
;
967 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
970 /* XXX - usually won't need 4 compares!
972 for (i
= 0; i
< 4; i
++) {
973 brw_push_insn_state(p
);
974 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
975 brw_set_predicate_control_flag_value(p
, 0xff);
976 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
977 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
978 brw_pop_insn_state(p
);
982 /* KIL_NV kills the pixels that are currently executing, not based on a test
985 static void emit_kil_nv( struct brw_wm_compile
*c
)
987 struct brw_compile
*p
= &c
->func
;
988 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
990 brw_push_insn_state(p
);
991 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
992 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
993 brw_AND(p
, r0uw
, c
->emit_mask_reg
, r0uw
);
994 brw_pop_insn_state(p
);
997 static void fire_fb_write( struct brw_wm_compile
*c
,
1003 struct brw_compile
*p
= &c
->func
;
1005 /* Pass through control information:
1007 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1009 brw_push_insn_state(p
);
1010 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
1011 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1013 brw_message_reg(base_reg
+ 1),
1014 brw_vec8_grf(1, 0));
1015 brw_pop_insn_state(p
);
1018 /* Send framebuffer write message: */
1019 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1021 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
1023 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1031 static void emit_aa( struct brw_wm_compile
*c
,
1032 struct brw_reg
*arg1
,
1035 struct brw_compile
*p
= &c
->func
;
1036 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
1037 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
1038 struct brw_reg aa
= offset(arg1
[comp
], off
);
1040 brw_push_insn_state(p
);
1041 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
1042 brw_MOV(p
, brw_message_reg(reg
), aa
);
1043 brw_pop_insn_state(p
);
1047 /* Post-fragment-program processing. Send the results to the
1049 * \param arg0 the fragment color
1050 * \param arg1 the pass-through depth value
1051 * \param arg2 the shader-computed depth value
1053 static void emit_fb_write( struct brw_wm_compile
*c
,
1054 struct brw_reg
*arg0
,
1055 struct brw_reg
*arg1
,
1056 struct brw_reg
*arg2
,
1060 struct brw_compile
*p
= &c
->func
;
1064 /* Reserve a space for AA - may not be needed:
1066 if (c
->key
.aa_dest_stencil_reg
)
1069 /* I don't really understand how this achieves the color interleave
1070 * (ie RGBARGBA) in the result: [Do the saturation here]
1073 brw_push_insn_state(p
);
1075 for (channel
= 0; channel
< 4; channel
++) {
1076 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1077 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1079 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1081 brw_message_reg(nr
+ channel
),
1084 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
1086 brw_message_reg(nr
+ channel
+ 4),
1087 sechalf(arg0
[channel
]));
1090 /* skip over the regs populated above:
1094 brw_pop_insn_state(p
);
1097 if (c
->key
.source_depth_to_render_target
)
1099 if (c
->key
.computes_depth
)
1100 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1102 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1107 if (c
->key
.dest_depth_reg
)
1109 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1110 GLuint off
= c
->key
.dest_depth_reg
% 2;
1113 brw_push_insn_state(p
);
1114 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1116 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1118 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1119 brw_pop_insn_state(p
);
1122 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1127 if (!c
->key
.runtime_check_aads_emit
) {
1128 if (c
->key
.aa_dest_stencil_reg
)
1129 emit_aa(c
, arg1
, 2);
1131 fire_fb_write(c
, 0, nr
, target
, eot
);
1134 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1135 struct brw_reg ip
= brw_ip_reg();
1136 struct brw_instruction
*jmp
;
1138 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1139 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1142 get_element_ud(brw_vec8_grf(1,0), 6),
1145 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_d(0));
1147 emit_aa(c
, arg1
, 2);
1148 fire_fb_write(c
, 0, nr
, target
, eot
);
1149 /* note - thread killed in subroutine */
1151 brw_land_fwd_jump(p
, jmp
);
1153 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1155 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1161 * Move a GPR to scratch memory.
1163 static void emit_spill( struct brw_wm_compile
*c
,
1167 struct brw_compile
*p
= &c
->func
;
1170 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1172 brw_MOV(p
, brw_message_reg(2), reg
);
1175 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1176 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1179 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1185 * Load a GPR from scratch memory.
1187 static void emit_unspill( struct brw_wm_compile
*c
,
1191 struct brw_compile
*p
= &c
->func
;
1193 /* Slot 0 is the undef value.
1196 brw_MOV(p
, reg
, brw_imm_f(0));
1201 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1202 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1206 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1212 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1213 * Args with unspill_reg != 0 will be loaded from scratch memory.
1215 static void get_argument_regs( struct brw_wm_compile
*c
,
1216 struct brw_wm_ref
*arg
[],
1217 struct brw_reg
*regs
)
1221 for (i
= 0; i
< 4; i
++) {
1223 if (arg
[i
]->unspill_reg
)
1225 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1226 arg
[i
]->value
->spill_slot
);
1228 regs
[i
] = arg
[i
]->hw_reg
;
1231 regs
[i
] = brw_null_reg();
1238 * For values that have a spill_slot!=0, write those regs to scratch memory.
1240 static void spill_values( struct brw_wm_compile
*c
,
1241 struct brw_wm_value
*values
,
1246 for (i
= 0; i
< nr
; i
++)
1247 if (values
[i
].spill_slot
)
1248 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1252 /* Emit the fragment program instructions here.
1254 void brw_wm_emit( struct brw_wm_compile
*c
)
1256 struct brw_compile
*p
= &c
->func
;
1259 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1261 /* Check if any of the payload regs need to be spilled:
1263 spill_values(c
, c
->payload
.depth
, 4);
1264 spill_values(c
, c
->creg
, c
->nr_creg
);
1265 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1268 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1270 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1271 struct brw_reg args
[3][4], dst
[4];
1272 GLuint i
, dst_flags
;
1274 /* Get argument regs:
1276 for (i
= 0; i
< 3; i
++)
1277 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1281 for (i
= 0; i
< 4; i
++)
1283 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1285 dst
[i
] = brw_null_reg();
1289 dst_flags
= inst
->writemask
;
1291 dst_flags
|= SATURATE
;
1293 switch (inst
->opcode
) {
1294 /* Generated instructions for calculating triangle interpolants:
1297 emit_pixel_xy(p
, dst
, dst_flags
);
1301 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1305 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1309 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1313 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1317 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1321 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1325 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1328 case WM_FRONTFACING
:
1329 emit_frontfacing(p
, dst
, dst_flags
);
1332 /* Straightforward arithmetic:
1335 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1339 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1343 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1347 emit_ddxy(p
, dst
, dst_flags
, GL_TRUE
, args
[0]);
1351 emit_ddxy(p
, dst
, dst_flags
, GL_FALSE
, args
[0]);
1355 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1359 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1363 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1367 emit_trunc(p
, dst
, dst_flags
, args
[0]);
1371 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1375 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1380 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1384 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1388 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1391 /* Higher math functions:
1394 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1398 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1402 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1406 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1410 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1414 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1418 /* There is an scs math function, but it would need some
1419 * fixup for 16-element execution.
1421 if (dst_flags
& WRITEMASK_X
)
1422 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1423 if (dst_flags
& WRITEMASK_Y
)
1424 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1428 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1434 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1438 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1442 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1446 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1450 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1453 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1456 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1459 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1462 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1466 emit_lit(p
, dst
, dst_flags
, args
[0]);
1469 /* Texturing operations:
1472 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1476 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1480 emit_kil(c
, args
[0]);
1488 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1489 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1490 _mesa_opcode_string(inst
->opcode
) :
1494 for (i
= 0; i
< 4; i
++)
1495 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1497 inst
->dst
[i
]->hw_reg
,
1498 inst
->dst
[i
]->spill_slot
);
1501 if (INTEL_DEBUG
& DEBUG_WM
) {
1504 _mesa_printf("wm-native:\n");
1505 for (i
= 0; i
< p
->nr_insn
; i
++)
1506 brw_disasm(stderr
, &p
->store
[i
]);