2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 /* Not quite sure how correct this is - need to understand horiz
38 * vs. vertical strides a little better.
40 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
50 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
51 * corresponding to each of the 16 execution channels.
53 * R1.0 -- triangle vertex 0.X
54 * R1.1 -- triangle vertex 0.Y
55 * R1.2 -- tile 0 x,y coords (2 packed uwords)
56 * R1.3 -- tile 1 x,y coords (2 packed uwords)
57 * R1.4 -- tile 2 x,y coords (2 packed uwords)
58 * R1.5 -- tile 3 x,y coords (2 packed uwords)
64 void emit_pixel_xy(struct brw_wm_compile
*c
,
65 const struct brw_reg
*dst
,
68 struct brw_compile
*p
= &c
->func
;
69 struct brw_reg r1
= brw_vec1_grf(1, 0);
70 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
71 struct brw_reg dst0_uw
, dst1_uw
;
73 brw_push_insn_state(p
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
76 if (c
->dispatch_width
== 16) {
77 dst0_uw
= vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
78 dst1_uw
= vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
80 dst0_uw
= vec8(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
81 dst1_uw
= vec8(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
84 /* Calculate pixel centers by adding 1 or 0 to each of the
85 * micro-tile coordinates passed in r1.
87 if (mask
& WRITEMASK_X
) {
90 stride(suboffset(r1_uw
, 4), 2, 4, 0),
91 brw_imm_v(0x10101010));
94 if (mask
& WRITEMASK_Y
) {
97 stride(suboffset(r1_uw
,5), 2, 4, 0),
98 brw_imm_v(0x11001100));
100 brw_pop_insn_state(p
);
104 void emit_delta_xy(struct brw_compile
*p
,
105 const struct brw_reg
*dst
,
107 const struct brw_reg
*arg0
)
109 struct brw_reg r1
= brw_vec1_grf(1, 0);
111 /* Calc delta X,Y by subtracting origin in r1 from the pixel
114 if (mask
& WRITEMASK_X
) {
117 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
121 if (mask
& WRITEMASK_Y
) {
124 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
125 negate(suboffset(r1
,1)));
130 void emit_wpos_xy(struct brw_wm_compile
*c
,
131 const struct brw_reg
*dst
,
133 const struct brw_reg
*arg0
)
135 struct brw_compile
*p
= &c
->func
;
137 /* Calculate the pixel offset from window bottom left into destination
140 if (mask
& WRITEMASK_X
) {
141 if (c
->fp
->program
.PixelCenterInteger
) {
145 retype(arg0
[0], BRW_REGISTER_TYPE_W
));
150 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
155 if (mask
& WRITEMASK_Y
) {
156 if (c
->fp
->program
.OriginUpperLeft
) {
157 if (c
->fp
->program
.PixelCenterInteger
) {
161 retype(arg0
[1], BRW_REGISTER_TYPE_W
));
166 retype(arg0
[1], BRW_REGISTER_TYPE_W
),
170 float center_offset
= c
->fp
->program
.PixelCenterInteger
? 0.0 : 0.5;
172 /* Y' = (height - 1) - Y + center */
175 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
176 brw_imm_f(c
->key
.drawable_height
- 1 + center_offset
));
182 void emit_pixel_w(struct brw_wm_compile
*c
,
183 const struct brw_reg
*dst
,
185 const struct brw_reg
*arg0
,
186 const struct brw_reg
*deltas
)
188 struct brw_compile
*p
= &c
->func
;
190 /* Don't need this if all you are doing is interpolating color, for
193 if (mask
& WRITEMASK_W
) {
194 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
196 /* Calc 1/w - just linterp wpos[3] optimized by putting the
197 * result straight into a message reg.
199 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
200 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
203 if (c
->dispatch_width
== 16) {
204 brw_math_16(p
, dst
[3],
205 BRW_MATH_FUNCTION_INV
,
206 BRW_MATH_SATURATE_NONE
,
208 BRW_MATH_PRECISION_FULL
);
211 BRW_MATH_FUNCTION_INV
,
212 BRW_MATH_SATURATE_NONE
,
214 BRW_MATH_DATA_VECTOR
,
215 BRW_MATH_PRECISION_FULL
);
221 void emit_linterp(struct brw_compile
*p
,
222 const struct brw_reg
*dst
,
224 const struct brw_reg
*arg0
,
225 const struct brw_reg
*deltas
)
227 struct brw_reg interp
[4];
228 GLuint nr
= arg0
[0].nr
;
231 interp
[0] = brw_vec1_grf(nr
, 0);
232 interp
[1] = brw_vec1_grf(nr
, 4);
233 interp
[2] = brw_vec1_grf(nr
+1, 0);
234 interp
[3] = brw_vec1_grf(nr
+1, 4);
236 for (i
= 0; i
< 4; i
++) {
238 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
239 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
245 void emit_pinterp(struct brw_compile
*p
,
246 const struct brw_reg
*dst
,
248 const struct brw_reg
*arg0
,
249 const struct brw_reg
*deltas
,
250 const struct brw_reg
*w
)
252 struct brw_reg interp
[4];
253 GLuint nr
= arg0
[0].nr
;
256 interp
[0] = brw_vec1_grf(nr
, 0);
257 interp
[1] = brw_vec1_grf(nr
, 4);
258 interp
[2] = brw_vec1_grf(nr
+1, 0);
259 interp
[3] = brw_vec1_grf(nr
+1, 4);
261 for (i
= 0; i
< 4; i
++) {
263 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
264 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
267 for (i
= 0; i
< 4; i
++) {
269 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
275 void emit_cinterp(struct brw_compile
*p
,
276 const struct brw_reg
*dst
,
278 const struct brw_reg
*arg0
)
280 struct brw_reg interp
[4];
281 GLuint nr
= arg0
[0].nr
;
284 interp
[0] = brw_vec1_grf(nr
, 0);
285 interp
[1] = brw_vec1_grf(nr
, 4);
286 interp
[2] = brw_vec1_grf(nr
+1, 0);
287 interp
[3] = brw_vec1_grf(nr
+1, 4);
289 for (i
= 0; i
< 4; i
++) {
291 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
296 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
297 void emit_frontfacing(struct brw_compile
*p
,
298 const struct brw_reg
*dst
,
301 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
304 if (!(mask
& WRITEMASK_XYZW
))
307 for (i
= 0; i
< 4; i
++) {
309 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
313 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
316 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
317 for (i
= 0; i
< 4; i
++) {
319 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
322 brw_set_predicate_control_flag_value(p
, 0xff);
325 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
328 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
330 * and we're trying to produce:
333 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
334 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
335 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
336 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
337 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
338 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
339 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
340 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
342 * and add another set of two more subspans if in 16-pixel dispatch mode.
344 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
345 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
346 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
347 * between each other. We could probably do it like ddx and swizzle the right
348 * order later, but bail for now and just produce
349 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
351 void emit_ddxy(struct brw_compile
*p
,
352 const struct brw_reg
*dst
,
355 const struct brw_reg
*arg0
)
358 struct brw_reg src0
, src1
;
361 brw_set_saturate(p
, 1);
362 for (i
= 0; i
< 4; i
++ ) {
365 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 1,
367 BRW_VERTICAL_STRIDE_2
,
369 BRW_HORIZONTAL_STRIDE_0
,
370 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
371 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
373 BRW_VERTICAL_STRIDE_2
,
375 BRW_HORIZONTAL_STRIDE_0
,
376 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
378 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
380 BRW_VERTICAL_STRIDE_4
,
382 BRW_HORIZONTAL_STRIDE_0
,
383 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
384 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 2,
386 BRW_VERTICAL_STRIDE_4
,
388 BRW_HORIZONTAL_STRIDE_0
,
389 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
391 brw_ADD(p
, dst
[i
], src0
, negate(src1
));
395 brw_set_saturate(p
, 0);
398 void emit_alu1(struct brw_compile
*p
,
399 struct brw_instruction
*(*func
)(struct brw_compile
*,
402 const struct brw_reg
*dst
,
404 const struct brw_reg
*arg0
)
409 brw_set_saturate(p
, 1);
411 for (i
= 0; i
< 4; i
++) {
413 func(p
, dst
[i
], arg0
[i
]);
418 brw_set_saturate(p
, 0);
422 void emit_alu2(struct brw_compile
*p
,
423 struct brw_instruction
*(*func
)(struct brw_compile
*,
427 const struct brw_reg
*dst
,
429 const struct brw_reg
*arg0
,
430 const struct brw_reg
*arg1
)
435 brw_set_saturate(p
, 1);
437 for (i
= 0; i
< 4; i
++) {
439 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
444 brw_set_saturate(p
, 0);
448 void emit_mad(struct brw_compile
*p
,
449 const struct brw_reg
*dst
,
451 const struct brw_reg
*arg0
,
452 const struct brw_reg
*arg1
,
453 const struct brw_reg
*arg2
)
457 for (i
= 0; i
< 4; i
++) {
459 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
461 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
462 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
463 brw_set_saturate(p
, 0);
468 void emit_lrp(struct brw_compile
*p
,
469 const struct brw_reg
*dst
,
471 const struct brw_reg
*arg0
,
472 const struct brw_reg
*arg1
,
473 const struct brw_reg
*arg2
)
477 /* Uses dst as a temporary:
479 for (i
= 0; i
< 4; i
++) {
481 /* Can I use the LINE instruction for this?
483 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
484 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
486 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
487 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
488 brw_set_saturate(p
, 0);
493 void emit_sop(struct brw_compile
*p
,
494 const struct brw_reg
*dst
,
497 const struct brw_reg
*arg0
,
498 const struct brw_reg
*arg1
)
502 for (i
= 0; i
< 4; i
++) {
504 brw_push_insn_state(p
);
505 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
506 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
507 brw_MOV(p
, dst
[i
], brw_imm_f(0));
508 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
509 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
510 brw_pop_insn_state(p
);
515 static void emit_slt( struct brw_compile
*p
,
516 const struct brw_reg
*dst
,
518 const struct brw_reg
*arg0
,
519 const struct brw_reg
*arg1
)
521 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
524 static void emit_sle( struct brw_compile
*p
,
525 const struct brw_reg
*dst
,
527 const struct brw_reg
*arg0
,
528 const struct brw_reg
*arg1
)
530 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
533 static void emit_sgt( struct brw_compile
*p
,
534 const struct brw_reg
*dst
,
536 const struct brw_reg
*arg0
,
537 const struct brw_reg
*arg1
)
539 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
542 static void emit_sge( struct brw_compile
*p
,
543 const struct brw_reg
*dst
,
545 const struct brw_reg
*arg0
,
546 const struct brw_reg
*arg1
)
548 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
551 static void emit_seq( struct brw_compile
*p
,
552 const struct brw_reg
*dst
,
554 const struct brw_reg
*arg0
,
555 const struct brw_reg
*arg1
)
557 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
560 static void emit_sne( struct brw_compile
*p
,
561 const struct brw_reg
*dst
,
563 const struct brw_reg
*arg0
,
564 const struct brw_reg
*arg1
)
566 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
569 static void emit_cmp( struct brw_compile
*p
,
570 const struct brw_reg
*dst
,
572 const struct brw_reg
*arg0
,
573 const struct brw_reg
*arg1
,
574 const struct brw_reg
*arg2
)
578 for (i
= 0; i
< 4; i
++) {
580 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
581 brw_MOV(p
, dst
[i
], arg2
[i
]);
582 brw_set_saturate(p
, 0);
584 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
586 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
587 brw_MOV(p
, dst
[i
], arg1
[i
]);
588 brw_set_saturate(p
, 0);
589 brw_set_predicate_control_flag_value(p
, 0xff);
594 void emit_max(struct brw_compile
*p
,
595 const struct brw_reg
*dst
,
597 const struct brw_reg
*arg0
,
598 const struct brw_reg
*arg1
)
602 for (i
= 0; i
< 4; i
++) {
604 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
605 brw_MOV(p
, dst
[i
], arg0
[i
]);
606 brw_set_saturate(p
, 0);
608 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
610 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
611 brw_MOV(p
, dst
[i
], arg1
[i
]);
612 brw_set_saturate(p
, 0);
613 brw_set_predicate_control_flag_value(p
, 0xff);
618 void emit_min(struct brw_compile
*p
,
619 const struct brw_reg
*dst
,
621 const struct brw_reg
*arg0
,
622 const struct brw_reg
*arg1
)
626 for (i
= 0; i
< 4; i
++) {
628 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
629 brw_MOV(p
, dst
[i
], arg1
[i
]);
630 brw_set_saturate(p
, 0);
632 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
634 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
635 brw_MOV(p
, dst
[i
], arg0
[i
]);
636 brw_set_saturate(p
, 0);
637 brw_set_predicate_control_flag_value(p
, 0xff);
643 void emit_dp3(struct brw_compile
*p
,
644 const struct brw_reg
*dst
,
646 const struct brw_reg
*arg0
,
647 const struct brw_reg
*arg1
)
649 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
651 if (!(mask
& WRITEMASK_XYZW
))
652 return; /* Do not emit dead code */
654 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
656 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
657 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
659 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
660 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
661 brw_set_saturate(p
, 0);
665 void emit_dp4(struct brw_compile
*p
,
666 const struct brw_reg
*dst
,
668 const struct brw_reg
*arg0
,
669 const struct brw_reg
*arg1
)
671 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
673 if (!(mask
& WRITEMASK_XYZW
))
674 return; /* Do not emit dead code */
676 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
678 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
679 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
680 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
682 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
683 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
684 brw_set_saturate(p
, 0);
688 void emit_dph(struct brw_compile
*p
,
689 const struct brw_reg
*dst
,
691 const struct brw_reg
*arg0
,
692 const struct brw_reg
*arg1
)
694 const int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
696 if (!(mask
& WRITEMASK_XYZW
))
697 return; /* Do not emit dead code */
699 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
701 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
702 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
703 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
705 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
706 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
707 brw_set_saturate(p
, 0);
711 void emit_xpd(struct brw_compile
*p
,
712 const struct brw_reg
*dst
,
714 const struct brw_reg
*arg0
,
715 const struct brw_reg
*arg1
)
719 assert((mask
& WRITEMASK_W
) != WRITEMASK_W
);
721 for (i
= 0 ; i
< 3; i
++) {
726 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
728 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
729 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
730 brw_set_saturate(p
, 0);
736 void emit_math1(struct brw_wm_compile
*c
,
738 const struct brw_reg
*dst
,
740 const struct brw_reg
*arg0
)
742 struct brw_compile
*p
= &c
->func
;
743 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
744 GLuint saturate
= ((mask
& SATURATE
) ?
745 BRW_MATH_SATURATE_SATURATE
:
746 BRW_MATH_SATURATE_NONE
);
748 if (!(mask
& WRITEMASK_XYZW
))
749 return; /* Do not emit dead code */
751 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
753 /* If compressed, this will write message reg 2,3 from arg0.x's 16
756 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
758 /* Send two messages to perform all 16 operations:
760 brw_push_insn_state(p
);
761 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
768 BRW_MATH_DATA_VECTOR
,
769 BRW_MATH_PRECISION_FULL
);
771 if (c
->dispatch_width
== 16) {
772 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
774 offset(dst
[dst_chan
],1),
779 BRW_MATH_DATA_VECTOR
,
780 BRW_MATH_PRECISION_FULL
);
782 brw_pop_insn_state(p
);
786 void emit_math2(struct brw_wm_compile
*c
,
788 const struct brw_reg
*dst
,
790 const struct brw_reg
*arg0
,
791 const struct brw_reg
*arg1
)
793 struct brw_compile
*p
= &c
->func
;
794 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
795 GLuint saturate
= ((mask
& SATURATE
) ?
796 BRW_MATH_SATURATE_SATURATE
:
797 BRW_MATH_SATURATE_NONE
);
799 if (!(mask
& WRITEMASK_XYZW
))
800 return; /* Do not emit dead code */
802 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
804 brw_push_insn_state(p
);
806 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
807 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
808 if (c
->dispatch_width
== 16) {
809 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
810 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
813 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
814 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
815 if (c
->dispatch_width
== 16) {
816 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
817 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
820 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
827 BRW_MATH_DATA_VECTOR
,
828 BRW_MATH_PRECISION_FULL
);
830 /* Send two messages to perform all 16 operations:
832 if (c
->dispatch_width
== 16) {
833 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
835 offset(dst
[dst_chan
],1),
840 BRW_MATH_DATA_VECTOR
,
841 BRW_MATH_PRECISION_FULL
);
843 brw_pop_insn_state(p
);
847 void emit_tex(struct brw_wm_compile
*c
,
851 struct brw_reg depth_payload
,
856 struct brw_compile
*p
= &c
->func
;
857 struct intel_context
*intel
= &p
->brw
->intel
;
858 struct brw_reg dst_retyped
;
859 GLuint cur_mrf
= 2, response_length
;
860 GLuint i
, nr_texcoords
;
863 GLuint mrf_per_channel
;
866 if (c
->dispatch_width
== 16) {
869 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
870 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
874 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
875 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
878 /* How many input regs are there?
881 case TEXTURE_1D_INDEX
:
885 case TEXTURE_2D_INDEX
:
886 case TEXTURE_RECT_INDEX
:
890 case TEXTURE_3D_INDEX
:
891 case TEXTURE_CUBE_INDEX
:
892 emit
= WRITEMASK_XYZ
;
896 /* unexpected target */
900 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
901 if (!intel
->is_ironlake
&& c
->dispatch_width
== 8)
904 /* For shadow comparisons, we have to supply u,v,r. */
908 /* Emit the texcoords. */
909 for (i
= 0; i
< nr_texcoords
; i
++) {
911 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[i
]);
913 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
914 cur_mrf
+= mrf_per_channel
;
917 /* Fill in the shadow comparison reference value. */
919 if (intel
->is_ironlake
) {
920 /* Fill in the cube map array index value. */
921 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
922 cur_mrf
+= mrf_per_channel
;
923 } else if (c
->dispatch_width
== 8) {
924 /* Fill in the LOD bias value. */
925 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
926 cur_mrf
+= mrf_per_channel
;
928 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[2]);
929 cur_mrf
+= mrf_per_channel
;
932 if (intel
->is_ironlake
) {
934 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG
;
936 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG
;
938 /* Note that G45 and older determines shadow compare and dispatch width
939 * from message length for most messages.
941 if (c
->dispatch_width
== 16 && shadow
)
942 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
944 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
950 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
951 SURF_INDEX_TEXTURE(sampler
),
953 dst_flags
& WRITEMASK_XYZW
,
963 void emit_txb(struct brw_wm_compile
*c
,
967 struct brw_reg depth_payload
,
971 struct brw_compile
*p
= &c
->func
;
972 struct intel_context
*intel
= &p
->brw
->intel
;
975 GLuint mrf_per_channel
;
976 GLuint response_length
;
977 struct brw_reg dst_retyped
;
979 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
980 * samples, so we'll use the 16-wide instruction, leave the second halves
981 * undefined, and trust the execution mask to keep the undefined pixels
984 if (c
->dispatch_width
== 16 || !intel
->is_ironlake
) {
985 if (intel
->is_ironlake
)
986 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG
;
988 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
990 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
993 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG
;
995 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
999 /* Shadow ignored for txb. */
1001 case TEXTURE_1D_INDEX
:
1002 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1003 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), brw_imm_f(0));
1004 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
1006 case TEXTURE_2D_INDEX
:
1007 case TEXTURE_RECT_INDEX
:
1008 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1009 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
1010 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
1012 case TEXTURE_3D_INDEX
:
1013 case TEXTURE_CUBE_INDEX
:
1014 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1015 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
1016 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), arg
[2]);
1019 /* unexpected target */
1023 brw_MOV(p
, brw_message_reg(2 + 3 * mrf_per_channel
), arg
[3]);
1024 msgLength
= 2 + 4 * mrf_per_channel
- 1;
1029 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
1030 SURF_INDEX_TEXTURE(sampler
),
1032 dst_flags
& WRITEMASK_XYZW
,
1038 BRW_SAMPLER_SIMD_MODE_SIMD16
);
1042 static void emit_lit(struct brw_wm_compile
*c
,
1043 const struct brw_reg
*dst
,
1045 const struct brw_reg
*arg0
)
1047 struct brw_compile
*p
= &c
->func
;
1049 assert((mask
& WRITEMASK_XW
) == 0);
1051 if (mask
& WRITEMASK_Y
) {
1052 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
1053 brw_MOV(p
, dst
[1], arg0
[0]);
1054 brw_set_saturate(p
, 0);
1057 if (mask
& WRITEMASK_Z
) {
1058 emit_math2(c
, BRW_MATH_FUNCTION_POW
,
1060 WRITEMASK_X
| (mask
& SATURATE
),
1065 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1066 * some of the POW calculations above, but 16-wide iff statements
1067 * seem to lock c1 hardware, so this is a nasty workaround:
1069 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
1071 if (mask
& WRITEMASK_Y
)
1072 brw_MOV(p
, dst
[1], brw_imm_f(0));
1074 if (mask
& WRITEMASK_Z
)
1075 brw_MOV(p
, dst
[2], brw_imm_f(0));
1077 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1081 /* Kill pixel - set execution mask to zero for those pixels which
1084 static void emit_kil( struct brw_wm_compile
*c
,
1085 struct brw_reg
*arg0
)
1087 struct brw_compile
*p
= &c
->func
;
1088 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1091 /* XXX - usually won't need 4 compares!
1093 for (i
= 0; i
< 4; i
++) {
1094 brw_push_insn_state(p
);
1095 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
1096 brw_set_predicate_control_flag_value(p
, 0xff);
1097 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1098 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
1099 brw_pop_insn_state(p
);
1103 /* KIL_NV kills the pixels that are currently executing, not based on a test
1106 static void emit_kil_nv( struct brw_wm_compile
*c
)
1108 struct brw_compile
*p
= &c
->func
;
1109 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1111 brw_push_insn_state(p
);
1112 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1113 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); /* IMASK */
1114 brw_AND(p
, r0uw
, c
->emit_mask_reg
, r0uw
);
1115 brw_pop_insn_state(p
);
1118 static void fire_fb_write( struct brw_wm_compile
*c
,
1124 struct brw_compile
*p
= &c
->func
;
1127 if (c
->dispatch_width
== 16)
1128 dst
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1130 dst
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1132 /* Pass through control information:
1134 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1136 brw_push_insn_state(p
);
1137 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
1138 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1140 brw_message_reg(base_reg
+ 1),
1141 brw_vec8_grf(1, 0));
1142 brw_pop_insn_state(p
);
1145 /* Send framebuffer write message: */
1146 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1150 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1158 static void emit_aa( struct brw_wm_compile
*c
,
1159 struct brw_reg
*arg1
,
1162 struct brw_compile
*p
= &c
->func
;
1163 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
1164 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
1165 struct brw_reg aa
= offset(arg1
[comp
], off
);
1167 brw_push_insn_state(p
);
1168 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
1169 brw_MOV(p
, brw_message_reg(reg
), aa
);
1170 brw_pop_insn_state(p
);
1174 /* Post-fragment-program processing. Send the results to the
1176 * \param arg0 the fragment color
1177 * \param arg1 the pass-through depth value
1178 * \param arg2 the shader-computed depth value
1180 void emit_fb_write(struct brw_wm_compile
*c
,
1181 struct brw_reg
*arg0
,
1182 struct brw_reg
*arg1
,
1183 struct brw_reg
*arg2
,
1187 struct brw_compile
*p
= &c
->func
;
1188 struct brw_context
*brw
= p
->brw
;
1192 /* Reserve a space for AA - may not be needed:
1194 if (c
->key
.aa_dest_stencil_reg
)
1197 /* I don't really understand how this achieves the color interleave
1198 * (ie RGBARGBA) in the result: [Do the saturation here]
1200 brw_push_insn_state(p
);
1202 for (channel
= 0; channel
< 4; channel
++) {
1203 if (c
->dispatch_width
== 16 && brw
->has_compr4
) {
1204 /* By setting the high bit of the MRF register number, we indicate
1205 * that we want COMPR4 mode - instead of doing the usual destination
1206 * + 1 for the second half we get destination + 4.
1209 brw_message_reg(nr
+ channel
+ (1 << 7)),
1212 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1213 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1214 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1216 brw_message_reg(nr
+ channel
),
1219 if (c
->dispatch_width
== 16) {
1220 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
1222 brw_message_reg(nr
+ channel
+ 4),
1223 sechalf(arg0
[channel
]));
1227 /* skip over the regs populated above:
1230 brw_pop_insn_state(p
);
1232 if (c
->key
.source_depth_to_render_target
)
1234 if (c
->key
.computes_depth
)
1235 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1237 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1242 if (c
->key
.dest_depth_reg
)
1244 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1245 GLuint off
= c
->key
.dest_depth_reg
% 2;
1248 brw_push_insn_state(p
);
1249 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1251 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1253 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1254 brw_pop_insn_state(p
);
1257 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1262 if (!c
->key
.runtime_check_aads_emit
) {
1263 if (c
->key
.aa_dest_stencil_reg
)
1264 emit_aa(c
, arg1
, 2);
1266 fire_fb_write(c
, 0, nr
, target
, eot
);
1269 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1270 struct brw_reg ip
= brw_ip_reg();
1271 struct brw_instruction
*jmp
;
1273 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1274 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1277 get_element_ud(brw_vec8_grf(1,0), 6),
1280 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
1282 emit_aa(c
, arg1
, 2);
1283 fire_fb_write(c
, 0, nr
, target
, eot
);
1284 /* note - thread killed in subroutine */
1286 brw_land_fwd_jump(p
, jmp
);
1288 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1290 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1295 * Move a GPR to scratch memory.
1297 static void emit_spill( struct brw_wm_compile
*c
,
1301 struct brw_compile
*p
= &c
->func
;
1304 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1306 brw_MOV(p
, brw_message_reg(2), reg
);
1309 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1310 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1313 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1319 * Load a GPR from scratch memory.
1321 static void emit_unspill( struct brw_wm_compile
*c
,
1325 struct brw_compile
*p
= &c
->func
;
1327 /* Slot 0 is the undef value.
1330 brw_MOV(p
, reg
, brw_imm_f(0));
1335 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1336 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1340 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1346 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1347 * Args with unspill_reg != 0 will be loaded from scratch memory.
1349 static void get_argument_regs( struct brw_wm_compile
*c
,
1350 struct brw_wm_ref
*arg
[],
1351 struct brw_reg
*regs
)
1355 for (i
= 0; i
< 4; i
++) {
1357 if (arg
[i
]->unspill_reg
)
1359 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1360 arg
[i
]->value
->spill_slot
);
1362 regs
[i
] = arg
[i
]->hw_reg
;
1365 regs
[i
] = brw_null_reg();
1372 * For values that have a spill_slot!=0, write those regs to scratch memory.
1374 static void spill_values( struct brw_wm_compile
*c
,
1375 struct brw_wm_value
*values
,
1380 for (i
= 0; i
< nr
; i
++)
1381 if (values
[i
].spill_slot
)
1382 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1386 /* Emit the fragment program instructions here.
1388 void brw_wm_emit( struct brw_wm_compile
*c
)
1390 struct brw_compile
*p
= &c
->func
;
1393 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1395 /* Check if any of the payload regs need to be spilled:
1397 spill_values(c
, c
->payload
.depth
, 4);
1398 spill_values(c
, c
->creg
, c
->nr_creg
);
1399 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1402 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1404 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1405 struct brw_reg args
[3][4], dst
[4];
1406 GLuint i
, dst_flags
;
1408 /* Get argument regs:
1410 for (i
= 0; i
< 3; i
++)
1411 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1415 for (i
= 0; i
< 4; i
++)
1417 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1419 dst
[i
] = brw_null_reg();
1423 dst_flags
= inst
->writemask
;
1425 dst_flags
|= SATURATE
;
1427 switch (inst
->opcode
) {
1428 /* Generated instructions for calculating triangle interpolants:
1431 emit_pixel_xy(c
, dst
, dst_flags
);
1435 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1439 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1443 emit_pixel_w(c
, dst
, dst_flags
, args
[0], args
[1]);
1447 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1451 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1455 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1459 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1462 case WM_FRONTFACING
:
1463 emit_frontfacing(p
, dst
, dst_flags
);
1466 /* Straightforward arithmetic:
1469 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1473 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1477 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1481 emit_ddxy(p
, dst
, dst_flags
, GL_TRUE
, args
[0]);
1485 emit_ddxy(p
, dst
, dst_flags
, GL_FALSE
, args
[0]);
1489 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1493 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1497 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1501 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
1505 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1509 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1514 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1518 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1522 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1525 /* Higher math functions:
1528 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1532 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1536 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1540 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1544 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1548 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1552 /* There is an scs math function, but it would need some
1553 * fixup for 16-element execution.
1555 if (dst_flags
& WRITEMASK_X
)
1556 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1557 if (dst_flags
& WRITEMASK_Y
)
1558 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1562 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1568 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1572 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1576 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1580 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1584 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1587 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1590 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1593 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1596 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1600 emit_lit(c
, dst
, dst_flags
, args
[0]);
1603 /* Texturing operations:
1606 emit_tex(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1607 inst
->tex_idx
, inst
->tex_unit
,
1612 emit_txb(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1613 inst
->tex_idx
, inst
->tex_unit
);
1617 emit_kil(c
, args
[0]);
1625 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1626 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1627 _mesa_opcode_string(inst
->opcode
) :
1631 for (i
= 0; i
< 4; i
++)
1632 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1634 inst
->dst
[i
]->hw_reg
,
1635 inst
->dst
[i
]->spill_slot
);
1638 if (INTEL_DEBUG
& DEBUG_WM
) {
1641 _mesa_printf("wm-native:\n");
1642 for (i
= 0; i
< p
->nr_insn
; i
++)
1643 brw_disasm(stderr
, &p
->store
[i
]);