2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 static GLboolean
can_do_pln(struct intel_context
*intel
,
38 const struct brw_reg
*deltas
)
40 struct brw_context
*brw
= brw_context(&intel
->ctx
);
45 if (deltas
[1].nr
!= deltas
[0].nr
+ 1)
48 if (intel
->gen
< 6 && ((deltas
[0].nr
& 1) != 0))
54 /* Not quite sure how correct this is - need to understand horiz
55 * vs. vertical strides a little better.
57 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
67 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
68 * corresponding to each of the 16 execution channels.
70 * R1.0 -- triangle vertex 0.X
71 * R1.1 -- triangle vertex 0.Y
72 * R1.2 -- tile 0 x,y coords (2 packed uwords)
73 * R1.3 -- tile 1 x,y coords (2 packed uwords)
74 * R1.4 -- tile 2 x,y coords (2 packed uwords)
75 * R1.5 -- tile 3 x,y coords (2 packed uwords)
81 void emit_pixel_xy(struct brw_wm_compile
*c
,
82 const struct brw_reg
*dst
,
85 struct brw_compile
*p
= &c
->func
;
86 struct brw_reg r1
= brw_vec1_grf(1, 0);
87 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
88 struct brw_reg dst0_uw
, dst1_uw
;
90 brw_push_insn_state(p
);
91 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
93 if (c
->dispatch_width
== 16) {
94 dst0_uw
= vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
95 dst1_uw
= vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
97 dst0_uw
= vec8(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
98 dst1_uw
= vec8(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
101 /* Calculate pixel centers by adding 1 or 0 to each of the
102 * micro-tile coordinates passed in r1.
104 if (mask
& WRITEMASK_X
) {
107 stride(suboffset(r1_uw
, 4), 2, 4, 0),
108 brw_imm_v(0x10101010));
111 if (mask
& WRITEMASK_Y
) {
114 stride(suboffset(r1_uw
,5), 2, 4, 0),
115 brw_imm_v(0x11001100));
117 brw_pop_insn_state(p
);
121 void emit_delta_xy(struct brw_compile
*p
,
122 const struct brw_reg
*dst
,
124 const struct brw_reg
*arg0
)
126 struct brw_reg r1
= brw_vec1_grf(1, 0);
128 /* Calc delta X,Y by subtracting origin in r1 from the pixel
131 if (mask
& WRITEMASK_X
) {
134 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
138 if (mask
& WRITEMASK_Y
) {
141 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
142 negate(suboffset(r1
,1)));
147 void emit_wpos_xy(struct brw_wm_compile
*c
,
148 const struct brw_reg
*dst
,
150 const struct brw_reg
*arg0
)
152 struct brw_compile
*p
= &c
->func
;
154 /* Calculate the pixel offset from window bottom left into destination
157 if (mask
& WRITEMASK_X
) {
158 if (c
->fp
->program
.PixelCenterInteger
) {
162 retype(arg0
[0], BRW_REGISTER_TYPE_W
));
167 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
172 if (mask
& WRITEMASK_Y
) {
173 if (c
->fp
->program
.OriginUpperLeft
) {
174 if (c
->fp
->program
.PixelCenterInteger
) {
178 retype(arg0
[1], BRW_REGISTER_TYPE_W
));
183 retype(arg0
[1], BRW_REGISTER_TYPE_W
),
187 float center_offset
= c
->fp
->program
.PixelCenterInteger
? 0.0 : 0.5;
189 /* Y' = (height - 1) - Y + center */
192 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
193 brw_imm_f(c
->key
.drawable_height
- 1 + center_offset
));
199 void emit_pixel_w(struct brw_wm_compile
*c
,
200 const struct brw_reg
*dst
,
202 const struct brw_reg
*arg0
,
203 const struct brw_reg
*deltas
)
205 struct brw_compile
*p
= &c
->func
;
206 struct intel_context
*intel
= &p
->brw
->intel
;
208 /* Don't need this if all you are doing is interpolating color, for
211 if (mask
& WRITEMASK_W
) {
212 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
214 /* Calc 1/w - just linterp wpos[3] optimized by putting the
215 * result straight into a message reg.
217 if (can_do_pln(intel
, deltas
)) {
218 brw_PLN(p
, brw_message_reg(2), interp3
, deltas
[0]);
220 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
221 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
225 if (c
->dispatch_width
== 16) {
226 brw_math_16(p
, dst
[3],
227 BRW_MATH_FUNCTION_INV
,
228 BRW_MATH_SATURATE_NONE
,
230 BRW_MATH_PRECISION_FULL
);
233 BRW_MATH_FUNCTION_INV
,
234 BRW_MATH_SATURATE_NONE
,
236 BRW_MATH_DATA_VECTOR
,
237 BRW_MATH_PRECISION_FULL
);
243 void emit_linterp(struct brw_compile
*p
,
244 const struct brw_reg
*dst
,
246 const struct brw_reg
*arg0
,
247 const struct brw_reg
*deltas
)
249 struct intel_context
*intel
= &p
->brw
->intel
;
250 struct brw_reg interp
[4];
251 GLuint nr
= arg0
[0].nr
;
254 interp
[0] = brw_vec1_grf(nr
, 0);
255 interp
[1] = brw_vec1_grf(nr
, 4);
256 interp
[2] = brw_vec1_grf(nr
+1, 0);
257 interp
[3] = brw_vec1_grf(nr
+1, 4);
259 for (i
= 0; i
< 4; i
++) {
261 if (can_do_pln(intel
, deltas
)) {
262 brw_PLN(p
, dst
[i
], interp
[i
], deltas
[0]);
264 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
265 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
272 void emit_pinterp(struct brw_compile
*p
,
273 const struct brw_reg
*dst
,
275 const struct brw_reg
*arg0
,
276 const struct brw_reg
*deltas
,
277 const struct brw_reg
*w
)
279 struct intel_context
*intel
= &p
->brw
->intel
;
280 struct brw_reg interp
[4];
281 GLuint nr
= arg0
[0].nr
;
284 interp
[0] = brw_vec1_grf(nr
, 0);
285 interp
[1] = brw_vec1_grf(nr
, 4);
286 interp
[2] = brw_vec1_grf(nr
+1, 0);
287 interp
[3] = brw_vec1_grf(nr
+1, 4);
289 for (i
= 0; i
< 4; i
++) {
291 if (can_do_pln(intel
, deltas
)) {
292 brw_PLN(p
, dst
[i
], interp
[i
], deltas
[0]);
294 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
295 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
299 for (i
= 0; i
< 4; i
++) {
301 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
307 void emit_cinterp(struct brw_compile
*p
,
308 const struct brw_reg
*dst
,
310 const struct brw_reg
*arg0
)
312 struct brw_reg interp
[4];
313 GLuint nr
= arg0
[0].nr
;
316 interp
[0] = brw_vec1_grf(nr
, 0);
317 interp
[1] = brw_vec1_grf(nr
, 4);
318 interp
[2] = brw_vec1_grf(nr
+1, 0);
319 interp
[3] = brw_vec1_grf(nr
+1, 4);
321 for (i
= 0; i
< 4; i
++) {
323 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
328 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
329 void emit_frontfacing(struct brw_compile
*p
,
330 const struct brw_reg
*dst
,
333 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
336 if (!(mask
& WRITEMASK_XYZW
))
339 for (i
= 0; i
< 4; i
++) {
341 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
345 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
348 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
349 for (i
= 0; i
< 4; i
++) {
351 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
354 brw_set_predicate_control_flag_value(p
, 0xff);
357 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
360 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
362 * and we're trying to produce:
365 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
366 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
367 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
368 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
369 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
370 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
371 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
372 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
374 * and add another set of two more subspans if in 16-pixel dispatch mode.
376 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
377 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
378 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
379 * between each other. We could probably do it like ddx and swizzle the right
380 * order later, but bail for now and just produce
381 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
383 void emit_ddxy(struct brw_compile
*p
,
384 const struct brw_reg
*dst
,
387 const struct brw_reg
*arg0
)
390 struct brw_reg src0
, src1
;
393 brw_set_saturate(p
, 1);
394 for (i
= 0; i
< 4; i
++ ) {
397 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 1,
399 BRW_VERTICAL_STRIDE_2
,
401 BRW_HORIZONTAL_STRIDE_0
,
402 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
403 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
405 BRW_VERTICAL_STRIDE_2
,
407 BRW_HORIZONTAL_STRIDE_0
,
408 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
410 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
412 BRW_VERTICAL_STRIDE_4
,
414 BRW_HORIZONTAL_STRIDE_0
,
415 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
416 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 2,
418 BRW_VERTICAL_STRIDE_4
,
420 BRW_HORIZONTAL_STRIDE_0
,
421 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
423 brw_ADD(p
, dst
[i
], src0
, negate(src1
));
427 brw_set_saturate(p
, 0);
430 void emit_alu1(struct brw_compile
*p
,
431 struct brw_instruction
*(*func
)(struct brw_compile
*,
434 const struct brw_reg
*dst
,
436 const struct brw_reg
*arg0
)
441 brw_set_saturate(p
, 1);
443 for (i
= 0; i
< 4; i
++) {
445 func(p
, dst
[i
], arg0
[i
]);
450 brw_set_saturate(p
, 0);
454 void emit_alu2(struct brw_compile
*p
,
455 struct brw_instruction
*(*func
)(struct brw_compile
*,
459 const struct brw_reg
*dst
,
461 const struct brw_reg
*arg0
,
462 const struct brw_reg
*arg1
)
467 brw_set_saturate(p
, 1);
469 for (i
= 0; i
< 4; i
++) {
471 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
476 brw_set_saturate(p
, 0);
480 void emit_mad(struct brw_compile
*p
,
481 const struct brw_reg
*dst
,
483 const struct brw_reg
*arg0
,
484 const struct brw_reg
*arg1
,
485 const struct brw_reg
*arg2
)
489 for (i
= 0; i
< 4; i
++) {
491 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
493 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
494 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
495 brw_set_saturate(p
, 0);
500 void emit_lrp(struct brw_compile
*p
,
501 const struct brw_reg
*dst
,
503 const struct brw_reg
*arg0
,
504 const struct brw_reg
*arg1
,
505 const struct brw_reg
*arg2
)
509 /* Uses dst as a temporary:
511 for (i
= 0; i
< 4; i
++) {
513 /* Can I use the LINE instruction for this?
515 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
516 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
518 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
519 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
520 brw_set_saturate(p
, 0);
525 void emit_sop(struct brw_compile
*p
,
526 const struct brw_reg
*dst
,
529 const struct brw_reg
*arg0
,
530 const struct brw_reg
*arg1
)
534 for (i
= 0; i
< 4; i
++) {
536 brw_push_insn_state(p
);
537 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
538 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
539 brw_MOV(p
, dst
[i
], brw_imm_f(0));
540 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
541 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
542 brw_pop_insn_state(p
);
547 static void emit_slt( struct brw_compile
*p
,
548 const struct brw_reg
*dst
,
550 const struct brw_reg
*arg0
,
551 const struct brw_reg
*arg1
)
553 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
556 static void emit_sle( struct brw_compile
*p
,
557 const struct brw_reg
*dst
,
559 const struct brw_reg
*arg0
,
560 const struct brw_reg
*arg1
)
562 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
565 static void emit_sgt( struct brw_compile
*p
,
566 const struct brw_reg
*dst
,
568 const struct brw_reg
*arg0
,
569 const struct brw_reg
*arg1
)
571 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
574 static void emit_sge( struct brw_compile
*p
,
575 const struct brw_reg
*dst
,
577 const struct brw_reg
*arg0
,
578 const struct brw_reg
*arg1
)
580 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
583 static void emit_seq( struct brw_compile
*p
,
584 const struct brw_reg
*dst
,
586 const struct brw_reg
*arg0
,
587 const struct brw_reg
*arg1
)
589 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
592 static void emit_sne( struct brw_compile
*p
,
593 const struct brw_reg
*dst
,
595 const struct brw_reg
*arg0
,
596 const struct brw_reg
*arg1
)
598 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
601 void emit_cmp(struct brw_compile
*p
,
602 const struct brw_reg
*dst
,
604 const struct brw_reg
*arg0
,
605 const struct brw_reg
*arg1
,
606 const struct brw_reg
*arg2
)
610 for (i
= 0; i
< 4; i
++) {
612 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
613 brw_MOV(p
, dst
[i
], arg2
[i
]);
614 brw_set_saturate(p
, 0);
616 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
618 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
619 brw_MOV(p
, dst
[i
], arg1
[i
]);
620 brw_set_saturate(p
, 0);
621 brw_set_predicate_control_flag_value(p
, 0xff);
626 void emit_max(struct brw_compile
*p
,
627 const struct brw_reg
*dst
,
629 const struct brw_reg
*arg0
,
630 const struct brw_reg
*arg1
)
634 for (i
= 0; i
< 4; i
++) {
636 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], arg1
[i
]);
638 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
639 brw_SEL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
640 brw_set_saturate(p
, 0);
641 brw_set_predicate_control_flag_value(p
, 0xff);
646 void emit_min(struct brw_compile
*p
,
647 const struct brw_reg
*dst
,
649 const struct brw_reg
*arg0
,
650 const struct brw_reg
*arg1
)
654 for (i
= 0; i
< 4; i
++) {
656 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
658 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
659 brw_SEL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
660 brw_set_saturate(p
, 0);
661 brw_set_predicate_control_flag_value(p
, 0xff);
667 void emit_dp3(struct brw_compile
*p
,
668 const struct brw_reg
*dst
,
670 const struct brw_reg
*arg0
,
671 const struct brw_reg
*arg1
)
673 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
675 if (!(mask
& WRITEMASK_XYZW
))
676 return; /* Do not emit dead code */
678 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
680 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
681 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
683 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
684 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
685 brw_set_saturate(p
, 0);
689 void emit_dp4(struct brw_compile
*p
,
690 const struct brw_reg
*dst
,
692 const struct brw_reg
*arg0
,
693 const struct brw_reg
*arg1
)
695 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
697 if (!(mask
& WRITEMASK_XYZW
))
698 return; /* Do not emit dead code */
700 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
702 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
703 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
704 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
706 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
707 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
708 brw_set_saturate(p
, 0);
712 void emit_dph(struct brw_compile
*p
,
713 const struct brw_reg
*dst
,
715 const struct brw_reg
*arg0
,
716 const struct brw_reg
*arg1
)
718 const int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
720 if (!(mask
& WRITEMASK_XYZW
))
721 return; /* Do not emit dead code */
723 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
725 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
726 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
727 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
729 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
730 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
731 brw_set_saturate(p
, 0);
735 void emit_xpd(struct brw_compile
*p
,
736 const struct brw_reg
*dst
,
738 const struct brw_reg
*arg0
,
739 const struct brw_reg
*arg1
)
743 assert((mask
& WRITEMASK_W
) != WRITEMASK_W
);
745 for (i
= 0 ; i
< 3; i
++) {
750 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
752 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
753 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
754 brw_set_saturate(p
, 0);
760 void emit_math1(struct brw_wm_compile
*c
,
762 const struct brw_reg
*dst
,
764 const struct brw_reg
*arg0
)
766 struct brw_compile
*p
= &c
->func
;
767 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
768 GLuint saturate
= ((mask
& SATURATE
) ?
769 BRW_MATH_SATURATE_SATURATE
:
770 BRW_MATH_SATURATE_NONE
);
772 if (!(mask
& WRITEMASK_XYZW
))
773 return; /* Do not emit dead code */
775 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
777 /* If compressed, this will write message reg 2,3 from arg0.x's 16
780 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
782 /* Send two messages to perform all 16 operations:
784 brw_push_insn_state(p
);
785 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
792 BRW_MATH_DATA_VECTOR
,
793 BRW_MATH_PRECISION_FULL
);
795 if (c
->dispatch_width
== 16) {
796 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
798 offset(dst
[dst_chan
],1),
803 BRW_MATH_DATA_VECTOR
,
804 BRW_MATH_PRECISION_FULL
);
806 brw_pop_insn_state(p
);
810 void emit_math2(struct brw_wm_compile
*c
,
812 const struct brw_reg
*dst
,
814 const struct brw_reg
*arg0
,
815 const struct brw_reg
*arg1
)
817 struct brw_compile
*p
= &c
->func
;
818 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
819 GLuint saturate
= ((mask
& SATURATE
) ?
820 BRW_MATH_SATURATE_SATURATE
:
821 BRW_MATH_SATURATE_NONE
);
823 if (!(mask
& WRITEMASK_XYZW
))
824 return; /* Do not emit dead code */
826 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
828 brw_push_insn_state(p
);
830 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
831 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
832 if (c
->dispatch_width
== 16) {
833 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
834 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
837 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
838 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
839 if (c
->dispatch_width
== 16) {
840 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
841 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
844 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
851 BRW_MATH_DATA_VECTOR
,
852 BRW_MATH_PRECISION_FULL
);
854 /* Send two messages to perform all 16 operations:
856 if (c
->dispatch_width
== 16) {
857 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
859 offset(dst
[dst_chan
],1),
864 BRW_MATH_DATA_VECTOR
,
865 BRW_MATH_PRECISION_FULL
);
867 brw_pop_insn_state(p
);
871 void emit_tex(struct brw_wm_compile
*c
,
875 struct brw_reg depth_payload
,
880 struct brw_compile
*p
= &c
->func
;
881 struct intel_context
*intel
= &p
->brw
->intel
;
882 struct brw_reg dst_retyped
;
883 GLuint cur_mrf
= 2, response_length
;
884 GLuint i
, nr_texcoords
;
887 GLuint mrf_per_channel
;
890 if (c
->dispatch_width
== 16) {
893 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
894 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
898 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
899 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
902 /* How many input regs are there?
905 case TEXTURE_1D_INDEX
:
909 case TEXTURE_2D_INDEX
:
910 case TEXTURE_RECT_INDEX
:
914 case TEXTURE_3D_INDEX
:
915 case TEXTURE_CUBE_INDEX
:
916 emit
= WRITEMASK_XYZ
;
920 /* unexpected target */
924 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
925 if (!intel
->is_ironlake
&& c
->dispatch_width
== 8)
928 /* For shadow comparisons, we have to supply u,v,r. */
932 /* Emit the texcoords. */
933 for (i
= 0; i
< nr_texcoords
; i
++) {
935 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[i
]);
937 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
938 cur_mrf
+= mrf_per_channel
;
941 /* Fill in the shadow comparison reference value. */
943 if (intel
->is_ironlake
) {
944 /* Fill in the cube map array index value. */
945 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
946 cur_mrf
+= mrf_per_channel
;
947 } else if (c
->dispatch_width
== 8) {
948 /* Fill in the LOD bias value. */
949 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
950 cur_mrf
+= mrf_per_channel
;
952 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[2]);
953 cur_mrf
+= mrf_per_channel
;
956 if (intel
->is_ironlake
) {
958 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG
;
960 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG
;
962 /* Note that G45 and older determines shadow compare and dispatch width
963 * from message length for most messages.
965 if (c
->dispatch_width
== 16 && shadow
)
966 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
968 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
974 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
975 SURF_INDEX_TEXTURE(sampler
),
977 dst_flags
& WRITEMASK_XYZW
,
987 void emit_txb(struct brw_wm_compile
*c
,
991 struct brw_reg depth_payload
,
995 struct brw_compile
*p
= &c
->func
;
996 struct intel_context
*intel
= &p
->brw
->intel
;
999 GLuint mrf_per_channel
;
1000 GLuint response_length
;
1001 struct brw_reg dst_retyped
;
1003 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
1004 * samples, so we'll use the 16-wide instruction, leave the second halves
1005 * undefined, and trust the execution mask to keep the undefined pixels
1008 if (c
->dispatch_width
== 16 || !intel
->is_ironlake
) {
1009 if (intel
->is_ironlake
)
1010 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG
;
1012 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
1013 mrf_per_channel
= 2;
1014 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
1015 response_length
= 8;
1017 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG
;
1018 mrf_per_channel
= 1;
1019 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
1020 response_length
= 4;
1023 /* Shadow ignored for txb. */
1025 case TEXTURE_1D_INDEX
:
1026 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1027 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), brw_imm_f(0));
1028 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
1030 case TEXTURE_2D_INDEX
:
1031 case TEXTURE_RECT_INDEX
:
1032 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1033 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
1034 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
1036 case TEXTURE_3D_INDEX
:
1037 case TEXTURE_CUBE_INDEX
:
1038 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1039 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
1040 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), arg
[2]);
1043 /* unexpected target */
1047 brw_MOV(p
, brw_message_reg(2 + 3 * mrf_per_channel
), arg
[3]);
1048 msgLength
= 2 + 4 * mrf_per_channel
- 1;
1053 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
1054 SURF_INDEX_TEXTURE(sampler
),
1056 dst_flags
& WRITEMASK_XYZW
,
1062 BRW_SAMPLER_SIMD_MODE_SIMD16
);
1066 static void emit_lit(struct brw_wm_compile
*c
,
1067 const struct brw_reg
*dst
,
1069 const struct brw_reg
*arg0
)
1071 struct brw_compile
*p
= &c
->func
;
1073 assert((mask
& WRITEMASK_XW
) == 0);
1075 if (mask
& WRITEMASK_Y
) {
1076 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
1077 brw_MOV(p
, dst
[1], arg0
[0]);
1078 brw_set_saturate(p
, 0);
1081 if (mask
& WRITEMASK_Z
) {
1082 emit_math2(c
, BRW_MATH_FUNCTION_POW
,
1084 WRITEMASK_X
| (mask
& SATURATE
),
1089 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1090 * some of the POW calculations above, but 16-wide iff statements
1091 * seem to lock c1 hardware, so this is a nasty workaround:
1093 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
1095 if (mask
& WRITEMASK_Y
)
1096 brw_MOV(p
, dst
[1], brw_imm_f(0));
1098 if (mask
& WRITEMASK_Z
)
1099 brw_MOV(p
, dst
[2], brw_imm_f(0));
1101 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1105 /* Kill pixel - set execution mask to zero for those pixels which
1108 static void emit_kil( struct brw_wm_compile
*c
,
1109 struct brw_reg
*arg0
)
1111 struct brw_compile
*p
= &c
->func
;
1112 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1115 for (i
= 0; i
< 4; i
++) {
1116 /* Check if we've already done the comparison for this reg
1117 * -- common when someone does KIL TEMP.wwww.
1119 for (j
= 0; j
< i
; j
++) {
1120 if (memcmp(&arg0
[j
], &arg0
[i
], sizeof(arg0
[0])) == 0)
1126 brw_push_insn_state(p
);
1127 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
1128 brw_set_predicate_control_flag_value(p
, 0xff);
1129 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1130 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
1131 brw_pop_insn_state(p
);
1135 /* KIL_NV kills the pixels that are currently executing, not based on a test
1138 static void emit_kil_nv( struct brw_wm_compile
*c
)
1140 struct brw_compile
*p
= &c
->func
;
1141 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1143 brw_push_insn_state(p
);
1144 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1145 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); /* IMASK */
1146 brw_AND(p
, r0uw
, c
->emit_mask_reg
, r0uw
);
1147 brw_pop_insn_state(p
);
1150 static void fire_fb_write( struct brw_wm_compile
*c
,
1156 struct brw_compile
*p
= &c
->func
;
1159 if (c
->dispatch_width
== 16)
1160 dst
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1162 dst
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1164 /* Pass through control information:
1166 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1168 brw_push_insn_state(p
);
1169 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
1170 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1172 brw_message_reg(base_reg
+ 1),
1173 brw_vec8_grf(1, 0));
1174 brw_pop_insn_state(p
);
1177 /* Send framebuffer write message: */
1178 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1182 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1190 static void emit_aa( struct brw_wm_compile
*c
,
1191 struct brw_reg
*arg1
,
1194 struct brw_compile
*p
= &c
->func
;
1195 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
1196 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
1197 struct brw_reg aa
= offset(arg1
[comp
], off
);
1199 brw_push_insn_state(p
);
1200 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
1201 brw_MOV(p
, brw_message_reg(reg
), aa
);
1202 brw_pop_insn_state(p
);
1206 /* Post-fragment-program processing. Send the results to the
1208 * \param arg0 the fragment color
1209 * \param arg1 the pass-through depth value
1210 * \param arg2 the shader-computed depth value
1212 void emit_fb_write(struct brw_wm_compile
*c
,
1213 struct brw_reg
*arg0
,
1214 struct brw_reg
*arg1
,
1215 struct brw_reg
*arg2
,
1219 struct brw_compile
*p
= &c
->func
;
1220 struct brw_context
*brw
= p
->brw
;
1224 /* Reserve a space for AA - may not be needed:
1226 if (c
->key
.aa_dest_stencil_reg
)
1229 /* I don't really understand how this achieves the color interleave
1230 * (ie RGBARGBA) in the result: [Do the saturation here]
1232 brw_push_insn_state(p
);
1234 for (channel
= 0; channel
< 4; channel
++) {
1235 if (c
->dispatch_width
== 16 && brw
->has_compr4
) {
1236 /* By setting the high bit of the MRF register number, we indicate
1237 * that we want COMPR4 mode - instead of doing the usual destination
1238 * + 1 for the second half we get destination + 4.
1241 brw_message_reg(nr
+ channel
+ (1 << 7)),
1244 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1245 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1246 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1248 brw_message_reg(nr
+ channel
),
1251 if (c
->dispatch_width
== 16) {
1252 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
1254 brw_message_reg(nr
+ channel
+ 4),
1255 sechalf(arg0
[channel
]));
1259 /* skip over the regs populated above:
1262 brw_pop_insn_state(p
);
1264 if (c
->key
.source_depth_to_render_target
)
1266 if (c
->key
.computes_depth
)
1267 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1269 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1274 if (c
->key
.dest_depth_reg
)
1276 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1277 GLuint off
= c
->key
.dest_depth_reg
% 2;
1280 brw_push_insn_state(p
);
1281 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1283 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1285 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1286 brw_pop_insn_state(p
);
1289 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1294 if (!c
->key
.runtime_check_aads_emit
) {
1295 if (c
->key
.aa_dest_stencil_reg
)
1296 emit_aa(c
, arg1
, 2);
1298 fire_fb_write(c
, 0, nr
, target
, eot
);
1301 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1302 struct brw_reg ip
= brw_ip_reg();
1303 struct brw_instruction
*jmp
;
1305 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1306 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1309 get_element_ud(brw_vec8_grf(1,0), 6),
1312 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
1314 emit_aa(c
, arg1
, 2);
1315 fire_fb_write(c
, 0, nr
, target
, eot
);
1316 /* note - thread killed in subroutine */
1318 brw_land_fwd_jump(p
, jmp
);
1320 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1322 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1327 * Move a GPR to scratch memory.
1329 static void emit_spill( struct brw_wm_compile
*c
,
1333 struct brw_compile
*p
= &c
->func
;
1336 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1338 brw_MOV(p
, brw_message_reg(2), reg
);
1341 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1342 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1345 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1351 * Load a GPR from scratch memory.
1353 static void emit_unspill( struct brw_wm_compile
*c
,
1357 struct brw_compile
*p
= &c
->func
;
1359 /* Slot 0 is the undef value.
1362 brw_MOV(p
, reg
, brw_imm_f(0));
1367 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1368 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1372 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1378 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1379 * Args with unspill_reg != 0 will be loaded from scratch memory.
1381 static void get_argument_regs( struct brw_wm_compile
*c
,
1382 struct brw_wm_ref
*arg
[],
1383 struct brw_reg
*regs
)
1387 for (i
= 0; i
< 4; i
++) {
1389 if (arg
[i
]->unspill_reg
)
1391 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1392 arg
[i
]->value
->spill_slot
);
1394 regs
[i
] = arg
[i
]->hw_reg
;
1397 regs
[i
] = brw_null_reg();
1404 * For values that have a spill_slot!=0, write those regs to scratch memory.
1406 static void spill_values( struct brw_wm_compile
*c
,
1407 struct brw_wm_value
*values
,
1412 for (i
= 0; i
< nr
; i
++)
1413 if (values
[i
].spill_slot
)
1414 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1418 /* Emit the fragment program instructions here.
1420 void brw_wm_emit( struct brw_wm_compile
*c
)
1422 struct brw_compile
*p
= &c
->func
;
1425 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1427 /* Check if any of the payload regs need to be spilled:
1429 spill_values(c
, c
->payload
.depth
, 4);
1430 spill_values(c
, c
->creg
, c
->nr_creg
);
1431 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1434 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1436 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1437 struct brw_reg args
[3][4], dst
[4];
1438 GLuint i
, dst_flags
;
1440 /* Get argument regs:
1442 for (i
= 0; i
< 3; i
++)
1443 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1447 for (i
= 0; i
< 4; i
++)
1449 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1451 dst
[i
] = brw_null_reg();
1455 dst_flags
= inst
->writemask
;
1457 dst_flags
|= SATURATE
;
1459 switch (inst
->opcode
) {
1460 /* Generated instructions for calculating triangle interpolants:
1463 emit_pixel_xy(c
, dst
, dst_flags
);
1467 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1471 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1475 emit_pixel_w(c
, dst
, dst_flags
, args
[0], args
[1]);
1479 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1483 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1487 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1491 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1494 case WM_FRONTFACING
:
1495 emit_frontfacing(p
, dst
, dst_flags
);
1498 /* Straightforward arithmetic:
1501 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1505 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1509 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1513 emit_ddxy(p
, dst
, dst_flags
, GL_TRUE
, args
[0]);
1517 emit_ddxy(p
, dst
, dst_flags
, GL_FALSE
, args
[0]);
1521 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1525 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1529 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1533 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
1537 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1541 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1546 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1550 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1554 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1557 /* Higher math functions:
1560 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1564 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1568 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1572 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1576 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1580 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1584 /* There is an scs math function, but it would need some
1585 * fixup for 16-element execution.
1587 if (dst_flags
& WRITEMASK_X
)
1588 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1589 if (dst_flags
& WRITEMASK_Y
)
1590 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1594 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1600 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1604 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1608 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1612 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1616 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1619 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1622 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1625 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1628 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1632 emit_lit(c
, dst
, dst_flags
, args
[0]);
1635 /* Texturing operations:
1638 emit_tex(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1639 inst
->tex_idx
, inst
->tex_unit
,
1644 emit_txb(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1645 inst
->tex_idx
, inst
->tex_unit
);
1649 emit_kil(c
, args
[0]);
1657 printf("Unsupported opcode %i (%s) in fragment shader\n",
1658 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1659 _mesa_opcode_string(inst
->opcode
) :
1663 for (i
= 0; i
< 4; i
++)
1664 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1666 inst
->dst
[i
]->hw_reg
,
1667 inst
->dst
[i
]->spill_slot
);
1670 if (INTEL_DEBUG
& DEBUG_WM
) {
1673 printf("wm-native:\n");
1674 for (i
= 0; i
< p
->nr_insn
; i
++)
1675 brw_disasm(stderr
, &p
->store
[i
]);