2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 static GLboolean
can_do_pln(struct intel_context
*intel
,
38 const struct brw_reg
*deltas
)
40 struct brw_context
*brw
= brw_context(&intel
->ctx
);
45 if (deltas
[1].nr
!= deltas
[0].nr
+ 1)
48 if (intel
->gen
< 6 && ((deltas
[0].nr
& 1) != 0))
54 /* Not quite sure how correct this is - need to understand horiz
55 * vs. vertical strides a little better.
57 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
66 * Computes the screen-space x,y position of the pixels.
68 * This will be used by emit_delta_xy() or emit_wpos_xy() for
69 * interpolation of attributes..
73 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
74 * corresponding to each of the 16 execution channels.
76 * R1.0 -- triangle vertex 0.X
77 * R1.1 -- triangle vertex 0.Y
78 * R1.2 -- tile 0 x,y coords (2 packed uwords)
79 * R1.3 -- tile 1 x,y coords (2 packed uwords)
80 * R1.4 -- tile 2 x,y coords (2 packed uwords)
81 * R1.5 -- tile 3 x,y coords (2 packed uwords)
86 void emit_pixel_xy(struct brw_wm_compile
*c
,
87 const struct brw_reg
*dst
,
90 struct brw_compile
*p
= &c
->func
;
91 struct brw_reg r1
= brw_vec1_grf(1, 0);
92 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
93 struct brw_reg dst0_uw
, dst1_uw
;
95 brw_push_insn_state(p
);
96 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
98 if (c
->dispatch_width
== 16) {
99 dst0_uw
= vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
100 dst1_uw
= vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
102 dst0_uw
= vec8(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
103 dst1_uw
= vec8(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
106 /* Calculate pixel centers by adding 1 or 0 to each of the
107 * micro-tile coordinates passed in r1.
109 if (mask
& WRITEMASK_X
) {
112 stride(suboffset(r1_uw
, 4), 2, 4, 0),
113 brw_imm_v(0x10101010));
116 if (mask
& WRITEMASK_Y
) {
119 stride(suboffset(r1_uw
,5), 2, 4, 0),
120 brw_imm_v(0x11001100));
122 brw_pop_insn_state(p
);
126 * Computes the screen-space x,y distance of the pixels from the start
129 * This will be used in linterp or pinterp with the start vertex value
130 * and the Cx, Cy, and C0 coefficients passed in from the setup engine
131 * to produce interpolated attribute values.
133 void emit_delta_xy(struct brw_compile
*p
,
134 const struct brw_reg
*dst
,
136 const struct brw_reg
*arg0
)
138 struct brw_reg r1
= brw_vec1_grf(1, 0);
143 assert(mask
== WRITEMASK_XY
);
145 /* Calc delta X,Y by subtracting origin in r1 from the pixel
146 * centers produced by emit_pixel_xy().
150 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
154 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
155 negate(suboffset(r1
,1)));
159 * Computes the pixel offset from the window origin for gl_FragCoord().
161 void emit_wpos_xy(struct brw_wm_compile
*c
,
162 const struct brw_reg
*dst
,
164 const struct brw_reg
*arg0
)
166 struct brw_compile
*p
= &c
->func
;
168 if (mask
& WRITEMASK_X
) {
169 if (c
->fp
->program
.PixelCenterInteger
) {
173 retype(arg0
[0], BRW_REGISTER_TYPE_W
));
178 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
183 if (mask
& WRITEMASK_Y
) {
184 if (c
->fp
->program
.OriginUpperLeft
) {
185 if (c
->fp
->program
.PixelCenterInteger
) {
189 retype(arg0
[1], BRW_REGISTER_TYPE_W
));
194 retype(arg0
[1], BRW_REGISTER_TYPE_W
),
198 float center_offset
= c
->fp
->program
.PixelCenterInteger
? 0.0 : 0.5;
200 /* Y' = (height - 1) - Y + center */
203 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
204 brw_imm_f(c
->key
.drawable_height
- 1 + center_offset
));
210 void emit_pixel_w(struct brw_wm_compile
*c
,
211 const struct brw_reg
*dst
,
213 const struct brw_reg
*arg0
,
214 const struct brw_reg
*deltas
)
216 struct brw_compile
*p
= &c
->func
;
217 struct intel_context
*intel
= &p
->brw
->intel
;
219 /* Don't need this if all you are doing is interpolating color, for
222 if (mask
& WRITEMASK_W
) {
223 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
225 /* Calc 1/w - just linterp wpos[3] optimized by putting the
226 * result straight into a message reg.
228 if (can_do_pln(intel
, deltas
)) {
229 brw_PLN(p
, brw_message_reg(2), interp3
, deltas
[0]);
231 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
232 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
236 if (c
->dispatch_width
== 16) {
237 brw_math_16(p
, dst
[3],
238 BRW_MATH_FUNCTION_INV
,
239 BRW_MATH_SATURATE_NONE
,
241 BRW_MATH_PRECISION_FULL
);
244 BRW_MATH_FUNCTION_INV
,
245 BRW_MATH_SATURATE_NONE
,
247 BRW_MATH_DATA_VECTOR
,
248 BRW_MATH_PRECISION_FULL
);
254 void emit_linterp(struct brw_compile
*p
,
255 const struct brw_reg
*dst
,
257 const struct brw_reg
*arg0
,
258 const struct brw_reg
*deltas
)
260 struct intel_context
*intel
= &p
->brw
->intel
;
261 struct brw_reg interp
[4];
262 GLuint nr
= arg0
[0].nr
;
265 interp
[0] = brw_vec1_grf(nr
, 0);
266 interp
[1] = brw_vec1_grf(nr
, 4);
267 interp
[2] = brw_vec1_grf(nr
+1, 0);
268 interp
[3] = brw_vec1_grf(nr
+1, 4);
270 for (i
= 0; i
< 4; i
++) {
272 if (can_do_pln(intel
, deltas
)) {
273 brw_PLN(p
, dst
[i
], interp
[i
], deltas
[0]);
275 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
276 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
283 void emit_pinterp(struct brw_compile
*p
,
284 const struct brw_reg
*dst
,
286 const struct brw_reg
*arg0
,
287 const struct brw_reg
*deltas
,
288 const struct brw_reg
*w
)
290 struct intel_context
*intel
= &p
->brw
->intel
;
291 struct brw_reg interp
[4];
292 GLuint nr
= arg0
[0].nr
;
295 interp
[0] = brw_vec1_grf(nr
, 0);
296 interp
[1] = brw_vec1_grf(nr
, 4);
297 interp
[2] = brw_vec1_grf(nr
+1, 0);
298 interp
[3] = brw_vec1_grf(nr
+1, 4);
300 for (i
= 0; i
< 4; i
++) {
302 if (can_do_pln(intel
, deltas
)) {
303 brw_PLN(p
, dst
[i
], interp
[i
], deltas
[0]);
305 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
306 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
310 for (i
= 0; i
< 4; i
++) {
312 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
318 void emit_cinterp(struct brw_compile
*p
,
319 const struct brw_reg
*dst
,
321 const struct brw_reg
*arg0
)
323 struct brw_reg interp
[4];
324 GLuint nr
= arg0
[0].nr
;
327 interp
[0] = brw_vec1_grf(nr
, 0);
328 interp
[1] = brw_vec1_grf(nr
, 4);
329 interp
[2] = brw_vec1_grf(nr
+1, 0);
330 interp
[3] = brw_vec1_grf(nr
+1, 4);
332 for (i
= 0; i
< 4; i
++) {
334 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
339 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
340 void emit_frontfacing(struct brw_compile
*p
,
341 const struct brw_reg
*dst
,
344 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
347 if (!(mask
& WRITEMASK_XYZW
))
350 for (i
= 0; i
< 4; i
++) {
352 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
356 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
359 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
360 for (i
= 0; i
< 4; i
++) {
362 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
365 brw_set_predicate_control_flag_value(p
, 0xff);
368 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
371 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
373 * and we're trying to produce:
376 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
377 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
378 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
379 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
380 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
381 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
382 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
383 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
385 * and add another set of two more subspans if in 16-pixel dispatch mode.
387 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
388 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
389 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
390 * between each other. We could probably do it like ddx and swizzle the right
391 * order later, but bail for now and just produce
392 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
394 void emit_ddxy(struct brw_compile
*p
,
395 const struct brw_reg
*dst
,
398 const struct brw_reg
*arg0
)
401 struct brw_reg src0
, src1
;
404 brw_set_saturate(p
, 1);
405 for (i
= 0; i
< 4; i
++ ) {
408 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 1,
410 BRW_VERTICAL_STRIDE_2
,
412 BRW_HORIZONTAL_STRIDE_0
,
413 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
414 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
416 BRW_VERTICAL_STRIDE_2
,
418 BRW_HORIZONTAL_STRIDE_0
,
419 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
421 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
423 BRW_VERTICAL_STRIDE_4
,
425 BRW_HORIZONTAL_STRIDE_0
,
426 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
427 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 2,
429 BRW_VERTICAL_STRIDE_4
,
431 BRW_HORIZONTAL_STRIDE_0
,
432 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
434 brw_ADD(p
, dst
[i
], src0
, negate(src1
));
438 brw_set_saturate(p
, 0);
441 void emit_alu1(struct brw_compile
*p
,
442 struct brw_instruction
*(*func
)(struct brw_compile
*,
445 const struct brw_reg
*dst
,
447 const struct brw_reg
*arg0
)
452 brw_set_saturate(p
, 1);
454 for (i
= 0; i
< 4; i
++) {
456 func(p
, dst
[i
], arg0
[i
]);
461 brw_set_saturate(p
, 0);
465 void emit_alu2(struct brw_compile
*p
,
466 struct brw_instruction
*(*func
)(struct brw_compile
*,
470 const struct brw_reg
*dst
,
472 const struct brw_reg
*arg0
,
473 const struct brw_reg
*arg1
)
478 brw_set_saturate(p
, 1);
480 for (i
= 0; i
< 4; i
++) {
482 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
487 brw_set_saturate(p
, 0);
491 void emit_mad(struct brw_compile
*p
,
492 const struct brw_reg
*dst
,
494 const struct brw_reg
*arg0
,
495 const struct brw_reg
*arg1
,
496 const struct brw_reg
*arg2
)
500 for (i
= 0; i
< 4; i
++) {
502 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
504 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
505 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
506 brw_set_saturate(p
, 0);
511 void emit_lrp(struct brw_compile
*p
,
512 const struct brw_reg
*dst
,
514 const struct brw_reg
*arg0
,
515 const struct brw_reg
*arg1
,
516 const struct brw_reg
*arg2
)
520 /* Uses dst as a temporary:
522 for (i
= 0; i
< 4; i
++) {
524 /* Can I use the LINE instruction for this?
526 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
527 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
529 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
530 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
531 brw_set_saturate(p
, 0);
536 void emit_sop(struct brw_compile
*p
,
537 const struct brw_reg
*dst
,
540 const struct brw_reg
*arg0
,
541 const struct brw_reg
*arg1
)
545 for (i
= 0; i
< 4; i
++) {
547 brw_push_insn_state(p
);
548 brw_CMP(p
, brw_null_reg(), cond
, arg1
[i
], arg0
[i
]);
549 brw_SEL(p
, dst
[i
], brw_null_reg(), brw_imm_f(1.0));
550 brw_pop_insn_state(p
);
555 static void emit_slt( struct brw_compile
*p
,
556 const struct brw_reg
*dst
,
558 const struct brw_reg
*arg0
,
559 const struct brw_reg
*arg1
)
561 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
564 static void emit_sle( struct brw_compile
*p
,
565 const struct brw_reg
*dst
,
567 const struct brw_reg
*arg0
,
568 const struct brw_reg
*arg1
)
570 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
573 static void emit_sgt( struct brw_compile
*p
,
574 const struct brw_reg
*dst
,
576 const struct brw_reg
*arg0
,
577 const struct brw_reg
*arg1
)
579 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
582 static void emit_sge( struct brw_compile
*p
,
583 const struct brw_reg
*dst
,
585 const struct brw_reg
*arg0
,
586 const struct brw_reg
*arg1
)
588 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
591 static void emit_seq( struct brw_compile
*p
,
592 const struct brw_reg
*dst
,
594 const struct brw_reg
*arg0
,
595 const struct brw_reg
*arg1
)
597 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
600 static void emit_sne( struct brw_compile
*p
,
601 const struct brw_reg
*dst
,
603 const struct brw_reg
*arg0
,
604 const struct brw_reg
*arg1
)
606 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
609 void emit_cmp(struct brw_compile
*p
,
610 const struct brw_reg
*dst
,
612 const struct brw_reg
*arg0
,
613 const struct brw_reg
*arg1
,
614 const struct brw_reg
*arg2
)
618 for (i
= 0; i
< 4; i
++) {
620 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
621 brw_MOV(p
, dst
[i
], arg2
[i
]);
622 brw_set_saturate(p
, 0);
624 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
626 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
627 brw_MOV(p
, dst
[i
], arg1
[i
]);
628 brw_set_saturate(p
, 0);
629 brw_set_predicate_control_flag_value(p
, 0xff);
634 void emit_max(struct brw_compile
*p
,
635 const struct brw_reg
*dst
,
637 const struct brw_reg
*arg0
,
638 const struct brw_reg
*arg1
)
642 for (i
= 0; i
< 4; i
++) {
644 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], arg1
[i
]);
646 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
647 brw_SEL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
648 brw_set_saturate(p
, 0);
649 brw_set_predicate_control_flag_value(p
, 0xff);
654 void emit_min(struct brw_compile
*p
,
655 const struct brw_reg
*dst
,
657 const struct brw_reg
*arg0
,
658 const struct brw_reg
*arg1
)
662 for (i
= 0; i
< 4; i
++) {
664 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
666 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
667 brw_SEL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
668 brw_set_saturate(p
, 0);
669 brw_set_predicate_control_flag_value(p
, 0xff);
675 void emit_dp3(struct brw_compile
*p
,
676 const struct brw_reg
*dst
,
678 const struct brw_reg
*arg0
,
679 const struct brw_reg
*arg1
)
681 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
683 if (!(mask
& WRITEMASK_XYZW
))
684 return; /* Do not emit dead code */
686 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
688 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
689 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
691 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
692 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
693 brw_set_saturate(p
, 0);
697 void emit_dp4(struct brw_compile
*p
,
698 const struct brw_reg
*dst
,
700 const struct brw_reg
*arg0
,
701 const struct brw_reg
*arg1
)
703 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
705 if (!(mask
& WRITEMASK_XYZW
))
706 return; /* Do not emit dead code */
708 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
710 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
711 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
712 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
714 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
715 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
716 brw_set_saturate(p
, 0);
720 void emit_dph(struct brw_compile
*p
,
721 const struct brw_reg
*dst
,
723 const struct brw_reg
*arg0
,
724 const struct brw_reg
*arg1
)
726 const int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
728 if (!(mask
& WRITEMASK_XYZW
))
729 return; /* Do not emit dead code */
731 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
733 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
734 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
735 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
737 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
738 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
739 brw_set_saturate(p
, 0);
743 void emit_xpd(struct brw_compile
*p
,
744 const struct brw_reg
*dst
,
746 const struct brw_reg
*arg0
,
747 const struct brw_reg
*arg1
)
751 assert((mask
& WRITEMASK_W
) != WRITEMASK_W
);
753 for (i
= 0 ; i
< 3; i
++) {
758 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
760 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
761 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
762 brw_set_saturate(p
, 0);
768 void emit_math1(struct brw_wm_compile
*c
,
770 const struct brw_reg
*dst
,
772 const struct brw_reg
*arg0
)
774 struct brw_compile
*p
= &c
->func
;
775 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
776 GLuint saturate
= ((mask
& SATURATE
) ?
777 BRW_MATH_SATURATE_SATURATE
:
778 BRW_MATH_SATURATE_NONE
);
780 if (!(mask
& WRITEMASK_XYZW
))
781 return; /* Do not emit dead code */
783 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
785 /* If compressed, this will write message reg 2,3 from arg0.x's 16
788 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
790 /* Send two messages to perform all 16 operations:
792 brw_push_insn_state(p
);
793 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
800 BRW_MATH_DATA_VECTOR
,
801 BRW_MATH_PRECISION_FULL
);
803 if (c
->dispatch_width
== 16) {
804 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
806 offset(dst
[dst_chan
],1),
811 BRW_MATH_DATA_VECTOR
,
812 BRW_MATH_PRECISION_FULL
);
814 brw_pop_insn_state(p
);
818 void emit_math2(struct brw_wm_compile
*c
,
820 const struct brw_reg
*dst
,
822 const struct brw_reg
*arg0
,
823 const struct brw_reg
*arg1
)
825 struct brw_compile
*p
= &c
->func
;
826 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
827 GLuint saturate
= ((mask
& SATURATE
) ?
828 BRW_MATH_SATURATE_SATURATE
:
829 BRW_MATH_SATURATE_NONE
);
831 if (!(mask
& WRITEMASK_XYZW
))
832 return; /* Do not emit dead code */
834 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
836 brw_push_insn_state(p
);
838 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
839 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
840 if (c
->dispatch_width
== 16) {
841 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
842 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
845 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
846 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
847 if (c
->dispatch_width
== 16) {
848 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
849 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
852 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
859 BRW_MATH_DATA_VECTOR
,
860 BRW_MATH_PRECISION_FULL
);
862 /* Send two messages to perform all 16 operations:
864 if (c
->dispatch_width
== 16) {
865 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
867 offset(dst
[dst_chan
],1),
872 BRW_MATH_DATA_VECTOR
,
873 BRW_MATH_PRECISION_FULL
);
875 brw_pop_insn_state(p
);
879 void emit_tex(struct brw_wm_compile
*c
,
883 struct brw_reg depth_payload
,
888 struct brw_compile
*p
= &c
->func
;
889 struct intel_context
*intel
= &p
->brw
->intel
;
890 struct brw_reg dst_retyped
;
891 GLuint cur_mrf
= 2, response_length
;
892 GLuint i
, nr_texcoords
;
895 GLuint mrf_per_channel
;
898 if (c
->dispatch_width
== 16) {
901 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
902 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
906 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
907 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
910 /* How many input regs are there?
913 case TEXTURE_1D_INDEX
:
917 case TEXTURE_2D_INDEX
:
918 case TEXTURE_RECT_INDEX
:
922 case TEXTURE_3D_INDEX
:
923 case TEXTURE_CUBE_INDEX
:
924 emit
= WRITEMASK_XYZ
;
928 /* unexpected target */
932 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
933 if (!intel
->is_ironlake
&& c
->dispatch_width
== 8)
936 /* For shadow comparisons, we have to supply u,v,r. */
940 /* Emit the texcoords. */
941 for (i
= 0; i
< nr_texcoords
; i
++) {
943 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[i
]);
945 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
946 cur_mrf
+= mrf_per_channel
;
949 /* Fill in the shadow comparison reference value. */
951 if (intel
->is_ironlake
) {
952 /* Fill in the cube map array index value. */
953 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
954 cur_mrf
+= mrf_per_channel
;
955 } else if (c
->dispatch_width
== 8) {
956 /* Fill in the LOD bias value. */
957 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
958 cur_mrf
+= mrf_per_channel
;
960 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[2]);
961 cur_mrf
+= mrf_per_channel
;
964 if (intel
->is_ironlake
) {
966 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG
;
968 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG
;
970 /* Note that G45 and older determines shadow compare and dispatch width
971 * from message length for most messages.
973 if (c
->dispatch_width
== 16 && shadow
)
974 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
976 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
982 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
983 SURF_INDEX_TEXTURE(sampler
),
985 dst_flags
& WRITEMASK_XYZW
,
995 void emit_txb(struct brw_wm_compile
*c
,
999 struct brw_reg depth_payload
,
1003 struct brw_compile
*p
= &c
->func
;
1004 struct intel_context
*intel
= &p
->brw
->intel
;
1007 GLuint mrf_per_channel
;
1008 GLuint response_length
;
1009 struct brw_reg dst_retyped
;
1011 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
1012 * samples, so we'll use the 16-wide instruction, leave the second halves
1013 * undefined, and trust the execution mask to keep the undefined pixels
1016 if (c
->dispatch_width
== 16 || !intel
->is_ironlake
) {
1017 if (intel
->is_ironlake
)
1018 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG
;
1020 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
1021 mrf_per_channel
= 2;
1022 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
1023 response_length
= 8;
1025 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG
;
1026 mrf_per_channel
= 1;
1027 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
1028 response_length
= 4;
1031 /* Shadow ignored for txb. */
1033 case TEXTURE_1D_INDEX
:
1034 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1035 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), brw_imm_f(0));
1036 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
1038 case TEXTURE_2D_INDEX
:
1039 case TEXTURE_RECT_INDEX
:
1040 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1041 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
1042 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
1044 case TEXTURE_3D_INDEX
:
1045 case TEXTURE_CUBE_INDEX
:
1046 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1047 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
1048 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), arg
[2]);
1051 /* unexpected target */
1055 brw_MOV(p
, brw_message_reg(2 + 3 * mrf_per_channel
), arg
[3]);
1056 msgLength
= 2 + 4 * mrf_per_channel
- 1;
1061 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
1062 SURF_INDEX_TEXTURE(sampler
),
1064 dst_flags
& WRITEMASK_XYZW
,
1070 BRW_SAMPLER_SIMD_MODE_SIMD16
);
1074 static void emit_lit(struct brw_wm_compile
*c
,
1075 const struct brw_reg
*dst
,
1077 const struct brw_reg
*arg0
)
1079 struct brw_compile
*p
= &c
->func
;
1081 assert((mask
& WRITEMASK_XW
) == 0);
1083 if (mask
& WRITEMASK_Y
) {
1084 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
1085 brw_MOV(p
, dst
[1], arg0
[0]);
1086 brw_set_saturate(p
, 0);
1089 if (mask
& WRITEMASK_Z
) {
1090 emit_math2(c
, BRW_MATH_FUNCTION_POW
,
1092 WRITEMASK_X
| (mask
& SATURATE
),
1097 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1098 * some of the POW calculations above, but 16-wide iff statements
1099 * seem to lock c1 hardware, so this is a nasty workaround:
1101 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
1103 if (mask
& WRITEMASK_Y
)
1104 brw_MOV(p
, dst
[1], brw_imm_f(0));
1106 if (mask
& WRITEMASK_Z
)
1107 brw_MOV(p
, dst
[2], brw_imm_f(0));
1109 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1113 /* Kill pixel - set execution mask to zero for those pixels which
1116 static void emit_kil( struct brw_wm_compile
*c
,
1117 struct brw_reg
*arg0
)
1119 struct brw_compile
*p
= &c
->func
;
1120 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1123 for (i
= 0; i
< 4; i
++) {
1124 /* Check if we've already done the comparison for this reg
1125 * -- common when someone does KIL TEMP.wwww.
1127 for (j
= 0; j
< i
; j
++) {
1128 if (memcmp(&arg0
[j
], &arg0
[i
], sizeof(arg0
[0])) == 0)
1134 brw_push_insn_state(p
);
1135 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
1136 brw_set_predicate_control_flag_value(p
, 0xff);
1137 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1138 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
1139 brw_pop_insn_state(p
);
1143 /* KIL_NV kills the pixels that are currently executing, not based on a test
1146 static void emit_kil_nv( struct brw_wm_compile
*c
)
1148 struct brw_compile
*p
= &c
->func
;
1149 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1151 brw_push_insn_state(p
);
1152 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1153 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); /* IMASK */
1154 brw_AND(p
, r0uw
, c
->emit_mask_reg
, r0uw
);
1155 brw_pop_insn_state(p
);
1158 static void fire_fb_write( struct brw_wm_compile
*c
,
1164 struct brw_compile
*p
= &c
->func
;
1167 if (c
->dispatch_width
== 16)
1168 dst
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1170 dst
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1172 /* Pass through control information:
1174 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1176 brw_push_insn_state(p
);
1177 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
1178 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1180 brw_message_reg(base_reg
+ 1),
1181 brw_vec8_grf(1, 0));
1182 brw_pop_insn_state(p
);
1185 /* Send framebuffer write message: */
1186 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1190 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1198 static void emit_aa( struct brw_wm_compile
*c
,
1199 struct brw_reg
*arg1
,
1202 struct brw_compile
*p
= &c
->func
;
1203 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
1204 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
1205 struct brw_reg aa
= offset(arg1
[comp
], off
);
1207 brw_push_insn_state(p
);
1208 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
1209 brw_MOV(p
, brw_message_reg(reg
), aa
);
1210 brw_pop_insn_state(p
);
1214 /* Post-fragment-program processing. Send the results to the
1216 * \param arg0 the fragment color
1217 * \param arg1 the pass-through depth value
1218 * \param arg2 the shader-computed depth value
1220 void emit_fb_write(struct brw_wm_compile
*c
,
1221 struct brw_reg
*arg0
,
1222 struct brw_reg
*arg1
,
1223 struct brw_reg
*arg2
,
1227 struct brw_compile
*p
= &c
->func
;
1228 struct brw_context
*brw
= p
->brw
;
1232 /* Reserve a space for AA - may not be needed:
1234 if (c
->key
.aa_dest_stencil_reg
)
1237 /* I don't really understand how this achieves the color interleave
1238 * (ie RGBARGBA) in the result: [Do the saturation here]
1240 brw_push_insn_state(p
);
1242 for (channel
= 0; channel
< 4; channel
++) {
1243 if (c
->dispatch_width
== 16 && brw
->has_compr4
) {
1244 /* By setting the high bit of the MRF register number, we indicate
1245 * that we want COMPR4 mode - instead of doing the usual destination
1246 * + 1 for the second half we get destination + 4.
1249 brw_message_reg(nr
+ channel
+ (1 << 7)),
1252 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1253 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1254 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1256 brw_message_reg(nr
+ channel
),
1259 if (c
->dispatch_width
== 16) {
1260 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
1262 brw_message_reg(nr
+ channel
+ 4),
1263 sechalf(arg0
[channel
]));
1267 /* skip over the regs populated above:
1270 brw_pop_insn_state(p
);
1272 if (c
->key
.source_depth_to_render_target
)
1274 if (c
->key
.computes_depth
)
1275 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1277 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1282 if (c
->key
.dest_depth_reg
)
1284 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1285 GLuint off
= c
->key
.dest_depth_reg
% 2;
1288 brw_push_insn_state(p
);
1289 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1291 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1293 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1294 brw_pop_insn_state(p
);
1297 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1302 if (!c
->key
.runtime_check_aads_emit
) {
1303 if (c
->key
.aa_dest_stencil_reg
)
1304 emit_aa(c
, arg1
, 2);
1306 fire_fb_write(c
, 0, nr
, target
, eot
);
1309 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1310 struct brw_reg ip
= brw_ip_reg();
1311 struct brw_instruction
*jmp
;
1313 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1314 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1317 get_element_ud(brw_vec8_grf(1,0), 6),
1320 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
1322 emit_aa(c
, arg1
, 2);
1323 fire_fb_write(c
, 0, nr
, target
, eot
);
1324 /* note - thread killed in subroutine */
1326 brw_land_fwd_jump(p
, jmp
);
1328 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1330 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1335 * Move a GPR to scratch memory.
1337 static void emit_spill( struct brw_wm_compile
*c
,
1341 struct brw_compile
*p
= &c
->func
;
1344 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1346 brw_MOV(p
, brw_message_reg(2), reg
);
1349 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1350 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1353 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1359 * Load a GPR from scratch memory.
1361 static void emit_unspill( struct brw_wm_compile
*c
,
1365 struct brw_compile
*p
= &c
->func
;
1367 /* Slot 0 is the undef value.
1370 brw_MOV(p
, reg
, brw_imm_f(0));
1375 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1376 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1380 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1386 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1387 * Args with unspill_reg != 0 will be loaded from scratch memory.
1389 static void get_argument_regs( struct brw_wm_compile
*c
,
1390 struct brw_wm_ref
*arg
[],
1391 struct brw_reg
*regs
)
1395 for (i
= 0; i
< 4; i
++) {
1397 if (arg
[i
]->unspill_reg
)
1399 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1400 arg
[i
]->value
->spill_slot
);
1402 regs
[i
] = arg
[i
]->hw_reg
;
1405 regs
[i
] = brw_null_reg();
1412 * For values that have a spill_slot!=0, write those regs to scratch memory.
1414 static void spill_values( struct brw_wm_compile
*c
,
1415 struct brw_wm_value
*values
,
1420 for (i
= 0; i
< nr
; i
++)
1421 if (values
[i
].spill_slot
)
1422 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1426 /* Emit the fragment program instructions here.
1428 void brw_wm_emit( struct brw_wm_compile
*c
)
1430 struct brw_compile
*p
= &c
->func
;
1433 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1435 /* Check if any of the payload regs need to be spilled:
1437 spill_values(c
, c
->payload
.depth
, 4);
1438 spill_values(c
, c
->creg
, c
->nr_creg
);
1439 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1442 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1444 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1445 struct brw_reg args
[3][4], dst
[4];
1446 GLuint i
, dst_flags
;
1448 /* Get argument regs:
1450 for (i
= 0; i
< 3; i
++)
1451 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1455 for (i
= 0; i
< 4; i
++)
1457 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1459 dst
[i
] = brw_null_reg();
1463 dst_flags
= inst
->writemask
;
1465 dst_flags
|= SATURATE
;
1467 switch (inst
->opcode
) {
1468 /* Generated instructions for calculating triangle interpolants:
1471 emit_pixel_xy(c
, dst
, dst_flags
);
1475 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1479 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1483 emit_pixel_w(c
, dst
, dst_flags
, args
[0], args
[1]);
1487 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1491 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1495 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1499 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1502 case WM_FRONTFACING
:
1503 emit_frontfacing(p
, dst
, dst_flags
);
1506 /* Straightforward arithmetic:
1509 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1513 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1517 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1521 emit_ddxy(p
, dst
, dst_flags
, GL_TRUE
, args
[0]);
1525 emit_ddxy(p
, dst
, dst_flags
, GL_FALSE
, args
[0]);
1529 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1533 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1537 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1541 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
1545 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1549 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1554 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1558 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1562 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1565 /* Higher math functions:
1568 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1572 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1576 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1580 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1584 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1588 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1592 /* There is an scs math function, but it would need some
1593 * fixup for 16-element execution.
1595 if (dst_flags
& WRITEMASK_X
)
1596 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1597 if (dst_flags
& WRITEMASK_Y
)
1598 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1602 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1608 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1612 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1616 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1620 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1624 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1627 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1630 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1633 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1636 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1640 emit_lit(c
, dst
, dst_flags
, args
[0]);
1643 /* Texturing operations:
1646 emit_tex(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1647 inst
->tex_idx
, inst
->tex_unit
,
1652 emit_txb(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1653 inst
->tex_idx
, inst
->tex_unit
);
1657 emit_kil(c
, args
[0]);
1665 printf("Unsupported opcode %i (%s) in fragment shader\n",
1666 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1667 _mesa_opcode_string(inst
->opcode
) :
1671 for (i
= 0; i
< 4; i
++)
1672 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1674 inst
->dst
[i
]->hw_reg
,
1675 inst
->dst
[i
]->spill_slot
);
1678 if (INTEL_DEBUG
& DEBUG_WM
) {
1681 printf("wm-native:\n");
1682 for (i
= 0; i
< p
->nr_insn
; i
++)
1683 brw_disasm(stderr
, &p
->store
[i
]);