2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 static GLboolean
can_do_pln(struct intel_context
*intel
,
38 const struct brw_reg
*deltas
)
40 struct brw_context
*brw
= brw_context(&intel
->ctx
);
45 if (deltas
[1].nr
!= deltas
[0].nr
+ 1)
48 if (intel
->gen
< 6 && ((deltas
[0].nr
& 1) != 0))
54 /* Not quite sure how correct this is - need to understand horiz
55 * vs. vertical strides a little better.
57 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
66 * Computes the screen-space x,y position of the pixels.
68 * This will be used by emit_delta_xy() or emit_wpos_xy() for
69 * interpolation of attributes..
73 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
74 * corresponding to each of the 16 execution channels.
76 * R1.0 -- triangle vertex 0.X
77 * R1.1 -- triangle vertex 0.Y
78 * R1.2 -- tile 0 x,y coords (2 packed uwords)
79 * R1.3 -- tile 1 x,y coords (2 packed uwords)
80 * R1.4 -- tile 2 x,y coords (2 packed uwords)
81 * R1.5 -- tile 3 x,y coords (2 packed uwords)
86 void emit_pixel_xy(struct brw_wm_compile
*c
,
87 const struct brw_reg
*dst
,
90 struct brw_compile
*p
= &c
->func
;
91 struct brw_reg r1
= brw_vec1_grf(1, 0);
92 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
93 struct brw_reg dst0_uw
, dst1_uw
;
95 brw_push_insn_state(p
);
96 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
98 if (c
->dispatch_width
== 16) {
99 dst0_uw
= vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
100 dst1_uw
= vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
102 dst0_uw
= vec8(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
103 dst1_uw
= vec8(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
106 /* Calculate pixel centers by adding 1 or 0 to each of the
107 * micro-tile coordinates passed in r1.
109 if (mask
& WRITEMASK_X
) {
112 stride(suboffset(r1_uw
, 4), 2, 4, 0),
113 brw_imm_v(0x10101010));
116 if (mask
& WRITEMASK_Y
) {
119 stride(suboffset(r1_uw
,5), 2, 4, 0),
120 brw_imm_v(0x11001100));
122 brw_pop_insn_state(p
);
126 * Computes the screen-space x,y distance of the pixels from the start
129 * This will be used in linterp or pinterp with the start vertex value
130 * and the Cx, Cy, and C0 coefficients passed in from the setup engine
131 * to produce interpolated attribute values.
133 void emit_delta_xy(struct brw_compile
*p
,
134 const struct brw_reg
*dst
,
136 const struct brw_reg
*arg0
)
138 struct brw_reg r1
= brw_vec1_grf(1, 0);
143 assert(mask
== WRITEMASK_XY
);
145 /* Calc delta X,Y by subtracting origin in r1 from the pixel
146 * centers produced by emit_pixel_xy().
150 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
154 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
155 negate(suboffset(r1
,1)));
159 * Computes the pixel offset from the window origin for gl_FragCoord().
161 void emit_wpos_xy(struct brw_wm_compile
*c
,
162 const struct brw_reg
*dst
,
164 const struct brw_reg
*arg0
)
166 struct brw_compile
*p
= &c
->func
;
168 if (mask
& WRITEMASK_X
) {
169 if (c
->fp
->program
.PixelCenterInteger
) {
173 retype(arg0
[0], BRW_REGISTER_TYPE_W
));
178 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
183 if (mask
& WRITEMASK_Y
) {
184 if (c
->fp
->program
.OriginUpperLeft
) {
185 if (c
->fp
->program
.PixelCenterInteger
) {
189 retype(arg0
[1], BRW_REGISTER_TYPE_W
));
194 retype(arg0
[1], BRW_REGISTER_TYPE_W
),
198 float center_offset
= c
->fp
->program
.PixelCenterInteger
? 0.0 : 0.5;
200 /* Y' = (height - 1) - Y + center */
203 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
204 brw_imm_f(c
->key
.drawable_height
- 1 + center_offset
));
210 void emit_pixel_w(struct brw_wm_compile
*c
,
211 const struct brw_reg
*dst
,
213 const struct brw_reg
*arg0
,
214 const struct brw_reg
*deltas
)
216 struct brw_compile
*p
= &c
->func
;
217 struct intel_context
*intel
= &p
->brw
->intel
;
219 /* Don't need this if all you are doing is interpolating color, for
222 if (mask
& WRITEMASK_W
) {
223 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
225 /* Calc 1/w - just linterp wpos[3] optimized by putting the
226 * result straight into a message reg.
228 if (can_do_pln(intel
, deltas
)) {
229 brw_PLN(p
, brw_message_reg(2), interp3
, deltas
[0]);
231 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
232 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
236 if (c
->dispatch_width
== 16) {
237 brw_math_16(p
, dst
[3],
238 BRW_MATH_FUNCTION_INV
,
239 BRW_MATH_SATURATE_NONE
,
241 BRW_MATH_PRECISION_FULL
);
244 BRW_MATH_FUNCTION_INV
,
245 BRW_MATH_SATURATE_NONE
,
247 BRW_MATH_DATA_VECTOR
,
248 BRW_MATH_PRECISION_FULL
);
254 void emit_linterp(struct brw_compile
*p
,
255 const struct brw_reg
*dst
,
257 const struct brw_reg
*arg0
,
258 const struct brw_reg
*deltas
)
260 struct intel_context
*intel
= &p
->brw
->intel
;
261 struct brw_reg interp
[4];
262 GLuint nr
= arg0
[0].nr
;
265 interp
[0] = brw_vec1_grf(nr
, 0);
266 interp
[1] = brw_vec1_grf(nr
, 4);
267 interp
[2] = brw_vec1_grf(nr
+1, 0);
268 interp
[3] = brw_vec1_grf(nr
+1, 4);
270 for (i
= 0; i
< 4; i
++) {
272 if (can_do_pln(intel
, deltas
)) {
273 brw_PLN(p
, dst
[i
], interp
[i
], deltas
[0]);
275 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
276 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
283 void emit_pinterp(struct brw_compile
*p
,
284 const struct brw_reg
*dst
,
286 const struct brw_reg
*arg0
,
287 const struct brw_reg
*deltas
,
288 const struct brw_reg
*w
)
290 struct intel_context
*intel
= &p
->brw
->intel
;
291 struct brw_reg interp
[4];
292 GLuint nr
= arg0
[0].nr
;
295 interp
[0] = brw_vec1_grf(nr
, 0);
296 interp
[1] = brw_vec1_grf(nr
, 4);
297 interp
[2] = brw_vec1_grf(nr
+1, 0);
298 interp
[3] = brw_vec1_grf(nr
+1, 4);
300 for (i
= 0; i
< 4; i
++) {
302 if (can_do_pln(intel
, deltas
)) {
303 brw_PLN(p
, dst
[i
], interp
[i
], deltas
[0]);
305 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
306 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
310 for (i
= 0; i
< 4; i
++) {
312 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
318 void emit_cinterp(struct brw_compile
*p
,
319 const struct brw_reg
*dst
,
321 const struct brw_reg
*arg0
)
323 struct brw_reg interp
[4];
324 GLuint nr
= arg0
[0].nr
;
327 interp
[0] = brw_vec1_grf(nr
, 0);
328 interp
[1] = brw_vec1_grf(nr
, 4);
329 interp
[2] = brw_vec1_grf(nr
+1, 0);
330 interp
[3] = brw_vec1_grf(nr
+1, 4);
332 for (i
= 0; i
< 4; i
++) {
334 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
339 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
340 void emit_frontfacing(struct brw_compile
*p
,
341 const struct brw_reg
*dst
,
344 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
347 if (!(mask
& WRITEMASK_XYZW
))
350 for (i
= 0; i
< 4; i
++) {
352 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
356 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
359 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
360 for (i
= 0; i
< 4; i
++) {
362 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
365 brw_set_predicate_control_flag_value(p
, 0xff);
368 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
371 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
373 * and we're trying to produce:
376 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
377 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
378 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
379 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
380 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
381 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
382 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
383 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
385 * and add another set of two more subspans if in 16-pixel dispatch mode.
387 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
388 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
389 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
390 * between each other. We could probably do it like ddx and swizzle the right
391 * order later, but bail for now and just produce
392 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
394 void emit_ddxy(struct brw_compile
*p
,
395 const struct brw_reg
*dst
,
398 const struct brw_reg
*arg0
)
401 struct brw_reg src0
, src1
;
404 brw_set_saturate(p
, 1);
405 for (i
= 0; i
< 4; i
++ ) {
408 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 1,
410 BRW_VERTICAL_STRIDE_2
,
412 BRW_HORIZONTAL_STRIDE_0
,
413 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
414 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
416 BRW_VERTICAL_STRIDE_2
,
418 BRW_HORIZONTAL_STRIDE_0
,
419 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
421 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
423 BRW_VERTICAL_STRIDE_4
,
425 BRW_HORIZONTAL_STRIDE_0
,
426 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
427 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 2,
429 BRW_VERTICAL_STRIDE_4
,
431 BRW_HORIZONTAL_STRIDE_0
,
432 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
434 brw_ADD(p
, dst
[i
], src0
, negate(src1
));
438 brw_set_saturate(p
, 0);
441 void emit_alu1(struct brw_compile
*p
,
442 struct brw_instruction
*(*func
)(struct brw_compile
*,
445 const struct brw_reg
*dst
,
447 const struct brw_reg
*arg0
)
452 brw_set_saturate(p
, 1);
454 for (i
= 0; i
< 4; i
++) {
456 func(p
, dst
[i
], arg0
[i
]);
461 brw_set_saturate(p
, 0);
465 void emit_alu2(struct brw_compile
*p
,
466 struct brw_instruction
*(*func
)(struct brw_compile
*,
470 const struct brw_reg
*dst
,
472 const struct brw_reg
*arg0
,
473 const struct brw_reg
*arg1
)
478 brw_set_saturate(p
, 1);
480 for (i
= 0; i
< 4; i
++) {
482 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
487 brw_set_saturate(p
, 0);
491 void emit_mad(struct brw_compile
*p
,
492 const struct brw_reg
*dst
,
494 const struct brw_reg
*arg0
,
495 const struct brw_reg
*arg1
,
496 const struct brw_reg
*arg2
)
500 for (i
= 0; i
< 4; i
++) {
502 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
504 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
505 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
506 brw_set_saturate(p
, 0);
511 void emit_lrp(struct brw_compile
*p
,
512 const struct brw_reg
*dst
,
514 const struct brw_reg
*arg0
,
515 const struct brw_reg
*arg1
,
516 const struct brw_reg
*arg2
)
520 /* Uses dst as a temporary:
522 for (i
= 0; i
< 4; i
++) {
524 /* Can I use the LINE instruction for this?
526 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
527 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
529 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
530 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
531 brw_set_saturate(p
, 0);
536 void emit_sop(struct brw_compile
*p
,
537 const struct brw_reg
*dst
,
540 const struct brw_reg
*arg0
,
541 const struct brw_reg
*arg1
)
545 for (i
= 0; i
< 4; i
++) {
547 brw_push_insn_state(p
);
548 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
549 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
550 brw_MOV(p
, dst
[i
], brw_imm_f(0));
551 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
552 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
553 brw_pop_insn_state(p
);
558 static void emit_slt( struct brw_compile
*p
,
559 const struct brw_reg
*dst
,
561 const struct brw_reg
*arg0
,
562 const struct brw_reg
*arg1
)
564 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
567 static void emit_sle( struct brw_compile
*p
,
568 const struct brw_reg
*dst
,
570 const struct brw_reg
*arg0
,
571 const struct brw_reg
*arg1
)
573 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
576 static void emit_sgt( struct brw_compile
*p
,
577 const struct brw_reg
*dst
,
579 const struct brw_reg
*arg0
,
580 const struct brw_reg
*arg1
)
582 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
585 static void emit_sge( struct brw_compile
*p
,
586 const struct brw_reg
*dst
,
588 const struct brw_reg
*arg0
,
589 const struct brw_reg
*arg1
)
591 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
594 static void emit_seq( struct brw_compile
*p
,
595 const struct brw_reg
*dst
,
597 const struct brw_reg
*arg0
,
598 const struct brw_reg
*arg1
)
600 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
603 static void emit_sne( struct brw_compile
*p
,
604 const struct brw_reg
*dst
,
606 const struct brw_reg
*arg0
,
607 const struct brw_reg
*arg1
)
609 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
612 void emit_cmp(struct brw_compile
*p
,
613 const struct brw_reg
*dst
,
615 const struct brw_reg
*arg0
,
616 const struct brw_reg
*arg1
,
617 const struct brw_reg
*arg2
)
621 for (i
= 0; i
< 4; i
++) {
623 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
624 brw_MOV(p
, dst
[i
], arg2
[i
]);
625 brw_set_saturate(p
, 0);
627 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
629 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
630 brw_MOV(p
, dst
[i
], arg1
[i
]);
631 brw_set_saturate(p
, 0);
632 brw_set_predicate_control_flag_value(p
, 0xff);
637 void emit_max(struct brw_compile
*p
,
638 const struct brw_reg
*dst
,
640 const struct brw_reg
*arg0
,
641 const struct brw_reg
*arg1
)
645 for (i
= 0; i
< 4; i
++) {
647 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], arg1
[i
]);
649 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
650 brw_SEL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
651 brw_set_saturate(p
, 0);
652 brw_set_predicate_control_flag_value(p
, 0xff);
657 void emit_min(struct brw_compile
*p
,
658 const struct brw_reg
*dst
,
660 const struct brw_reg
*arg0
,
661 const struct brw_reg
*arg1
)
665 for (i
= 0; i
< 4; i
++) {
667 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
669 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
670 brw_SEL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
671 brw_set_saturate(p
, 0);
672 brw_set_predicate_control_flag_value(p
, 0xff);
678 void emit_dp3(struct brw_compile
*p
,
679 const struct brw_reg
*dst
,
681 const struct brw_reg
*arg0
,
682 const struct brw_reg
*arg1
)
684 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
686 if (!(mask
& WRITEMASK_XYZW
))
687 return; /* Do not emit dead code */
689 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
691 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
692 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
694 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
695 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
696 brw_set_saturate(p
, 0);
700 void emit_dp4(struct brw_compile
*p
,
701 const struct brw_reg
*dst
,
703 const struct brw_reg
*arg0
,
704 const struct brw_reg
*arg1
)
706 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
708 if (!(mask
& WRITEMASK_XYZW
))
709 return; /* Do not emit dead code */
711 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
713 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
714 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
715 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
717 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
718 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
719 brw_set_saturate(p
, 0);
723 void emit_dph(struct brw_compile
*p
,
724 const struct brw_reg
*dst
,
726 const struct brw_reg
*arg0
,
727 const struct brw_reg
*arg1
)
729 const int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
731 if (!(mask
& WRITEMASK_XYZW
))
732 return; /* Do not emit dead code */
734 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
736 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
737 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
738 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
740 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
741 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
742 brw_set_saturate(p
, 0);
746 void emit_xpd(struct brw_compile
*p
,
747 const struct brw_reg
*dst
,
749 const struct brw_reg
*arg0
,
750 const struct brw_reg
*arg1
)
754 assert((mask
& WRITEMASK_W
) != WRITEMASK_W
);
756 for (i
= 0 ; i
< 3; i
++) {
761 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
763 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
764 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
765 brw_set_saturate(p
, 0);
771 void emit_math1(struct brw_wm_compile
*c
,
773 const struct brw_reg
*dst
,
775 const struct brw_reg
*arg0
)
777 struct brw_compile
*p
= &c
->func
;
778 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
779 GLuint saturate
= ((mask
& SATURATE
) ?
780 BRW_MATH_SATURATE_SATURATE
:
781 BRW_MATH_SATURATE_NONE
);
783 if (!(mask
& WRITEMASK_XYZW
))
784 return; /* Do not emit dead code */
786 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
788 /* If compressed, this will write message reg 2,3 from arg0.x's 16
791 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
793 /* Send two messages to perform all 16 operations:
795 brw_push_insn_state(p
);
796 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
803 BRW_MATH_DATA_VECTOR
,
804 BRW_MATH_PRECISION_FULL
);
806 if (c
->dispatch_width
== 16) {
807 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
809 offset(dst
[dst_chan
],1),
814 BRW_MATH_DATA_VECTOR
,
815 BRW_MATH_PRECISION_FULL
);
817 brw_pop_insn_state(p
);
821 void emit_math2(struct brw_wm_compile
*c
,
823 const struct brw_reg
*dst
,
825 const struct brw_reg
*arg0
,
826 const struct brw_reg
*arg1
)
828 struct brw_compile
*p
= &c
->func
;
829 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
830 GLuint saturate
= ((mask
& SATURATE
) ?
831 BRW_MATH_SATURATE_SATURATE
:
832 BRW_MATH_SATURATE_NONE
);
834 if (!(mask
& WRITEMASK_XYZW
))
835 return; /* Do not emit dead code */
837 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
839 brw_push_insn_state(p
);
841 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
842 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
843 if (c
->dispatch_width
== 16) {
844 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
845 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
848 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
849 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
850 if (c
->dispatch_width
== 16) {
851 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
852 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
855 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
862 BRW_MATH_DATA_VECTOR
,
863 BRW_MATH_PRECISION_FULL
);
865 /* Send two messages to perform all 16 operations:
867 if (c
->dispatch_width
== 16) {
868 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
870 offset(dst
[dst_chan
],1),
875 BRW_MATH_DATA_VECTOR
,
876 BRW_MATH_PRECISION_FULL
);
878 brw_pop_insn_state(p
);
882 void emit_tex(struct brw_wm_compile
*c
,
886 struct brw_reg depth_payload
,
891 struct brw_compile
*p
= &c
->func
;
892 struct intel_context
*intel
= &p
->brw
->intel
;
893 struct brw_reg dst_retyped
;
894 GLuint cur_mrf
= 2, response_length
;
895 GLuint i
, nr_texcoords
;
898 GLuint mrf_per_channel
;
901 if (c
->dispatch_width
== 16) {
904 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
905 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
909 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
910 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
913 /* How many input regs are there?
916 case TEXTURE_1D_INDEX
:
920 case TEXTURE_2D_INDEX
:
921 case TEXTURE_RECT_INDEX
:
925 case TEXTURE_3D_INDEX
:
926 case TEXTURE_CUBE_INDEX
:
927 emit
= WRITEMASK_XYZ
;
931 /* unexpected target */
935 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
936 if (!intel
->is_ironlake
&& c
->dispatch_width
== 8)
939 /* For shadow comparisons, we have to supply u,v,r. */
943 /* Emit the texcoords. */
944 for (i
= 0; i
< nr_texcoords
; i
++) {
946 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[i
]);
948 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
949 cur_mrf
+= mrf_per_channel
;
952 /* Fill in the shadow comparison reference value. */
954 if (intel
->is_ironlake
) {
955 /* Fill in the cube map array index value. */
956 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
957 cur_mrf
+= mrf_per_channel
;
958 } else if (c
->dispatch_width
== 8) {
959 /* Fill in the LOD bias value. */
960 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
961 cur_mrf
+= mrf_per_channel
;
963 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[2]);
964 cur_mrf
+= mrf_per_channel
;
967 if (intel
->is_ironlake
) {
969 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG
;
971 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG
;
973 /* Note that G45 and older determines shadow compare and dispatch width
974 * from message length for most messages.
976 if (c
->dispatch_width
== 16 && shadow
)
977 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
979 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
985 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
986 SURF_INDEX_TEXTURE(sampler
),
988 dst_flags
& WRITEMASK_XYZW
,
998 void emit_txb(struct brw_wm_compile
*c
,
1001 struct brw_reg
*arg
,
1002 struct brw_reg depth_payload
,
1006 struct brw_compile
*p
= &c
->func
;
1007 struct intel_context
*intel
= &p
->brw
->intel
;
1010 GLuint mrf_per_channel
;
1011 GLuint response_length
;
1012 struct brw_reg dst_retyped
;
1014 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
1015 * samples, so we'll use the 16-wide instruction, leave the second halves
1016 * undefined, and trust the execution mask to keep the undefined pixels
1019 if (c
->dispatch_width
== 16 || !intel
->is_ironlake
) {
1020 if (intel
->is_ironlake
)
1021 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG
;
1023 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
1024 mrf_per_channel
= 2;
1025 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
1026 response_length
= 8;
1028 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG
;
1029 mrf_per_channel
= 1;
1030 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
1031 response_length
= 4;
1034 /* Shadow ignored for txb. */
1036 case TEXTURE_1D_INDEX
:
1037 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1038 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), brw_imm_f(0));
1039 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
1041 case TEXTURE_2D_INDEX
:
1042 case TEXTURE_RECT_INDEX
:
1043 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1044 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
1045 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
1047 case TEXTURE_3D_INDEX
:
1048 case TEXTURE_CUBE_INDEX
:
1049 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1050 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
1051 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), arg
[2]);
1054 /* unexpected target */
1058 brw_MOV(p
, brw_message_reg(2 + 3 * mrf_per_channel
), arg
[3]);
1059 msgLength
= 2 + 4 * mrf_per_channel
- 1;
1064 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
1065 SURF_INDEX_TEXTURE(sampler
),
1067 dst_flags
& WRITEMASK_XYZW
,
1073 BRW_SAMPLER_SIMD_MODE_SIMD16
);
1077 static void emit_lit(struct brw_wm_compile
*c
,
1078 const struct brw_reg
*dst
,
1080 const struct brw_reg
*arg0
)
1082 struct brw_compile
*p
= &c
->func
;
1084 assert((mask
& WRITEMASK_XW
) == 0);
1086 if (mask
& WRITEMASK_Y
) {
1087 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
1088 brw_MOV(p
, dst
[1], arg0
[0]);
1089 brw_set_saturate(p
, 0);
1092 if (mask
& WRITEMASK_Z
) {
1093 emit_math2(c
, BRW_MATH_FUNCTION_POW
,
1095 WRITEMASK_X
| (mask
& SATURATE
),
1100 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1101 * some of the POW calculations above, but 16-wide iff statements
1102 * seem to lock c1 hardware, so this is a nasty workaround:
1104 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
1106 if (mask
& WRITEMASK_Y
)
1107 brw_MOV(p
, dst
[1], brw_imm_f(0));
1109 if (mask
& WRITEMASK_Z
)
1110 brw_MOV(p
, dst
[2], brw_imm_f(0));
1112 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1116 /* Kill pixel - set execution mask to zero for those pixels which
1119 static void emit_kil( struct brw_wm_compile
*c
,
1120 struct brw_reg
*arg0
)
1122 struct brw_compile
*p
= &c
->func
;
1123 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1126 for (i
= 0; i
< 4; i
++) {
1127 /* Check if we've already done the comparison for this reg
1128 * -- common when someone does KIL TEMP.wwww.
1130 for (j
= 0; j
< i
; j
++) {
1131 if (memcmp(&arg0
[j
], &arg0
[i
], sizeof(arg0
[0])) == 0)
1137 brw_push_insn_state(p
);
1138 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
1139 brw_set_predicate_control_flag_value(p
, 0xff);
1140 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1141 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
1142 brw_pop_insn_state(p
);
1146 /* KIL_NV kills the pixels that are currently executing, not based on a test
1149 static void emit_kil_nv( struct brw_wm_compile
*c
)
1151 struct brw_compile
*p
= &c
->func
;
1152 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1154 brw_push_insn_state(p
);
1155 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1156 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); /* IMASK */
1157 brw_AND(p
, r0uw
, c
->emit_mask_reg
, r0uw
);
1158 brw_pop_insn_state(p
);
1161 static void fire_fb_write( struct brw_wm_compile
*c
,
1167 struct brw_compile
*p
= &c
->func
;
1170 if (c
->dispatch_width
== 16)
1171 dst
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1173 dst
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1175 /* Pass through control information:
1177 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1179 brw_push_insn_state(p
);
1180 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
1181 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1183 brw_message_reg(base_reg
+ 1),
1184 brw_vec8_grf(1, 0));
1185 brw_pop_insn_state(p
);
1188 /* Send framebuffer write message: */
1189 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1193 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1201 static void emit_aa( struct brw_wm_compile
*c
,
1202 struct brw_reg
*arg1
,
1205 struct brw_compile
*p
= &c
->func
;
1206 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
1207 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
1208 struct brw_reg aa
= offset(arg1
[comp
], off
);
1210 brw_push_insn_state(p
);
1211 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
1212 brw_MOV(p
, brw_message_reg(reg
), aa
);
1213 brw_pop_insn_state(p
);
1217 /* Post-fragment-program processing. Send the results to the
1219 * \param arg0 the fragment color
1220 * \param arg1 the pass-through depth value
1221 * \param arg2 the shader-computed depth value
1223 void emit_fb_write(struct brw_wm_compile
*c
,
1224 struct brw_reg
*arg0
,
1225 struct brw_reg
*arg1
,
1226 struct brw_reg
*arg2
,
1230 struct brw_compile
*p
= &c
->func
;
1231 struct brw_context
*brw
= p
->brw
;
1235 /* Reserve a space for AA - may not be needed:
1237 if (c
->key
.aa_dest_stencil_reg
)
1240 /* I don't really understand how this achieves the color interleave
1241 * (ie RGBARGBA) in the result: [Do the saturation here]
1243 brw_push_insn_state(p
);
1245 for (channel
= 0; channel
< 4; channel
++) {
1246 if (c
->dispatch_width
== 16 && brw
->has_compr4
) {
1247 /* By setting the high bit of the MRF register number, we indicate
1248 * that we want COMPR4 mode - instead of doing the usual destination
1249 * + 1 for the second half we get destination + 4.
1252 brw_message_reg(nr
+ channel
+ (1 << 7)),
1255 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1256 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1257 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1259 brw_message_reg(nr
+ channel
),
1262 if (c
->dispatch_width
== 16) {
1263 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
1265 brw_message_reg(nr
+ channel
+ 4),
1266 sechalf(arg0
[channel
]));
1270 /* skip over the regs populated above:
1273 brw_pop_insn_state(p
);
1275 if (c
->key
.source_depth_to_render_target
)
1277 if (c
->key
.computes_depth
)
1278 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1280 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1285 if (c
->key
.dest_depth_reg
)
1287 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1288 GLuint off
= c
->key
.dest_depth_reg
% 2;
1291 brw_push_insn_state(p
);
1292 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1294 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1296 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1297 brw_pop_insn_state(p
);
1300 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1305 if (!c
->key
.runtime_check_aads_emit
) {
1306 if (c
->key
.aa_dest_stencil_reg
)
1307 emit_aa(c
, arg1
, 2);
1309 fire_fb_write(c
, 0, nr
, target
, eot
);
1312 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1313 struct brw_reg ip
= brw_ip_reg();
1314 struct brw_instruction
*jmp
;
1316 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1317 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1320 get_element_ud(brw_vec8_grf(1,0), 6),
1323 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
1325 emit_aa(c
, arg1
, 2);
1326 fire_fb_write(c
, 0, nr
, target
, eot
);
1327 /* note - thread killed in subroutine */
1329 brw_land_fwd_jump(p
, jmp
);
1331 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1333 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1338 * Move a GPR to scratch memory.
1340 static void emit_spill( struct brw_wm_compile
*c
,
1344 struct brw_compile
*p
= &c
->func
;
1347 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1349 brw_MOV(p
, brw_message_reg(2), reg
);
1352 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1353 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1356 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1362 * Load a GPR from scratch memory.
1364 static void emit_unspill( struct brw_wm_compile
*c
,
1368 struct brw_compile
*p
= &c
->func
;
1370 /* Slot 0 is the undef value.
1373 brw_MOV(p
, reg
, brw_imm_f(0));
1378 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1379 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1383 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1389 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1390 * Args with unspill_reg != 0 will be loaded from scratch memory.
1392 static void get_argument_regs( struct brw_wm_compile
*c
,
1393 struct brw_wm_ref
*arg
[],
1394 struct brw_reg
*regs
)
1398 for (i
= 0; i
< 4; i
++) {
1400 if (arg
[i
]->unspill_reg
)
1402 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1403 arg
[i
]->value
->spill_slot
);
1405 regs
[i
] = arg
[i
]->hw_reg
;
1408 regs
[i
] = brw_null_reg();
1415 * For values that have a spill_slot!=0, write those regs to scratch memory.
1417 static void spill_values( struct brw_wm_compile
*c
,
1418 struct brw_wm_value
*values
,
1423 for (i
= 0; i
< nr
; i
++)
1424 if (values
[i
].spill_slot
)
1425 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1429 /* Emit the fragment program instructions here.
1431 void brw_wm_emit( struct brw_wm_compile
*c
)
1433 struct brw_compile
*p
= &c
->func
;
1436 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1438 /* Check if any of the payload regs need to be spilled:
1440 spill_values(c
, c
->payload
.depth
, 4);
1441 spill_values(c
, c
->creg
, c
->nr_creg
);
1442 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1445 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1447 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1448 struct brw_reg args
[3][4], dst
[4];
1449 GLuint i
, dst_flags
;
1451 /* Get argument regs:
1453 for (i
= 0; i
< 3; i
++)
1454 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1458 for (i
= 0; i
< 4; i
++)
1460 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1462 dst
[i
] = brw_null_reg();
1466 dst_flags
= inst
->writemask
;
1468 dst_flags
|= SATURATE
;
1470 switch (inst
->opcode
) {
1471 /* Generated instructions for calculating triangle interpolants:
1474 emit_pixel_xy(c
, dst
, dst_flags
);
1478 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1482 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1486 emit_pixel_w(c
, dst
, dst_flags
, args
[0], args
[1]);
1490 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1494 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1498 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1502 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1505 case WM_FRONTFACING
:
1506 emit_frontfacing(p
, dst
, dst_flags
);
1509 /* Straightforward arithmetic:
1512 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1516 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1520 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1524 emit_ddxy(p
, dst
, dst_flags
, GL_TRUE
, args
[0]);
1528 emit_ddxy(p
, dst
, dst_flags
, GL_FALSE
, args
[0]);
1532 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1536 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1540 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1544 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
1548 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1552 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1557 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1561 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1565 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1568 /* Higher math functions:
1571 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1575 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1579 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1583 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1587 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1591 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1595 /* There is an scs math function, but it would need some
1596 * fixup for 16-element execution.
1598 if (dst_flags
& WRITEMASK_X
)
1599 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1600 if (dst_flags
& WRITEMASK_Y
)
1601 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1605 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1611 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1615 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1619 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1623 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1627 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1630 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1633 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1636 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1639 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1643 emit_lit(c
, dst
, dst_flags
, args
[0]);
1646 /* Texturing operations:
1649 emit_tex(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1650 inst
->tex_idx
, inst
->tex_unit
,
1655 emit_txb(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1656 inst
->tex_idx
, inst
->tex_unit
);
1660 emit_kil(c
, args
[0]);
1668 printf("Unsupported opcode %i (%s) in fragment shader\n",
1669 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1670 _mesa_opcode_string(inst
->opcode
) :
1674 for (i
= 0; i
< 4; i
++)
1675 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1677 inst
->dst
[i
]->hw_reg
,
1678 inst
->dst
[i
]->spill_slot
);
1681 if (INTEL_DEBUG
& DEBUG_WM
) {
1684 printf("wm-native:\n");
1685 for (i
= 0; i
< p
->nr_insn
; i
++)
1686 brw_disasm(stderr
, &p
->store
[i
]);