2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 static GLboolean
can_do_pln(struct intel_context
*intel
,
38 const struct brw_reg
*deltas
)
40 struct brw_context
*brw
= brw_context(&intel
->ctx
);
45 if (deltas
[1].nr
!= deltas
[0].nr
+ 1)
48 if (intel
->gen
< 6 && ((deltas
[0].nr
& 1) != 0))
54 /* Not quite sure how correct this is - need to understand horiz
55 * vs. vertical strides a little better.
57 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
64 /* Return the SrcReg index of the channels that can be immediate float operands
65 * instead of usage of PROGRAM_CONSTANT values through push/pull.
68 brw_wm_arg_can_be_immediate(enum prog_opcode opcode
, int arg
)
70 int opcode_array
[] = {
89 /* These opcodes get broken down in a way that allow two
90 * args to be immediates.
92 if (opcode
== OPCODE_MAD
|| opcode
== OPCODE_LRP
) {
93 if (arg
== 1 || arg
== 2)
97 if (opcode
> ARRAY_SIZE(opcode_array
))
100 return arg
== opcode_array
[opcode
] - 1;
104 * Computes the screen-space x,y position of the pixels.
106 * This will be used by emit_delta_xy() or emit_wpos_xy() for
107 * interpolation of attributes..
111 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
112 * corresponding to each of the 16 execution channels.
114 * R1.0 -- triangle vertex 0.X
115 * R1.1 -- triangle vertex 0.Y
116 * R1.2 -- tile 0 x,y coords (2 packed uwords)
117 * R1.3 -- tile 1 x,y coords (2 packed uwords)
118 * R1.4 -- tile 2 x,y coords (2 packed uwords)
119 * R1.5 -- tile 3 x,y coords (2 packed uwords)
124 void emit_pixel_xy(struct brw_wm_compile
*c
,
125 const struct brw_reg
*dst
,
128 struct brw_compile
*p
= &c
->func
;
129 struct brw_reg r1
= brw_vec1_grf(1, 0);
130 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
131 struct brw_reg dst0_uw
, dst1_uw
;
133 brw_push_insn_state(p
);
134 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
136 if (c
->dispatch_width
== 16) {
137 dst0_uw
= vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
138 dst1_uw
= vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
140 dst0_uw
= vec8(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
141 dst1_uw
= vec8(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
144 /* Calculate pixel centers by adding 1 or 0 to each of the
145 * micro-tile coordinates passed in r1.
147 if (mask
& WRITEMASK_X
) {
150 stride(suboffset(r1_uw
, 4), 2, 4, 0),
151 brw_imm_v(0x10101010));
154 if (mask
& WRITEMASK_Y
) {
157 stride(suboffset(r1_uw
,5), 2, 4, 0),
158 brw_imm_v(0x11001100));
160 brw_pop_insn_state(p
);
164 * Computes the screen-space x,y distance of the pixels from the start
167 * This will be used in linterp or pinterp with the start vertex value
168 * and the Cx, Cy, and C0 coefficients passed in from the setup engine
169 * to produce interpolated attribute values.
171 void emit_delta_xy(struct brw_compile
*p
,
172 const struct brw_reg
*dst
,
174 const struct brw_reg
*arg0
)
176 struct brw_reg r1
= brw_vec1_grf(1, 0);
181 assert(mask
== WRITEMASK_XY
);
183 /* Calc delta X,Y by subtracting origin in r1 from the pixel
184 * centers produced by emit_pixel_xy().
188 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
192 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
193 negate(suboffset(r1
,1)));
197 * Computes the pixel offset from the window origin for gl_FragCoord().
199 void emit_wpos_xy(struct brw_wm_compile
*c
,
200 const struct brw_reg
*dst
,
202 const struct brw_reg
*arg0
)
204 struct brw_compile
*p
= &c
->func
;
206 if (mask
& WRITEMASK_X
) {
207 if (c
->fp
->program
.PixelCenterInteger
) {
211 retype(arg0
[0], BRW_REGISTER_TYPE_W
));
216 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
221 if (mask
& WRITEMASK_Y
) {
222 if (c
->fp
->program
.OriginUpperLeft
) {
223 if (c
->fp
->program
.PixelCenterInteger
) {
227 retype(arg0
[1], BRW_REGISTER_TYPE_W
));
232 retype(arg0
[1], BRW_REGISTER_TYPE_W
),
236 float center_offset
= c
->fp
->program
.PixelCenterInteger
? 0.0 : 0.5;
238 /* Y' = (height - 1) - Y + center */
241 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
242 brw_imm_f(c
->key
.drawable_height
- 1 + center_offset
));
248 void emit_pixel_w(struct brw_wm_compile
*c
,
249 const struct brw_reg
*dst
,
251 const struct brw_reg
*arg0
,
252 const struct brw_reg
*deltas
)
254 struct brw_compile
*p
= &c
->func
;
255 struct intel_context
*intel
= &p
->brw
->intel
;
257 /* Don't need this if all you are doing is interpolating color, for
260 if (mask
& WRITEMASK_W
) {
261 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
263 /* Calc 1/w - just linterp wpos[3] optimized by putting the
264 * result straight into a message reg.
266 if (can_do_pln(intel
, deltas
)) {
267 brw_PLN(p
, brw_message_reg(2), interp3
, deltas
[0]);
269 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
270 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
274 if (c
->dispatch_width
== 16) {
275 brw_math_16(p
, dst
[3],
276 BRW_MATH_FUNCTION_INV
,
277 BRW_MATH_SATURATE_NONE
,
279 BRW_MATH_PRECISION_FULL
);
282 BRW_MATH_FUNCTION_INV
,
283 BRW_MATH_SATURATE_NONE
,
285 BRW_MATH_DATA_VECTOR
,
286 BRW_MATH_PRECISION_FULL
);
292 void emit_linterp(struct brw_compile
*p
,
293 const struct brw_reg
*dst
,
295 const struct brw_reg
*arg0
,
296 const struct brw_reg
*deltas
)
298 struct intel_context
*intel
= &p
->brw
->intel
;
299 struct brw_reg interp
[4];
300 GLuint nr
= arg0
[0].nr
;
303 interp
[0] = brw_vec1_grf(nr
, 0);
304 interp
[1] = brw_vec1_grf(nr
, 4);
305 interp
[2] = brw_vec1_grf(nr
+1, 0);
306 interp
[3] = brw_vec1_grf(nr
+1, 4);
308 for (i
= 0; i
< 4; i
++) {
310 if (can_do_pln(intel
, deltas
)) {
311 brw_PLN(p
, dst
[i
], interp
[i
], deltas
[0]);
313 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
314 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
321 void emit_pinterp(struct brw_compile
*p
,
322 const struct brw_reg
*dst
,
324 const struct brw_reg
*arg0
,
325 const struct brw_reg
*deltas
,
326 const struct brw_reg
*w
)
328 struct intel_context
*intel
= &p
->brw
->intel
;
329 struct brw_reg interp
[4];
330 GLuint nr
= arg0
[0].nr
;
333 interp
[0] = brw_vec1_grf(nr
, 0);
334 interp
[1] = brw_vec1_grf(nr
, 4);
335 interp
[2] = brw_vec1_grf(nr
+1, 0);
336 interp
[3] = brw_vec1_grf(nr
+1, 4);
338 for (i
= 0; i
< 4; i
++) {
340 if (can_do_pln(intel
, deltas
)) {
341 brw_PLN(p
, dst
[i
], interp
[i
], deltas
[0]);
343 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
344 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
348 for (i
= 0; i
< 4; i
++) {
350 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
356 void emit_cinterp(struct brw_compile
*p
,
357 const struct brw_reg
*dst
,
359 const struct brw_reg
*arg0
)
361 struct brw_reg interp
[4];
362 GLuint nr
= arg0
[0].nr
;
365 interp
[0] = brw_vec1_grf(nr
, 0);
366 interp
[1] = brw_vec1_grf(nr
, 4);
367 interp
[2] = brw_vec1_grf(nr
+1, 0);
368 interp
[3] = brw_vec1_grf(nr
+1, 4);
370 for (i
= 0; i
< 4; i
++) {
372 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
377 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
378 void emit_frontfacing(struct brw_compile
*p
,
379 const struct brw_reg
*dst
,
382 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
385 if (!(mask
& WRITEMASK_XYZW
))
388 for (i
= 0; i
< 4; i
++) {
390 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
394 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
397 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
398 for (i
= 0; i
< 4; i
++) {
400 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
403 brw_set_predicate_control_flag_value(p
, 0xff);
406 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
409 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
411 * and we're trying to produce:
414 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
415 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
416 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
417 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
418 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
419 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
420 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
421 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
423 * and add another set of two more subspans if in 16-pixel dispatch mode.
425 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
426 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
427 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
428 * between each other. We could probably do it like ddx and swizzle the right
429 * order later, but bail for now and just produce
430 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
432 void emit_ddxy(struct brw_compile
*p
,
433 const struct brw_reg
*dst
,
436 const struct brw_reg
*arg0
)
439 struct brw_reg src0
, src1
;
442 brw_set_saturate(p
, 1);
443 for (i
= 0; i
< 4; i
++ ) {
446 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 1,
448 BRW_VERTICAL_STRIDE_2
,
450 BRW_HORIZONTAL_STRIDE_0
,
451 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
452 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
454 BRW_VERTICAL_STRIDE_2
,
456 BRW_HORIZONTAL_STRIDE_0
,
457 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
459 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
461 BRW_VERTICAL_STRIDE_4
,
463 BRW_HORIZONTAL_STRIDE_0
,
464 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
465 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 2,
467 BRW_VERTICAL_STRIDE_4
,
469 BRW_HORIZONTAL_STRIDE_0
,
470 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
472 brw_ADD(p
, dst
[i
], src0
, negate(src1
));
476 brw_set_saturate(p
, 0);
479 void emit_alu1(struct brw_compile
*p
,
480 struct brw_instruction
*(*func
)(struct brw_compile
*,
483 const struct brw_reg
*dst
,
485 const struct brw_reg
*arg0
)
490 brw_set_saturate(p
, 1);
492 for (i
= 0; i
< 4; i
++) {
494 func(p
, dst
[i
], arg0
[i
]);
499 brw_set_saturate(p
, 0);
503 void emit_alu2(struct brw_compile
*p
,
504 struct brw_instruction
*(*func
)(struct brw_compile
*,
508 const struct brw_reg
*dst
,
510 const struct brw_reg
*arg0
,
511 const struct brw_reg
*arg1
)
516 brw_set_saturate(p
, 1);
518 for (i
= 0; i
< 4; i
++) {
520 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
525 brw_set_saturate(p
, 0);
529 void emit_mad(struct brw_compile
*p
,
530 const struct brw_reg
*dst
,
532 const struct brw_reg
*arg0
,
533 const struct brw_reg
*arg1
,
534 const struct brw_reg
*arg2
)
538 for (i
= 0; i
< 4; i
++) {
540 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
542 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
543 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
544 brw_set_saturate(p
, 0);
549 void emit_lrp(struct brw_compile
*p
,
550 const struct brw_reg
*dst
,
552 const struct brw_reg
*arg0
,
553 const struct brw_reg
*arg1
,
554 const struct brw_reg
*arg2
)
558 /* Uses dst as a temporary:
560 for (i
= 0; i
< 4; i
++) {
562 /* Can I use the LINE instruction for this?
564 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
565 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
567 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
568 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
569 brw_set_saturate(p
, 0);
574 void emit_sop(struct brw_compile
*p
,
575 const struct brw_reg
*dst
,
578 const struct brw_reg
*arg0
,
579 const struct brw_reg
*arg1
)
583 for (i
= 0; i
< 4; i
++) {
585 brw_push_insn_state(p
);
586 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
587 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
588 brw_MOV(p
, dst
[i
], brw_imm_f(0));
589 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
590 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
591 brw_pop_insn_state(p
);
596 static void emit_slt( struct brw_compile
*p
,
597 const struct brw_reg
*dst
,
599 const struct brw_reg
*arg0
,
600 const struct brw_reg
*arg1
)
602 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
605 static void emit_sle( struct brw_compile
*p
,
606 const struct brw_reg
*dst
,
608 const struct brw_reg
*arg0
,
609 const struct brw_reg
*arg1
)
611 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
614 static void emit_sgt( struct brw_compile
*p
,
615 const struct brw_reg
*dst
,
617 const struct brw_reg
*arg0
,
618 const struct brw_reg
*arg1
)
620 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
623 static void emit_sge( struct brw_compile
*p
,
624 const struct brw_reg
*dst
,
626 const struct brw_reg
*arg0
,
627 const struct brw_reg
*arg1
)
629 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
632 static void emit_seq( struct brw_compile
*p
,
633 const struct brw_reg
*dst
,
635 const struct brw_reg
*arg0
,
636 const struct brw_reg
*arg1
)
638 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
641 static void emit_sne( struct brw_compile
*p
,
642 const struct brw_reg
*dst
,
644 const struct brw_reg
*arg0
,
645 const struct brw_reg
*arg1
)
647 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
650 void emit_cmp(struct brw_compile
*p
,
651 const struct brw_reg
*dst
,
653 const struct brw_reg
*arg0
,
654 const struct brw_reg
*arg1
,
655 const struct brw_reg
*arg2
)
659 for (i
= 0; i
< 4; i
++) {
661 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
663 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
664 brw_SEL(p
, dst
[i
], arg1
[i
], arg2
[i
]);
665 brw_set_saturate(p
, 0);
666 brw_set_predicate_control_flag_value(p
, 0xff);
671 void emit_sign(struct brw_compile
*p
,
672 const struct brw_reg
*dst
,
674 const struct brw_reg
*arg0
)
678 for (i
= 0; i
< 4; i
++) {
680 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
682 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
683 brw_MOV(p
, dst
[i
], brw_imm_f(-1.0));
684 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
686 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, arg0
[i
], brw_imm_f(0));
687 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
688 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
693 void emit_max(struct brw_compile
*p
,
694 const struct brw_reg
*dst
,
696 const struct brw_reg
*arg0
,
697 const struct brw_reg
*arg1
)
701 for (i
= 0; i
< 4; i
++) {
703 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], arg1
[i
]);
705 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
706 brw_SEL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
707 brw_set_saturate(p
, 0);
708 brw_set_predicate_control_flag_value(p
, 0xff);
713 void emit_min(struct brw_compile
*p
,
714 const struct brw_reg
*dst
,
716 const struct brw_reg
*arg0
,
717 const struct brw_reg
*arg1
)
721 for (i
= 0; i
< 4; i
++) {
723 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
725 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
726 brw_SEL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
727 brw_set_saturate(p
, 0);
728 brw_set_predicate_control_flag_value(p
, 0xff);
734 void emit_dp2(struct brw_compile
*p
,
735 const struct brw_reg
*dst
,
737 const struct brw_reg
*arg0
,
738 const struct brw_reg
*arg1
)
740 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
742 if (!(mask
& WRITEMASK_XYZW
))
743 return; /* Do not emit dead code */
745 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
747 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
749 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
750 brw_MAC(p
, dst
[dst_chan
], arg0
[1], arg1
[1]);
751 brw_set_saturate(p
, 0);
755 void emit_dp3(struct brw_compile
*p
,
756 const struct brw_reg
*dst
,
758 const struct brw_reg
*arg0
,
759 const struct brw_reg
*arg1
)
761 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
763 if (!(mask
& WRITEMASK_XYZW
))
764 return; /* Do not emit dead code */
766 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
768 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
769 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
771 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
772 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
773 brw_set_saturate(p
, 0);
777 void emit_dp4(struct brw_compile
*p
,
778 const struct brw_reg
*dst
,
780 const struct brw_reg
*arg0
,
781 const struct brw_reg
*arg1
)
783 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
785 if (!(mask
& WRITEMASK_XYZW
))
786 return; /* Do not emit dead code */
788 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
790 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
791 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
792 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
794 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
795 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
796 brw_set_saturate(p
, 0);
800 void emit_dph(struct brw_compile
*p
,
801 const struct brw_reg
*dst
,
803 const struct brw_reg
*arg0
,
804 const struct brw_reg
*arg1
)
806 const int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
808 if (!(mask
& WRITEMASK_XYZW
))
809 return; /* Do not emit dead code */
811 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
813 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
814 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
815 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
817 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
818 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
819 brw_set_saturate(p
, 0);
823 void emit_xpd(struct brw_compile
*p
,
824 const struct brw_reg
*dst
,
826 const struct brw_reg
*arg0
,
827 const struct brw_reg
*arg1
)
831 assert((mask
& WRITEMASK_W
) != WRITEMASK_W
);
833 for (i
= 0 ; i
< 3; i
++) {
838 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
840 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
841 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
842 brw_set_saturate(p
, 0);
848 void emit_math1(struct brw_wm_compile
*c
,
850 const struct brw_reg
*dst
,
852 const struct brw_reg
*arg0
)
854 struct brw_compile
*p
= &c
->func
;
855 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
856 GLuint saturate
= ((mask
& SATURATE
) ?
857 BRW_MATH_SATURATE_SATURATE
:
858 BRW_MATH_SATURATE_NONE
);
860 if (!(mask
& WRITEMASK_XYZW
))
861 return; /* Do not emit dead code */
863 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
865 /* If compressed, this will write message reg 2,3 from arg0.x's 16
868 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
870 /* Send two messages to perform all 16 operations:
872 brw_push_insn_state(p
);
873 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
880 BRW_MATH_DATA_VECTOR
,
881 BRW_MATH_PRECISION_FULL
);
883 if (c
->dispatch_width
== 16) {
884 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
886 offset(dst
[dst_chan
],1),
891 BRW_MATH_DATA_VECTOR
,
892 BRW_MATH_PRECISION_FULL
);
894 brw_pop_insn_state(p
);
898 void emit_math2(struct brw_wm_compile
*c
,
900 const struct brw_reg
*dst
,
902 const struct brw_reg
*arg0
,
903 const struct brw_reg
*arg1
)
905 struct brw_compile
*p
= &c
->func
;
906 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
907 GLuint saturate
= ((mask
& SATURATE
) ?
908 BRW_MATH_SATURATE_SATURATE
:
909 BRW_MATH_SATURATE_NONE
);
911 if (!(mask
& WRITEMASK_XYZW
))
912 return; /* Do not emit dead code */
914 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
916 brw_push_insn_state(p
);
918 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
919 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
920 if (c
->dispatch_width
== 16) {
921 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
922 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
925 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
926 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
927 if (c
->dispatch_width
== 16) {
928 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
929 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
932 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
939 BRW_MATH_DATA_VECTOR
,
940 BRW_MATH_PRECISION_FULL
);
942 /* Send two messages to perform all 16 operations:
944 if (c
->dispatch_width
== 16) {
945 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
947 offset(dst
[dst_chan
],1),
952 BRW_MATH_DATA_VECTOR
,
953 BRW_MATH_PRECISION_FULL
);
955 brw_pop_insn_state(p
);
959 void emit_tex(struct brw_wm_compile
*c
,
963 struct brw_reg depth_payload
,
968 struct brw_compile
*p
= &c
->func
;
969 struct intel_context
*intel
= &p
->brw
->intel
;
970 struct brw_reg dst_retyped
;
971 GLuint cur_mrf
= 2, response_length
;
972 GLuint i
, nr_texcoords
;
975 GLuint mrf_per_channel
;
978 if (c
->dispatch_width
== 16) {
981 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
982 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
986 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
987 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
990 /* How many input regs are there?
993 case TEXTURE_1D_INDEX
:
997 case TEXTURE_2D_INDEX
:
998 case TEXTURE_RECT_INDEX
:
1002 case TEXTURE_3D_INDEX
:
1003 case TEXTURE_CUBE_INDEX
:
1004 emit
= WRITEMASK_XYZ
;
1008 /* unexpected target */
1012 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1013 if (intel
->gen
< 5 && c
->dispatch_width
== 8)
1016 /* For shadow comparisons, we have to supply u,v,r. */
1020 /* Emit the texcoords. */
1021 for (i
= 0; i
< nr_texcoords
; i
++) {
1023 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[i
]);
1025 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
1026 cur_mrf
+= mrf_per_channel
;
1029 /* Fill in the shadow comparison reference value. */
1031 if (intel
->gen
== 5) {
1032 /* Fill in the cube map array index value. */
1033 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
1034 cur_mrf
+= mrf_per_channel
;
1035 } else if (c
->dispatch_width
== 8) {
1036 /* Fill in the LOD bias value. */
1037 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
1038 cur_mrf
+= mrf_per_channel
;
1040 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[2]);
1041 cur_mrf
+= mrf_per_channel
;
1044 if (intel
->gen
== 5) {
1046 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5
;
1048 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_GEN5
;
1050 /* Note that G45 and older determines shadow compare and dispatch width
1051 * from message length for most messages.
1053 if (c
->dispatch_width
== 16 && shadow
)
1054 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
1056 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
1062 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
1063 SURF_INDEX_TEXTURE(sampler
),
1065 dst_flags
& WRITEMASK_XYZW
,
1075 void emit_txb(struct brw_wm_compile
*c
,
1076 struct brw_reg
*dst
,
1078 struct brw_reg
*arg
,
1079 struct brw_reg depth_payload
,
1083 struct brw_compile
*p
= &c
->func
;
1084 struct intel_context
*intel
= &p
->brw
->intel
;
1087 GLuint mrf_per_channel
;
1088 GLuint response_length
;
1089 struct brw_reg dst_retyped
;
1091 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
1092 * samples, so we'll use the 16-wide instruction, leave the second halves
1093 * undefined, and trust the execution mask to keep the undefined pixels
1096 if (c
->dispatch_width
== 16 || intel
->gen
< 5) {
1097 if (intel
->gen
== 5)
1098 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5
;
1100 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
1101 mrf_per_channel
= 2;
1102 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
1103 response_length
= 8;
1105 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5
;
1106 mrf_per_channel
= 1;
1107 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
1108 response_length
= 4;
1111 /* Shadow ignored for txb. */
1113 case TEXTURE_1D_INDEX
:
1114 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1115 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), brw_imm_f(0));
1116 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
1118 case TEXTURE_2D_INDEX
:
1119 case TEXTURE_RECT_INDEX
:
1120 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1121 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
1122 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
1124 case TEXTURE_3D_INDEX
:
1125 case TEXTURE_CUBE_INDEX
:
1126 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
1127 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
1128 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), arg
[2]);
1131 /* unexpected target */
1135 brw_MOV(p
, brw_message_reg(2 + 3 * mrf_per_channel
), arg
[3]);
1136 msgLength
= 2 + 4 * mrf_per_channel
- 1;
1141 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
1142 SURF_INDEX_TEXTURE(sampler
),
1144 dst_flags
& WRITEMASK_XYZW
,
1150 BRW_SAMPLER_SIMD_MODE_SIMD16
);
1154 static void emit_lit(struct brw_wm_compile
*c
,
1155 const struct brw_reg
*dst
,
1157 const struct brw_reg
*arg0
)
1159 struct brw_compile
*p
= &c
->func
;
1161 assert((mask
& WRITEMASK_XW
) == 0);
1163 if (mask
& WRITEMASK_Y
) {
1164 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
1165 brw_MOV(p
, dst
[1], arg0
[0]);
1166 brw_set_saturate(p
, 0);
1169 if (mask
& WRITEMASK_Z
) {
1170 emit_math2(c
, BRW_MATH_FUNCTION_POW
,
1172 WRITEMASK_X
| (mask
& SATURATE
),
1177 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1178 * some of the POW calculations above, but 16-wide iff statements
1179 * seem to lock c1 hardware, so this is a nasty workaround:
1181 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
1183 if (mask
& WRITEMASK_Y
)
1184 brw_MOV(p
, dst
[1], brw_imm_f(0));
1186 if (mask
& WRITEMASK_Z
)
1187 brw_MOV(p
, dst
[2], brw_imm_f(0));
1189 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1193 /* Kill pixel - set execution mask to zero for those pixels which
1196 static void emit_kil( struct brw_wm_compile
*c
,
1197 struct brw_reg
*arg0
)
1199 struct brw_compile
*p
= &c
->func
;
1200 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1203 for (i
= 0; i
< 4; i
++) {
1204 /* Check if we've already done the comparison for this reg
1205 * -- common when someone does KIL TEMP.wwww.
1207 for (j
= 0; j
< i
; j
++) {
1208 if (memcmp(&arg0
[j
], &arg0
[i
], sizeof(arg0
[0])) == 0)
1214 brw_push_insn_state(p
);
1215 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
1216 brw_set_predicate_control_flag_value(p
, 0xff);
1217 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1218 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
1219 brw_pop_insn_state(p
);
1223 /* KIL_NV kills the pixels that are currently executing, not based on a test
1226 static void emit_kil_nv( struct brw_wm_compile
*c
)
1228 struct brw_compile
*p
= &c
->func
;
1229 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1231 brw_push_insn_state(p
);
1232 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1233 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); /* IMASK */
1234 brw_AND(p
, r0uw
, c
->emit_mask_reg
, r0uw
);
1235 brw_pop_insn_state(p
);
1238 static void fire_fb_write( struct brw_wm_compile
*c
,
1244 struct brw_compile
*p
= &c
->func
;
1247 if (c
->dispatch_width
== 16)
1248 dst
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1250 dst
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1252 /* Pass through control information:
1254 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1256 brw_push_insn_state(p
);
1257 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
1258 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1260 brw_message_reg(base_reg
+ 1),
1261 brw_vec8_grf(1, 0));
1262 brw_pop_insn_state(p
);
1265 /* Send framebuffer write message: */
1266 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1270 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1278 static void emit_aa( struct brw_wm_compile
*c
,
1279 struct brw_reg
*arg1
,
1282 struct brw_compile
*p
= &c
->func
;
1283 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
1284 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
1285 struct brw_reg aa
= offset(arg1
[comp
], off
);
1287 brw_push_insn_state(p
);
1288 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
1289 brw_MOV(p
, brw_message_reg(reg
), aa
);
1290 brw_pop_insn_state(p
);
1294 /* Post-fragment-program processing. Send the results to the
1296 * \param arg0 the fragment color
1297 * \param arg1 the pass-through depth value
1298 * \param arg2 the shader-computed depth value
1300 void emit_fb_write(struct brw_wm_compile
*c
,
1301 struct brw_reg
*arg0
,
1302 struct brw_reg
*arg1
,
1303 struct brw_reg
*arg2
,
1307 struct brw_compile
*p
= &c
->func
;
1308 struct brw_context
*brw
= p
->brw
;
1312 /* Reserve a space for AA - may not be needed:
1314 if (c
->key
.aa_dest_stencil_reg
)
1317 /* I don't really understand how this achieves the color interleave
1318 * (ie RGBARGBA) in the result: [Do the saturation here]
1320 brw_push_insn_state(p
);
1322 for (channel
= 0; channel
< 4; channel
++) {
1323 if (c
->dispatch_width
== 16 && brw
->has_compr4
) {
1324 /* By setting the high bit of the MRF register number, we indicate
1325 * that we want COMPR4 mode - instead of doing the usual destination
1326 * + 1 for the second half we get destination + 4.
1329 brw_message_reg(nr
+ channel
+ BRW_MRF_COMPR4
),
1332 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1333 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1334 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1336 brw_message_reg(nr
+ channel
),
1339 if (c
->dispatch_width
== 16) {
1340 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
1342 brw_message_reg(nr
+ channel
+ 4),
1343 sechalf(arg0
[channel
]));
1347 /* skip over the regs populated above:
1350 brw_pop_insn_state(p
);
1352 if (c
->key
.source_depth_to_render_target
)
1354 if (c
->key
.computes_depth
)
1355 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1357 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1362 if (c
->key
.dest_depth_reg
)
1364 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1365 GLuint off
= c
->key
.dest_depth_reg
% 2;
1368 brw_push_insn_state(p
);
1369 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1371 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1373 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1374 brw_pop_insn_state(p
);
1377 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1382 if (!c
->key
.runtime_check_aads_emit
) {
1383 if (c
->key
.aa_dest_stencil_reg
)
1384 emit_aa(c
, arg1
, 2);
1386 fire_fb_write(c
, 0, nr
, target
, eot
);
1389 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1390 struct brw_reg ip
= brw_ip_reg();
1391 struct brw_instruction
*jmp
;
1393 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1394 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1397 get_element_ud(brw_vec8_grf(1,0), 6),
1400 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
1402 emit_aa(c
, arg1
, 2);
1403 fire_fb_write(c
, 0, nr
, target
, eot
);
1404 /* note - thread killed in subroutine */
1406 brw_land_fwd_jump(p
, jmp
);
1408 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1410 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1415 * Move a GPR to scratch memory.
1417 static void emit_spill( struct brw_wm_compile
*c
,
1421 struct brw_compile
*p
= &c
->func
;
1424 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1426 brw_MOV(p
, brw_message_reg(2), reg
);
1429 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1430 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1433 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1439 * Load a GPR from scratch memory.
1441 static void emit_unspill( struct brw_wm_compile
*c
,
1445 struct brw_compile
*p
= &c
->func
;
1447 /* Slot 0 is the undef value.
1450 brw_MOV(p
, reg
, brw_imm_f(0));
1455 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1456 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1460 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1466 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1467 * Args with unspill_reg != 0 will be loaded from scratch memory.
1469 static void get_argument_regs( struct brw_wm_compile
*c
,
1470 struct brw_wm_ref
*arg
[],
1471 struct brw_reg
*regs
)
1475 for (i
= 0; i
< 4; i
++) {
1477 if (arg
[i
]->unspill_reg
)
1479 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1480 arg
[i
]->value
->spill_slot
);
1482 regs
[i
] = arg
[i
]->hw_reg
;
1485 regs
[i
] = brw_null_reg();
1492 * For values that have a spill_slot!=0, write those regs to scratch memory.
1494 static void spill_values( struct brw_wm_compile
*c
,
1495 struct brw_wm_value
*values
,
1500 for (i
= 0; i
< nr
; i
++)
1501 if (values
[i
].spill_slot
)
1502 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1506 /* Emit the fragment program instructions here.
1508 void brw_wm_emit( struct brw_wm_compile
*c
)
1510 struct brw_compile
*p
= &c
->func
;
1513 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1515 /* Check if any of the payload regs need to be spilled:
1517 spill_values(c
, c
->payload
.depth
, 4);
1518 spill_values(c
, c
->creg
, c
->nr_creg
);
1519 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1522 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1524 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1525 struct brw_reg args
[3][4], dst
[4];
1526 GLuint i
, dst_flags
;
1528 /* Get argument regs:
1530 for (i
= 0; i
< 3; i
++)
1531 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1535 for (i
= 0; i
< 4; i
++)
1537 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1539 dst
[i
] = brw_null_reg();
1543 dst_flags
= inst
->writemask
;
1545 dst_flags
|= SATURATE
;
1547 switch (inst
->opcode
) {
1548 /* Generated instructions for calculating triangle interpolants:
1551 emit_pixel_xy(c
, dst
, dst_flags
);
1555 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1559 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1563 emit_pixel_w(c
, dst
, dst_flags
, args
[0], args
[1]);
1567 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1571 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1575 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1579 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1582 case WM_FRONTFACING
:
1583 emit_frontfacing(p
, dst
, dst_flags
);
1586 /* Straightforward arithmetic:
1589 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1593 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1597 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1601 emit_ddxy(p
, dst
, dst_flags
, GL_TRUE
, args
[0]);
1605 emit_ddxy(p
, dst
, dst_flags
, GL_FALSE
, args
[0]);
1609 emit_dp2(p
, dst
, dst_flags
, args
[0], args
[1]);
1613 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1617 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1621 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1625 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
1629 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1633 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1638 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1642 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1646 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1649 /* Higher math functions:
1652 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1656 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1660 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1664 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1668 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1672 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1676 /* There is an scs math function, but it would need some
1677 * fixup for 16-element execution.
1679 if (dst_flags
& WRITEMASK_X
)
1680 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1681 if (dst_flags
& WRITEMASK_Y
)
1682 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1686 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1692 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1696 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1700 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1704 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1708 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1711 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1714 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1717 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1720 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1724 emit_sign(p
, dst
, dst_flags
, args
[0]);
1728 emit_lit(c
, dst
, dst_flags
, args
[0]);
1731 /* Texturing operations:
1734 emit_tex(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1735 inst
->tex_idx
, inst
->tex_unit
,
1740 emit_txb(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1741 inst
->tex_idx
, inst
->tex_unit
);
1745 emit_kil(c
, args
[0]);
1753 printf("Unsupported opcode %i (%s) in fragment shader\n",
1754 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1755 _mesa_opcode_string(inst
->opcode
) :
1759 for (i
= 0; i
< 4; i
++)
1760 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1762 inst
->dst
[i
]->hw_reg
,
1763 inst
->dst
[i
]->spill_slot
);
1766 /* Only properly tested on ILK */
1767 if (p
->brw
->intel
.gen
== 5) {
1768 brw_remove_duplicate_mrf_moves(p
);
1769 if (c
->dispatch_width
== 16)
1770 brw_remove_grf_to_mrf_moves(p
);
1773 if (INTEL_DEBUG
& DEBUG_WM
) {
1776 printf("wm-native:\n");
1777 for (i
= 0; i
< p
->nr_insn
; i
++)
1778 brw_disasm(stderr
, &p
->store
[i
], p
->brw
->intel
.gen
);