2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 /* Not quite sure how correct this is - need to understand horiz
38 * vs. vertical strides a little better.
40 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
50 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
51 * corresponding to each of the 16 execution channels.
53 * R1.0 -- triangle vertex 0.X
54 * R1.1 -- triangle vertex 0.Y
55 * R1.2 -- tile 0 x,y coords (2 packed uwords)
56 * R1.3 -- tile 1 x,y coords (2 packed uwords)
57 * R1.4 -- tile 2 x,y coords (2 packed uwords)
58 * R1.5 -- tile 3 x,y coords (2 packed uwords)
64 void emit_pixel_xy(struct brw_wm_compile
*c
,
65 const struct brw_reg
*dst
,
68 struct brw_compile
*p
= &c
->func
;
69 struct brw_reg r1
= brw_vec1_grf(1, 0);
70 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
71 struct brw_reg dst0_uw
, dst1_uw
;
73 brw_push_insn_state(p
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
76 if (c
->dispatch_width
== 16) {
77 dst0_uw
= vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
78 dst1_uw
= vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
80 dst0_uw
= vec8(retype(dst
[0], BRW_REGISTER_TYPE_UW
));
81 dst1_uw
= vec8(retype(dst
[1], BRW_REGISTER_TYPE_UW
));
84 /* Calculate pixel centers by adding 1 or 0 to each of the
85 * micro-tile coordinates passed in r1.
87 if (mask
& WRITEMASK_X
) {
90 stride(suboffset(r1_uw
, 4), 2, 4, 0),
91 brw_imm_v(0x10101010));
94 if (mask
& WRITEMASK_Y
) {
97 stride(suboffset(r1_uw
,5), 2, 4, 0),
98 brw_imm_v(0x11001100));
100 brw_pop_insn_state(p
);
104 void emit_delta_xy(struct brw_compile
*p
,
105 const struct brw_reg
*dst
,
107 const struct brw_reg
*arg0
)
109 struct brw_reg r1
= brw_vec1_grf(1, 0);
111 /* Calc delta X,Y by subtracting origin in r1 from the pixel
114 if (mask
& WRITEMASK_X
) {
117 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
121 if (mask
& WRITEMASK_Y
) {
124 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
125 negate(suboffset(r1
,1)));
130 void emit_wpos_xy(struct brw_wm_compile
*c
,
131 const struct brw_reg
*dst
,
133 const struct brw_reg
*arg0
)
135 struct brw_compile
*p
= &c
->func
;
137 /* Calculate the pixel offset from window bottom left into destination
140 if (mask
& WRITEMASK_X
) {
141 /* X' = X - origin */
144 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
145 brw_imm_d(0 - c
->key
.origin_x
));
148 if (mask
& WRITEMASK_Y
) {
149 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
152 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
153 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
158 void emit_pixel_w(struct brw_wm_compile
*c
,
159 const struct brw_reg
*dst
,
161 const struct brw_reg
*arg0
,
162 const struct brw_reg
*deltas
)
164 struct brw_compile
*p
= &c
->func
;
166 /* Don't need this if all you are doing is interpolating color, for
169 if (mask
& WRITEMASK_W
) {
170 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
172 /* Calc 1/w - just linterp wpos[3] optimized by putting the
173 * result straight into a message reg.
175 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
176 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
179 if (c
->dispatch_width
== 16) {
180 brw_math_16(p
, dst
[3],
181 BRW_MATH_FUNCTION_INV
,
182 BRW_MATH_SATURATE_NONE
,
184 BRW_MATH_PRECISION_FULL
);
187 BRW_MATH_FUNCTION_INV
,
188 BRW_MATH_SATURATE_NONE
,
190 BRW_MATH_DATA_VECTOR
,
191 BRW_MATH_PRECISION_FULL
);
197 void emit_linterp(struct brw_compile
*p
,
198 const struct brw_reg
*dst
,
200 const struct brw_reg
*arg0
,
201 const struct brw_reg
*deltas
)
203 struct brw_reg interp
[4];
204 GLuint nr
= arg0
[0].nr
;
207 interp
[0] = brw_vec1_grf(nr
, 0);
208 interp
[1] = brw_vec1_grf(nr
, 4);
209 interp
[2] = brw_vec1_grf(nr
+1, 0);
210 interp
[3] = brw_vec1_grf(nr
+1, 4);
212 for (i
= 0; i
< 4; i
++) {
214 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
215 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
221 void emit_pinterp(struct brw_compile
*p
,
222 const struct brw_reg
*dst
,
224 const struct brw_reg
*arg0
,
225 const struct brw_reg
*deltas
,
226 const struct brw_reg
*w
)
228 struct brw_reg interp
[4];
229 GLuint nr
= arg0
[0].nr
;
232 interp
[0] = brw_vec1_grf(nr
, 0);
233 interp
[1] = brw_vec1_grf(nr
, 4);
234 interp
[2] = brw_vec1_grf(nr
+1, 0);
235 interp
[3] = brw_vec1_grf(nr
+1, 4);
237 for (i
= 0; i
< 4; i
++) {
239 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
240 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
243 for (i
= 0; i
< 4; i
++) {
245 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
251 void emit_cinterp(struct brw_compile
*p
,
252 const struct brw_reg
*dst
,
254 const struct brw_reg
*arg0
)
256 struct brw_reg interp
[4];
257 GLuint nr
= arg0
[0].nr
;
260 interp
[0] = brw_vec1_grf(nr
, 0);
261 interp
[1] = brw_vec1_grf(nr
, 4);
262 interp
[2] = brw_vec1_grf(nr
+1, 0);
263 interp
[3] = brw_vec1_grf(nr
+1, 4);
265 for (i
= 0; i
< 4; i
++) {
267 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
272 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
273 void emit_frontfacing(struct brw_compile
*p
,
274 const struct brw_reg
*dst
,
277 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
280 if (!(mask
& WRITEMASK_XYZW
))
283 for (i
= 0; i
< 4; i
++) {
285 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
289 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
292 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
293 for (i
= 0; i
< 4; i
++) {
295 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
298 brw_set_predicate_control_flag_value(p
, 0xff);
301 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
304 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
306 * and we're trying to produce:
309 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
310 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
311 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
312 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
313 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
314 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
315 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
316 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
318 * and add another set of two more subspans if in 16-pixel dispatch mode.
320 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
321 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
322 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
323 * between each other. We could probably do it like ddx and swizzle the right
324 * order later, but bail for now and just produce
325 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
327 void emit_ddxy(struct brw_compile
*p
,
328 const struct brw_reg
*dst
,
331 const struct brw_reg
*arg0
)
334 struct brw_reg src0
, src1
;
337 brw_set_saturate(p
, 1);
338 for (i
= 0; i
< 4; i
++ ) {
341 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 1,
343 BRW_VERTICAL_STRIDE_2
,
345 BRW_HORIZONTAL_STRIDE_0
,
346 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
347 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
349 BRW_VERTICAL_STRIDE_2
,
351 BRW_HORIZONTAL_STRIDE_0
,
352 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
354 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
356 BRW_VERTICAL_STRIDE_4
,
358 BRW_HORIZONTAL_STRIDE_0
,
359 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
360 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 2,
362 BRW_VERTICAL_STRIDE_4
,
364 BRW_HORIZONTAL_STRIDE_0
,
365 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
367 brw_ADD(p
, dst
[i
], src0
, negate(src1
));
371 brw_set_saturate(p
, 0);
374 void emit_alu1(struct brw_compile
*p
,
375 struct brw_instruction
*(*func
)(struct brw_compile
*,
378 const struct brw_reg
*dst
,
380 const struct brw_reg
*arg0
)
385 brw_set_saturate(p
, 1);
387 for (i
= 0; i
< 4; i
++) {
389 func(p
, dst
[i
], arg0
[i
]);
394 brw_set_saturate(p
, 0);
398 void emit_alu2(struct brw_compile
*p
,
399 struct brw_instruction
*(*func
)(struct brw_compile
*,
403 const struct brw_reg
*dst
,
405 const struct brw_reg
*arg0
,
406 const struct brw_reg
*arg1
)
411 brw_set_saturate(p
, 1);
413 for (i
= 0; i
< 4; i
++) {
415 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
420 brw_set_saturate(p
, 0);
424 void emit_mad(struct brw_compile
*p
,
425 const struct brw_reg
*dst
,
427 const struct brw_reg
*arg0
,
428 const struct brw_reg
*arg1
,
429 const struct brw_reg
*arg2
)
433 for (i
= 0; i
< 4; i
++) {
435 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
437 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
438 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
439 brw_set_saturate(p
, 0);
444 void emit_lrp(struct brw_compile
*p
,
445 const struct brw_reg
*dst
,
447 const struct brw_reg
*arg0
,
448 const struct brw_reg
*arg1
,
449 const struct brw_reg
*arg2
)
453 /* Uses dst as a temporary:
455 for (i
= 0; i
< 4; i
++) {
457 /* Can I use the LINE instruction for this?
459 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
460 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
462 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
463 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
464 brw_set_saturate(p
, 0);
469 void emit_sop(struct brw_compile
*p
,
470 const struct brw_reg
*dst
,
473 const struct brw_reg
*arg0
,
474 const struct brw_reg
*arg1
)
478 for (i
= 0; i
< 4; i
++) {
480 brw_push_insn_state(p
);
481 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
482 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
483 brw_MOV(p
, dst
[i
], brw_imm_f(0));
484 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
485 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
486 brw_pop_insn_state(p
);
491 static void emit_slt( struct brw_compile
*p
,
492 const struct brw_reg
*dst
,
494 const struct brw_reg
*arg0
,
495 const struct brw_reg
*arg1
)
497 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
500 static void emit_sle( struct brw_compile
*p
,
501 const struct brw_reg
*dst
,
503 const struct brw_reg
*arg0
,
504 const struct brw_reg
*arg1
)
506 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
509 static void emit_sgt( struct brw_compile
*p
,
510 const struct brw_reg
*dst
,
512 const struct brw_reg
*arg0
,
513 const struct brw_reg
*arg1
)
515 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
518 static void emit_sge( struct brw_compile
*p
,
519 const struct brw_reg
*dst
,
521 const struct brw_reg
*arg0
,
522 const struct brw_reg
*arg1
)
524 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
527 static void emit_seq( struct brw_compile
*p
,
528 const struct brw_reg
*dst
,
530 const struct brw_reg
*arg0
,
531 const struct brw_reg
*arg1
)
533 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
536 static void emit_sne( struct brw_compile
*p
,
537 const struct brw_reg
*dst
,
539 const struct brw_reg
*arg0
,
540 const struct brw_reg
*arg1
)
542 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
545 static void emit_cmp( struct brw_compile
*p
,
546 const struct brw_reg
*dst
,
548 const struct brw_reg
*arg0
,
549 const struct brw_reg
*arg1
,
550 const struct brw_reg
*arg2
)
554 for (i
= 0; i
< 4; i
++) {
556 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
557 brw_MOV(p
, dst
[i
], arg2
[i
]);
558 brw_set_saturate(p
, 0);
560 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
562 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
563 brw_MOV(p
, dst
[i
], arg1
[i
]);
564 brw_set_saturate(p
, 0);
565 brw_set_predicate_control_flag_value(p
, 0xff);
570 void emit_max(struct brw_compile
*p
,
571 const struct brw_reg
*dst
,
573 const struct brw_reg
*arg0
,
574 const struct brw_reg
*arg1
)
578 for (i
= 0; i
< 4; i
++) {
580 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
581 brw_MOV(p
, dst
[i
], arg0
[i
]);
582 brw_set_saturate(p
, 0);
584 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
586 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
587 brw_MOV(p
, dst
[i
], arg1
[i
]);
588 brw_set_saturate(p
, 0);
589 brw_set_predicate_control_flag_value(p
, 0xff);
594 void emit_min(struct brw_compile
*p
,
595 const struct brw_reg
*dst
,
597 const struct brw_reg
*arg0
,
598 const struct brw_reg
*arg1
)
602 for (i
= 0; i
< 4; i
++) {
604 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
605 brw_MOV(p
, dst
[i
], arg1
[i
]);
606 brw_set_saturate(p
, 0);
608 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
610 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
611 brw_MOV(p
, dst
[i
], arg0
[i
]);
612 brw_set_saturate(p
, 0);
613 brw_set_predicate_control_flag_value(p
, 0xff);
619 void emit_dp3(struct brw_compile
*p
,
620 const struct brw_reg
*dst
,
622 const struct brw_reg
*arg0
,
623 const struct brw_reg
*arg1
)
625 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
627 if (!(mask
& WRITEMASK_XYZW
))
628 return; /* Do not emit dead code */
630 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
632 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
633 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
635 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
636 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
637 brw_set_saturate(p
, 0);
641 void emit_dp4(struct brw_compile
*p
,
642 const struct brw_reg
*dst
,
644 const struct brw_reg
*arg0
,
645 const struct brw_reg
*arg1
)
647 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
649 if (!(mask
& WRITEMASK_XYZW
))
650 return; /* Do not emit dead code */
652 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
654 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
655 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
656 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
658 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
659 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
660 brw_set_saturate(p
, 0);
664 void emit_dph(struct brw_compile
*p
,
665 const struct brw_reg
*dst
,
667 const struct brw_reg
*arg0
,
668 const struct brw_reg
*arg1
)
670 const int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
672 if (!(mask
& WRITEMASK_XYZW
))
673 return; /* Do not emit dead code */
675 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
677 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
678 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
679 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
681 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
682 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
683 brw_set_saturate(p
, 0);
687 void emit_xpd(struct brw_compile
*p
,
688 const struct brw_reg
*dst
,
690 const struct brw_reg
*arg0
,
691 const struct brw_reg
*arg1
)
695 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
697 for (i
= 0 ; i
< 3; i
++) {
702 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
704 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
705 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
706 brw_set_saturate(p
, 0);
712 void emit_math1(struct brw_wm_compile
*c
,
714 const struct brw_reg
*dst
,
716 const struct brw_reg
*arg0
)
718 struct brw_compile
*p
= &c
->func
;
719 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
720 GLuint saturate
= ((mask
& SATURATE
) ?
721 BRW_MATH_SATURATE_SATURATE
:
722 BRW_MATH_SATURATE_NONE
);
724 if (!(mask
& WRITEMASK_XYZW
))
725 return; /* Do not emit dead code */
727 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
729 /* If compressed, this will write message reg 2,3 from arg0.x's 16
732 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
734 /* Send two messages to perform all 16 operations:
736 brw_push_insn_state(p
);
737 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
744 BRW_MATH_DATA_VECTOR
,
745 BRW_MATH_PRECISION_FULL
);
747 if (c
->dispatch_width
== 16) {
748 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
750 offset(dst
[dst_chan
],1),
755 BRW_MATH_DATA_VECTOR
,
756 BRW_MATH_PRECISION_FULL
);
758 brw_pop_insn_state(p
);
762 void emit_math2(struct brw_wm_compile
*c
,
764 const struct brw_reg
*dst
,
766 const struct brw_reg
*arg0
,
767 const struct brw_reg
*arg1
)
769 struct brw_compile
*p
= &c
->func
;
770 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
771 GLuint saturate
= ((mask
& SATURATE
) ?
772 BRW_MATH_SATURATE_SATURATE
:
773 BRW_MATH_SATURATE_NONE
);
775 if (!(mask
& WRITEMASK_XYZW
))
776 return; /* Do not emit dead code */
778 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
780 brw_push_insn_state(p
);
782 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
783 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
784 if (c
->dispatch_width
== 16) {
785 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
786 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
789 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
790 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
791 if (c
->dispatch_width
== 16) {
792 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
793 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
796 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
803 BRW_MATH_DATA_VECTOR
,
804 BRW_MATH_PRECISION_FULL
);
806 /* Send two messages to perform all 16 operations:
808 if (c
->dispatch_width
== 16) {
809 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
811 offset(dst
[dst_chan
],1),
816 BRW_MATH_DATA_VECTOR
,
817 BRW_MATH_PRECISION_FULL
);
819 brw_pop_insn_state(p
);
823 void emit_tex(struct brw_wm_compile
*c
,
827 struct brw_reg depth_payload
,
832 struct brw_compile
*p
= &c
->func
;
833 struct brw_reg dst_retyped
;
834 GLuint cur_mrf
= 2, response_length
;
835 GLuint i
, nr_texcoords
;
838 GLuint mrf_per_channel
;
841 if (c
->dispatch_width
== 16) {
844 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
845 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
849 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
850 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
853 /* How many input regs are there?
856 case TEXTURE_1D_INDEX
:
860 case TEXTURE_2D_INDEX
:
861 case TEXTURE_RECT_INDEX
:
865 case TEXTURE_3D_INDEX
:
866 case TEXTURE_CUBE_INDEX
:
867 emit
= WRITEMASK_XYZ
;
871 /* unexpected target */
875 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
876 if (!BRW_IS_IGDNG(p
->brw
) && c
->dispatch_width
== 8)
879 /* For shadow comparisons, we have to supply u,v,r. */
883 /* Emit the texcoords. */
884 for (i
= 0; i
< nr_texcoords
; i
++) {
886 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[i
]);
888 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
889 cur_mrf
+= mrf_per_channel
;
892 /* Fill in the shadow comparison reference value. */
894 if (BRW_IS_IGDNG(p
->brw
)) {
895 /* Fill in the cube map array index value. */
896 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
897 cur_mrf
+= mrf_per_channel
;
898 } else if (c
->dispatch_width
== 8) {
899 /* Fill in the LOD bias value. */
900 brw_MOV(p
, brw_message_reg(cur_mrf
), brw_imm_f(0));
901 cur_mrf
+= mrf_per_channel
;
903 brw_MOV(p
, brw_message_reg(cur_mrf
), arg
[2]);
904 cur_mrf
+= mrf_per_channel
;
907 if (BRW_IS_IGDNG(p
->brw
)) {
909 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG
;
911 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG
;
913 /* Note that G45 and older determines shadow compare and dispatch width
914 * from message length for most messages.
916 if (c
->dispatch_width
== 16 && shadow
)
917 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
919 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
925 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
926 SURF_INDEX_TEXTURE(sampler
),
928 dst_flags
& WRITEMASK_XYZW
,
938 void emit_txb(struct brw_wm_compile
*c
,
942 struct brw_reg depth_payload
,
946 struct brw_compile
*p
= &c
->func
;
949 GLuint mrf_per_channel
;
950 GLuint response_length
;
951 struct brw_reg dst_retyped
;
953 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
954 * samples, so we'll use the 16-wide instruction, leave the second halves
955 * undefined, and trust the execution mask to keep the undefined pixels
958 if (c
->dispatch_width
== 16 || !BRW_IS_IGDNG(p
->brw
)) {
959 if (BRW_IS_IGDNG(p
->brw
))
960 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG
;
962 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
964 dst_retyped
= retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
);
967 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG
;
969 dst_retyped
= retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
);
973 /* Shadow ignored for txb. */
975 case TEXTURE_1D_INDEX
:
976 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
977 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), brw_imm_f(0));
978 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
980 case TEXTURE_2D_INDEX
:
981 case TEXTURE_RECT_INDEX
:
982 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
983 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
984 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), brw_imm_f(0));
986 case TEXTURE_3D_INDEX
:
987 case TEXTURE_CUBE_INDEX
:
988 brw_MOV(p
, brw_message_reg(2 + 0 * mrf_per_channel
), arg
[0]);
989 brw_MOV(p
, brw_message_reg(2 + 1 * mrf_per_channel
), arg
[1]);
990 brw_MOV(p
, brw_message_reg(2 + 2 * mrf_per_channel
), arg
[2]);
993 /* unexpected target */
997 brw_MOV(p
, brw_message_reg(2 + 3 * mrf_per_channel
), arg
[3]);
998 msgLength
= 2 + 4 * mrf_per_channel
- 1;
1003 retype(depth_payload
, BRW_REGISTER_TYPE_UW
),
1004 SURF_INDEX_TEXTURE(sampler
),
1006 dst_flags
& WRITEMASK_XYZW
,
1012 BRW_SAMPLER_SIMD_MODE_SIMD16
);
1016 static void emit_lit(struct brw_wm_compile
*c
,
1017 const struct brw_reg
*dst
,
1019 const struct brw_reg
*arg0
)
1021 struct brw_compile
*p
= &c
->func
;
1023 assert((mask
& WRITEMASK_XW
) == 0);
1025 if (mask
& WRITEMASK_Y
) {
1026 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
1027 brw_MOV(p
, dst
[1], arg0
[0]);
1028 brw_set_saturate(p
, 0);
1031 if (mask
& WRITEMASK_Z
) {
1032 emit_math2(c
, BRW_MATH_FUNCTION_POW
,
1034 WRITEMASK_X
| (mask
& SATURATE
),
1039 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1040 * some of the POW calculations above, but 16-wide iff statements
1041 * seem to lock c1 hardware, so this is a nasty workaround:
1043 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
1045 if (mask
& WRITEMASK_Y
)
1046 brw_MOV(p
, dst
[1], brw_imm_f(0));
1048 if (mask
& WRITEMASK_Z
)
1049 brw_MOV(p
, dst
[2], brw_imm_f(0));
1051 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1055 /* Kill pixel - set execution mask to zero for those pixels which
1058 static void emit_kil( struct brw_wm_compile
*c
,
1059 struct brw_reg
*arg0
)
1061 struct brw_compile
*p
= &c
->func
;
1062 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1065 /* XXX - usually won't need 4 compares!
1067 for (i
= 0; i
< 4; i
++) {
1068 brw_push_insn_state(p
);
1069 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
1070 brw_set_predicate_control_flag_value(p
, 0xff);
1071 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1072 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
1073 brw_pop_insn_state(p
);
1077 /* KIL_NV kills the pixels that are currently executing, not based on a test
1080 static void emit_kil_nv( struct brw_wm_compile
*c
)
1082 struct brw_compile
*p
= &c
->func
;
1083 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1085 brw_push_insn_state(p
);
1086 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1087 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1088 brw_AND(p
, r0uw
, c
->emit_mask_reg
, r0uw
);
1089 brw_pop_insn_state(p
);
1092 static void fire_fb_write( struct brw_wm_compile
*c
,
1098 struct brw_compile
*p
= &c
->func
;
1101 if (c
->dispatch_width
== 16)
1102 dst
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1104 dst
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1106 /* Pass through control information:
1108 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1110 brw_push_insn_state(p
);
1111 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
1112 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1114 brw_message_reg(base_reg
+ 1),
1115 brw_vec8_grf(1, 0));
1116 brw_pop_insn_state(p
);
1119 /* Send framebuffer write message: */
1120 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1124 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1132 static void emit_aa( struct brw_wm_compile
*c
,
1133 struct brw_reg
*arg1
,
1136 struct brw_compile
*p
= &c
->func
;
1137 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
1138 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
1139 struct brw_reg aa
= offset(arg1
[comp
], off
);
1141 brw_push_insn_state(p
);
1142 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
1143 brw_MOV(p
, brw_message_reg(reg
), aa
);
1144 brw_pop_insn_state(p
);
1148 /* Post-fragment-program processing. Send the results to the
1150 * \param arg0 the fragment color
1151 * \param arg1 the pass-through depth value
1152 * \param arg2 the shader-computed depth value
1154 void emit_fb_write(struct brw_wm_compile
*c
,
1155 struct brw_reg
*arg0
,
1156 struct brw_reg
*arg1
,
1157 struct brw_reg
*arg2
,
1161 struct brw_compile
*p
= &c
->func
;
1162 struct brw_context
*brw
= p
->brw
;
1166 /* Reserve a space for AA - may not be needed:
1168 if (c
->key
.aa_dest_stencil_reg
)
1171 /* I don't really understand how this achieves the color interleave
1172 * (ie RGBARGBA) in the result: [Do the saturation here]
1174 brw_push_insn_state(p
);
1176 for (channel
= 0; channel
< 4; channel
++) {
1177 if (c
->dispatch_width
== 16 && (BRW_IS_G4X(brw
) || BRW_IS_IGDNG(brw
))) {
1178 /* By setting the high bit of the MRF register number, we indicate
1179 * that we want COMPR4 mode - instead of doing the usual destination
1180 * + 1 for the second half we get destination + 4.
1183 brw_message_reg(nr
+ channel
+ (1 << 7)),
1186 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1187 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1188 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1190 brw_message_reg(nr
+ channel
),
1193 if (c
->dispatch_width
== 16) {
1194 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
1196 brw_message_reg(nr
+ channel
+ 4),
1197 sechalf(arg0
[channel
]));
1201 /* skip over the regs populated above:
1204 brw_pop_insn_state(p
);
1206 if (c
->key
.source_depth_to_render_target
)
1208 if (c
->key
.computes_depth
)
1209 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1211 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1216 if (c
->key
.dest_depth_reg
)
1218 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1219 GLuint off
= c
->key
.dest_depth_reg
% 2;
1222 brw_push_insn_state(p
);
1223 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1225 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1227 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1228 brw_pop_insn_state(p
);
1231 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1236 if (!c
->key
.runtime_check_aads_emit
) {
1237 if (c
->key
.aa_dest_stencil_reg
)
1238 emit_aa(c
, arg1
, 2);
1240 fire_fb_write(c
, 0, nr
, target
, eot
);
1243 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1244 struct brw_reg ip
= brw_ip_reg();
1245 struct brw_instruction
*jmp
;
1247 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1248 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1251 get_element_ud(brw_vec8_grf(1,0), 6),
1254 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
1256 emit_aa(c
, arg1
, 2);
1257 fire_fb_write(c
, 0, nr
, target
, eot
);
1258 /* note - thread killed in subroutine */
1260 brw_land_fwd_jump(p
, jmp
);
1262 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1264 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1269 * Move a GPR to scratch memory.
1271 static void emit_spill( struct brw_wm_compile
*c
,
1275 struct brw_compile
*p
= &c
->func
;
1278 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1280 brw_MOV(p
, brw_message_reg(2), reg
);
1283 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1284 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1287 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1293 * Load a GPR from scratch memory.
1295 static void emit_unspill( struct brw_wm_compile
*c
,
1299 struct brw_compile
*p
= &c
->func
;
1301 /* Slot 0 is the undef value.
1304 brw_MOV(p
, reg
, brw_imm_f(0));
1309 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1310 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1314 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1320 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1321 * Args with unspill_reg != 0 will be loaded from scratch memory.
1323 static void get_argument_regs( struct brw_wm_compile
*c
,
1324 struct brw_wm_ref
*arg
[],
1325 struct brw_reg
*regs
)
1329 for (i
= 0; i
< 4; i
++) {
1331 if (arg
[i
]->unspill_reg
)
1333 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1334 arg
[i
]->value
->spill_slot
);
1336 regs
[i
] = arg
[i
]->hw_reg
;
1339 regs
[i
] = brw_null_reg();
1346 * For values that have a spill_slot!=0, write those regs to scratch memory.
1348 static void spill_values( struct brw_wm_compile
*c
,
1349 struct brw_wm_value
*values
,
1354 for (i
= 0; i
< nr
; i
++)
1355 if (values
[i
].spill_slot
)
1356 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1360 /* Emit the fragment program instructions here.
1362 void brw_wm_emit( struct brw_wm_compile
*c
)
1364 struct brw_compile
*p
= &c
->func
;
1367 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1369 /* Check if any of the payload regs need to be spilled:
1371 spill_values(c
, c
->payload
.depth
, 4);
1372 spill_values(c
, c
->creg
, c
->nr_creg
);
1373 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1376 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1378 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1379 struct brw_reg args
[3][4], dst
[4];
1380 GLuint i
, dst_flags
;
1382 /* Get argument regs:
1384 for (i
= 0; i
< 3; i
++)
1385 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1389 for (i
= 0; i
< 4; i
++)
1391 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1393 dst
[i
] = brw_null_reg();
1397 dst_flags
= inst
->writemask
;
1399 dst_flags
|= SATURATE
;
1401 switch (inst
->opcode
) {
1402 /* Generated instructions for calculating triangle interpolants:
1405 emit_pixel_xy(c
, dst
, dst_flags
);
1409 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1413 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1417 emit_pixel_w(c
, dst
, dst_flags
, args
[0], args
[1]);
1421 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1425 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1429 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1433 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1436 case WM_FRONTFACING
:
1437 emit_frontfacing(p
, dst
, dst_flags
);
1440 /* Straightforward arithmetic:
1443 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1447 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1451 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1455 emit_ddxy(p
, dst
, dst_flags
, GL_TRUE
, args
[0]);
1459 emit_ddxy(p
, dst
, dst_flags
, GL_FALSE
, args
[0]);
1463 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1467 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1471 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1475 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
1479 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1483 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1488 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1492 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1496 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1499 /* Higher math functions:
1502 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1506 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1510 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1514 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1518 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1522 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1526 /* There is an scs math function, but it would need some
1527 * fixup for 16-element execution.
1529 if (dst_flags
& WRITEMASK_X
)
1530 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1531 if (dst_flags
& WRITEMASK_Y
)
1532 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1536 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1542 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1546 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1550 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1554 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1558 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1561 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1564 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1567 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1570 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1574 emit_lit(c
, dst
, dst_flags
, args
[0]);
1577 /* Texturing operations:
1580 emit_tex(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1581 inst
->tex_idx
, inst
->tex_unit
,
1586 emit_txb(c
, dst
, dst_flags
, args
[0], c
->payload
.depth
[0].hw_reg
,
1587 inst
->tex_idx
, inst
->tex_unit
);
1591 emit_kil(c
, args
[0]);
1599 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1600 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1601 _mesa_opcode_string(inst
->opcode
) :
1605 for (i
= 0; i
< 4; i
++)
1606 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1608 inst
->dst
[i
]->hw_reg
,
1609 inst
->dst
[i
]->spill_slot
);
1612 if (INTEL_DEBUG
& DEBUG_WM
) {
1615 _mesa_printf("wm-native:\n");
1616 for (i
= 0; i
< p
->nr_insn
; i
++)
1617 brw_disasm(stderr
, &p
->store
[i
]);