2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
32 #include "util/u_math.h"
33 #include "tgsi/tgsi_info.h"
35 #include "brw_context.h"
37 #include "brw_debug.h"
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- quad 0 x,y coords (2 packed uwords)
57 * R1.3 -- quad 1 x,y coords (2 packed uwords)
58 * R1.4 -- quad 2 x,y coords (2 packed uwords)
59 * R1.5 -- quad 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
70 struct brw_reg r1
= brw_vec1_grf(1, 0);
71 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
73 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
75 /* Calculate pixel centers by adding 1 or 0 to each of the
76 * micro-tile coordinates passed in r1.
78 if (mask
& BRW_WRITEMASK_X
) {
80 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
81 stride(suboffset(r1_uw
, 4), 2, 4, 0),
82 brw_imm_v(0x10101010));
85 if (mask
& BRW_WRITEMASK_Y
) {
87 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
88 stride(suboffset(r1_uw
,5), 2, 4, 0),
89 brw_imm_v(0x11001100));
92 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
97 static void emit_delta_xy(struct brw_compile
*p
,
98 const struct brw_reg
*dst
,
100 const struct brw_reg
*arg0
)
102 struct brw_reg r1
= brw_vec1_grf(1, 0);
104 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 if (mask
& BRW_WRITEMASK_X
) {
110 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
114 if (mask
& BRW_WRITEMASK_Y
) {
117 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
118 negate(suboffset(r1
,1)));
123 static void emit_wpos_xy(struct brw_wm_compile
*c
,
124 const struct brw_reg
*dst
,
126 const struct brw_reg
*arg0
)
128 struct brw_compile
*p
= &c
->func
;
130 if (mask
& BRW_WRITEMASK_X
) {
134 retype(arg0
[0], BRW_REGISTER_TYPE_W
));
137 /* XXX: is this needed any more, or is this a NOOP?
139 if (mask
& BRW_WRITEMASK_Y
) {
141 /* Y' = height - 1 - Y */
144 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
145 brw_imm_d(c
->key
.drawable_height
- 1));
149 retype(arg0
[0], BRW_REGISTER_TYPE_W
));
155 static void emit_pixel_w( struct brw_compile
*p
,
156 const struct brw_reg
*dst
,
158 const struct brw_reg
*arg0
,
159 const struct brw_reg
*deltas
)
161 /* Don't need this if all you are doing is interpolating color, for
164 if (mask
& BRW_WRITEMASK_W
) {
165 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
167 /* Calc 1/w - just linterp wpos[3] optimized by putting the
168 * result straight into a message reg.
170 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
171 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
174 brw_math_16( p
, dst
[3],
175 BRW_MATH_FUNCTION_INV
,
176 BRW_MATH_SATURATE_NONE
,
178 BRW_MATH_PRECISION_FULL
);
184 static void emit_linterp( struct brw_compile
*p
,
185 const struct brw_reg
*dst
,
187 const struct brw_reg
*arg0
,
188 const struct brw_reg
*deltas
)
190 struct brw_reg interp
[4];
191 GLuint nr
= arg0
[0].nr
;
194 interp
[0] = brw_vec1_grf(nr
, 0);
195 interp
[1] = brw_vec1_grf(nr
, 4);
196 interp
[2] = brw_vec1_grf(nr
+1, 0);
197 interp
[3] = brw_vec1_grf(nr
+1, 4);
199 for (i
= 0; i
< 4; i
++) {
201 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
202 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
208 static void emit_pinterp( struct brw_compile
*p
,
209 const struct brw_reg
*dst
,
211 const struct brw_reg
*arg0
,
212 const struct brw_reg
*deltas
,
213 const struct brw_reg
*w
)
215 struct brw_reg interp
[4];
216 GLuint nr
= arg0
[0].nr
;
219 interp
[0] = brw_vec1_grf(nr
, 0);
220 interp
[1] = brw_vec1_grf(nr
, 4);
221 interp
[2] = brw_vec1_grf(nr
+1, 0);
222 interp
[3] = brw_vec1_grf(nr
+1, 4);
224 for (i
= 0; i
< 4; i
++) {
226 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
227 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
230 for (i
= 0; i
< 4; i
++) {
232 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
238 static void emit_cinterp( struct brw_compile
*p
,
239 const struct brw_reg
*dst
,
241 const struct brw_reg
*arg0
)
243 struct brw_reg interp
[4];
244 GLuint nr
= arg0
[0].nr
;
247 interp
[0] = brw_vec1_grf(nr
, 0);
248 interp
[1] = brw_vec1_grf(nr
, 4);
249 interp
[2] = brw_vec1_grf(nr
+1, 0);
250 interp
[3] = brw_vec1_grf(nr
+1, 4);
252 for (i
= 0; i
< 4; i
++) {
254 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
259 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
260 static void emit_frontfacing( struct brw_compile
*p
,
261 const struct brw_reg
*dst
,
264 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
267 if (!(mask
& BRW_WRITEMASK_XYZW
))
270 for (i
= 0; i
< 4; i
++) {
272 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
276 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
279 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
280 for (i
= 0; i
< 4; i
++) {
282 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
285 brw_set_predicate_control_flag_value(p
, 0xff);
288 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
291 * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br
293 * and we're trying to produce:
296 * dst: (q0.tr - q0.tl) (q0.tl - q0.bl)
297 * (q0.tr - q0.tl) (q0.tr - q0.br)
298 * (q0.br - q0.bl) (q0.tl - q0.bl)
299 * (q0.br - q0.bl) (q0.tr - q0.br)
300 * (q1.tr - q1.tl) (q1.tl - q1.bl)
301 * (q1.tr - q1.tl) (q1.tr - q1.br)
302 * (q1.br - q1.bl) (q1.tl - q1.bl)
303 * (q1.br - q1.bl) (q1.tr - q1.br)
305 * and add two more quads if in 16-pixel dispatch mode.
307 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
308 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
309 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
310 * between each other. We could probably do it like ddx and swizzle the right
311 * order later, but bail for now and just produce
312 * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4)
314 void emit_ddxy(struct brw_compile
*p
,
315 const struct brw_reg
*dst
,
318 const struct brw_reg
*arg0
)
321 struct brw_reg src0
, src1
;
324 brw_set_saturate(p
, 1);
325 for (i
= 0; i
< 4; i
++ ) {
328 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 1,
330 BRW_VERTICAL_STRIDE_2
,
332 BRW_HORIZONTAL_STRIDE_0
,
333 BRW_SWIZZLE_XYZW
, BRW_WRITEMASK_XYZW
);
334 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
336 BRW_VERTICAL_STRIDE_2
,
338 BRW_HORIZONTAL_STRIDE_0
,
339 BRW_SWIZZLE_XYZW
, BRW_WRITEMASK_XYZW
);
341 src0
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 0,
343 BRW_VERTICAL_STRIDE_4
,
345 BRW_HORIZONTAL_STRIDE_0
,
346 BRW_SWIZZLE_XYZW
, BRW_WRITEMASK_XYZW
);
347 src1
= brw_reg(arg0
[i
].file
, arg0
[i
].nr
, 2,
349 BRW_VERTICAL_STRIDE_4
,
351 BRW_HORIZONTAL_STRIDE_0
,
352 BRW_SWIZZLE_XYZW
, BRW_WRITEMASK_XYZW
);
354 brw_ADD(p
, dst
[i
], src0
, negate(src1
));
358 brw_set_saturate(p
, 0);
361 static void emit_alu1( struct brw_compile
*p
,
362 struct brw_instruction
*(*func
)(struct brw_compile
*,
365 const struct brw_reg
*dst
,
367 const struct brw_reg
*arg0
)
372 brw_set_saturate(p
, 1);
374 for (i
= 0; i
< 4; i
++) {
376 func(p
, dst
[i
], arg0
[i
]);
381 brw_set_saturate(p
, 0);
385 static void emit_alu2( struct brw_compile
*p
,
386 struct brw_instruction
*(*func
)(struct brw_compile
*,
390 const struct brw_reg
*dst
,
392 const struct brw_reg
*arg0
,
393 const struct brw_reg
*arg1
)
398 brw_set_saturate(p
, 1);
400 for (i
= 0; i
< 4; i
++) {
402 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
407 brw_set_saturate(p
, 0);
411 static void emit_mad( struct brw_compile
*p
,
412 const struct brw_reg
*dst
,
414 const struct brw_reg
*arg0
,
415 const struct brw_reg
*arg1
,
416 const struct brw_reg
*arg2
)
420 for (i
= 0; i
< 4; i
++) {
422 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
424 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
425 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
426 brw_set_saturate(p
, 0);
431 static void emit_trunc( struct brw_compile
*p
,
432 const struct brw_reg
*dst
,
434 const struct brw_reg
*arg0
)
438 for (i
= 0; i
< 4; i
++) {
440 brw_RNDZ(p
, dst
[i
], arg0
[i
]);
445 static void emit_lrp( struct brw_compile
*p
,
446 const struct brw_reg
*dst
,
448 const struct brw_reg
*arg0
,
449 const struct brw_reg
*arg1
,
450 const struct brw_reg
*arg2
)
454 /* Uses dst as a temporary:
456 for (i
= 0; i
< 4; i
++) {
458 /* Can I use the LINE instruction for this?
460 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
461 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
463 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
464 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
465 brw_set_saturate(p
, 0);
470 static void emit_sop( struct brw_compile
*p
,
471 const struct brw_reg
*dst
,
474 const struct brw_reg
*arg0
,
475 const struct brw_reg
*arg1
)
479 for (i
= 0; i
< 4; i
++) {
481 brw_MOV(p
, dst
[i
], brw_imm_f(0));
482 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
483 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
484 brw_set_predicate_control_flag_value(p
, 0xff);
489 static void emit_slt( struct brw_compile
*p
,
490 const struct brw_reg
*dst
,
492 const struct brw_reg
*arg0
,
493 const struct brw_reg
*arg1
)
495 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
498 static void emit_sle( struct brw_compile
*p
,
499 const struct brw_reg
*dst
,
501 const struct brw_reg
*arg0
,
502 const struct brw_reg
*arg1
)
504 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
507 static void emit_sgt( struct brw_compile
*p
,
508 const struct brw_reg
*dst
,
510 const struct brw_reg
*arg0
,
511 const struct brw_reg
*arg1
)
513 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
516 static void emit_sge( struct brw_compile
*p
,
517 const struct brw_reg
*dst
,
519 const struct brw_reg
*arg0
,
520 const struct brw_reg
*arg1
)
522 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
525 static void emit_seq( struct brw_compile
*p
,
526 const struct brw_reg
*dst
,
528 const struct brw_reg
*arg0
,
529 const struct brw_reg
*arg1
)
531 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
534 static void emit_sne( struct brw_compile
*p
,
535 const struct brw_reg
*dst
,
537 const struct brw_reg
*arg0
,
538 const struct brw_reg
*arg1
)
540 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
543 static void emit_cmp( struct brw_compile
*p
,
544 const struct brw_reg
*dst
,
546 const struct brw_reg
*arg0
,
547 const struct brw_reg
*arg1
,
548 const struct brw_reg
*arg2
)
552 for (i
= 0; i
< 4; i
++) {
554 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
555 brw_MOV(p
, dst
[i
], arg2
[i
]);
556 brw_set_saturate(p
, 0);
558 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
560 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
561 brw_MOV(p
, dst
[i
], arg1
[i
]);
562 brw_set_saturate(p
, 0);
563 brw_set_predicate_control_flag_value(p
, 0xff);
568 static void emit_max( struct brw_compile
*p
,
569 const struct brw_reg
*dst
,
571 const struct brw_reg
*arg0
,
572 const struct brw_reg
*arg1
)
576 for (i
= 0; i
< 4; i
++) {
578 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
579 brw_MOV(p
, dst
[i
], arg0
[i
]);
580 brw_set_saturate(p
, 0);
582 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
584 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
585 brw_MOV(p
, dst
[i
], arg1
[i
]);
586 brw_set_saturate(p
, 0);
587 brw_set_predicate_control_flag_value(p
, 0xff);
592 static void emit_min( struct brw_compile
*p
,
593 const struct brw_reg
*dst
,
595 const struct brw_reg
*arg0
,
596 const struct brw_reg
*arg1
)
600 for (i
= 0; i
< 4; i
++) {
602 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
603 brw_MOV(p
, dst
[i
], arg1
[i
]);
604 brw_set_saturate(p
, 0);
606 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
608 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
609 brw_MOV(p
, dst
[i
], arg0
[i
]);
610 brw_set_saturate(p
, 0);
611 brw_set_predicate_control_flag_value(p
, 0xff);
617 static void emit_dp3( struct brw_compile
*p
,
618 const struct brw_reg
*dst
,
620 const struct brw_reg
*arg0
,
621 const struct brw_reg
*arg1
)
623 int dst_chan
= ffs(mask
& BRW_WRITEMASK_XYZW
) - 1;
625 if (!(mask
& BRW_WRITEMASK_XYZW
))
626 return; /* Do not emit dead code */
628 assert(util_is_power_of_two(mask
& BRW_WRITEMASK_XYZW
));
630 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
631 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
633 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
634 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
635 brw_set_saturate(p
, 0);
639 static void emit_dp4( struct brw_compile
*p
,
640 const struct brw_reg
*dst
,
642 const struct brw_reg
*arg0
,
643 const struct brw_reg
*arg1
)
645 int dst_chan
= ffs(mask
& BRW_WRITEMASK_XYZW
) - 1;
647 if (!(mask
& BRW_WRITEMASK_XYZW
))
648 return; /* Do not emit dead code */
650 assert(util_is_power_of_two(mask
& BRW_WRITEMASK_XYZW
));
652 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
653 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
654 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
656 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
657 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
658 brw_set_saturate(p
, 0);
662 static void emit_dph( struct brw_compile
*p
,
663 const struct brw_reg
*dst
,
665 const struct brw_reg
*arg0
,
666 const struct brw_reg
*arg1
)
668 const int dst_chan
= ffs(mask
& BRW_WRITEMASK_XYZW
) - 1;
670 if (!(mask
& BRW_WRITEMASK_XYZW
))
671 return; /* Do not emit dead code */
673 assert(util_is_power_of_two(mask
& BRW_WRITEMASK_XYZW
));
675 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
676 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
677 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
679 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
680 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
681 brw_set_saturate(p
, 0);
685 static void emit_xpd( struct brw_compile
*p
,
686 const struct brw_reg
*dst
,
688 const struct brw_reg
*arg0
,
689 const struct brw_reg
*arg1
)
693 assert(!(mask
& BRW_WRITEMASK_W
) == BRW_WRITEMASK_X
);
695 for (i
= 0 ; i
< 3; i
++) {
700 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
702 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
703 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
704 brw_set_saturate(p
, 0);
710 static void emit_math1( struct brw_compile
*p
,
712 const struct brw_reg
*dst
,
714 const struct brw_reg
*arg0
)
716 int dst_chan
= ffs(mask
& BRW_WRITEMASK_XYZW
) - 1;
718 if (!(mask
& BRW_WRITEMASK_XYZW
))
719 return; /* Do not emit dead code */
721 assert(util_is_power_of_two(mask
& BRW_WRITEMASK_XYZW
));
723 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
725 /* Send two messages to perform all 16 operations:
730 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
733 BRW_MATH_PRECISION_FULL
);
737 static void emit_math2( struct brw_compile
*p
,
739 const struct brw_reg
*dst
,
741 const struct brw_reg
*arg0
,
742 const struct brw_reg
*arg1
)
744 int dst_chan
= ffs(mask
& BRW_WRITEMASK_XYZW
) - 1;
746 if (!(mask
& BRW_WRITEMASK_XYZW
))
747 return; /* Do not emit dead code */
749 assert(util_is_power_of_two(mask
& BRW_WRITEMASK_XYZW
));
751 brw_push_insn_state(p
);
753 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
754 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
755 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
756 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
758 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
759 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
760 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
761 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
764 /* Send two messages to perform all 16 operations:
766 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
770 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
773 BRW_MATH_DATA_VECTOR
,
774 BRW_MATH_PRECISION_FULL
);
776 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
778 offset(dst
[dst_chan
],1),
780 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
783 BRW_MATH_DATA_VECTOR
,
784 BRW_MATH_PRECISION_FULL
);
786 brw_pop_insn_state(p
);
791 static void emit_tex( struct brw_wm_compile
*c
,
792 const struct brw_wm_instruction
*inst
,
795 struct brw_reg
*arg
)
797 struct brw_compile
*p
= &c
->func
;
798 GLuint msgLength
, responseLength
;
802 GLboolean shadow
= FALSE
;
804 /* How many input regs are there?
806 switch (inst
->tex_target
) {
807 case TGSI_TEXTURE_1D
:
808 emit
= BRW_WRITEMASK_X
;
811 case TGSI_TEXTURE_SHADOW1D
:
812 emit
= BRW_WRITEMASK_XW
;
816 case TGSI_TEXTURE_2D
:
817 emit
= BRW_WRITEMASK_XY
;
820 case TGSI_TEXTURE_SHADOW2D
:
821 case TGSI_TEXTURE_SHADOWRECT
:
822 emit
= BRW_WRITEMASK_XYW
;
826 case TGSI_TEXTURE_3D
:
827 case TGSI_TEXTURE_CUBE
:
828 emit
= BRW_WRITEMASK_XYZ
;
832 /* unexpected target */
838 for (i
= 0; i
< nr
; i
++) {
839 static const GLuint swz
[4] = {0,1,2,2};
841 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
843 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
847 responseLength
= 8; /* always */
849 if (BRW_IS_IGDNG(p
->brw
)) {
851 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG
;
853 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG
;
856 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
858 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
862 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
864 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
865 SURF_INDEX_TEXTURE(inst
->tex_unit
),
866 inst
->tex_unit
, /* sampler */
873 BRW_SAMPLER_SIMD_MODE_SIMD16
);
877 static void emit_txb( struct brw_wm_compile
*c
,
878 const struct brw_wm_instruction
*inst
,
881 struct brw_reg
*arg
)
883 struct brw_compile
*p
= &c
->func
;
886 /* Shadow ignored for txb.
888 switch (inst
->tex_target
) {
889 case TGSI_TEXTURE_1D
:
890 case TGSI_TEXTURE_SHADOW1D
:
891 brw_MOV(p
, brw_message_reg(2), arg
[0]);
892 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
893 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
895 case TGSI_TEXTURE_2D
:
896 case TGSI_TEXTURE_RECT
:
897 case TGSI_TEXTURE_SHADOW2D
:
898 case TGSI_TEXTURE_SHADOWRECT
:
899 brw_MOV(p
, brw_message_reg(2), arg
[0]);
900 brw_MOV(p
, brw_message_reg(4), arg
[1]);
901 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
903 case TGSI_TEXTURE_3D
:
904 case TGSI_TEXTURE_CUBE
:
905 brw_MOV(p
, brw_message_reg(2), arg
[0]);
906 brw_MOV(p
, brw_message_reg(4), arg
[1]);
907 brw_MOV(p
, brw_message_reg(6), arg
[2]);
910 /* unexpected target */
914 brw_MOV(p
, brw_message_reg(8), arg
[3]);
917 if (BRW_IS_IGDNG(p
->brw
))
918 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG
;
920 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
923 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
925 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
926 SURF_INDEX_TEXTURE(inst
->tex_unit
),
927 inst
->tex_unit
, /* sampler */
930 8, /* responseLength */
934 BRW_SAMPLER_SIMD_MODE_SIMD16
);
938 static void emit_lit( struct brw_compile
*p
,
939 const struct brw_reg
*dst
,
941 const struct brw_reg
*arg0
)
943 assert((mask
& BRW_WRITEMASK_XW
) == 0);
945 if (mask
& BRW_WRITEMASK_Y
) {
946 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
947 brw_MOV(p
, dst
[1], arg0
[0]);
948 brw_set_saturate(p
, 0);
951 if (mask
& BRW_WRITEMASK_Z
) {
952 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
954 BRW_WRITEMASK_X
| (mask
& SATURATE
),
959 /* Ordinarily you'd use an iff statement to skip or shortcircuit
960 * some of the POW calculations above, but 16-wide iff statements
961 * seem to lock c1 hardware, so this is a nasty workaround:
963 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
965 if (mask
& BRW_WRITEMASK_Y
)
966 brw_MOV(p
, dst
[1], brw_imm_f(0));
968 if (mask
& BRW_WRITEMASK_Z
)
969 brw_MOV(p
, dst
[2], brw_imm_f(0));
971 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
975 /* Kill pixel - set execution mask to zero for those pixels which
978 static void emit_kil( struct brw_wm_compile
*c
,
979 struct brw_reg
*arg0
)
981 struct brw_compile
*p
= &c
->func
;
982 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
985 /* XXX - usually won't need 4 compares!
987 for (i
= 0; i
< 4; i
++) {
988 brw_push_insn_state(p
);
989 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
990 brw_set_predicate_control_flag_value(p
, 0xff);
991 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
992 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
993 brw_pop_insn_state(p
);
997 /* KILLP kills the pixels that are currently executing, not based on a test
1000 static void emit_killp( struct brw_wm_compile
*c
)
1002 struct brw_compile
*p
= &c
->func
;
1003 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1005 brw_push_insn_state(p
);
1006 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1007 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1008 brw_AND(p
, r0uw
, c
->emit_mask_reg
, r0uw
);
1009 brw_pop_insn_state(p
);
1012 static void fire_fb_write( struct brw_wm_compile
*c
,
1018 struct brw_compile
*p
= &c
->func
;
1020 /* Pass through control information:
1022 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1024 brw_push_insn_state(p
);
1025 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
1026 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1028 brw_message_reg(base_reg
+ 1),
1029 brw_vec8_grf(1, 0));
1030 brw_pop_insn_state(p
);
1033 /* Send framebuffer write message: */
1034 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1036 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
1038 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1046 static void emit_aa( struct brw_wm_compile
*c
,
1047 struct brw_reg
*arg1
,
1050 struct brw_compile
*p
= &c
->func
;
1051 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
1052 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
1053 struct brw_reg aa
= offset(arg1
[comp
], off
);
1055 brw_push_insn_state(p
);
1056 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
1057 brw_MOV(p
, brw_message_reg(reg
), aa
);
1058 brw_pop_insn_state(p
);
1062 /* Post-fragment-program processing. Send the results to the
1064 * \param arg0 the fragment color
1065 * \param arg1 the pass-through depth value
1066 * \param arg2 the shader-computed depth value
1068 static void emit_fb_write( struct brw_wm_compile
*c
,
1069 struct brw_reg
*arg0
,
1070 struct brw_reg
*arg1
,
1071 struct brw_reg
*arg2
,
1075 struct brw_compile
*p
= &c
->func
;
1079 /* Reserve a space for AA - may not be needed:
1081 if (c
->key
.aa_dest_stencil_reg
)
1084 /* I don't really understand how this achieves the color interleave
1085 * (ie RGBARGBA) in the result: [Do the saturation here]
1088 brw_push_insn_state(p
);
1090 for (channel
= 0; channel
< 4; channel
++) {
1091 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1092 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1094 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1096 brw_message_reg(nr
+ channel
),
1099 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
1101 brw_message_reg(nr
+ channel
+ 4),
1102 sechalf(arg0
[channel
]));
1105 /* skip over the regs populated above:
1109 brw_pop_insn_state(p
);
1112 if (c
->key
.source_depth_to_render_target
)
1114 if (c
->key
.computes_depth
)
1115 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1117 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1122 if (c
->key
.dest_depth_reg
)
1124 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1125 GLuint off
= c
->key
.dest_depth_reg
% 2;
1128 brw_push_insn_state(p
);
1129 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1131 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1133 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1134 brw_pop_insn_state(p
);
1137 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1142 if (!c
->key
.runtime_check_aads_emit
) {
1143 if (c
->key
.aa_dest_stencil_reg
)
1144 emit_aa(c
, arg1
, 2);
1146 fire_fb_write(c
, 0, nr
, target
, eot
);
1149 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1150 struct brw_reg ip
= brw_ip_reg();
1151 struct brw_instruction
*jmp
;
1153 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1154 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1157 get_element_ud(brw_vec8_grf(1,0), 6),
1160 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_d(0));
1162 emit_aa(c
, arg1
, 2);
1163 fire_fb_write(c
, 0, nr
, target
, eot
);
1164 /* note - thread killed in subroutine */
1166 brw_land_fwd_jump(p
, jmp
);
1168 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1170 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1176 * Move a GPR to scratch memory.
1178 static void emit_spill( struct brw_wm_compile
*c
,
1182 struct brw_compile
*p
= &c
->func
;
1185 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1187 brw_MOV(p
, brw_message_reg(2), reg
);
1190 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1191 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1194 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1200 * Load a GPR from scratch memory.
1202 static void emit_unspill( struct brw_wm_compile
*c
,
1206 struct brw_compile
*p
= &c
->func
;
1208 /* Slot 0 is the undef value.
1211 brw_MOV(p
, reg
, brw_imm_f(0));
1216 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1217 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1221 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1227 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1228 * Args with unspill_reg != 0 will be loaded from scratch memory.
1230 static void get_argument_regs( struct brw_wm_compile
*c
,
1231 struct brw_wm_ref
*arg
[],
1232 struct brw_reg
*regs
)
1236 for (i
= 0; i
< 4; i
++) {
1238 if (arg
[i
]->unspill_reg
)
1240 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1241 arg
[i
]->value
->spill_slot
);
1243 regs
[i
] = arg
[i
]->hw_reg
;
1246 regs
[i
] = brw_null_reg();
1253 * For values that have a spill_slot!=0, write those regs to scratch memory.
1255 static void spill_values( struct brw_wm_compile
*c
,
1256 struct brw_wm_value
*values
,
1261 for (i
= 0; i
< nr
; i
++)
1262 if (values
[i
].spill_slot
)
1263 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1267 /* Emit the fragment program instructions here.
1269 void brw_wm_emit( struct brw_wm_compile
*c
)
1271 struct brw_compile
*p
= &c
->func
;
1274 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1276 /* Check if any of the payload regs need to be spilled:
1278 spill_values(c
, c
->payload
.depth
, 4);
1279 spill_values(c
, c
->creg
, c
->nr_creg
);
1280 spill_values(c
, c
->payload
.input_interp
, PIPE_MAX_SHADER_INPUTS
);
1283 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1285 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1286 struct brw_reg args
[3][4], dst
[4];
1287 GLuint i
, dst_flags
;
1289 /* Get argument regs:
1291 for (i
= 0; i
< 3; i
++)
1292 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1296 for (i
= 0; i
< 4; i
++)
1298 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1300 dst
[i
] = brw_null_reg();
1304 dst_flags
= inst
->writemask
;
1306 dst_flags
|= SATURATE
;
1308 switch (inst
->opcode
) {
1309 /* Generated instructions for calculating triangle interpolants:
1312 emit_pixel_xy(p
, dst
, dst_flags
);
1316 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1320 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1324 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1328 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1332 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1336 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1340 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1343 case WM_FRONTFACING
:
1344 emit_frontfacing(p
, dst
, dst_flags
);
1347 /* Straightforward arithmetic:
1349 case TGSI_OPCODE_ADD
:
1350 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1353 case TGSI_OPCODE_FRC
:
1354 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1357 case TGSI_OPCODE_FLR
:
1358 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1361 case TGSI_OPCODE_DDX
:
1362 emit_ddxy(p
, dst
, dst_flags
, GL_TRUE
, args
[0]);
1365 case TGSI_OPCODE_DDY
:
1366 emit_ddxy(p
, dst
, dst_flags
, GL_FALSE
, args
[0]);
1369 case TGSI_OPCODE_DP3
:
1370 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1373 case TGSI_OPCODE_DP4
:
1374 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1377 case TGSI_OPCODE_DPH
:
1378 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1381 case TGSI_OPCODE_TRUNC
:
1382 emit_trunc(p
, dst
, dst_flags
, args
[0]);
1385 case TGSI_OPCODE_LRP
:
1386 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1389 case TGSI_OPCODE_MAD
:
1390 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1393 case TGSI_OPCODE_MOV
:
1394 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1397 case TGSI_OPCODE_MUL
:
1398 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1401 case TGSI_OPCODE_XPD
:
1402 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1405 /* Higher math functions:
1407 case TGSI_OPCODE_RCP
:
1408 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1411 case TGSI_OPCODE_RSQ
:
1412 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1415 case TGSI_OPCODE_SIN
:
1416 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1419 case TGSI_OPCODE_COS
:
1420 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1423 case TGSI_OPCODE_EX2
:
1424 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1427 case TGSI_OPCODE_LG2
:
1428 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1431 case TGSI_OPCODE_SCS
:
1432 /* There is an scs math function, but it would need some
1433 * fixup for 16-element execution.
1435 if (dst_flags
& BRW_WRITEMASK_X
)
1436 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|BRW_WRITEMASK_X
, args
[0]);
1437 if (dst_flags
& BRW_WRITEMASK_Y
)
1438 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|BRW_WRITEMASK_X
, args
[0]);
1441 case TGSI_OPCODE_POW
:
1442 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1447 case TGSI_OPCODE_CMP
:
1448 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1451 case TGSI_OPCODE_MAX
:
1452 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1455 case TGSI_OPCODE_MIN
:
1456 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1459 case TGSI_OPCODE_SLT
:
1460 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1463 case TGSI_OPCODE_SLE
:
1464 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1466 case TGSI_OPCODE_SGT
:
1467 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1469 case TGSI_OPCODE_SGE
:
1470 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1472 case TGSI_OPCODE_SEQ
:
1473 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1475 case TGSI_OPCODE_SNE
:
1476 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1479 case TGSI_OPCODE_LIT
:
1480 emit_lit(p
, dst
, dst_flags
, args
[0]);
1483 /* Texturing operations:
1485 case TGSI_OPCODE_TEX
:
1486 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1489 case TGSI_OPCODE_TXB
:
1490 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1493 case TGSI_OPCODE_KIL
:
1494 emit_kil(c
, args
[0]);
1497 case TGSI_OPCODE_KILP
:
1502 debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
1504 tgsi_get_opcode_info(inst
->opcode
)->mnemonic
);
1507 for (i
= 0; i
< 4; i
++)
1508 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1510 inst
->dst
[i
]->hw_reg
,
1511 inst
->dst
[i
]->spill_slot
);
1514 if (BRW_DEBUG
& DEBUG_WM
) {
1515 debug_printf("wm-native:\n");
1516 brw_disasm(stderr
, p
->store
, p
->nr_insn
);