2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
70 struct brw_reg r1
= brw_vec1_grf(1, 0);
71 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
73 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
75 /* Calculate pixel centers by adding 1 or 0 to each of the
76 * micro-tile coordinates passed in r1.
78 if (mask
& WRITEMASK_X
) {
80 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
81 stride(suboffset(r1_uw
, 4), 2, 4, 0),
82 brw_imm_v(0x10101010));
85 if (mask
& WRITEMASK_Y
) {
87 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
88 stride(suboffset(r1_uw
,5), 2, 4, 0),
89 brw_imm_v(0x11001100));
92 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
97 static void emit_delta_xy(struct brw_compile
*p
,
98 const struct brw_reg
*dst
,
100 const struct brw_reg
*arg0
)
102 struct brw_reg r1
= brw_vec1_grf(1, 0);
104 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 if (mask
& WRITEMASK_X
) {
110 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
114 if (mask
& WRITEMASK_Y
) {
117 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
118 negate(suboffset(r1
,1)));
123 static void emit_wpos_xy(struct brw_wm_compile
*c
,
124 const struct brw_reg
*dst
,
126 const struct brw_reg
*arg0
)
128 struct brw_compile
*p
= &c
->func
;
130 /* Calculate the pixel offset from window bottom left into destination
133 if (mask
& WRITEMASK_X
) {
134 /* X' = X - origin */
137 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
138 brw_imm_d(0 - c
->key
.origin_x
));
141 if (mask
& WRITEMASK_Y
) {
142 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
145 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
146 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
151 static void emit_pixel_w( struct brw_compile
*p
,
152 const struct brw_reg
*dst
,
154 const struct brw_reg
*arg0
,
155 const struct brw_reg
*deltas
)
157 /* Don't need this if all you are doing is interpolating color, for
160 if (mask
& WRITEMASK_W
) {
161 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
163 /* Calc 1/w - just linterp wpos[3] optimized by putting the
164 * result straight into a message reg.
166 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
167 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
170 brw_math_16( p
, dst
[3],
171 BRW_MATH_FUNCTION_INV
,
172 BRW_MATH_SATURATE_NONE
,
174 BRW_MATH_PRECISION_FULL
);
180 static void emit_linterp( struct brw_compile
*p
,
181 const struct brw_reg
*dst
,
183 const struct brw_reg
*arg0
,
184 const struct brw_reg
*deltas
)
186 struct brw_reg interp
[4];
187 GLuint nr
= arg0
[0].nr
;
190 interp
[0] = brw_vec1_grf(nr
, 0);
191 interp
[1] = brw_vec1_grf(nr
, 4);
192 interp
[2] = brw_vec1_grf(nr
+1, 0);
193 interp
[3] = brw_vec1_grf(nr
+1, 4);
195 for (i
= 0; i
< 4; i
++) {
197 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
198 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
204 static void emit_pinterp( struct brw_compile
*p
,
205 const struct brw_reg
*dst
,
207 const struct brw_reg
*arg0
,
208 const struct brw_reg
*deltas
,
209 const struct brw_reg
*w
)
211 struct brw_reg interp
[4];
212 GLuint nr
= arg0
[0].nr
;
215 interp
[0] = brw_vec1_grf(nr
, 0);
216 interp
[1] = brw_vec1_grf(nr
, 4);
217 interp
[2] = brw_vec1_grf(nr
+1, 0);
218 interp
[3] = brw_vec1_grf(nr
+1, 4);
220 for (i
= 0; i
< 4; i
++) {
222 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
223 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
226 for (i
= 0; i
< 4; i
++) {
228 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
234 static void emit_cinterp( struct brw_compile
*p
,
235 const struct brw_reg
*dst
,
237 const struct brw_reg
*arg0
)
239 struct brw_reg interp
[4];
240 GLuint nr
= arg0
[0].nr
;
243 interp
[0] = brw_vec1_grf(nr
, 0);
244 interp
[1] = brw_vec1_grf(nr
, 4);
245 interp
[2] = brw_vec1_grf(nr
+1, 0);
246 interp
[3] = brw_vec1_grf(nr
+1, 4);
248 for (i
= 0; i
< 4; i
++) {
250 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
255 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
256 static void emit_frontfacing( struct brw_compile
*p
,
257 const struct brw_reg
*dst
,
260 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
263 if (!(mask
& WRITEMASK_XYZW
))
266 for (i
= 0; i
< 4; i
++) {
268 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
272 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
275 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
276 for (i
= 0; i
< 4; i
++) {
278 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
281 brw_set_predicate_control_flag_value(p
, 0xff);
284 static void emit_alu1( struct brw_compile
*p
,
285 struct brw_instruction
*(*func
)(struct brw_compile
*,
288 const struct brw_reg
*dst
,
290 const struct brw_reg
*arg0
)
295 brw_set_saturate(p
, 1);
297 for (i
= 0; i
< 4; i
++) {
299 func(p
, dst
[i
], arg0
[i
]);
304 brw_set_saturate(p
, 0);
308 static void emit_alu2( struct brw_compile
*p
,
309 struct brw_instruction
*(*func
)(struct brw_compile
*,
313 const struct brw_reg
*dst
,
315 const struct brw_reg
*arg0
,
316 const struct brw_reg
*arg1
)
321 brw_set_saturate(p
, 1);
323 for (i
= 0; i
< 4; i
++) {
325 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
330 brw_set_saturate(p
, 0);
334 static void emit_mad( struct brw_compile
*p
,
335 const struct brw_reg
*dst
,
337 const struct brw_reg
*arg0
,
338 const struct brw_reg
*arg1
,
339 const struct brw_reg
*arg2
)
343 for (i
= 0; i
< 4; i
++) {
345 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
347 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
348 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
349 brw_set_saturate(p
, 0);
354 static void emit_trunc( struct brw_compile
*p
,
355 const struct brw_reg
*dst
,
357 const struct brw_reg
*arg0
)
361 for (i
= 0; i
< 4; i
++) {
363 brw_RNDZ(p
, dst
[i
], arg0
[i
]);
368 static void emit_lrp( struct brw_compile
*p
,
369 const struct brw_reg
*dst
,
371 const struct brw_reg
*arg0
,
372 const struct brw_reg
*arg1
,
373 const struct brw_reg
*arg2
)
377 /* Uses dst as a temporary:
379 for (i
= 0; i
< 4; i
++) {
381 /* Can I use the LINE instruction for this?
383 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
384 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
386 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
387 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
388 brw_set_saturate(p
, 0);
393 static void emit_sop( struct brw_compile
*p
,
394 const struct brw_reg
*dst
,
397 const struct brw_reg
*arg0
,
398 const struct brw_reg
*arg1
)
402 for (i
= 0; i
< 4; i
++) {
404 brw_MOV(p
, dst
[i
], brw_imm_f(0));
405 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
406 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
407 brw_set_predicate_control_flag_value(p
, 0xff);
412 static void emit_slt( struct brw_compile
*p
,
413 const struct brw_reg
*dst
,
415 const struct brw_reg
*arg0
,
416 const struct brw_reg
*arg1
)
418 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
421 static void emit_sle( struct brw_compile
*p
,
422 const struct brw_reg
*dst
,
424 const struct brw_reg
*arg0
,
425 const struct brw_reg
*arg1
)
427 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
430 static void emit_sgt( struct brw_compile
*p
,
431 const struct brw_reg
*dst
,
433 const struct brw_reg
*arg0
,
434 const struct brw_reg
*arg1
)
436 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
439 static void emit_sge( struct brw_compile
*p
,
440 const struct brw_reg
*dst
,
442 const struct brw_reg
*arg0
,
443 const struct brw_reg
*arg1
)
445 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
448 static void emit_seq( struct brw_compile
*p
,
449 const struct brw_reg
*dst
,
451 const struct brw_reg
*arg0
,
452 const struct brw_reg
*arg1
)
454 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
457 static void emit_sne( struct brw_compile
*p
,
458 const struct brw_reg
*dst
,
460 const struct brw_reg
*arg0
,
461 const struct brw_reg
*arg1
)
463 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
466 static void emit_cmp( struct brw_compile
*p
,
467 const struct brw_reg
*dst
,
469 const struct brw_reg
*arg0
,
470 const struct brw_reg
*arg1
,
471 const struct brw_reg
*arg2
)
475 for (i
= 0; i
< 4; i
++) {
477 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
478 brw_MOV(p
, dst
[i
], arg2
[i
]);
479 brw_set_saturate(p
, 0);
481 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
483 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
484 brw_MOV(p
, dst
[i
], arg1
[i
]);
485 brw_set_saturate(p
, 0);
486 brw_set_predicate_control_flag_value(p
, 0xff);
491 static void emit_max( struct brw_compile
*p
,
492 const struct brw_reg
*dst
,
494 const struct brw_reg
*arg0
,
495 const struct brw_reg
*arg1
)
499 for (i
= 0; i
< 4; i
++) {
501 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
502 brw_MOV(p
, dst
[i
], arg0
[i
]);
503 brw_set_saturate(p
, 0);
505 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
507 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
508 brw_MOV(p
, dst
[i
], arg1
[i
]);
509 brw_set_saturate(p
, 0);
510 brw_set_predicate_control_flag_value(p
, 0xff);
515 static void emit_min( struct brw_compile
*p
,
516 const struct brw_reg
*dst
,
518 const struct brw_reg
*arg0
,
519 const struct brw_reg
*arg1
)
523 for (i
= 0; i
< 4; i
++) {
525 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
526 brw_MOV(p
, dst
[i
], arg1
[i
]);
527 brw_set_saturate(p
, 0);
529 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
531 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
532 brw_MOV(p
, dst
[i
], arg0
[i
]);
533 brw_set_saturate(p
, 0);
534 brw_set_predicate_control_flag_value(p
, 0xff);
540 static void emit_dp3( struct brw_compile
*p
,
541 const struct brw_reg
*dst
,
543 const struct brw_reg
*arg0
,
544 const struct brw_reg
*arg1
)
546 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
548 if (!(mask
& WRITEMASK_XYZW
))
549 return; /* Do not emit dead code */
551 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
553 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
554 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
556 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
557 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
558 brw_set_saturate(p
, 0);
562 static void emit_dp4( struct brw_compile
*p
,
563 const struct brw_reg
*dst
,
565 const struct brw_reg
*arg0
,
566 const struct brw_reg
*arg1
)
568 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
570 if (!(mask
& WRITEMASK_XYZW
))
571 return; /* Do not emit dead code */
573 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
575 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
576 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
577 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
579 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
580 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
581 brw_set_saturate(p
, 0);
585 static void emit_dph( struct brw_compile
*p
,
586 const struct brw_reg
*dst
,
588 const struct brw_reg
*arg0
,
589 const struct brw_reg
*arg1
)
591 const int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
593 if (!(mask
& WRITEMASK_XYZW
))
594 return; /* Do not emit dead code */
596 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
598 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
599 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
600 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
602 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
603 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
604 brw_set_saturate(p
, 0);
608 static void emit_xpd( struct brw_compile
*p
,
609 const struct brw_reg
*dst
,
611 const struct brw_reg
*arg0
,
612 const struct brw_reg
*arg1
)
616 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
618 for (i
= 0 ; i
< 3; i
++) {
623 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
625 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
626 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
627 brw_set_saturate(p
, 0);
633 static void emit_math1( struct brw_compile
*p
,
635 const struct brw_reg
*dst
,
637 const struct brw_reg
*arg0
)
639 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
641 if (!(mask
& WRITEMASK_XYZW
))
642 return; /* Do not emit dead code */
644 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
646 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
648 /* Send two messages to perform all 16 operations:
653 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
656 BRW_MATH_PRECISION_FULL
);
660 static void emit_math2( struct brw_compile
*p
,
662 const struct brw_reg
*dst
,
664 const struct brw_reg
*arg0
,
665 const struct brw_reg
*arg1
)
667 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
669 if (!(mask
& WRITEMASK_XYZW
))
670 return; /* Do not emit dead code */
672 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
674 brw_push_insn_state(p
);
676 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
677 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
678 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
679 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
681 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
682 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
683 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
684 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
687 /* Send two messages to perform all 16 operations:
689 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
693 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
696 BRW_MATH_DATA_VECTOR
,
697 BRW_MATH_PRECISION_FULL
);
699 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
701 offset(dst
[dst_chan
],1),
703 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
706 BRW_MATH_DATA_VECTOR
,
707 BRW_MATH_PRECISION_FULL
);
709 brw_pop_insn_state(p
);
714 static void emit_tex( struct brw_wm_compile
*c
,
715 const struct brw_wm_instruction
*inst
,
718 struct brw_reg
*arg
)
720 struct brw_compile
*p
= &c
->func
;
721 GLuint msgLength
, responseLength
;
726 /* How many input regs are there?
728 switch (inst
->tex_idx
) {
729 case TEXTURE_1D_INDEX
:
733 case TEXTURE_2D_INDEX
:
734 case TEXTURE_RECT_INDEX
:
738 case TEXTURE_3D_INDEX
:
739 case TEXTURE_CUBE_INDEX
:
740 emit
= WRITEMASK_XYZ
;
744 /* unexpected target */
748 if (inst
->tex_shadow
) {
755 for (i
= 0; i
< nr
; i
++) {
756 static const GLuint swz
[4] = {0,1,2,2};
758 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
760 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
764 responseLength
= 8; /* always */
766 if (BRW_IS_IGDNG(p
->brw
)) {
767 if (inst
->tex_shadow
)
768 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG
;
770 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG
;
772 if (inst
->tex_shadow
)
773 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
775 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
779 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
781 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
782 SURF_INDEX_TEXTURE(inst
->tex_unit
),
783 inst
->tex_unit
, /* sampler */
790 BRW_SAMPLER_SIMD_MODE_SIMD16
);
794 static void emit_txb( struct brw_wm_compile
*c
,
795 const struct brw_wm_instruction
*inst
,
798 struct brw_reg
*arg
)
800 struct brw_compile
*p
= &c
->func
;
803 /* Shadow ignored for txb.
805 switch (inst
->tex_idx
) {
806 case TEXTURE_1D_INDEX
:
807 brw_MOV(p
, brw_message_reg(2), arg
[0]);
808 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
809 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
811 case TEXTURE_2D_INDEX
:
812 case TEXTURE_RECT_INDEX
:
813 brw_MOV(p
, brw_message_reg(2), arg
[0]);
814 brw_MOV(p
, brw_message_reg(4), arg
[1]);
815 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
817 case TEXTURE_3D_INDEX
:
818 case TEXTURE_CUBE_INDEX
:
819 brw_MOV(p
, brw_message_reg(2), arg
[0]);
820 brw_MOV(p
, brw_message_reg(4), arg
[1]);
821 brw_MOV(p
, brw_message_reg(6), arg
[2]);
824 /* unexpected target */
828 brw_MOV(p
, brw_message_reg(8), arg
[3]);
831 if (BRW_IS_IGDNG(p
->brw
))
832 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG
;
834 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
837 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
839 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
840 SURF_INDEX_TEXTURE(inst
->tex_unit
),
841 inst
->tex_unit
, /* sampler */
844 8, /* responseLength */
848 BRW_SAMPLER_SIMD_MODE_SIMD16
);
852 static void emit_lit( struct brw_compile
*p
,
853 const struct brw_reg
*dst
,
855 const struct brw_reg
*arg0
)
857 assert((mask
& WRITEMASK_XW
) == 0);
859 if (mask
& WRITEMASK_Y
) {
860 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
861 brw_MOV(p
, dst
[1], arg0
[0]);
862 brw_set_saturate(p
, 0);
865 if (mask
& WRITEMASK_Z
) {
866 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
868 WRITEMASK_X
| (mask
& SATURATE
),
873 /* Ordinarily you'd use an iff statement to skip or shortcircuit
874 * some of the POW calculations above, but 16-wide iff statements
875 * seem to lock c1 hardware, so this is a nasty workaround:
877 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
879 if (mask
& WRITEMASK_Y
)
880 brw_MOV(p
, dst
[1], brw_imm_f(0));
882 if (mask
& WRITEMASK_Z
)
883 brw_MOV(p
, dst
[2], brw_imm_f(0));
885 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
889 /* Kill pixel - set execution mask to zero for those pixels which
892 static void emit_kil( struct brw_wm_compile
*c
,
893 struct brw_reg
*arg0
)
895 struct brw_compile
*p
= &c
->func
;
896 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
899 /* XXX - usually won't need 4 compares!
901 for (i
= 0; i
< 4; i
++) {
902 brw_push_insn_state(p
);
903 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
904 brw_set_predicate_control_flag_value(p
, 0xff);
905 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
906 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
907 brw_pop_insn_state(p
);
912 static void fire_fb_write( struct brw_wm_compile
*c
,
918 struct brw_compile
*p
= &c
->func
;
920 /* Pass through control information:
922 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
924 brw_push_insn_state(p
);
925 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
926 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
928 brw_message_reg(base_reg
+ 1),
930 brw_pop_insn_state(p
);
933 /* Send framebuffer write message: */
934 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
936 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
938 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
946 static void emit_aa( struct brw_wm_compile
*c
,
947 struct brw_reg
*arg1
,
950 struct brw_compile
*p
= &c
->func
;
951 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
952 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
953 struct brw_reg aa
= offset(arg1
[comp
], off
);
955 brw_push_insn_state(p
);
956 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
957 brw_MOV(p
, brw_message_reg(reg
), aa
);
958 brw_pop_insn_state(p
);
962 /* Post-fragment-program processing. Send the results to the
964 * \param arg0 the fragment color
965 * \param arg1 the pass-through depth value
966 * \param arg2 the shader-computed depth value
968 static void emit_fb_write( struct brw_wm_compile
*c
,
969 struct brw_reg
*arg0
,
970 struct brw_reg
*arg1
,
971 struct brw_reg
*arg2
,
975 struct brw_compile
*p
= &c
->func
;
979 /* Reserve a space for AA - may not be needed:
981 if (c
->key
.aa_dest_stencil_reg
)
984 /* I don't really understand how this achieves the color interleave
985 * (ie RGBARGBA) in the result: [Do the saturation here]
988 brw_push_insn_state(p
);
990 for (channel
= 0; channel
< 4; channel
++) {
991 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
992 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
994 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
996 brw_message_reg(nr
+ channel
),
999 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
1001 brw_message_reg(nr
+ channel
+ 4),
1002 sechalf(arg0
[channel
]));
1005 /* skip over the regs populated above:
1009 brw_pop_insn_state(p
);
1012 if (c
->key
.source_depth_to_render_target
)
1014 if (c
->key
.computes_depth
)
1015 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1017 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1022 if (c
->key
.dest_depth_reg
)
1024 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1025 GLuint off
= c
->key
.dest_depth_reg
% 2;
1028 brw_push_insn_state(p
);
1029 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1031 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1033 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1034 brw_pop_insn_state(p
);
1037 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1042 if (!c
->key
.runtime_check_aads_emit
) {
1043 if (c
->key
.aa_dest_stencil_reg
)
1044 emit_aa(c
, arg1
, 2);
1046 fire_fb_write(c
, 0, nr
, target
, eot
);
1049 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1050 struct brw_reg ip
= brw_ip_reg();
1051 struct brw_instruction
*jmp
;
1053 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1054 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1057 get_element_ud(brw_vec8_grf(1,0), 6),
1060 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_d(0));
1062 emit_aa(c
, arg1
, 2);
1063 fire_fb_write(c
, 0, nr
, target
, eot
);
1064 /* note - thread killed in subroutine */
1066 brw_land_fwd_jump(p
, jmp
);
1068 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1070 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1076 * Move a GPR to scratch memory.
1078 static void emit_spill( struct brw_wm_compile
*c
,
1082 struct brw_compile
*p
= &c
->func
;
1085 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1087 brw_MOV(p
, brw_message_reg(2), reg
);
1090 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1091 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1094 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1100 * Load a GPR from scratch memory.
1102 static void emit_unspill( struct brw_wm_compile
*c
,
1106 struct brw_compile
*p
= &c
->func
;
1108 /* Slot 0 is the undef value.
1111 brw_MOV(p
, reg
, brw_imm_f(0));
1116 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1117 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1121 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1127 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1128 * Args with unspill_reg != 0 will be loaded from scratch memory.
1130 static void get_argument_regs( struct brw_wm_compile
*c
,
1131 struct brw_wm_ref
*arg
[],
1132 struct brw_reg
*regs
)
1136 for (i
= 0; i
< 4; i
++) {
1138 if (arg
[i
]->unspill_reg
)
1140 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1141 arg
[i
]->value
->spill_slot
);
1143 regs
[i
] = arg
[i
]->hw_reg
;
1146 regs
[i
] = brw_null_reg();
1153 * For values that have a spill_slot!=0, write those regs to scratch memory.
1155 static void spill_values( struct brw_wm_compile
*c
,
1156 struct brw_wm_value
*values
,
1161 for (i
= 0; i
< nr
; i
++)
1162 if (values
[i
].spill_slot
)
1163 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1167 /* Emit the fragment program instructions here.
1169 void brw_wm_emit( struct brw_wm_compile
*c
)
1171 struct brw_compile
*p
= &c
->func
;
1174 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1176 /* Check if any of the payload regs need to be spilled:
1178 spill_values(c
, c
->payload
.depth
, 4);
1179 spill_values(c
, c
->creg
, c
->nr_creg
);
1180 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1183 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1185 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1186 struct brw_reg args
[3][4], dst
[4];
1187 GLuint i
, dst_flags
;
1189 /* Get argument regs:
1191 for (i
= 0; i
< 3; i
++)
1192 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1196 for (i
= 0; i
< 4; i
++)
1198 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1200 dst
[i
] = brw_null_reg();
1204 dst_flags
= inst
->writemask
;
1206 dst_flags
|= SATURATE
;
1208 switch (inst
->opcode
) {
1209 /* Generated instructions for calculating triangle interpolants:
1212 emit_pixel_xy(p
, dst
, dst_flags
);
1216 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1220 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1224 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1228 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1232 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1236 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1240 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1243 case WM_FRONTFACING
:
1244 emit_frontfacing(p
, dst
, dst_flags
);
1247 /* Straightforward arithmetic:
1250 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1254 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1258 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1262 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1266 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1270 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1274 emit_trunc(p
, dst
, dst_flags
, args
[0]);
1278 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1282 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1287 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1291 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1295 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1298 /* Higher math functions:
1301 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1305 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1309 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1313 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1317 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1321 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1325 /* There is an scs math function, but it would need some
1326 * fixup for 16-element execution.
1328 if (dst_flags
& WRITEMASK_X
)
1329 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1330 if (dst_flags
& WRITEMASK_Y
)
1331 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1335 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1341 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1345 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1349 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1353 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1357 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1360 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1363 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1366 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1369 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1373 emit_lit(p
, dst
, dst_flags
, args
[0]);
1376 /* Texturing operations:
1379 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1383 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1387 emit_kil(c
, args
[0]);
1391 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1392 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1393 _mesa_opcode_string(inst
->opcode
) :
1397 for (i
= 0; i
< 4; i
++)
1398 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1400 inst
->dst
[i
]->hw_reg
,
1401 inst
->dst
[i
]->spill_slot
);
1404 if (INTEL_DEBUG
& DEBUG_WM
) {
1407 _mesa_printf("wm-native:\n");
1408 for (i
= 0; i
< p
->nr_insn
; i
++)
1409 brw_disasm(stderr
, &p
->store
[i
]);