2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
70 struct brw_reg r1
= brw_vec1_grf(1, 0);
71 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
73 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
75 /* Calculate pixel centers by adding 1 or 0 to each of the
76 * micro-tile coordinates passed in r1.
78 if (mask
& WRITEMASK_X
) {
80 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
81 stride(suboffset(r1_uw
, 4), 2, 4, 0),
82 brw_imm_v(0x10101010));
85 if (mask
& WRITEMASK_Y
) {
87 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
88 stride(suboffset(r1_uw
,5), 2, 4, 0),
89 brw_imm_v(0x11001100));
92 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
97 static void emit_delta_xy(struct brw_compile
*p
,
98 const struct brw_reg
*dst
,
100 const struct brw_reg
*arg0
)
102 struct brw_reg r1
= brw_vec1_grf(1, 0);
104 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 if (mask
& WRITEMASK_X
) {
110 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
114 if (mask
& WRITEMASK_Y
) {
117 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
118 negate(suboffset(r1
,1)));
123 static void emit_wpos_xy(struct brw_wm_compile
*c
,
124 const struct brw_reg
*dst
,
126 const struct brw_reg
*arg0
)
128 struct brw_compile
*p
= &c
->func
;
130 /* Calculate the pixel offset from window bottom left into destination
133 if (mask
& WRITEMASK_X
) {
134 /* X' = X - origin */
137 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
138 brw_imm_d(0 - c
->key
.origin_x
));
141 if (mask
& WRITEMASK_Y
) {
142 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
145 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
146 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
151 static void emit_pixel_w( struct brw_compile
*p
,
152 const struct brw_reg
*dst
,
154 const struct brw_reg
*arg0
,
155 const struct brw_reg
*deltas
)
157 /* Don't need this if all you are doing is interpolating color, for
160 if (mask
& WRITEMASK_W
) {
161 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
163 /* Calc 1/w - just linterp wpos[3] optimized by putting the
164 * result straight into a message reg.
166 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
167 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
170 brw_math_16( p
, dst
[3],
171 BRW_MATH_FUNCTION_INV
,
172 BRW_MATH_SATURATE_NONE
,
174 BRW_MATH_PRECISION_FULL
);
180 static void emit_linterp( struct brw_compile
*p
,
181 const struct brw_reg
*dst
,
183 const struct brw_reg
*arg0
,
184 const struct brw_reg
*deltas
)
186 struct brw_reg interp
[4];
187 GLuint nr
= arg0
[0].nr
;
190 interp
[0] = brw_vec1_grf(nr
, 0);
191 interp
[1] = brw_vec1_grf(nr
, 4);
192 interp
[2] = brw_vec1_grf(nr
+1, 0);
193 interp
[3] = brw_vec1_grf(nr
+1, 4);
195 for (i
= 0; i
< 4; i
++) {
197 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
198 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
204 static void emit_pinterp( struct brw_compile
*p
,
205 const struct brw_reg
*dst
,
207 const struct brw_reg
*arg0
,
208 const struct brw_reg
*deltas
,
209 const struct brw_reg
*w
)
211 struct brw_reg interp
[4];
212 GLuint nr
= arg0
[0].nr
;
215 interp
[0] = brw_vec1_grf(nr
, 0);
216 interp
[1] = brw_vec1_grf(nr
, 4);
217 interp
[2] = brw_vec1_grf(nr
+1, 0);
218 interp
[3] = brw_vec1_grf(nr
+1, 4);
220 for (i
= 0; i
< 4; i
++) {
222 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
223 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
226 for (i
= 0; i
< 4; i
++) {
228 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
234 static void emit_cinterp( struct brw_compile
*p
,
235 const struct brw_reg
*dst
,
237 const struct brw_reg
*arg0
)
239 struct brw_reg interp
[4];
240 GLuint nr
= arg0
[0].nr
;
243 interp
[0] = brw_vec1_grf(nr
, 0);
244 interp
[1] = brw_vec1_grf(nr
, 4);
245 interp
[2] = brw_vec1_grf(nr
+1, 0);
246 interp
[3] = brw_vec1_grf(nr
+1, 4);
248 for (i
= 0; i
< 4; i
++) {
250 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
255 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
256 static void emit_frontfacing( struct brw_compile
*p
,
257 const struct brw_reg
*dst
,
260 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
263 if (!(mask
& WRITEMASK_XYZW
))
266 for (i
= 0; i
< 4; i
++) {
268 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
272 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
275 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
276 for (i
= 0; i
< 4; i
++) {
278 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
281 brw_set_predicate_control_flag_value(p
, 0xff);
284 static void emit_alu1( struct brw_compile
*p
,
285 struct brw_instruction
*(*func
)(struct brw_compile
*,
288 const struct brw_reg
*dst
,
290 const struct brw_reg
*arg0
)
295 brw_set_saturate(p
, 1);
297 for (i
= 0; i
< 4; i
++) {
299 func(p
, dst
[i
], arg0
[i
]);
304 brw_set_saturate(p
, 0);
308 static void emit_alu2( struct brw_compile
*p
,
309 struct brw_instruction
*(*func
)(struct brw_compile
*,
313 const struct brw_reg
*dst
,
315 const struct brw_reg
*arg0
,
316 const struct brw_reg
*arg1
)
321 brw_set_saturate(p
, 1);
323 for (i
= 0; i
< 4; i
++) {
325 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
330 brw_set_saturate(p
, 0);
334 static void emit_mad( struct brw_compile
*p
,
335 const struct brw_reg
*dst
,
337 const struct brw_reg
*arg0
,
338 const struct brw_reg
*arg1
,
339 const struct brw_reg
*arg2
)
343 for (i
= 0; i
< 4; i
++) {
345 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
347 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
348 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
349 brw_set_saturate(p
, 0);
354 static void emit_trunc( struct brw_compile
*p
,
355 const struct brw_reg
*dst
,
357 const struct brw_reg
*arg0
)
361 for (i
= 0; i
< 4; i
++) {
363 brw_RNDZ(p
, dst
[i
], arg0
[i
]);
368 static void emit_lrp( struct brw_compile
*p
,
369 const struct brw_reg
*dst
,
371 const struct brw_reg
*arg0
,
372 const struct brw_reg
*arg1
,
373 const struct brw_reg
*arg2
)
377 /* Uses dst as a temporary:
379 for (i
= 0; i
< 4; i
++) {
381 /* Can I use the LINE instruction for this?
383 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
384 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
386 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
387 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
388 brw_set_saturate(p
, 0);
393 static void emit_sop( struct brw_compile
*p
,
394 const struct brw_reg
*dst
,
397 const struct brw_reg
*arg0
,
398 const struct brw_reg
*arg1
)
402 for (i
= 0; i
< 4; i
++) {
404 brw_MOV(p
, dst
[i
], brw_imm_f(0));
405 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
406 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
407 brw_set_predicate_control_flag_value(p
, 0xff);
412 static void emit_slt( struct brw_compile
*p
,
413 const struct brw_reg
*dst
,
415 const struct brw_reg
*arg0
,
416 const struct brw_reg
*arg1
)
418 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
421 static void emit_sle( struct brw_compile
*p
,
422 const struct brw_reg
*dst
,
424 const struct brw_reg
*arg0
,
425 const struct brw_reg
*arg1
)
427 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
430 static void emit_sgt( struct brw_compile
*p
,
431 const struct brw_reg
*dst
,
433 const struct brw_reg
*arg0
,
434 const struct brw_reg
*arg1
)
436 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
439 static void emit_sge( struct brw_compile
*p
,
440 const struct brw_reg
*dst
,
442 const struct brw_reg
*arg0
,
443 const struct brw_reg
*arg1
)
445 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
448 static void emit_seq( struct brw_compile
*p
,
449 const struct brw_reg
*dst
,
451 const struct brw_reg
*arg0
,
452 const struct brw_reg
*arg1
)
454 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
457 static void emit_sne( struct brw_compile
*p
,
458 const struct brw_reg
*dst
,
460 const struct brw_reg
*arg0
,
461 const struct brw_reg
*arg1
)
463 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
466 static void emit_cmp( struct brw_compile
*p
,
467 const struct brw_reg
*dst
,
469 const struct brw_reg
*arg0
,
470 const struct brw_reg
*arg1
,
471 const struct brw_reg
*arg2
)
475 for (i
= 0; i
< 4; i
++) {
477 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
478 brw_MOV(p
, dst
[i
], arg2
[i
]);
479 brw_set_saturate(p
, 0);
481 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
483 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
484 brw_MOV(p
, dst
[i
], arg1
[i
]);
485 brw_set_saturate(p
, 0);
486 brw_set_predicate_control_flag_value(p
, 0xff);
491 static void emit_max( struct brw_compile
*p
,
492 const struct brw_reg
*dst
,
494 const struct brw_reg
*arg0
,
495 const struct brw_reg
*arg1
)
499 for (i
= 0; i
< 4; i
++) {
501 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
502 brw_MOV(p
, dst
[i
], arg0
[i
]);
503 brw_set_saturate(p
, 0);
505 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
507 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
508 brw_MOV(p
, dst
[i
], arg1
[i
]);
509 brw_set_saturate(p
, 0);
510 brw_set_predicate_control_flag_value(p
, 0xff);
515 static void emit_min( struct brw_compile
*p
,
516 const struct brw_reg
*dst
,
518 const struct brw_reg
*arg0
,
519 const struct brw_reg
*arg1
)
523 for (i
= 0; i
< 4; i
++) {
525 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
526 brw_MOV(p
, dst
[i
], arg1
[i
]);
527 brw_set_saturate(p
, 0);
529 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
531 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
532 brw_MOV(p
, dst
[i
], arg0
[i
]);
533 brw_set_saturate(p
, 0);
534 brw_set_predicate_control_flag_value(p
, 0xff);
540 static void emit_dp3( struct brw_compile
*p
,
541 const struct brw_reg
*dst
,
543 const struct brw_reg
*arg0
,
544 const struct brw_reg
*arg1
)
546 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
548 if (!(mask
& WRITEMASK_XYZW
))
549 return; /* Do not emit dead code */
551 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
553 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
554 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
556 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
557 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
558 brw_set_saturate(p
, 0);
562 static void emit_dp4( struct brw_compile
*p
,
563 const struct brw_reg
*dst
,
565 const struct brw_reg
*arg0
,
566 const struct brw_reg
*arg1
)
568 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
570 if (!(mask
& WRITEMASK_XYZW
))
571 return; /* Do not emit dead code */
573 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
575 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
576 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
577 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
579 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
580 brw_MAC(p
, dst
[dst_chan
], arg0
[3], arg1
[3]);
581 brw_set_saturate(p
, 0);
585 static void emit_dph( struct brw_compile
*p
,
586 const struct brw_reg
*dst
,
588 const struct brw_reg
*arg0
,
589 const struct brw_reg
*arg1
)
591 const int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
593 if (!(mask
& WRITEMASK_XYZW
))
594 return; /* Do not emit dead code */
596 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
598 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
599 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
600 brw_MAC(p
, dst
[dst_chan
], arg0
[2], arg1
[2]);
602 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
603 brw_ADD(p
, dst
[dst_chan
], dst
[dst_chan
], arg1
[3]);
604 brw_set_saturate(p
, 0);
608 static void emit_xpd( struct brw_compile
*p
,
609 const struct brw_reg
*dst
,
611 const struct brw_reg
*arg0
,
612 const struct brw_reg
*arg1
)
616 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
618 for (i
= 0 ; i
< 3; i
++) {
623 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
625 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
626 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
627 brw_set_saturate(p
, 0);
633 static void emit_math1( struct brw_compile
*p
,
635 const struct brw_reg
*dst
,
637 const struct brw_reg
*arg0
)
639 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
641 if (!(mask
& WRITEMASK_XYZW
))
642 return; /* Do not emit dead code */
644 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
646 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
648 /* Send two messages to perform all 16 operations:
653 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
656 BRW_MATH_PRECISION_FULL
);
660 static void emit_math2( struct brw_compile
*p
,
662 const struct brw_reg
*dst
,
664 const struct brw_reg
*arg0
,
665 const struct brw_reg
*arg1
)
667 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
669 if (!(mask
& WRITEMASK_XYZW
))
670 return; /* Do not emit dead code */
672 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
674 brw_push_insn_state(p
);
676 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
677 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
678 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
679 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
681 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
682 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
683 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
684 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
687 /* Send two messages to perform all 16 operations:
689 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
693 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
696 BRW_MATH_DATA_VECTOR
,
697 BRW_MATH_PRECISION_FULL
);
699 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
701 offset(dst
[dst_chan
],1),
703 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
706 BRW_MATH_DATA_VECTOR
,
707 BRW_MATH_PRECISION_FULL
);
709 brw_pop_insn_state(p
);
714 static void emit_tex( struct brw_wm_compile
*c
,
715 const struct brw_wm_instruction
*inst
,
718 struct brw_reg
*arg
)
720 struct brw_compile
*p
= &c
->func
;
721 GLuint msgLength
, responseLength
;
726 /* How many input regs are there?
728 switch (inst
->tex_idx
) {
729 case TEXTURE_1D_INDEX
:
733 case TEXTURE_2D_INDEX
:
734 case TEXTURE_RECT_INDEX
:
739 emit
= WRITEMASK_XYZ
;
744 if (inst
->tex_shadow
) {
751 for (i
= 0; i
< nr
; i
++) {
752 static const GLuint swz
[4] = {0,1,2,2};
754 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
756 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
760 responseLength
= 8; /* always */
762 if (BRW_IS_IGDNG(p
->brw
)) {
763 if (inst
->tex_shadow
)
764 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG
;
766 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG
;
768 if (inst
->tex_shadow
)
769 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
771 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
775 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
777 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
778 SURF_INDEX_TEXTURE(inst
->tex_unit
),
779 inst
->tex_unit
, /* sampler */
786 BRW_SAMPLER_SIMD_MODE_SIMD16
);
790 static void emit_txb( struct brw_wm_compile
*c
,
791 const struct brw_wm_instruction
*inst
,
794 struct brw_reg
*arg
)
796 struct brw_compile
*p
= &c
->func
;
799 /* Shadow ignored for txb.
801 switch (inst
->tex_idx
) {
802 case TEXTURE_1D_INDEX
:
803 brw_MOV(p
, brw_message_reg(2), arg
[0]);
804 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
805 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
807 case TEXTURE_2D_INDEX
:
808 case TEXTURE_RECT_INDEX
:
809 brw_MOV(p
, brw_message_reg(2), arg
[0]);
810 brw_MOV(p
, brw_message_reg(4), arg
[1]);
811 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
814 brw_MOV(p
, brw_message_reg(2), arg
[0]);
815 brw_MOV(p
, brw_message_reg(4), arg
[1]);
816 brw_MOV(p
, brw_message_reg(6), arg
[2]);
820 brw_MOV(p
, brw_message_reg(8), arg
[3]);
823 if (BRW_IS_IGDNG(p
->brw
))
824 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG
;
826 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
829 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
831 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
832 SURF_INDEX_TEXTURE(inst
->tex_unit
),
833 inst
->tex_unit
, /* sampler */
836 8, /* responseLength */
840 BRW_SAMPLER_SIMD_MODE_SIMD16
);
844 static void emit_lit( struct brw_compile
*p
,
845 const struct brw_reg
*dst
,
847 const struct brw_reg
*arg0
)
849 assert((mask
& WRITEMASK_XW
) == 0);
851 if (mask
& WRITEMASK_Y
) {
852 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
853 brw_MOV(p
, dst
[1], arg0
[0]);
854 brw_set_saturate(p
, 0);
857 if (mask
& WRITEMASK_Z
) {
858 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
860 WRITEMASK_X
| (mask
& SATURATE
),
865 /* Ordinarily you'd use an iff statement to skip or shortcircuit
866 * some of the POW calculations above, but 16-wide iff statements
867 * seem to lock c1 hardware, so this is a nasty workaround:
869 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
871 if (mask
& WRITEMASK_Y
)
872 brw_MOV(p
, dst
[1], brw_imm_f(0));
874 if (mask
& WRITEMASK_Z
)
875 brw_MOV(p
, dst
[2], brw_imm_f(0));
877 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
881 /* Kill pixel - set execution mask to zero for those pixels which
884 static void emit_kil( struct brw_wm_compile
*c
,
885 struct brw_reg
*arg0
)
887 struct brw_compile
*p
= &c
->func
;
888 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
891 /* XXX - usually won't need 4 compares!
893 for (i
= 0; i
< 4; i
++) {
894 brw_push_insn_state(p
);
895 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
896 brw_set_predicate_control_flag_value(p
, 0xff);
897 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
898 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
899 brw_pop_insn_state(p
);
904 static void fire_fb_write( struct brw_wm_compile
*c
,
910 struct brw_compile
*p
= &c
->func
;
912 /* Pass through control information:
914 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
916 brw_push_insn_state(p
);
917 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
918 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
920 brw_message_reg(base_reg
+ 1),
922 brw_pop_insn_state(p
);
925 /* Send framebuffer write message: */
926 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
928 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
930 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
938 static void emit_aa( struct brw_wm_compile
*c
,
939 struct brw_reg
*arg1
,
942 struct brw_compile
*p
= &c
->func
;
943 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
944 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
945 struct brw_reg aa
= offset(arg1
[comp
], off
);
947 brw_push_insn_state(p
);
948 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
949 brw_MOV(p
, brw_message_reg(reg
), aa
);
950 brw_pop_insn_state(p
);
954 /* Post-fragment-program processing. Send the results to the
956 * \param arg0 the fragment color
957 * \param arg1 the pass-through depth value
958 * \param arg2 the shader-computed depth value
960 static void emit_fb_write( struct brw_wm_compile
*c
,
961 struct brw_reg
*arg0
,
962 struct brw_reg
*arg1
,
963 struct brw_reg
*arg2
,
967 struct brw_compile
*p
= &c
->func
;
971 /* Reserve a space for AA - may not be needed:
973 if (c
->key
.aa_dest_stencil_reg
)
976 /* I don't really understand how this achieves the color interleave
977 * (ie RGBARGBA) in the result: [Do the saturation here]
980 brw_push_insn_state(p
);
982 for (channel
= 0; channel
< 4; channel
++) {
983 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
984 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
986 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
988 brw_message_reg(nr
+ channel
),
991 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
993 brw_message_reg(nr
+ channel
+ 4),
994 sechalf(arg0
[channel
]));
997 /* skip over the regs populated above:
1001 brw_pop_insn_state(p
);
1004 if (c
->key
.source_depth_to_render_target
)
1006 if (c
->key
.computes_depth
)
1007 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1009 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1014 if (c
->key
.dest_depth_reg
)
1016 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1017 GLuint off
= c
->key
.dest_depth_reg
% 2;
1020 brw_push_insn_state(p
);
1021 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1023 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1025 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1026 brw_pop_insn_state(p
);
1029 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1034 if (!c
->key
.runtime_check_aads_emit
) {
1035 if (c
->key
.aa_dest_stencil_reg
)
1036 emit_aa(c
, arg1
, 2);
1038 fire_fb_write(c
, 0, nr
, target
, eot
);
1041 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1042 struct brw_reg ip
= brw_ip_reg();
1043 struct brw_instruction
*jmp
;
1045 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1046 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1049 get_element_ud(brw_vec8_grf(1,0), 6),
1052 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_d(0));
1054 emit_aa(c
, arg1
, 2);
1055 fire_fb_write(c
, 0, nr
, target
, eot
);
1056 /* note - thread killed in subroutine */
1058 brw_land_fwd_jump(p
, jmp
);
1060 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1062 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1068 * Move a GPR to scratch memory.
1070 static void emit_spill( struct brw_wm_compile
*c
,
1074 struct brw_compile
*p
= &c
->func
;
1077 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1079 brw_MOV(p
, brw_message_reg(2), reg
);
1082 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1083 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1086 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1092 * Load a GPR from scratch memory.
1094 static void emit_unspill( struct brw_wm_compile
*c
,
1098 struct brw_compile
*p
= &c
->func
;
1100 /* Slot 0 is the undef value.
1103 brw_MOV(p
, reg
, brw_imm_f(0));
1108 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1109 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1113 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1119 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1120 * Args with unspill_reg != 0 will be loaded from scratch memory.
1122 static void get_argument_regs( struct brw_wm_compile
*c
,
1123 struct brw_wm_ref
*arg
[],
1124 struct brw_reg
*regs
)
1128 for (i
= 0; i
< 4; i
++) {
1130 if (arg
[i
]->unspill_reg
)
1132 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1133 arg
[i
]->value
->spill_slot
);
1135 regs
[i
] = arg
[i
]->hw_reg
;
1138 regs
[i
] = brw_null_reg();
1145 * For values that have a spill_slot!=0, write those regs to scratch memory.
1147 static void spill_values( struct brw_wm_compile
*c
,
1148 struct brw_wm_value
*values
,
1153 for (i
= 0; i
< nr
; i
++)
1154 if (values
[i
].spill_slot
)
1155 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1159 /* Emit the fragment program instructions here.
1161 void brw_wm_emit( struct brw_wm_compile
*c
)
1163 struct brw_compile
*p
= &c
->func
;
1166 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1168 /* Check if any of the payload regs need to be spilled:
1170 spill_values(c
, c
->payload
.depth
, 4);
1171 spill_values(c
, c
->creg
, c
->nr_creg
);
1172 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1175 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1177 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1178 struct brw_reg args
[3][4], dst
[4];
1179 GLuint i
, dst_flags
;
1181 /* Get argument regs:
1183 for (i
= 0; i
< 3; i
++)
1184 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1188 for (i
= 0; i
< 4; i
++)
1190 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1192 dst
[i
] = brw_null_reg();
1196 dst_flags
= inst
->writemask
;
1198 dst_flags
|= SATURATE
;
1200 switch (inst
->opcode
) {
1201 /* Generated instructions for calculating triangle interpolants:
1204 emit_pixel_xy(p
, dst
, dst_flags
);
1208 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1212 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1216 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1220 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1224 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1228 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1232 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1235 case WM_FRONTFACING
:
1236 emit_frontfacing(p
, dst
, dst_flags
);
1239 /* Straightforward arithmetic:
1242 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1246 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1250 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1254 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1258 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1262 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1266 emit_trunc(p
, dst
, dst_flags
, args
[0]);
1270 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1274 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1279 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1283 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1287 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1290 /* Higher math functions:
1293 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1297 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1301 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1305 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1309 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1313 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1317 /* There is an scs math function, but it would need some
1318 * fixup for 16-element execution.
1320 if (dst_flags
& WRITEMASK_X
)
1321 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1322 if (dst_flags
& WRITEMASK_Y
)
1323 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1327 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1333 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1337 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1341 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1345 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1349 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1352 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1355 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1358 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1361 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1365 emit_lit(p
, dst
, dst_flags
, args
[0]);
1368 /* Texturing operations:
1371 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1375 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1379 emit_kil(c
, args
[0]);
1383 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1384 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1385 _mesa_opcode_string(inst
->opcode
) :
1389 for (i
= 0; i
< 4; i
++)
1390 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1392 inst
->dst
[i
]->hw_reg
,
1393 inst
->dst
[i
]->spill_slot
);
1396 if (INTEL_DEBUG
& DEBUG_WM
) {
1399 _mesa_printf("wm-native:\n");
1400 for (i
= 0; i
< p
->nr_insn
; i
++)
1401 brw_disasm(stderr
, &p
->store
[i
]);