2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
69 const struct brw_reg
*arg0
)
71 struct brw_reg r1
= brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
79 if (mask
& WRITEMASK_X
) {
81 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
82 stride(suboffset(r1_uw
, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
86 if (mask
& WRITEMASK_Y
) {
88 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
89 stride(suboffset(r1_uw
,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
93 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
98 static void emit_delta_xy(struct brw_compile
*p
,
99 const struct brw_reg
*dst
,
101 const struct brw_reg
*arg0
,
102 const struct brw_reg
*arg1
)
104 struct brw_reg r1
= brw_vec1_grf(1, 0);
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
109 if (mask
& WRITEMASK_X
) {
112 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
116 if (mask
& WRITEMASK_Y
) {
119 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
120 negate(suboffset(r1
,1)));
125 static void emit_wpos_xy(struct brw_wm_compile
*c
,
126 const struct brw_reg
*dst
,
128 const struct brw_reg
*arg0
)
130 struct brw_compile
*p
= &c
->func
;
132 /* Calculate the pixel offset from window bottom left into destination
135 if (mask
& WRITEMASK_X
) {
136 /* X' = X - origin */
139 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
140 brw_imm_d(0 - c
->key
.origin_x
));
143 if (mask
& WRITEMASK_Y
) {
144 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
147 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
148 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
153 static void emit_pixel_w( struct brw_compile
*p
,
154 const struct brw_reg
*dst
,
156 const struct brw_reg
*arg0
,
157 const struct brw_reg
*deltas
)
159 /* Don't need this if all you are doing is interpolating color, for
162 if (mask
& WRITEMASK_W
) {
163 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
165 /* Calc 1/w - just linterp wpos[3] optimized by putting the
166 * result straight into a message reg.
168 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
169 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
172 brw_math_16( p
, dst
[3],
173 BRW_MATH_FUNCTION_INV
,
174 BRW_MATH_SATURATE_NONE
,
176 BRW_MATH_PRECISION_FULL
);
182 static void emit_linterp( struct brw_compile
*p
,
183 const struct brw_reg
*dst
,
185 const struct brw_reg
*arg0
,
186 const struct brw_reg
*deltas
)
188 struct brw_reg interp
[4];
189 GLuint nr
= arg0
[0].nr
;
192 interp
[0] = brw_vec1_grf(nr
, 0);
193 interp
[1] = brw_vec1_grf(nr
, 4);
194 interp
[2] = brw_vec1_grf(nr
+1, 0);
195 interp
[3] = brw_vec1_grf(nr
+1, 4);
197 for (i
= 0; i
< 4; i
++) {
199 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
200 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
206 static void emit_pinterp( struct brw_compile
*p
,
207 const struct brw_reg
*dst
,
209 const struct brw_reg
*arg0
,
210 const struct brw_reg
*deltas
,
211 const struct brw_reg
*w
)
213 struct brw_reg interp
[4];
214 GLuint nr
= arg0
[0].nr
;
217 interp
[0] = brw_vec1_grf(nr
, 0);
218 interp
[1] = brw_vec1_grf(nr
, 4);
219 interp
[2] = brw_vec1_grf(nr
+1, 0);
220 interp
[3] = brw_vec1_grf(nr
+1, 4);
222 for (i
= 0; i
< 4; i
++) {
224 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
225 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
228 for (i
= 0; i
< 4; i
++) {
230 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
236 static void emit_cinterp( struct brw_compile
*p
,
237 const struct brw_reg
*dst
,
239 const struct brw_reg
*arg0
)
241 struct brw_reg interp
[4];
242 GLuint nr
= arg0
[0].nr
;
245 interp
[0] = brw_vec1_grf(nr
, 0);
246 interp
[1] = brw_vec1_grf(nr
, 4);
247 interp
[2] = brw_vec1_grf(nr
+1, 0);
248 interp
[3] = brw_vec1_grf(nr
+1, 4);
250 for (i
= 0; i
< 4; i
++) {
252 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
258 static void emit_alu1( struct brw_compile
*p
,
259 struct brw_instruction
*(*func
)(struct brw_compile
*,
262 const struct brw_reg
*dst
,
264 const struct brw_reg
*arg0
)
269 brw_set_saturate(p
, 1);
271 for (i
= 0; i
< 4; i
++) {
273 func(p
, dst
[i
], arg0
[i
]);
278 brw_set_saturate(p
, 0);
282 static void emit_alu2( struct brw_compile
*p
,
283 struct brw_instruction
*(*func
)(struct brw_compile
*,
287 const struct brw_reg
*dst
,
289 const struct brw_reg
*arg0
,
290 const struct brw_reg
*arg1
)
295 brw_set_saturate(p
, 1);
297 for (i
= 0; i
< 4; i
++) {
299 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
304 brw_set_saturate(p
, 0);
308 static void emit_mad( struct brw_compile
*p
,
309 const struct brw_reg
*dst
,
311 const struct brw_reg
*arg0
,
312 const struct brw_reg
*arg1
,
313 const struct brw_reg
*arg2
)
317 for (i
= 0; i
< 4; i
++) {
319 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
321 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
322 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
323 brw_set_saturate(p
, 0);
329 static void emit_lrp( struct brw_compile
*p
,
330 const struct brw_reg
*dst
,
332 const struct brw_reg
*arg0
,
333 const struct brw_reg
*arg1
,
334 const struct brw_reg
*arg2
)
338 /* Uses dst as a temporary:
340 for (i
= 0; i
< 4; i
++) {
342 /* Can I use the LINE instruction for this?
344 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
345 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
347 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
348 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
349 brw_set_saturate(p
, 0);
354 static void emit_sop( struct brw_compile
*p
,
355 const struct brw_reg
*dst
,
358 const struct brw_reg
*arg0
,
359 const struct brw_reg
*arg1
)
363 for (i
= 0; i
< 4; i
++) {
365 brw_MOV(p
, dst
[i
], brw_imm_f(0));
366 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
367 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
368 brw_set_predicate_control_flag_value(p
, 0xff);
373 static void emit_slt( struct brw_compile
*p
,
374 const struct brw_reg
*dst
,
376 const struct brw_reg
*arg0
,
377 const struct brw_reg
*arg1
)
379 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
382 static void emit_sle( struct brw_compile
*p
,
383 const struct brw_reg
*dst
,
385 const struct brw_reg
*arg0
,
386 const struct brw_reg
*arg1
)
388 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
391 static void emit_sgt( struct brw_compile
*p
,
392 const struct brw_reg
*dst
,
394 const struct brw_reg
*arg0
,
395 const struct brw_reg
*arg1
)
397 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
400 static void emit_sge( struct brw_compile
*p
,
401 const struct brw_reg
*dst
,
403 const struct brw_reg
*arg0
,
404 const struct brw_reg
*arg1
)
406 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
409 static void emit_seq( struct brw_compile
*p
,
410 const struct brw_reg
*dst
,
412 const struct brw_reg
*arg0
,
413 const struct brw_reg
*arg1
)
415 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
418 static void emit_sne( struct brw_compile
*p
,
419 const struct brw_reg
*dst
,
421 const struct brw_reg
*arg0
,
422 const struct brw_reg
*arg1
)
424 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
427 static void emit_cmp( struct brw_compile
*p
,
428 const struct brw_reg
*dst
,
430 const struct brw_reg
*arg0
,
431 const struct brw_reg
*arg1
,
432 const struct brw_reg
*arg2
)
436 for (i
= 0; i
< 4; i
++) {
438 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
439 brw_MOV(p
, dst
[i
], arg2
[i
]);
440 brw_set_saturate(p
, 0);
442 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
444 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
445 brw_MOV(p
, dst
[i
], arg1
[i
]);
446 brw_set_saturate(p
, 0);
447 brw_set_predicate_control_flag_value(p
, 0xff);
452 static void emit_max( struct brw_compile
*p
,
453 const struct brw_reg
*dst
,
455 const struct brw_reg
*arg0
,
456 const struct brw_reg
*arg1
)
460 for (i
= 0; i
< 4; i
++) {
462 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
463 brw_MOV(p
, dst
[i
], arg0
[i
]);
464 brw_set_saturate(p
, 0);
466 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
468 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
469 brw_MOV(p
, dst
[i
], arg1
[i
]);
470 brw_set_saturate(p
, 0);
471 brw_set_predicate_control_flag_value(p
, 0xff);
476 static void emit_min( struct brw_compile
*p
,
477 const struct brw_reg
*dst
,
479 const struct brw_reg
*arg0
,
480 const struct brw_reg
*arg1
)
484 for (i
= 0; i
< 4; i
++) {
486 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
487 brw_MOV(p
, dst
[i
], arg1
[i
]);
488 brw_set_saturate(p
, 0);
490 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
492 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
493 brw_MOV(p
, dst
[i
], arg0
[i
]);
494 brw_set_saturate(p
, 0);
495 brw_set_predicate_control_flag_value(p
, 0xff);
501 static void emit_dp3( struct brw_compile
*p
,
502 const struct brw_reg
*dst
,
504 const struct brw_reg
*arg0
,
505 const struct brw_reg
*arg1
)
507 if (!(mask
& WRITEMASK_XYZW
))
508 return; /* Do not emit dead code */
510 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
512 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
513 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
515 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
516 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
517 brw_set_saturate(p
, 0);
521 static void emit_dp4( struct brw_compile
*p
,
522 const struct brw_reg
*dst
,
524 const struct brw_reg
*arg0
,
525 const struct brw_reg
*arg1
)
527 if (!(mask
& WRITEMASK_XYZW
))
528 return; /* Do not emit dead code */
530 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
532 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
533 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
534 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
536 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
537 brw_MAC(p
, dst
[0], arg0
[3], arg1
[3]);
538 brw_set_saturate(p
, 0);
542 static void emit_dph( struct brw_compile
*p
,
543 const struct brw_reg
*dst
,
545 const struct brw_reg
*arg0
,
546 const struct brw_reg
*arg1
)
548 if (!(mask
& WRITEMASK_XYZW
))
549 return; /* Do not emit dead code */
551 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
553 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
554 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
555 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
557 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
558 brw_ADD(p
, dst
[0], dst
[0], arg1
[3]);
559 brw_set_saturate(p
, 0);
563 static void emit_xpd( struct brw_compile
*p
,
564 const struct brw_reg
*dst
,
566 const struct brw_reg
*arg0
,
567 const struct brw_reg
*arg1
)
571 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
573 for (i
= 0 ; i
< 3; i
++) {
578 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
580 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
581 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
582 brw_set_saturate(p
, 0);
588 static void emit_math1( struct brw_compile
*p
,
590 const struct brw_reg
*dst
,
592 const struct brw_reg
*arg0
)
594 if (!(mask
& WRITEMASK_XYZW
))
595 return; /* Do not emit dead code */
597 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
598 // function == BRW_MATH_FUNCTION_SINCOS);
600 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
602 /* Send two messages to perform all 16 operations:
607 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
610 BRW_MATH_PRECISION_FULL
);
614 static void emit_math2( struct brw_compile
*p
,
616 const struct brw_reg
*dst
,
618 const struct brw_reg
*arg0
,
619 const struct brw_reg
*arg1
)
621 if (!(mask
& WRITEMASK_XYZW
))
622 return; /* Do not emit dead code */
624 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
626 brw_push_insn_state(p
);
628 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
629 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
630 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
631 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
633 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
634 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
635 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
636 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
639 /* Send two messages to perform all 16 operations:
641 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
645 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
648 BRW_MATH_DATA_VECTOR
,
649 BRW_MATH_PRECISION_FULL
);
651 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
655 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
658 BRW_MATH_DATA_VECTOR
,
659 BRW_MATH_PRECISION_FULL
);
661 brw_pop_insn_state(p
);
666 static void emit_tex( struct brw_wm_compile
*c
,
667 const struct brw_wm_instruction
*inst
,
670 struct brw_reg
*arg
)
672 struct brw_compile
*p
= &c
->func
;
673 GLuint msgLength
, responseLength
;
677 /* How many input regs are there?
679 switch (inst
->tex_idx
) {
680 case TEXTURE_1D_INDEX
:
684 case TEXTURE_2D_INDEX
:
685 case TEXTURE_RECT_INDEX
:
690 emit
= WRITEMASK_XYZ
;
695 if (inst
->tex_shadow
) {
702 for (i
= 0; i
< nr
; i
++) {
703 static const GLuint swz
[4] = {0,1,2,2};
705 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
707 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
711 responseLength
= 8; /* always */
714 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
716 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
717 inst
->tex_unit
+ MAX_DRAW_BUFFERS
, /* surface */
718 inst
->tex_unit
, /* sampler */
721 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
:
722 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
),
729 static void emit_txb( struct brw_wm_compile
*c
,
730 const struct brw_wm_instruction
*inst
,
733 struct brw_reg
*arg
)
735 struct brw_compile
*p
= &c
->func
;
738 /* Shadow ignored for txb.
740 switch (inst
->tex_idx
) {
741 case TEXTURE_1D_INDEX
:
742 brw_MOV(p
, brw_message_reg(2), arg
[0]);
743 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
744 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
746 case TEXTURE_2D_INDEX
:
747 case TEXTURE_RECT_INDEX
:
748 brw_MOV(p
, brw_message_reg(2), arg
[0]);
749 brw_MOV(p
, brw_message_reg(4), arg
[1]);
750 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
753 brw_MOV(p
, brw_message_reg(2), arg
[0]);
754 brw_MOV(p
, brw_message_reg(4), arg
[1]);
755 brw_MOV(p
, brw_message_reg(6), arg
[2]);
759 brw_MOV(p
, brw_message_reg(8), arg
[3]);
763 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
765 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
766 inst
->tex_unit
+ MAX_DRAW_BUFFERS
, /* surface */
767 inst
->tex_unit
, /* sampler */
769 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
770 8, /* responseLength */
776 static void emit_lit( struct brw_compile
*p
,
777 const struct brw_reg
*dst
,
779 const struct brw_reg
*arg0
)
781 assert((mask
& WRITEMASK_XW
) == 0);
783 if (mask
& WRITEMASK_Y
) {
784 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
785 brw_MOV(p
, dst
[1], arg0
[0]);
786 brw_set_saturate(p
, 0);
789 if (mask
& WRITEMASK_Z
) {
790 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
792 WRITEMASK_X
| (mask
& SATURATE
),
797 /* Ordinarily you'd use an iff statement to skip or shortcircuit
798 * some of the POW calculations above, but 16-wide iff statements
799 * seem to lock c1 hardware, so this is a nasty workaround:
801 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
803 if (mask
& WRITEMASK_Y
)
804 brw_MOV(p
, dst
[1], brw_imm_f(0));
806 if (mask
& WRITEMASK_Z
)
807 brw_MOV(p
, dst
[2], brw_imm_f(0));
809 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
813 /* Kill pixel - set execution mask to zero for those pixels which
816 static void emit_kil( struct brw_wm_compile
*c
,
817 struct brw_reg
*arg0
)
819 struct brw_compile
*p
= &c
->func
;
820 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
823 /* XXX - usually won't need 4 compares!
825 for (i
= 0; i
< 4; i
++) {
826 brw_push_insn_state(p
);
827 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
828 brw_set_predicate_control_flag_value(p
, 0xff);
829 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
830 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
831 brw_pop_insn_state(p
);
836 static void fire_fb_write( struct brw_wm_compile
*c
,
842 struct brw_compile
*p
= &c
->func
;
844 /* Pass through control information:
846 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
848 brw_push_insn_state(p
);
849 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
850 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
852 brw_message_reg(base_reg
+ 1),
854 brw_pop_insn_state(p
);
857 /* Send framebuffer write message: */
858 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
860 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
862 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
870 static void emit_aa( struct brw_wm_compile
*c
,
871 struct brw_reg
*arg1
,
874 struct brw_compile
*p
= &c
->func
;
875 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
876 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
877 struct brw_reg aa
= offset(arg1
[comp
], off
);
879 brw_push_insn_state(p
);
880 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
881 brw_MOV(p
, brw_message_reg(reg
), aa
);
882 brw_pop_insn_state(p
);
886 /* Post-fragment-program processing. Send the results to the
888 * \param arg0 the fragment color
889 * \param arg1 the pass-through depth value
890 * \param arg2 the shader-computed depth value
892 static void emit_fb_write( struct brw_wm_compile
*c
,
893 struct brw_reg
*arg0
,
894 struct brw_reg
*arg1
,
895 struct brw_reg
*arg2
,
899 struct brw_compile
*p
= &c
->func
;
903 /* Reserve a space for AA - may not be needed:
905 if (c
->key
.aa_dest_stencil_reg
)
908 /* I don't really understand how this achieves the color interleave
909 * (ie RGBARGBA) in the result: [Do the saturation here]
912 brw_push_insn_state(p
);
914 for (channel
= 0; channel
< 4; channel
++) {
915 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
916 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
918 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
920 brw_message_reg(nr
+ channel
),
923 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
925 brw_message_reg(nr
+ channel
+ 4),
926 sechalf(arg0
[channel
]));
929 /* skip over the regs populated above:
933 brw_pop_insn_state(p
);
936 if (c
->key
.source_depth_to_render_target
)
938 if (c
->key
.computes_depth
)
939 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
941 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
946 if (c
->key
.dest_depth_reg
)
948 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
949 GLuint off
= c
->key
.dest_depth_reg
% 2;
952 brw_push_insn_state(p
);
953 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
955 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
957 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
958 brw_pop_insn_state(p
);
961 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
966 if (!c
->key
.runtime_check_aads_emit
) {
967 if (c
->key
.aa_dest_stencil_reg
)
970 fire_fb_write(c
, 0, nr
, target
, eot
);
973 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
974 struct brw_reg ip
= brw_ip_reg();
975 struct brw_instruction
*jmp
;
977 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
978 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
981 get_element_ud(brw_vec8_grf(1,0), 6),
984 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
987 fire_fb_write(c
, 0, nr
, target
, eot
);
988 /* note - thread killed in subroutine */
990 brw_land_fwd_jump(p
, jmp
);
992 /* ELSE: Shuffle up one register to fill in the hole left for AA:
994 fire_fb_write(c
, 1, nr
-1, target
, eot
);
999 /* Post-fragment-program processing. Send the results to the
1002 static void emit_spill( struct brw_wm_compile
*c
,
1006 struct brw_compile
*p
= &c
->func
;
1009 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1011 brw_MOV(p
, brw_message_reg(2), reg
);
1014 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1015 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1018 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1024 static void emit_unspill( struct brw_wm_compile
*c
,
1028 struct brw_compile
*p
= &c
->func
;
1030 /* Slot 0 is the undef value.
1033 brw_MOV(p
, reg
, brw_imm_f(0));
1038 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1039 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1043 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1050 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
1052 static void get_argument_regs( struct brw_wm_compile
*c
,
1053 struct brw_wm_ref
*arg
[],
1054 struct brw_reg
*regs
)
1058 for (i
= 0; i
< 4; i
++) {
1061 if (arg
[i
]->unspill_reg
)
1063 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1064 arg
[i
]->value
->spill_slot
);
1066 regs
[i
] = arg
[i
]->hw_reg
;
1069 regs
[i
] = brw_null_reg();
1075 static void spill_values( struct brw_wm_compile
*c
,
1076 struct brw_wm_value
*values
,
1081 for (i
= 0; i
< nr
; i
++)
1082 if (values
[i
].spill_slot
)
1083 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1087 /* Emit the fragment program instructions here.
1089 void brw_wm_emit( struct brw_wm_compile
*c
)
1091 struct brw_compile
*p
= &c
->func
;
1094 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1096 /* Check if any of the payload regs need to be spilled:
1098 spill_values(c
, c
->payload
.depth
, 4);
1099 spill_values(c
, c
->creg
, c
->nr_creg
);
1100 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1103 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1105 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1106 struct brw_reg args
[3][4], dst
[4];
1107 GLuint i
, dst_flags
;
1109 /* Get argument regs:
1111 for (i
= 0; i
< 3; i
++)
1112 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1116 for (i
= 0; i
< 4; i
++)
1118 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1120 dst
[i
] = brw_null_reg();
1124 dst_flags
= inst
->writemask
;
1126 dst_flags
|= SATURATE
;
1128 switch (inst
->opcode
) {
1129 /* Generated instructions for calculating triangle interpolants:
1132 emit_pixel_xy(p
, dst
, dst_flags
, args
[0]);
1136 emit_delta_xy(p
, dst
, dst_flags
, args
[0], args
[1]);
1140 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1144 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1148 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1152 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1156 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1160 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1163 /* Straightforward arithmetic:
1166 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1170 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1174 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1178 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1182 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1186 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1190 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1194 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1199 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1203 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1207 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1210 /* Higher math functions:
1213 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1217 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1221 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1225 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1229 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1233 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1237 /* There is an scs math function, but it would need some
1238 * fixup for 16-element execution.
1240 if (dst_flags
& WRITEMASK_X
)
1241 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1242 if (dst_flags
& WRITEMASK_Y
)
1243 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1247 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1253 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1257 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1261 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1265 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1269 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1272 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1275 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1278 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1281 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1285 emit_lit(p
, dst
, dst_flags
, args
[0]);
1288 /* Texturing operations:
1291 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1295 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1299 emit_kil(c
, args
[0]);
1303 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1304 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1305 _mesa_opcode_string(inst
->opcode
) :
1309 for (i
= 0; i
< 4; i
++)
1310 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1312 inst
->dst
[i
]->hw_reg
,
1313 inst
->dst
[i
]->spill_slot
);