2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
69 const struct brw_reg
*arg0
)
71 struct brw_reg r1
= brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
79 if (mask
& WRITEMASK_X
) {
81 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
82 stride(suboffset(r1_uw
, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
86 if (mask
& WRITEMASK_Y
) {
88 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
89 stride(suboffset(r1_uw
,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
93 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
98 static void emit_delta_xy(struct brw_compile
*p
,
99 const struct brw_reg
*dst
,
101 const struct brw_reg
*arg0
,
102 const struct brw_reg
*arg1
)
104 struct brw_reg r1
= brw_vec1_grf(1, 0);
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
109 if (mask
& WRITEMASK_X
) {
112 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
116 if (mask
& WRITEMASK_Y
) {
119 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
120 negate(suboffset(r1
,1)));
125 static void emit_wpos_xy(struct brw_wm_compile
*c
,
126 const struct brw_reg
*dst
,
128 const struct brw_reg
*arg0
)
130 struct brw_compile
*p
= &c
->func
;
132 /* Calculate the pixel offset from window bottom left into destination
135 if (mask
& WRITEMASK_X
) {
136 /* X' = X - origin */
139 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
140 brw_imm_d(0 - c
->key
.origin_x
));
143 if (mask
& WRITEMASK_Y
) {
144 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
147 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
148 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
153 static void emit_pixel_w( struct brw_compile
*p
,
154 const struct brw_reg
*dst
,
156 const struct brw_reg
*arg0
,
157 const struct brw_reg
*deltas
)
159 /* Don't need this if all you are doing is interpolating color, for
162 if (mask
& WRITEMASK_W
) {
163 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
165 /* Calc 1/w - just linterp wpos[3] optimized by putting the
166 * result straight into a message reg.
168 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
169 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
172 brw_math_16( p
, dst
[3],
173 BRW_MATH_FUNCTION_INV
,
174 BRW_MATH_SATURATE_NONE
,
176 BRW_MATH_PRECISION_FULL
);
182 static void emit_linterp( struct brw_compile
*p
,
183 const struct brw_reg
*dst
,
185 const struct brw_reg
*arg0
,
186 const struct brw_reg
*deltas
)
188 struct brw_reg interp
[4];
189 GLuint nr
= arg0
[0].nr
;
192 interp
[0] = brw_vec1_grf(nr
, 0);
193 interp
[1] = brw_vec1_grf(nr
, 4);
194 interp
[2] = brw_vec1_grf(nr
+1, 0);
195 interp
[3] = brw_vec1_grf(nr
+1, 4);
197 for (i
= 0; i
< 4; i
++) {
199 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
200 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
206 static void emit_pinterp( struct brw_compile
*p
,
207 const struct brw_reg
*dst
,
209 const struct brw_reg
*arg0
,
210 const struct brw_reg
*deltas
,
211 const struct brw_reg
*w
)
213 struct brw_reg interp
[4];
214 GLuint nr
= arg0
[0].nr
;
217 interp
[0] = brw_vec1_grf(nr
, 0);
218 interp
[1] = brw_vec1_grf(nr
, 4);
219 interp
[2] = brw_vec1_grf(nr
+1, 0);
220 interp
[3] = brw_vec1_grf(nr
+1, 4);
222 for (i
= 0; i
< 4; i
++) {
224 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
225 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
228 for (i
= 0; i
< 4; i
++) {
230 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
236 static void emit_cinterp( struct brw_compile
*p
,
237 const struct brw_reg
*dst
,
239 const struct brw_reg
*arg0
)
241 struct brw_reg interp
[4];
242 GLuint nr
= arg0
[0].nr
;
245 interp
[0] = brw_vec1_grf(nr
, 0);
246 interp
[1] = brw_vec1_grf(nr
, 4);
247 interp
[2] = brw_vec1_grf(nr
+1, 0);
248 interp
[3] = brw_vec1_grf(nr
+1, 4);
250 for (i
= 0; i
< 4; i
++) {
252 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
257 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
258 static void emit_frontfacing( struct brw_compile
*p
,
259 const struct brw_reg
*dst
,
262 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
265 if (!(mask
& WRITEMASK_XYZW
))
268 for (i
= 0; i
< 4; i
++) {
270 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
274 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
277 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
278 for (i
= 0; i
< 4; i
++) {
280 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
283 brw_set_predicate_control_flag_value(p
, 0xff);
286 static void emit_alu1( struct brw_compile
*p
,
287 struct brw_instruction
*(*func
)(struct brw_compile
*,
290 const struct brw_reg
*dst
,
292 const struct brw_reg
*arg0
)
297 brw_set_saturate(p
, 1);
299 for (i
= 0; i
< 4; i
++) {
301 func(p
, dst
[i
], arg0
[i
]);
306 brw_set_saturate(p
, 0);
310 static void emit_alu2( struct brw_compile
*p
,
311 struct brw_instruction
*(*func
)(struct brw_compile
*,
315 const struct brw_reg
*dst
,
317 const struct brw_reg
*arg0
,
318 const struct brw_reg
*arg1
)
323 brw_set_saturate(p
, 1);
325 for (i
= 0; i
< 4; i
++) {
327 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
332 brw_set_saturate(p
, 0);
336 static void emit_mad( struct brw_compile
*p
,
337 const struct brw_reg
*dst
,
339 const struct brw_reg
*arg0
,
340 const struct brw_reg
*arg1
,
341 const struct brw_reg
*arg2
)
345 for (i
= 0; i
< 4; i
++) {
347 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
349 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
350 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
351 brw_set_saturate(p
, 0);
356 static void emit_trunc( struct brw_compile
*p
,
357 const struct brw_reg
*dst
,
359 const struct brw_reg
*arg0
)
363 for (i
= 0; i
< 4; i
++) {
365 brw_RNDZ(p
, dst
[i
], arg0
[i
]);
370 static void emit_lrp( struct brw_compile
*p
,
371 const struct brw_reg
*dst
,
373 const struct brw_reg
*arg0
,
374 const struct brw_reg
*arg1
,
375 const struct brw_reg
*arg2
)
379 /* Uses dst as a temporary:
381 for (i
= 0; i
< 4; i
++) {
383 /* Can I use the LINE instruction for this?
385 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
386 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
388 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
389 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
390 brw_set_saturate(p
, 0);
395 static void emit_sop( struct brw_compile
*p
,
396 const struct brw_reg
*dst
,
399 const struct brw_reg
*arg0
,
400 const struct brw_reg
*arg1
)
404 for (i
= 0; i
< 4; i
++) {
406 brw_MOV(p
, dst
[i
], brw_imm_f(0));
407 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
408 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
409 brw_set_predicate_control_flag_value(p
, 0xff);
414 static void emit_slt( struct brw_compile
*p
,
415 const struct brw_reg
*dst
,
417 const struct brw_reg
*arg0
,
418 const struct brw_reg
*arg1
)
420 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
423 static void emit_sle( struct brw_compile
*p
,
424 const struct brw_reg
*dst
,
426 const struct brw_reg
*arg0
,
427 const struct brw_reg
*arg1
)
429 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
432 static void emit_sgt( struct brw_compile
*p
,
433 const struct brw_reg
*dst
,
435 const struct brw_reg
*arg0
,
436 const struct brw_reg
*arg1
)
438 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
441 static void emit_sge( struct brw_compile
*p
,
442 const struct brw_reg
*dst
,
444 const struct brw_reg
*arg0
,
445 const struct brw_reg
*arg1
)
447 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
450 static void emit_seq( struct brw_compile
*p
,
451 const struct brw_reg
*dst
,
453 const struct brw_reg
*arg0
,
454 const struct brw_reg
*arg1
)
456 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
459 static void emit_sne( struct brw_compile
*p
,
460 const struct brw_reg
*dst
,
462 const struct brw_reg
*arg0
,
463 const struct brw_reg
*arg1
)
465 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
468 static void emit_cmp( struct brw_compile
*p
,
469 const struct brw_reg
*dst
,
471 const struct brw_reg
*arg0
,
472 const struct brw_reg
*arg1
,
473 const struct brw_reg
*arg2
)
477 for (i
= 0; i
< 4; i
++) {
479 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
480 brw_MOV(p
, dst
[i
], arg2
[i
]);
481 brw_set_saturate(p
, 0);
483 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
485 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
486 brw_MOV(p
, dst
[i
], arg1
[i
]);
487 brw_set_saturate(p
, 0);
488 brw_set_predicate_control_flag_value(p
, 0xff);
493 static void emit_max( struct brw_compile
*p
,
494 const struct brw_reg
*dst
,
496 const struct brw_reg
*arg0
,
497 const struct brw_reg
*arg1
)
501 for (i
= 0; i
< 4; i
++) {
503 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
504 brw_MOV(p
, dst
[i
], arg0
[i
]);
505 brw_set_saturate(p
, 0);
507 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
509 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
510 brw_MOV(p
, dst
[i
], arg1
[i
]);
511 brw_set_saturate(p
, 0);
512 brw_set_predicate_control_flag_value(p
, 0xff);
517 static void emit_min( struct brw_compile
*p
,
518 const struct brw_reg
*dst
,
520 const struct brw_reg
*arg0
,
521 const struct brw_reg
*arg1
)
525 for (i
= 0; i
< 4; i
++) {
527 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
528 brw_MOV(p
, dst
[i
], arg1
[i
]);
529 brw_set_saturate(p
, 0);
531 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
533 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
534 brw_MOV(p
, dst
[i
], arg0
[i
]);
535 brw_set_saturate(p
, 0);
536 brw_set_predicate_control_flag_value(p
, 0xff);
542 static void emit_dp3( struct brw_compile
*p
,
543 const struct brw_reg
*dst
,
545 const struct brw_reg
*arg0
,
546 const struct brw_reg
*arg1
)
548 if (!(mask
& WRITEMASK_XYZW
))
549 return; /* Do not emit dead code */
551 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
553 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
554 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
556 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
557 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
558 brw_set_saturate(p
, 0);
562 static void emit_dp4( struct brw_compile
*p
,
563 const struct brw_reg
*dst
,
565 const struct brw_reg
*arg0
,
566 const struct brw_reg
*arg1
)
568 if (!(mask
& WRITEMASK_XYZW
))
569 return; /* Do not emit dead code */
571 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
573 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
574 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
575 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
577 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
578 brw_MAC(p
, dst
[0], arg0
[3], arg1
[3]);
579 brw_set_saturate(p
, 0);
583 static void emit_dph( struct brw_compile
*p
,
584 const struct brw_reg
*dst
,
586 const struct brw_reg
*arg0
,
587 const struct brw_reg
*arg1
)
589 if (!(mask
& WRITEMASK_XYZW
))
590 return; /* Do not emit dead code */
592 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
594 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
595 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
596 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
598 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
599 brw_ADD(p
, dst
[0], dst
[0], arg1
[3]);
600 brw_set_saturate(p
, 0);
604 static void emit_xpd( struct brw_compile
*p
,
605 const struct brw_reg
*dst
,
607 const struct brw_reg
*arg0
,
608 const struct brw_reg
*arg1
)
612 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
614 for (i
= 0 ; i
< 3; i
++) {
619 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
621 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
622 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
623 brw_set_saturate(p
, 0);
629 static void emit_math1( struct brw_compile
*p
,
631 const struct brw_reg
*dst
,
633 const struct brw_reg
*arg0
)
635 if (!(mask
& WRITEMASK_XYZW
))
636 return; /* Do not emit dead code */
638 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
639 // function == BRW_MATH_FUNCTION_SINCOS);
641 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
643 /* Send two messages to perform all 16 operations:
648 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
651 BRW_MATH_PRECISION_FULL
);
655 static void emit_math2( struct brw_compile
*p
,
657 const struct brw_reg
*dst
,
659 const struct brw_reg
*arg0
,
660 const struct brw_reg
*arg1
)
662 if (!(mask
& WRITEMASK_XYZW
))
663 return; /* Do not emit dead code */
665 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
667 brw_push_insn_state(p
);
669 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
670 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
671 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
672 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
674 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
675 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
676 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
677 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
680 /* Send two messages to perform all 16 operations:
682 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
686 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
689 BRW_MATH_DATA_VECTOR
,
690 BRW_MATH_PRECISION_FULL
);
692 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
696 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
699 BRW_MATH_DATA_VECTOR
,
700 BRW_MATH_PRECISION_FULL
);
702 brw_pop_insn_state(p
);
707 static void emit_tex( struct brw_wm_compile
*c
,
708 const struct brw_wm_instruction
*inst
,
711 struct brw_reg
*arg
)
713 struct brw_compile
*p
= &c
->func
;
714 GLuint msgLength
, responseLength
;
718 /* How many input regs are there?
720 switch (inst
->tex_idx
) {
721 case TEXTURE_1D_INDEX
:
725 case TEXTURE_2D_INDEX
:
726 case TEXTURE_RECT_INDEX
:
731 emit
= WRITEMASK_XYZ
;
736 if (inst
->tex_shadow
) {
743 for (i
= 0; i
< nr
; i
++) {
744 static const GLuint swz
[4] = {0,1,2,2};
746 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
748 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
752 responseLength
= 8; /* always */
755 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
757 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
758 SURF_INDEX_TEXTURE(inst
->tex_unit
),
759 inst
->tex_unit
, /* sampler */
762 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
:
763 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
),
770 static void emit_txb( struct brw_wm_compile
*c
,
771 const struct brw_wm_instruction
*inst
,
774 struct brw_reg
*arg
)
776 struct brw_compile
*p
= &c
->func
;
779 /* Shadow ignored for txb.
781 switch (inst
->tex_idx
) {
782 case TEXTURE_1D_INDEX
:
783 brw_MOV(p
, brw_message_reg(2), arg
[0]);
784 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
785 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
787 case TEXTURE_2D_INDEX
:
788 case TEXTURE_RECT_INDEX
:
789 brw_MOV(p
, brw_message_reg(2), arg
[0]);
790 brw_MOV(p
, brw_message_reg(4), arg
[1]);
791 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
794 brw_MOV(p
, brw_message_reg(2), arg
[0]);
795 brw_MOV(p
, brw_message_reg(4), arg
[1]);
796 brw_MOV(p
, brw_message_reg(6), arg
[2]);
800 brw_MOV(p
, brw_message_reg(8), arg
[3]);
804 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
806 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
807 SURF_INDEX_TEXTURE(inst
->tex_unit
),
808 inst
->tex_unit
, /* sampler */
810 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
811 8, /* responseLength */
817 static void emit_lit( struct brw_compile
*p
,
818 const struct brw_reg
*dst
,
820 const struct brw_reg
*arg0
)
822 assert((mask
& WRITEMASK_XW
) == 0);
824 if (mask
& WRITEMASK_Y
) {
825 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
826 brw_MOV(p
, dst
[1], arg0
[0]);
827 brw_set_saturate(p
, 0);
830 if (mask
& WRITEMASK_Z
) {
831 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
833 WRITEMASK_X
| (mask
& SATURATE
),
838 /* Ordinarily you'd use an iff statement to skip or shortcircuit
839 * some of the POW calculations above, but 16-wide iff statements
840 * seem to lock c1 hardware, so this is a nasty workaround:
842 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
844 if (mask
& WRITEMASK_Y
)
845 brw_MOV(p
, dst
[1], brw_imm_f(0));
847 if (mask
& WRITEMASK_Z
)
848 brw_MOV(p
, dst
[2], brw_imm_f(0));
850 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
854 /* Kill pixel - set execution mask to zero for those pixels which
857 static void emit_kil( struct brw_wm_compile
*c
,
858 struct brw_reg
*arg0
)
860 struct brw_compile
*p
= &c
->func
;
861 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
864 /* XXX - usually won't need 4 compares!
866 for (i
= 0; i
< 4; i
++) {
867 brw_push_insn_state(p
);
868 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
869 brw_set_predicate_control_flag_value(p
, 0xff);
870 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
871 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
872 brw_pop_insn_state(p
);
877 static void fire_fb_write( struct brw_wm_compile
*c
,
883 struct brw_compile
*p
= &c
->func
;
885 /* Pass through control information:
887 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
889 brw_push_insn_state(p
);
890 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
891 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
893 brw_message_reg(base_reg
+ 1),
895 brw_pop_insn_state(p
);
898 /* Send framebuffer write message: */
899 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
901 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
903 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
911 static void emit_aa( struct brw_wm_compile
*c
,
912 struct brw_reg
*arg1
,
915 struct brw_compile
*p
= &c
->func
;
916 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
917 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
918 struct brw_reg aa
= offset(arg1
[comp
], off
);
920 brw_push_insn_state(p
);
921 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
922 brw_MOV(p
, brw_message_reg(reg
), aa
);
923 brw_pop_insn_state(p
);
927 /* Post-fragment-program processing. Send the results to the
929 * \param arg0 the fragment color
930 * \param arg1 the pass-through depth value
931 * \param arg2 the shader-computed depth value
933 static void emit_fb_write( struct brw_wm_compile
*c
,
934 struct brw_reg
*arg0
,
935 struct brw_reg
*arg1
,
936 struct brw_reg
*arg2
,
940 struct brw_compile
*p
= &c
->func
;
944 /* Reserve a space for AA - may not be needed:
946 if (c
->key
.aa_dest_stencil_reg
)
949 /* I don't really understand how this achieves the color interleave
950 * (ie RGBARGBA) in the result: [Do the saturation here]
953 brw_push_insn_state(p
);
955 for (channel
= 0; channel
< 4; channel
++) {
956 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
957 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
959 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
961 brw_message_reg(nr
+ channel
),
964 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
966 brw_message_reg(nr
+ channel
+ 4),
967 sechalf(arg0
[channel
]));
970 /* skip over the regs populated above:
974 brw_pop_insn_state(p
);
977 if (c
->key
.source_depth_to_render_target
)
979 if (c
->key
.computes_depth
)
980 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
982 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
987 if (c
->key
.dest_depth_reg
)
989 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
990 GLuint off
= c
->key
.dest_depth_reg
% 2;
993 brw_push_insn_state(p
);
994 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
996 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
998 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
999 brw_pop_insn_state(p
);
1002 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1007 if (!c
->key
.runtime_check_aads_emit
) {
1008 if (c
->key
.aa_dest_stencil_reg
)
1009 emit_aa(c
, arg1
, 2);
1011 fire_fb_write(c
, 0, nr
, target
, eot
);
1014 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1015 struct brw_reg ip
= brw_ip_reg();
1016 struct brw_instruction
*jmp
;
1018 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1019 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1022 get_element_ud(brw_vec8_grf(1,0), 6),
1025 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_d(0));
1027 emit_aa(c
, arg1
, 2);
1028 fire_fb_write(c
, 0, nr
, target
, eot
);
1029 /* note - thread killed in subroutine */
1031 brw_land_fwd_jump(p
, jmp
);
1033 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1035 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1041 * Move a GPR to scratch memory.
1043 static void emit_spill( struct brw_wm_compile
*c
,
1047 struct brw_compile
*p
= &c
->func
;
1050 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1052 brw_MOV(p
, brw_message_reg(2), reg
);
1055 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1056 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1059 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1065 * Load a GPR from scratch memory.
1067 static void emit_unspill( struct brw_wm_compile
*c
,
1071 struct brw_compile
*p
= &c
->func
;
1073 /* Slot 0 is the undef value.
1076 brw_MOV(p
, reg
, brw_imm_f(0));
1081 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1082 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1086 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1092 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1093 * Args with unspill_reg != 0 will be loaded from scratch memory.
1095 static void get_argument_regs( struct brw_wm_compile
*c
,
1096 struct brw_wm_ref
*arg
[],
1097 struct brw_reg
*regs
)
1101 for (i
= 0; i
< 4; i
++) {
1103 if (arg
[i
]->unspill_reg
)
1105 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1106 arg
[i
]->value
->spill_slot
);
1108 regs
[i
] = arg
[i
]->hw_reg
;
1111 regs
[i
] = brw_null_reg();
1118 * For values that have a spill_slot!=0, write those regs to scratch memory.
1120 static void spill_values( struct brw_wm_compile
*c
,
1121 struct brw_wm_value
*values
,
1126 for (i
= 0; i
< nr
; i
++)
1127 if (values
[i
].spill_slot
)
1128 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1132 /* Emit the fragment program instructions here.
1134 void brw_wm_emit( struct brw_wm_compile
*c
)
1136 struct brw_compile
*p
= &c
->func
;
1139 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1141 /* Check if any of the payload regs need to be spilled:
1143 spill_values(c
, c
->payload
.depth
, 4);
1144 spill_values(c
, c
->creg
, c
->nr_creg
);
1145 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1148 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1150 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1151 struct brw_reg args
[3][4], dst
[4];
1152 GLuint i
, dst_flags
;
1154 /* Get argument regs:
1156 for (i
= 0; i
< 3; i
++)
1157 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1161 for (i
= 0; i
< 4; i
++)
1163 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1165 dst
[i
] = brw_null_reg();
1169 dst_flags
= inst
->writemask
;
1171 dst_flags
|= SATURATE
;
1173 switch (inst
->opcode
) {
1174 /* Generated instructions for calculating triangle interpolants:
1177 emit_pixel_xy(p
, dst
, dst_flags
, args
[0]);
1181 emit_delta_xy(p
, dst
, dst_flags
, args
[0], args
[1]);
1185 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1189 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1193 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1197 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1201 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1205 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1208 case WM_FRONTFACING
:
1209 emit_frontfacing(p
, dst
, dst_flags
);
1212 /* Straightforward arithmetic:
1215 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1219 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1223 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1227 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1231 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1235 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1239 emit_trunc(p
, dst
, dst_flags
, args
[0]);
1243 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1247 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1252 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1256 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1260 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1263 /* Higher math functions:
1266 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1270 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1274 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1278 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1282 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1286 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1290 /* There is an scs math function, but it would need some
1291 * fixup for 16-element execution.
1293 if (dst_flags
& WRITEMASK_X
)
1294 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1295 if (dst_flags
& WRITEMASK_Y
)
1296 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1300 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1306 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1310 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1314 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1318 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1322 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1325 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1328 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1331 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1334 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1338 emit_lit(p
, dst
, dst_flags
, args
[0]);
1341 /* Texturing operations:
1344 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1348 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1352 emit_kil(c
, args
[0]);
1356 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1357 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1358 _mesa_opcode_string(inst
->opcode
) :
1362 for (i
= 0; i
< 4; i
++)
1363 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1365 inst
->dst
[i
]->hw_reg
,
1366 inst
->dst
[i
]->spill_slot
);