2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
69 const struct brw_reg
*arg0
)
71 struct brw_reg r1
= brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
79 if (mask
& WRITEMASK_X
) {
81 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
82 stride(suboffset(r1_uw
, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
86 if (mask
& WRITEMASK_Y
) {
88 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
89 stride(suboffset(r1_uw
,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
93 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
98 static void emit_delta_xy(struct brw_compile
*p
,
99 const struct brw_reg
*dst
,
101 const struct brw_reg
*arg0
,
102 const struct brw_reg
*arg1
)
104 struct brw_reg r1
= brw_vec1_grf(1, 0);
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
109 if (mask
& WRITEMASK_X
) {
112 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
116 if (mask
& WRITEMASK_Y
) {
119 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
120 negate(suboffset(r1
,1)));
125 static void emit_wpos_xy(struct brw_wm_compile
*c
,
126 const struct brw_reg
*dst
,
128 const struct brw_reg
*arg0
)
130 struct brw_compile
*p
= &c
->func
;
132 /* Calculate the pixel offset from window bottom left into destination
135 if (mask
& WRITEMASK_X
) {
136 /* X' = X - origin */
139 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
140 brw_imm_d(0 - c
->key
.origin_x
));
143 if (mask
& WRITEMASK_Y
) {
144 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
147 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
148 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
153 static void emit_pixel_w( struct brw_compile
*p
,
154 const struct brw_reg
*dst
,
156 const struct brw_reg
*arg0
,
157 const struct brw_reg
*deltas
)
159 /* Don't need this if all you are doing is interpolating color, for
162 if (mask
& WRITEMASK_W
) {
163 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
165 /* Calc 1/w - just linterp wpos[3] optimized by putting the
166 * result straight into a message reg.
168 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
169 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
172 brw_math_16( p
, dst
[3],
173 BRW_MATH_FUNCTION_INV
,
174 BRW_MATH_SATURATE_NONE
,
176 BRW_MATH_PRECISION_FULL
);
182 static void emit_linterp( struct brw_compile
*p
,
183 const struct brw_reg
*dst
,
185 const struct brw_reg
*arg0
,
186 const struct brw_reg
*deltas
)
188 struct brw_reg interp
[4];
189 GLuint nr
= arg0
[0].nr
;
192 interp
[0] = brw_vec1_grf(nr
, 0);
193 interp
[1] = brw_vec1_grf(nr
, 4);
194 interp
[2] = brw_vec1_grf(nr
+1, 0);
195 interp
[3] = brw_vec1_grf(nr
+1, 4);
197 for (i
= 0; i
< 4; i
++) {
199 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
200 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
206 static void emit_pinterp( struct brw_compile
*p
,
207 const struct brw_reg
*dst
,
209 const struct brw_reg
*arg0
,
210 const struct brw_reg
*deltas
,
211 const struct brw_reg
*w
)
213 struct brw_reg interp
[4];
214 GLuint nr
= arg0
[0].nr
;
217 interp
[0] = brw_vec1_grf(nr
, 0);
218 interp
[1] = brw_vec1_grf(nr
, 4);
219 interp
[2] = brw_vec1_grf(nr
+1, 0);
220 interp
[3] = brw_vec1_grf(nr
+1, 4);
222 for (i
= 0; i
< 4; i
++) {
224 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
225 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
228 for (i
= 0; i
< 4; i
++) {
230 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
236 static void emit_cinterp( struct brw_compile
*p
,
237 const struct brw_reg
*dst
,
239 const struct brw_reg
*arg0
)
241 struct brw_reg interp
[4];
242 GLuint nr
= arg0
[0].nr
;
245 interp
[0] = brw_vec1_grf(nr
, 0);
246 interp
[1] = brw_vec1_grf(nr
, 4);
247 interp
[2] = brw_vec1_grf(nr
+1, 0);
248 interp
[3] = brw_vec1_grf(nr
+1, 4);
250 for (i
= 0; i
< 4; i
++) {
252 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
257 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
258 static void emit_frontfacing( struct brw_compile
*p
,
259 const struct brw_reg
*dst
,
262 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
265 if (!(mask
& WRITEMASK_XYZW
))
268 for (i
= 0; i
< 4; i
++) {
270 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
274 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
277 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
278 for (i
= 0; i
< 4; i
++) {
280 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
283 brw_set_predicate_control_flag_value(p
, 0xff);
286 static void emit_alu1( struct brw_compile
*p
,
287 struct brw_instruction
*(*func
)(struct brw_compile
*,
290 const struct brw_reg
*dst
,
292 const struct brw_reg
*arg0
)
297 brw_set_saturate(p
, 1);
299 for (i
= 0; i
< 4; i
++) {
301 func(p
, dst
[i
], arg0
[i
]);
306 brw_set_saturate(p
, 0);
310 static void emit_alu2( struct brw_compile
*p
,
311 struct brw_instruction
*(*func
)(struct brw_compile
*,
315 const struct brw_reg
*dst
,
317 const struct brw_reg
*arg0
,
318 const struct brw_reg
*arg1
)
323 brw_set_saturate(p
, 1);
325 for (i
= 0; i
< 4; i
++) {
327 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
332 brw_set_saturate(p
, 0);
336 static void emit_mad( struct brw_compile
*p
,
337 const struct brw_reg
*dst
,
339 const struct brw_reg
*arg0
,
340 const struct brw_reg
*arg1
,
341 const struct brw_reg
*arg2
)
345 for (i
= 0; i
< 4; i
++) {
347 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
349 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
350 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
351 brw_set_saturate(p
, 0);
357 static void emit_lrp( struct brw_compile
*p
,
358 const struct brw_reg
*dst
,
360 const struct brw_reg
*arg0
,
361 const struct brw_reg
*arg1
,
362 const struct brw_reg
*arg2
)
366 /* Uses dst as a temporary:
368 for (i
= 0; i
< 4; i
++) {
370 /* Can I use the LINE instruction for this?
372 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
373 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
375 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
376 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
377 brw_set_saturate(p
, 0);
382 static void emit_sop( struct brw_compile
*p
,
383 const struct brw_reg
*dst
,
386 const struct brw_reg
*arg0
,
387 const struct brw_reg
*arg1
)
391 for (i
= 0; i
< 4; i
++) {
393 brw_MOV(p
, dst
[i
], brw_imm_f(0));
394 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
395 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
396 brw_set_predicate_control_flag_value(p
, 0xff);
401 static void emit_slt( struct brw_compile
*p
,
402 const struct brw_reg
*dst
,
404 const struct brw_reg
*arg0
,
405 const struct brw_reg
*arg1
)
407 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
410 static void emit_sle( struct brw_compile
*p
,
411 const struct brw_reg
*dst
,
413 const struct brw_reg
*arg0
,
414 const struct brw_reg
*arg1
)
416 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
419 static void emit_sgt( struct brw_compile
*p
,
420 const struct brw_reg
*dst
,
422 const struct brw_reg
*arg0
,
423 const struct brw_reg
*arg1
)
425 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
428 static void emit_sge( struct brw_compile
*p
,
429 const struct brw_reg
*dst
,
431 const struct brw_reg
*arg0
,
432 const struct brw_reg
*arg1
)
434 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
437 static void emit_seq( struct brw_compile
*p
,
438 const struct brw_reg
*dst
,
440 const struct brw_reg
*arg0
,
441 const struct brw_reg
*arg1
)
443 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
446 static void emit_sne( struct brw_compile
*p
,
447 const struct brw_reg
*dst
,
449 const struct brw_reg
*arg0
,
450 const struct brw_reg
*arg1
)
452 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
455 static void emit_cmp( struct brw_compile
*p
,
456 const struct brw_reg
*dst
,
458 const struct brw_reg
*arg0
,
459 const struct brw_reg
*arg1
,
460 const struct brw_reg
*arg2
)
464 for (i
= 0; i
< 4; i
++) {
466 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
467 brw_MOV(p
, dst
[i
], arg2
[i
]);
468 brw_set_saturate(p
, 0);
470 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
472 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
473 brw_MOV(p
, dst
[i
], arg1
[i
]);
474 brw_set_saturate(p
, 0);
475 brw_set_predicate_control_flag_value(p
, 0xff);
480 static void emit_max( struct brw_compile
*p
,
481 const struct brw_reg
*dst
,
483 const struct brw_reg
*arg0
,
484 const struct brw_reg
*arg1
)
488 for (i
= 0; i
< 4; i
++) {
490 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
491 brw_MOV(p
, dst
[i
], arg0
[i
]);
492 brw_set_saturate(p
, 0);
494 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
496 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
497 brw_MOV(p
, dst
[i
], arg1
[i
]);
498 brw_set_saturate(p
, 0);
499 brw_set_predicate_control_flag_value(p
, 0xff);
504 static void emit_min( struct brw_compile
*p
,
505 const struct brw_reg
*dst
,
507 const struct brw_reg
*arg0
,
508 const struct brw_reg
*arg1
)
512 for (i
= 0; i
< 4; i
++) {
514 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
515 brw_MOV(p
, dst
[i
], arg1
[i
]);
516 brw_set_saturate(p
, 0);
518 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
520 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
521 brw_MOV(p
, dst
[i
], arg0
[i
]);
522 brw_set_saturate(p
, 0);
523 brw_set_predicate_control_flag_value(p
, 0xff);
529 static void emit_dp3( struct brw_compile
*p
,
530 const struct brw_reg
*dst
,
532 const struct brw_reg
*arg0
,
533 const struct brw_reg
*arg1
)
535 if (!(mask
& WRITEMASK_XYZW
))
536 return; /* Do not emit dead code */
538 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
540 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
541 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
543 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
544 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
545 brw_set_saturate(p
, 0);
549 static void emit_dp4( struct brw_compile
*p
,
550 const struct brw_reg
*dst
,
552 const struct brw_reg
*arg0
,
553 const struct brw_reg
*arg1
)
555 if (!(mask
& WRITEMASK_XYZW
))
556 return; /* Do not emit dead code */
558 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
560 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
561 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
562 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
564 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
565 brw_MAC(p
, dst
[0], arg0
[3], arg1
[3]);
566 brw_set_saturate(p
, 0);
570 static void emit_dph( struct brw_compile
*p
,
571 const struct brw_reg
*dst
,
573 const struct brw_reg
*arg0
,
574 const struct brw_reg
*arg1
)
576 if (!(mask
& WRITEMASK_XYZW
))
577 return; /* Do not emit dead code */
579 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
581 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
582 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
583 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
585 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
586 brw_ADD(p
, dst
[0], dst
[0], arg1
[3]);
587 brw_set_saturate(p
, 0);
591 static void emit_xpd( struct brw_compile
*p
,
592 const struct brw_reg
*dst
,
594 const struct brw_reg
*arg0
,
595 const struct brw_reg
*arg1
)
599 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
601 for (i
= 0 ; i
< 3; i
++) {
606 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
608 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
609 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
610 brw_set_saturate(p
, 0);
616 static void emit_math1( struct brw_compile
*p
,
618 const struct brw_reg
*dst
,
620 const struct brw_reg
*arg0
)
622 if (!(mask
& WRITEMASK_XYZW
))
623 return; /* Do not emit dead code */
625 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
626 // function == BRW_MATH_FUNCTION_SINCOS);
628 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
630 /* Send two messages to perform all 16 operations:
635 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
638 BRW_MATH_PRECISION_FULL
);
642 static void emit_math2( struct brw_compile
*p
,
644 const struct brw_reg
*dst
,
646 const struct brw_reg
*arg0
,
647 const struct brw_reg
*arg1
)
649 if (!(mask
& WRITEMASK_XYZW
))
650 return; /* Do not emit dead code */
652 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
654 brw_push_insn_state(p
);
656 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
657 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
658 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
659 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
661 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
662 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
663 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
664 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
667 /* Send two messages to perform all 16 operations:
669 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
673 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
676 BRW_MATH_DATA_VECTOR
,
677 BRW_MATH_PRECISION_FULL
);
679 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
683 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
686 BRW_MATH_DATA_VECTOR
,
687 BRW_MATH_PRECISION_FULL
);
689 brw_pop_insn_state(p
);
694 static void emit_tex( struct brw_wm_compile
*c
,
695 const struct brw_wm_instruction
*inst
,
698 struct brw_reg
*arg
)
700 struct brw_compile
*p
= &c
->func
;
701 GLuint msgLength
, responseLength
;
705 /* How many input regs are there?
707 switch (inst
->tex_idx
) {
708 case TEXTURE_1D_INDEX
:
712 case TEXTURE_2D_INDEX
:
713 case TEXTURE_RECT_INDEX
:
718 emit
= WRITEMASK_XYZ
;
723 if (inst
->tex_shadow
) {
730 for (i
= 0; i
< nr
; i
++) {
731 static const GLuint swz
[4] = {0,1,2,2};
733 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
735 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
739 responseLength
= 8; /* always */
742 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
744 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
745 SURF_INDEX_TEXTURE(inst
->tex_unit
),
746 inst
->tex_unit
, /* sampler */
749 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
:
750 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
),
757 static void emit_txb( struct brw_wm_compile
*c
,
758 const struct brw_wm_instruction
*inst
,
761 struct brw_reg
*arg
)
763 struct brw_compile
*p
= &c
->func
;
766 /* Shadow ignored for txb.
768 switch (inst
->tex_idx
) {
769 case TEXTURE_1D_INDEX
:
770 brw_MOV(p
, brw_message_reg(2), arg
[0]);
771 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
772 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
774 case TEXTURE_2D_INDEX
:
775 case TEXTURE_RECT_INDEX
:
776 brw_MOV(p
, brw_message_reg(2), arg
[0]);
777 brw_MOV(p
, brw_message_reg(4), arg
[1]);
778 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
781 brw_MOV(p
, brw_message_reg(2), arg
[0]);
782 brw_MOV(p
, brw_message_reg(4), arg
[1]);
783 brw_MOV(p
, brw_message_reg(6), arg
[2]);
787 brw_MOV(p
, brw_message_reg(8), arg
[3]);
791 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
793 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
794 SURF_INDEX_TEXTURE(inst
->tex_unit
),
795 inst
->tex_unit
, /* sampler */
797 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
798 8, /* responseLength */
804 static void emit_lit( struct brw_compile
*p
,
805 const struct brw_reg
*dst
,
807 const struct brw_reg
*arg0
)
809 assert((mask
& WRITEMASK_XW
) == 0);
811 if (mask
& WRITEMASK_Y
) {
812 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
813 brw_MOV(p
, dst
[1], arg0
[0]);
814 brw_set_saturate(p
, 0);
817 if (mask
& WRITEMASK_Z
) {
818 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
820 WRITEMASK_X
| (mask
& SATURATE
),
825 /* Ordinarily you'd use an iff statement to skip or shortcircuit
826 * some of the POW calculations above, but 16-wide iff statements
827 * seem to lock c1 hardware, so this is a nasty workaround:
829 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
831 if (mask
& WRITEMASK_Y
)
832 brw_MOV(p
, dst
[1], brw_imm_f(0));
834 if (mask
& WRITEMASK_Z
)
835 brw_MOV(p
, dst
[2], brw_imm_f(0));
837 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
841 /* Kill pixel - set execution mask to zero for those pixels which
844 static void emit_kil( struct brw_wm_compile
*c
,
845 struct brw_reg
*arg0
)
847 struct brw_compile
*p
= &c
->func
;
848 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
851 /* XXX - usually won't need 4 compares!
853 for (i
= 0; i
< 4; i
++) {
854 brw_push_insn_state(p
);
855 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
856 brw_set_predicate_control_flag_value(p
, 0xff);
857 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
858 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
859 brw_pop_insn_state(p
);
864 static void fire_fb_write( struct brw_wm_compile
*c
,
870 struct brw_compile
*p
= &c
->func
;
872 /* Pass through control information:
874 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
876 brw_push_insn_state(p
);
877 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
878 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
880 brw_message_reg(base_reg
+ 1),
882 brw_pop_insn_state(p
);
885 /* Send framebuffer write message: */
886 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
888 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
890 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
898 static void emit_aa( struct brw_wm_compile
*c
,
899 struct brw_reg
*arg1
,
902 struct brw_compile
*p
= &c
->func
;
903 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
904 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
905 struct brw_reg aa
= offset(arg1
[comp
], off
);
907 brw_push_insn_state(p
);
908 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
909 brw_MOV(p
, brw_message_reg(reg
), aa
);
910 brw_pop_insn_state(p
);
914 /* Post-fragment-program processing. Send the results to the
916 * \param arg0 the fragment color
917 * \param arg1 the pass-through depth value
918 * \param arg2 the shader-computed depth value
920 static void emit_fb_write( struct brw_wm_compile
*c
,
921 struct brw_reg
*arg0
,
922 struct brw_reg
*arg1
,
923 struct brw_reg
*arg2
,
927 struct brw_compile
*p
= &c
->func
;
931 /* Reserve a space for AA - may not be needed:
933 if (c
->key
.aa_dest_stencil_reg
)
936 /* I don't really understand how this achieves the color interleave
937 * (ie RGBARGBA) in the result: [Do the saturation here]
940 brw_push_insn_state(p
);
942 for (channel
= 0; channel
< 4; channel
++) {
943 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
944 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
946 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
948 brw_message_reg(nr
+ channel
),
951 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
953 brw_message_reg(nr
+ channel
+ 4),
954 sechalf(arg0
[channel
]));
957 /* skip over the regs populated above:
961 brw_pop_insn_state(p
);
964 if (c
->key
.source_depth_to_render_target
)
966 if (c
->key
.computes_depth
)
967 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
969 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
974 if (c
->key
.dest_depth_reg
)
976 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
977 GLuint off
= c
->key
.dest_depth_reg
% 2;
980 brw_push_insn_state(p
);
981 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
983 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
985 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
986 brw_pop_insn_state(p
);
989 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
994 if (!c
->key
.runtime_check_aads_emit
) {
995 if (c
->key
.aa_dest_stencil_reg
)
998 fire_fb_write(c
, 0, nr
, target
, eot
);
1001 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1002 struct brw_reg ip
= brw_ip_reg();
1003 struct brw_instruction
*jmp
;
1005 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1006 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1009 get_element_ud(brw_vec8_grf(1,0), 6),
1012 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
1014 emit_aa(c
, arg1
, 2);
1015 fire_fb_write(c
, 0, nr
, target
, eot
);
1016 /* note - thread killed in subroutine */
1018 brw_land_fwd_jump(p
, jmp
);
1020 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1022 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1028 * Move a GPR to scratch memory.
1030 static void emit_spill( struct brw_wm_compile
*c
,
1034 struct brw_compile
*p
= &c
->func
;
1037 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1039 brw_MOV(p
, brw_message_reg(2), reg
);
1042 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1043 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1046 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1052 * Load a GPR from scratch memory.
1054 static void emit_unspill( struct brw_wm_compile
*c
,
1058 struct brw_compile
*p
= &c
->func
;
1060 /* Slot 0 is the undef value.
1063 brw_MOV(p
, reg
, brw_imm_f(0));
1068 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1069 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1073 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1079 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1080 * Args with unspill_reg != 0 will be loaded from scratch memory.
1082 static void get_argument_regs( struct brw_wm_compile
*c
,
1083 struct brw_wm_ref
*arg
[],
1084 struct brw_reg
*regs
)
1088 for (i
= 0; i
< 4; i
++) {
1090 if (arg
[i
]->unspill_reg
)
1092 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1093 arg
[i
]->value
->spill_slot
);
1095 regs
[i
] = arg
[i
]->hw_reg
;
1098 regs
[i
] = brw_null_reg();
1105 * For values that have a spill_slot!=0, write those regs to scratch memory.
1107 static void spill_values( struct brw_wm_compile
*c
,
1108 struct brw_wm_value
*values
,
1113 for (i
= 0; i
< nr
; i
++)
1114 if (values
[i
].spill_slot
)
1115 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1119 /* Emit the fragment program instructions here.
1121 void brw_wm_emit( struct brw_wm_compile
*c
)
1123 struct brw_compile
*p
= &c
->func
;
1126 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1128 /* Check if any of the payload regs need to be spilled:
1130 spill_values(c
, c
->payload
.depth
, 4);
1131 spill_values(c
, c
->creg
, c
->nr_creg
);
1132 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1135 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1137 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1138 struct brw_reg args
[3][4], dst
[4];
1139 GLuint i
, dst_flags
;
1141 /* Get argument regs:
1143 for (i
= 0; i
< 3; i
++)
1144 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1148 for (i
= 0; i
< 4; i
++)
1150 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1152 dst
[i
] = brw_null_reg();
1156 dst_flags
= inst
->writemask
;
1158 dst_flags
|= SATURATE
;
1160 switch (inst
->opcode
) {
1161 /* Generated instructions for calculating triangle interpolants:
1164 emit_pixel_xy(p
, dst
, dst_flags
, args
[0]);
1168 emit_delta_xy(p
, dst
, dst_flags
, args
[0], args
[1]);
1172 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1176 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1180 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1184 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1188 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1192 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1195 case WM_FRONTFACING
:
1196 emit_frontfacing(p
, dst
, dst_flags
);
1199 /* Straightforward arithmetic:
1202 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1206 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1210 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1214 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1218 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1222 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1226 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1230 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1235 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1239 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1243 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1246 /* Higher math functions:
1249 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1253 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1257 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1261 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1265 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1269 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1273 /* There is an scs math function, but it would need some
1274 * fixup for 16-element execution.
1276 if (dst_flags
& WRITEMASK_X
)
1277 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1278 if (dst_flags
& WRITEMASK_Y
)
1279 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1283 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1289 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1293 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1297 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1301 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1305 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1308 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1311 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1314 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1317 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1321 emit_lit(p
, dst
, dst_flags
, args
[0]);
1324 /* Texturing operations:
1327 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1331 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1335 emit_kil(c
, args
[0]);
1339 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1340 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1341 _mesa_opcode_string(inst
->opcode
) :
1345 for (i
= 0; i
< 4; i
++)
1346 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1348 inst
->dst
[i
]->hw_reg
,
1349 inst
->dst
[i
]->spill_slot
);