2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
69 const struct brw_reg
*arg0
)
71 struct brw_reg r1
= brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
79 if (mask
& WRITEMASK_X
) {
81 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
82 stride(suboffset(r1_uw
, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
86 if (mask
& WRITEMASK_Y
) {
88 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
89 stride(suboffset(r1_uw
,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
93 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
98 static void emit_delta_xy(struct brw_compile
*p
,
99 const struct brw_reg
*dst
,
101 const struct brw_reg
*arg0
,
102 const struct brw_reg
*arg1
)
104 struct brw_reg r1
= brw_vec1_grf(1, 0);
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
109 if (mask
& WRITEMASK_X
) {
112 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
116 if (mask
& WRITEMASK_Y
) {
119 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
120 negate(suboffset(r1
,1)));
125 static void emit_wpos_xy(struct brw_wm_compile
*c
,
126 const struct brw_reg
*dst
,
128 const struct brw_reg
*arg0
)
130 struct brw_compile
*p
= &c
->func
;
132 /* Calculate the pixel offset from window bottom left into destination
135 if (mask
& WRITEMASK_X
) {
136 /* X' = X - origin */
139 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
140 brw_imm_d(0 - c
->key
.origin_x
));
143 if (mask
& WRITEMASK_Y
) {
144 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
147 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
148 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
153 static void emit_pixel_w( struct brw_compile
*p
,
154 const struct brw_reg
*dst
,
156 const struct brw_reg
*arg0
,
157 const struct brw_reg
*deltas
)
159 /* Don't need this if all you are doing is interpolating color, for
162 if (mask
& WRITEMASK_W
) {
163 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
165 /* Calc 1/w - just linterp wpos[3] optimized by putting the
166 * result straight into a message reg.
168 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
169 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
172 brw_math_16( p
, dst
[3],
173 BRW_MATH_FUNCTION_INV
,
174 BRW_MATH_SATURATE_NONE
,
176 BRW_MATH_PRECISION_FULL
);
182 static void emit_linterp( struct brw_compile
*p
,
183 const struct brw_reg
*dst
,
185 const struct brw_reg
*arg0
,
186 const struct brw_reg
*deltas
)
188 struct brw_reg interp
[4];
189 GLuint nr
= arg0
[0].nr
;
192 interp
[0] = brw_vec1_grf(nr
, 0);
193 interp
[1] = brw_vec1_grf(nr
, 4);
194 interp
[2] = brw_vec1_grf(nr
+1, 0);
195 interp
[3] = brw_vec1_grf(nr
+1, 4);
197 for (i
= 0; i
< 4; i
++) {
199 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
200 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
206 static void emit_pinterp( struct brw_compile
*p
,
207 const struct brw_reg
*dst
,
209 const struct brw_reg
*arg0
,
210 const struct brw_reg
*deltas
,
211 const struct brw_reg
*w
)
213 struct brw_reg interp
[4];
214 GLuint nr
= arg0
[0].nr
;
217 interp
[0] = brw_vec1_grf(nr
, 0);
218 interp
[1] = brw_vec1_grf(nr
, 4);
219 interp
[2] = brw_vec1_grf(nr
+1, 0);
220 interp
[3] = brw_vec1_grf(nr
+1, 4);
222 for (i
= 0; i
< 4; i
++) {
224 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
225 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
228 for (i
= 0; i
< 4; i
++) {
230 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
236 static void emit_cinterp( struct brw_compile
*p
,
237 const struct brw_reg
*dst
,
239 const struct brw_reg
*arg0
)
241 struct brw_reg interp
[4];
242 GLuint nr
= arg0
[0].nr
;
245 interp
[0] = brw_vec1_grf(nr
, 0);
246 interp
[1] = brw_vec1_grf(nr
, 4);
247 interp
[2] = brw_vec1_grf(nr
+1, 0);
248 interp
[3] = brw_vec1_grf(nr
+1, 4);
250 for (i
= 0; i
< 4; i
++) {
252 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
257 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
258 static void emit_frontfacing( struct brw_compile
*p
,
259 const struct brw_reg
*dst
,
262 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
265 if (!(mask
& WRITEMASK_XYZW
))
268 for (i
= 0; i
< 4; i
++) {
270 brw_MOV(p
, dst
[i
], brw_imm_f(0.0));
274 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
277 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
278 for (i
= 0; i
< 4; i
++) {
280 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
283 brw_set_predicate_control_flag_value(p
, 0xff);
286 static void emit_alu1( struct brw_compile
*p
,
287 struct brw_instruction
*(*func
)(struct brw_compile
*,
290 const struct brw_reg
*dst
,
292 const struct brw_reg
*arg0
)
297 brw_set_saturate(p
, 1);
299 for (i
= 0; i
< 4; i
++) {
301 func(p
, dst
[i
], arg0
[i
]);
306 brw_set_saturate(p
, 0);
310 static void emit_alu2( struct brw_compile
*p
,
311 struct brw_instruction
*(*func
)(struct brw_compile
*,
315 const struct brw_reg
*dst
,
317 const struct brw_reg
*arg0
,
318 const struct brw_reg
*arg1
)
323 brw_set_saturate(p
, 1);
325 for (i
= 0; i
< 4; i
++) {
327 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
332 brw_set_saturate(p
, 0);
336 static void emit_mad( struct brw_compile
*p
,
337 const struct brw_reg
*dst
,
339 const struct brw_reg
*arg0
,
340 const struct brw_reg
*arg1
,
341 const struct brw_reg
*arg2
)
345 for (i
= 0; i
< 4; i
++) {
347 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
349 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
350 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
351 brw_set_saturate(p
, 0);
356 static void emit_trunc( struct brw_compile
*p
,
357 const struct brw_reg
*dst
,
359 const struct brw_reg
*arg0
)
363 for (i
= 0; i
< 4; i
++) {
365 brw_RNDZ(p
, dst
[i
], arg0
[i
]);
370 static void emit_lrp( struct brw_compile
*p
,
371 const struct brw_reg
*dst
,
373 const struct brw_reg
*arg0
,
374 const struct brw_reg
*arg1
,
375 const struct brw_reg
*arg2
)
379 /* Uses dst as a temporary:
381 for (i
= 0; i
< 4; i
++) {
383 /* Can I use the LINE instruction for this?
385 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
386 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
388 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
389 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
390 brw_set_saturate(p
, 0);
395 static void emit_sop( struct brw_compile
*p
,
396 const struct brw_reg
*dst
,
399 const struct brw_reg
*arg0
,
400 const struct brw_reg
*arg1
)
404 for (i
= 0; i
< 4; i
++) {
406 brw_MOV(p
, dst
[i
], brw_imm_f(0));
407 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
408 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
409 brw_set_predicate_control_flag_value(p
, 0xff);
414 static void emit_slt( struct brw_compile
*p
,
415 const struct brw_reg
*dst
,
417 const struct brw_reg
*arg0
,
418 const struct brw_reg
*arg1
)
420 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
423 static void emit_sle( struct brw_compile
*p
,
424 const struct brw_reg
*dst
,
426 const struct brw_reg
*arg0
,
427 const struct brw_reg
*arg1
)
429 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
432 static void emit_sgt( struct brw_compile
*p
,
433 const struct brw_reg
*dst
,
435 const struct brw_reg
*arg0
,
436 const struct brw_reg
*arg1
)
438 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
441 static void emit_sge( struct brw_compile
*p
,
442 const struct brw_reg
*dst
,
444 const struct brw_reg
*arg0
,
445 const struct brw_reg
*arg1
)
447 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
450 static void emit_seq( struct brw_compile
*p
,
451 const struct brw_reg
*dst
,
453 const struct brw_reg
*arg0
,
454 const struct brw_reg
*arg1
)
456 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
459 static void emit_sne( struct brw_compile
*p
,
460 const struct brw_reg
*dst
,
462 const struct brw_reg
*arg0
,
463 const struct brw_reg
*arg1
)
465 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
468 static void emit_cmp( struct brw_compile
*p
,
469 const struct brw_reg
*dst
,
471 const struct brw_reg
*arg0
,
472 const struct brw_reg
*arg1
,
473 const struct brw_reg
*arg2
)
477 for (i
= 0; i
< 4; i
++) {
479 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
480 brw_MOV(p
, dst
[i
], arg2
[i
]);
481 brw_set_saturate(p
, 0);
483 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
485 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
486 brw_MOV(p
, dst
[i
], arg1
[i
]);
487 brw_set_saturate(p
, 0);
488 brw_set_predicate_control_flag_value(p
, 0xff);
493 static void emit_max( struct brw_compile
*p
,
494 const struct brw_reg
*dst
,
496 const struct brw_reg
*arg0
,
497 const struct brw_reg
*arg1
)
501 for (i
= 0; i
< 4; i
++) {
503 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
504 brw_MOV(p
, dst
[i
], arg0
[i
]);
505 brw_set_saturate(p
, 0);
507 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
509 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
510 brw_MOV(p
, dst
[i
], arg1
[i
]);
511 brw_set_saturate(p
, 0);
512 brw_set_predicate_control_flag_value(p
, 0xff);
517 static void emit_min( struct brw_compile
*p
,
518 const struct brw_reg
*dst
,
520 const struct brw_reg
*arg0
,
521 const struct brw_reg
*arg1
)
525 for (i
= 0; i
< 4; i
++) {
527 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
528 brw_MOV(p
, dst
[i
], arg1
[i
]);
529 brw_set_saturate(p
, 0);
531 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
533 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
534 brw_MOV(p
, dst
[i
], arg0
[i
]);
535 brw_set_saturate(p
, 0);
536 brw_set_predicate_control_flag_value(p
, 0xff);
542 static void emit_dp3( struct brw_compile
*p
,
543 const struct brw_reg
*dst
,
545 const struct brw_reg
*arg0
,
546 const struct brw_reg
*arg1
)
548 if (!(mask
& WRITEMASK_XYZW
))
549 return; /* Do not emit dead code */
551 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
553 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
554 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
556 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
557 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
558 brw_set_saturate(p
, 0);
562 static void emit_dp4( struct brw_compile
*p
,
563 const struct brw_reg
*dst
,
565 const struct brw_reg
*arg0
,
566 const struct brw_reg
*arg1
)
568 if (!(mask
& WRITEMASK_XYZW
))
569 return; /* Do not emit dead code */
571 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
573 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
574 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
575 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
577 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
578 brw_MAC(p
, dst
[0], arg0
[3], arg1
[3]);
579 brw_set_saturate(p
, 0);
583 static void emit_dph( struct brw_compile
*p
,
584 const struct brw_reg
*dst
,
586 const struct brw_reg
*arg0
,
587 const struct brw_reg
*arg1
)
589 if (!(mask
& WRITEMASK_XYZW
))
590 return; /* Do not emit dead code */
592 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
594 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
595 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
596 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
598 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
599 brw_ADD(p
, dst
[0], dst
[0], arg1
[3]);
600 brw_set_saturate(p
, 0);
604 static void emit_xpd( struct brw_compile
*p
,
605 const struct brw_reg
*dst
,
607 const struct brw_reg
*arg0
,
608 const struct brw_reg
*arg1
)
612 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
614 for (i
= 0 ; i
< 3; i
++) {
619 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
621 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
622 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
623 brw_set_saturate(p
, 0);
629 static void emit_math1( struct brw_compile
*p
,
631 const struct brw_reg
*dst
,
633 const struct brw_reg
*arg0
)
635 if (!(mask
& WRITEMASK_XYZW
))
636 return; /* Do not emit dead code */
638 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
639 // function == BRW_MATH_FUNCTION_SINCOS);
641 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
643 /* Send two messages to perform all 16 operations:
648 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
651 BRW_MATH_PRECISION_FULL
);
655 static void emit_math2( struct brw_compile
*p
,
657 const struct brw_reg
*dst
,
659 const struct brw_reg
*arg0
,
660 const struct brw_reg
*arg1
)
662 if (!(mask
& WRITEMASK_XYZW
))
663 return; /* Do not emit dead code */
665 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
667 brw_push_insn_state(p
);
669 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
670 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
671 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
672 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
674 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
675 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
676 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
677 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
680 /* Send two messages to perform all 16 operations:
682 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
686 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
689 BRW_MATH_DATA_VECTOR
,
690 BRW_MATH_PRECISION_FULL
);
692 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
696 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
699 BRW_MATH_DATA_VECTOR
,
700 BRW_MATH_PRECISION_FULL
);
702 brw_pop_insn_state(p
);
707 static void emit_tex( struct brw_wm_compile
*c
,
708 const struct brw_wm_instruction
*inst
,
711 struct brw_reg
*arg
)
713 struct brw_compile
*p
= &c
->func
;
714 GLuint msgLength
, responseLength
;
719 /* How many input regs are there?
721 switch (inst
->tex_idx
) {
722 case TEXTURE_1D_INDEX
:
726 case TEXTURE_2D_INDEX
:
727 case TEXTURE_RECT_INDEX
:
732 emit
= WRITEMASK_XYZ
;
737 if (inst
->tex_shadow
) {
744 for (i
= 0; i
< nr
; i
++) {
745 static const GLuint swz
[4] = {0,1,2,2};
747 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
749 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
753 responseLength
= 8; /* always */
755 if (BRW_IS_IGDNG(p
->brw
)) {
756 if (inst
->tex_shadow
)
757 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG
;
759 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG
;
761 if (inst
->tex_shadow
)
762 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
764 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
768 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
770 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
771 SURF_INDEX_TEXTURE(inst
->tex_unit
),
772 inst
->tex_unit
, /* sampler */
779 BRW_SAMPLER_SIMD_MODE_SIMD16
);
783 static void emit_txb( struct brw_wm_compile
*c
,
784 const struct brw_wm_instruction
*inst
,
787 struct brw_reg
*arg
)
789 struct brw_compile
*p
= &c
->func
;
792 /* Shadow ignored for txb.
794 switch (inst
->tex_idx
) {
795 case TEXTURE_1D_INDEX
:
796 brw_MOV(p
, brw_message_reg(2), arg
[0]);
797 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
798 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
800 case TEXTURE_2D_INDEX
:
801 case TEXTURE_RECT_INDEX
:
802 brw_MOV(p
, brw_message_reg(2), arg
[0]);
803 brw_MOV(p
, brw_message_reg(4), arg
[1]);
804 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
807 brw_MOV(p
, brw_message_reg(2), arg
[0]);
808 brw_MOV(p
, brw_message_reg(4), arg
[1]);
809 brw_MOV(p
, brw_message_reg(6), arg
[2]);
813 brw_MOV(p
, brw_message_reg(8), arg
[3]);
816 if (BRW_IS_IGDNG(p
->brw
))
817 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG
;
819 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
822 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
824 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
825 SURF_INDEX_TEXTURE(inst
->tex_unit
),
826 inst
->tex_unit
, /* sampler */
829 8, /* responseLength */
833 BRW_SAMPLER_SIMD_MODE_SIMD16
);
837 static void emit_lit( struct brw_compile
*p
,
838 const struct brw_reg
*dst
,
840 const struct brw_reg
*arg0
)
842 assert((mask
& WRITEMASK_XW
) == 0);
844 if (mask
& WRITEMASK_Y
) {
845 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
846 brw_MOV(p
, dst
[1], arg0
[0]);
847 brw_set_saturate(p
, 0);
850 if (mask
& WRITEMASK_Z
) {
851 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
853 WRITEMASK_X
| (mask
& SATURATE
),
858 /* Ordinarily you'd use an iff statement to skip or shortcircuit
859 * some of the POW calculations above, but 16-wide iff statements
860 * seem to lock c1 hardware, so this is a nasty workaround:
862 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
864 if (mask
& WRITEMASK_Y
)
865 brw_MOV(p
, dst
[1], brw_imm_f(0));
867 if (mask
& WRITEMASK_Z
)
868 brw_MOV(p
, dst
[2], brw_imm_f(0));
870 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
874 /* Kill pixel - set execution mask to zero for those pixels which
877 static void emit_kil( struct brw_wm_compile
*c
,
878 struct brw_reg
*arg0
)
880 struct brw_compile
*p
= &c
->func
;
881 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
884 /* XXX - usually won't need 4 compares!
886 for (i
= 0; i
< 4; i
++) {
887 brw_push_insn_state(p
);
888 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
889 brw_set_predicate_control_flag_value(p
, 0xff);
890 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
891 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
892 brw_pop_insn_state(p
);
897 static void fire_fb_write( struct brw_wm_compile
*c
,
903 struct brw_compile
*p
= &c
->func
;
905 /* Pass through control information:
907 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
909 brw_push_insn_state(p
);
910 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
911 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
913 brw_message_reg(base_reg
+ 1),
915 brw_pop_insn_state(p
);
918 /* Send framebuffer write message: */
919 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
921 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
923 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
931 static void emit_aa( struct brw_wm_compile
*c
,
932 struct brw_reg
*arg1
,
935 struct brw_compile
*p
= &c
->func
;
936 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
937 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
938 struct brw_reg aa
= offset(arg1
[comp
], off
);
940 brw_push_insn_state(p
);
941 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
942 brw_MOV(p
, brw_message_reg(reg
), aa
);
943 brw_pop_insn_state(p
);
947 /* Post-fragment-program processing. Send the results to the
949 * \param arg0 the fragment color
950 * \param arg1 the pass-through depth value
951 * \param arg2 the shader-computed depth value
953 static void emit_fb_write( struct brw_wm_compile
*c
,
954 struct brw_reg
*arg0
,
955 struct brw_reg
*arg1
,
956 struct brw_reg
*arg2
,
960 struct brw_compile
*p
= &c
->func
;
964 /* Reserve a space for AA - may not be needed:
966 if (c
->key
.aa_dest_stencil_reg
)
969 /* I don't really understand how this achieves the color interleave
970 * (ie RGBARGBA) in the result: [Do the saturation here]
973 brw_push_insn_state(p
);
975 for (channel
= 0; channel
< 4; channel
++) {
976 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
977 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
979 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
981 brw_message_reg(nr
+ channel
),
984 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
986 brw_message_reg(nr
+ channel
+ 4),
987 sechalf(arg0
[channel
]));
990 /* skip over the regs populated above:
994 brw_pop_insn_state(p
);
997 if (c
->key
.source_depth_to_render_target
)
999 if (c
->key
.computes_depth
)
1000 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
1002 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
1007 if (c
->key
.dest_depth_reg
)
1009 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
1010 GLuint off
= c
->key
.dest_depth_reg
% 2;
1013 brw_push_insn_state(p
);
1014 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1016 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
1018 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
1019 brw_pop_insn_state(p
);
1022 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
1027 if (!c
->key
.runtime_check_aads_emit
) {
1028 if (c
->key
.aa_dest_stencil_reg
)
1029 emit_aa(c
, arg1
, 2);
1031 fire_fb_write(c
, 0, nr
, target
, eot
);
1034 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
1035 struct brw_reg ip
= brw_ip_reg();
1036 struct brw_instruction
*jmp
;
1038 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1039 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
1042 get_element_ud(brw_vec8_grf(1,0), 6),
1045 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_d(0));
1047 emit_aa(c
, arg1
, 2);
1048 fire_fb_write(c
, 0, nr
, target
, eot
);
1049 /* note - thread killed in subroutine */
1051 brw_land_fwd_jump(p
, jmp
);
1053 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1055 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1061 * Move a GPR to scratch memory.
1063 static void emit_spill( struct brw_wm_compile
*c
,
1067 struct brw_compile
*p
= &c
->func
;
1070 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1072 brw_MOV(p
, brw_message_reg(2), reg
);
1075 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1076 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1079 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1085 * Load a GPR from scratch memory.
1087 static void emit_unspill( struct brw_wm_compile
*c
,
1091 struct brw_compile
*p
= &c
->func
;
1093 /* Slot 0 is the undef value.
1096 brw_MOV(p
, reg
, brw_imm_f(0));
1101 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1102 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1106 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1112 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1113 * Args with unspill_reg != 0 will be loaded from scratch memory.
1115 static void get_argument_regs( struct brw_wm_compile
*c
,
1116 struct brw_wm_ref
*arg
[],
1117 struct brw_reg
*regs
)
1121 for (i
= 0; i
< 4; i
++) {
1123 if (arg
[i
]->unspill_reg
)
1125 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1126 arg
[i
]->value
->spill_slot
);
1128 regs
[i
] = arg
[i
]->hw_reg
;
1131 regs
[i
] = brw_null_reg();
1138 * For values that have a spill_slot!=0, write those regs to scratch memory.
1140 static void spill_values( struct brw_wm_compile
*c
,
1141 struct brw_wm_value
*values
,
1146 for (i
= 0; i
< nr
; i
++)
1147 if (values
[i
].spill_slot
)
1148 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1152 /* Emit the fragment program instructions here.
1154 void brw_wm_emit( struct brw_wm_compile
*c
)
1156 struct brw_compile
*p
= &c
->func
;
1159 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1161 /* Check if any of the payload regs need to be spilled:
1163 spill_values(c
, c
->payload
.depth
, 4);
1164 spill_values(c
, c
->creg
, c
->nr_creg
);
1165 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1168 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1170 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1171 struct brw_reg args
[3][4], dst
[4];
1172 GLuint i
, dst_flags
;
1174 /* Get argument regs:
1176 for (i
= 0; i
< 3; i
++)
1177 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1181 for (i
= 0; i
< 4; i
++)
1183 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1185 dst
[i
] = brw_null_reg();
1189 dst_flags
= inst
->writemask
;
1191 dst_flags
|= SATURATE
;
1193 switch (inst
->opcode
) {
1194 /* Generated instructions for calculating triangle interpolants:
1197 emit_pixel_xy(p
, dst
, dst_flags
, args
[0]);
1201 emit_delta_xy(p
, dst
, dst_flags
, args
[0], args
[1]);
1205 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1209 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1213 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1217 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1221 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1225 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1228 case WM_FRONTFACING
:
1229 emit_frontfacing(p
, dst
, dst_flags
);
1232 /* Straightforward arithmetic:
1235 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1239 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1243 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1247 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1251 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1255 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1259 emit_trunc(p
, dst
, dst_flags
, args
[0]);
1263 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1267 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1272 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1276 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1280 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1283 /* Higher math functions:
1286 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1290 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1294 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1298 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1302 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1306 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1310 /* There is an scs math function, but it would need some
1311 * fixup for 16-element execution.
1313 if (dst_flags
& WRITEMASK_X
)
1314 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1315 if (dst_flags
& WRITEMASK_Y
)
1316 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1320 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1326 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1330 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1334 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1338 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1342 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1345 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1348 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1351 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1354 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1358 emit_lit(p
, dst
, dst_flags
, args
[0]);
1361 /* Texturing operations:
1364 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1368 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1372 emit_kil(c
, args
[0]);
1376 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1377 inst
->opcode
, inst
->opcode
< MAX_OPCODE
?
1378 _mesa_opcode_string(inst
->opcode
) :
1382 for (i
= 0; i
< 4; i
++)
1383 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1385 inst
->dst
[i
]->hw_reg
,
1386 inst
->dst
[i
]->spill_slot
);
1389 if (INTEL_DEBUG
& DEBUG_WM
) {
1392 _mesa_printf("wm-native:\n");
1393 for (i
= 0; i
< p
->nr_insn
; i
++)
1394 brw_disasm(stderr
, &p
->store
[i
]);