2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static INLINE
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
69 const struct brw_reg
*arg0
)
71 struct brw_reg r1
= brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
79 if (mask
& WRITEMASK_X
) {
81 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
82 stride(suboffset(r1_uw
, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
86 if (mask
& WRITEMASK_Y
) {
88 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
89 stride(suboffset(r1_uw
,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
93 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
98 static void emit_delta_xy(struct brw_compile
*p
,
99 const struct brw_reg
*dst
,
101 const struct brw_reg
*arg0
,
102 const struct brw_reg
*arg1
)
104 struct brw_reg r1
= brw_vec1_grf(1, 0);
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
109 if (mask
& WRITEMASK_X
) {
112 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
116 if (mask
& WRITEMASK_Y
) {
119 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
120 negate(suboffset(r1
,1)));
125 static void emit_wpos_xy(struct brw_wm_compile
*c
,
126 const struct brw_reg
*dst
,
128 const struct brw_reg
*arg0
)
130 struct brw_compile
*p
= &c
->func
;
132 /* Calculate the pixel offset from window bottom left into destination
135 if (mask
& WRITEMASK_X
) {
136 /* X' = X - origin */
139 retype(arg0
[0], BRW_REGISTER_TYPE_W
),
140 brw_imm_d(0 - c
->key
.origin_x
));
143 if (mask
& WRITEMASK_Y
) {
144 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
147 negate(retype(arg0
[1], BRW_REGISTER_TYPE_W
)),
148 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
153 static void emit_pixel_w( struct brw_compile
*p
,
154 const struct brw_reg
*dst
,
156 const struct brw_reg
*arg0
,
157 const struct brw_reg
*deltas
)
159 /* Don't need this if all you are doing is interpolating color, for
162 if (mask
& WRITEMASK_W
) {
163 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
165 /* Calc 1/w - just linterp wpos[3] optimized by putting the
166 * result straight into a message reg.
168 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
169 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
172 brw_math_16( p
, dst
[3],
173 BRW_MATH_FUNCTION_INV
,
174 BRW_MATH_SATURATE_NONE
,
176 BRW_MATH_PRECISION_FULL
);
182 static void emit_linterp( struct brw_compile
*p
,
183 const struct brw_reg
*dst
,
185 const struct brw_reg
*arg0
,
186 const struct brw_reg
*deltas
)
188 struct brw_reg interp
[4];
189 GLuint nr
= arg0
[0].nr
;
192 interp
[0] = brw_vec1_grf(nr
, 0);
193 interp
[1] = brw_vec1_grf(nr
, 4);
194 interp
[2] = brw_vec1_grf(nr
+1, 0);
195 interp
[3] = brw_vec1_grf(nr
+1, 4);
197 for(i
= 0; i
< 4; i
++ ) {
199 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
200 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
206 static void emit_pinterp( struct brw_compile
*p
,
207 const struct brw_reg
*dst
,
209 const struct brw_reg
*arg0
,
210 const struct brw_reg
*deltas
,
211 const struct brw_reg
*w
)
213 struct brw_reg interp
[4];
214 GLuint nr
= arg0
[0].nr
;
217 interp
[0] = brw_vec1_grf(nr
, 0);
218 interp
[1] = brw_vec1_grf(nr
, 4);
219 interp
[2] = brw_vec1_grf(nr
+1, 0);
220 interp
[3] = brw_vec1_grf(nr
+1, 4);
222 for(i
= 0; i
< 4; i
++ ) {
224 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
225 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
228 for(i
= 0; i
< 4; i
++ ) {
230 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
235 static void emit_cinterp( struct brw_compile
*p
,
236 const struct brw_reg
*dst
,
238 const struct brw_reg
*arg0
)
240 struct brw_reg interp
[4];
241 GLuint nr
= arg0
[0].nr
;
244 interp
[0] = brw_vec1_grf(nr
, 0);
245 interp
[1] = brw_vec1_grf(nr
, 4);
246 interp
[2] = brw_vec1_grf(nr
+1, 0);
247 interp
[3] = brw_vec1_grf(nr
+1, 4);
249 for(i
= 0; i
< 4; i
++ ) {
251 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
260 static void emit_alu1( struct brw_compile
*p
,
261 struct brw_instruction
*(*func
)(struct brw_compile
*,
264 const struct brw_reg
*dst
,
266 const struct brw_reg
*arg0
)
271 brw_set_saturate(p
, 1);
273 for (i
= 0; i
< 4; i
++) {
275 func(p
, dst
[i
], arg0
[i
]);
280 brw_set_saturate(p
, 0);
283 static void emit_alu2( struct brw_compile
*p
,
284 struct brw_instruction
*(*func
)(struct brw_compile
*,
288 const struct brw_reg
*dst
,
290 const struct brw_reg
*arg0
,
291 const struct brw_reg
*arg1
)
296 brw_set_saturate(p
, 1);
298 for (i
= 0; i
< 4; i
++) {
300 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
305 brw_set_saturate(p
, 0);
309 static void emit_mad( struct brw_compile
*p
,
310 const struct brw_reg
*dst
,
312 const struct brw_reg
*arg0
,
313 const struct brw_reg
*arg1
,
314 const struct brw_reg
*arg2
)
318 for (i
= 0; i
< 4; i
++) {
320 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
322 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
323 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
324 brw_set_saturate(p
, 0);
330 static void emit_lrp( struct brw_compile
*p
,
331 const struct brw_reg
*dst
,
333 const struct brw_reg
*arg0
,
334 const struct brw_reg
*arg1
,
335 const struct brw_reg
*arg2
)
339 /* Uses dst as a temporary:
341 for (i
= 0; i
< 4; i
++) {
343 /* Can I use the LINE instruction for this?
345 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
346 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
348 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
349 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
350 brw_set_saturate(p
, 0);
354 static void emit_sop( struct brw_compile
*p
,
355 const struct brw_reg
*dst
,
358 const struct brw_reg
*arg0
,
359 const struct brw_reg
*arg1
)
363 for (i
= 0; i
< 4; i
++) {
365 brw_MOV(p
, dst
[i
], brw_imm_f(0));
366 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
367 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
368 brw_set_predicate_control_flag_value(p
, 0xff);
373 static void emit_slt( struct brw_compile
*p
,
374 const struct brw_reg
*dst
,
376 const struct brw_reg
*arg0
,
377 const struct brw_reg
*arg1
)
379 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
382 static void emit_sle( struct brw_compile
*p
,
383 const struct brw_reg
*dst
,
385 const struct brw_reg
*arg0
,
386 const struct brw_reg
*arg1
)
388 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
391 static void emit_sgt( struct brw_compile
*p
,
392 const struct brw_reg
*dst
,
394 const struct brw_reg
*arg0
,
395 const struct brw_reg
*arg1
)
397 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
400 static void emit_sge( struct brw_compile
*p
,
401 const struct brw_reg
*dst
,
403 const struct brw_reg
*arg0
,
404 const struct brw_reg
*arg1
)
406 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
409 static void emit_seq( struct brw_compile
*p
,
410 const struct brw_reg
*dst
,
412 const struct brw_reg
*arg0
,
413 const struct brw_reg
*arg1
)
415 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
418 static void emit_sne( struct brw_compile
*p
,
419 const struct brw_reg
*dst
,
421 const struct brw_reg
*arg0
,
422 const struct brw_reg
*arg1
)
424 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
427 static void emit_cmp( struct brw_compile
*p
,
428 const struct brw_reg
*dst
,
430 const struct brw_reg
*arg0
,
431 const struct brw_reg
*arg1
,
432 const struct brw_reg
*arg2
)
436 for (i
= 0; i
< 4; i
++) {
438 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
439 brw_MOV(p
, dst
[i
], arg2
[i
]);
440 brw_set_saturate(p
, 0);
442 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
444 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
445 brw_MOV(p
, dst
[i
], arg1
[i
]);
446 brw_set_saturate(p
, 0);
447 brw_set_predicate_control_flag_value(p
, 0xff);
452 static void emit_max( struct brw_compile
*p
,
453 const struct brw_reg
*dst
,
455 const struct brw_reg
*arg0
,
456 const struct brw_reg
*arg1
)
460 for (i
= 0; i
< 4; i
++) {
462 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
463 brw_MOV(p
, dst
[i
], arg0
[i
]);
464 brw_set_saturate(p
, 0);
466 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
468 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
469 brw_MOV(p
, dst
[i
], arg1
[i
]);
470 brw_set_saturate(p
, 0);
471 brw_set_predicate_control_flag_value(p
, 0xff);
476 static void emit_min( struct brw_compile
*p
,
477 const struct brw_reg
*dst
,
479 const struct brw_reg
*arg0
,
480 const struct brw_reg
*arg1
)
484 for (i
= 0; i
< 4; i
++) {
486 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
487 brw_MOV(p
, dst
[i
], arg1
[i
]);
488 brw_set_saturate(p
, 0);
490 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
492 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
493 brw_MOV(p
, dst
[i
], arg0
[i
]);
494 brw_set_saturate(p
, 0);
495 brw_set_predicate_control_flag_value(p
, 0xff);
501 static void emit_dp3( struct brw_compile
*p
,
502 const struct brw_reg
*dst
,
504 const struct brw_reg
*arg0
,
505 const struct brw_reg
*arg1
)
507 if (!(mask
& WRITEMASK_XYZW
))
508 return; /* Do not emit dead code*/
510 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
512 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
513 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
515 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
516 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
517 brw_set_saturate(p
, 0);
521 static void emit_dp4( struct brw_compile
*p
,
522 const struct brw_reg
*dst
,
524 const struct brw_reg
*arg0
,
525 const struct brw_reg
*arg1
)
527 if (!(mask
& WRITEMASK_XYZW
))
528 return; /* Do not emit dead code*/
530 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
532 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
533 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
534 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
536 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
537 brw_MAC(p
, dst
[0], arg0
[3], arg1
[3]);
538 brw_set_saturate(p
, 0);
542 static void emit_dph( struct brw_compile
*p
,
543 const struct brw_reg
*dst
,
545 const struct brw_reg
*arg0
,
546 const struct brw_reg
*arg1
)
548 if (!(mask
& WRITEMASK_XYZW
))
549 return; /* Do not emit dead code*/
551 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
553 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
554 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
555 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
557 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
558 brw_ADD(p
, dst
[0], dst
[0], arg1
[3]);
559 brw_set_saturate(p
, 0);
563 static void emit_xpd( struct brw_compile
*p
,
564 const struct brw_reg
*dst
,
566 const struct brw_reg
*arg0
,
567 const struct brw_reg
*arg1
)
571 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
573 for (i
= 0 ; i
< 3; i
++) {
578 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
580 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
581 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
582 brw_set_saturate(p
, 0);
588 static void emit_math1( struct brw_compile
*p
,
590 const struct brw_reg
*dst
,
592 const struct brw_reg
*arg0
)
594 if (!(mask
& WRITEMASK_XYZW
))
595 return; /* Do not emit dead code*/
597 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
598 // function == BRW_MATH_FUNCTION_SINCOS);
600 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
602 /* Send two messages to perform all 16 operations:
607 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
610 BRW_MATH_PRECISION_FULL
);
614 static void emit_math2( struct brw_compile
*p
,
616 const struct brw_reg
*dst
,
618 const struct brw_reg
*arg0
,
619 const struct brw_reg
*arg1
)
621 if (!(mask
& WRITEMASK_XYZW
))
622 return; /* Do not emit dead code*/
624 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
626 brw_push_insn_state(p
);
628 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
629 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
630 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
631 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
633 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
634 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
635 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
636 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
639 /* Send two messages to perform all 16 operations:
641 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
645 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
648 BRW_MATH_DATA_VECTOR
,
649 BRW_MATH_PRECISION_FULL
);
651 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
655 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
658 BRW_MATH_DATA_VECTOR
,
659 BRW_MATH_PRECISION_FULL
);
661 brw_pop_insn_state(p
);
666 static void emit_tex( struct brw_wm_compile
*c
,
667 const struct brw_wm_instruction
*inst
,
670 struct brw_reg
*arg
)
672 struct brw_compile
*p
= &c
->func
;
673 GLuint msgLength
, responseLength
;
674 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<inst
->tex_unit
)) ? 1 : 0;
678 /* How many input regs are there?
680 switch (inst
->tex_idx
) {
681 case TEXTURE_1D_INDEX
:
685 case TEXTURE_2D_INDEX
:
686 case TEXTURE_RECT_INDEX
:
691 emit
= WRITEMASK_XYZ
;
703 for (i
= 0; i
< nr
; i
++) {
704 static const GLuint swz
[4] = {0,1,2,2};
706 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
708 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
712 responseLength
= 8; /* always */
715 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
717 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
718 inst
->tex_unit
+ MAX_DRAW_BUFFERS
, /* surface */
719 inst
->tex_unit
, /* sampler */
722 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
:
723 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
),
730 static void emit_txb( struct brw_wm_compile
*c
,
731 const struct brw_wm_instruction
*inst
,
734 struct brw_reg
*arg
)
736 struct brw_compile
*p
= &c
->func
;
739 /* Shadow ignored for txb.
741 switch (inst
->tex_idx
) {
742 case TEXTURE_1D_INDEX
:
743 brw_MOV(p
, brw_message_reg(2), arg
[0]);
744 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
745 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
747 case TEXTURE_2D_INDEX
:
748 case TEXTURE_RECT_INDEX
:
749 brw_MOV(p
, brw_message_reg(2), arg
[0]);
750 brw_MOV(p
, brw_message_reg(4), arg
[1]);
751 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
754 brw_MOV(p
, brw_message_reg(2), arg
[0]);
755 brw_MOV(p
, brw_message_reg(4), arg
[1]);
756 brw_MOV(p
, brw_message_reg(6), arg
[2]);
760 brw_MOV(p
, brw_message_reg(8), arg
[3]);
765 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
767 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
768 inst
->tex_unit
+ MAX_DRAW_BUFFERS
, /* surface */
769 inst
->tex_unit
, /* sampler */
771 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
772 8, /* responseLength */
779 static void emit_lit( struct brw_compile
*p
,
780 const struct brw_reg
*dst
,
782 const struct brw_reg
*arg0
)
784 assert((mask
& WRITEMASK_XW
) == 0);
786 if (mask
& WRITEMASK_Y
) {
787 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
788 brw_MOV(p
, dst
[1], arg0
[0]);
789 brw_set_saturate(p
, 0);
792 if (mask
& WRITEMASK_Z
) {
793 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
795 WRITEMASK_X
| (mask
& SATURATE
),
800 /* Ordinarily you'd use an iff statement to skip or shortcircuit
801 * some of the POW calculations above, but 16-wide iff statements
802 * seem to lock c1 hardware, so this is a nasty workaround:
804 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
806 if (mask
& WRITEMASK_Y
)
807 brw_MOV(p
, dst
[1], brw_imm_f(0));
809 if (mask
& WRITEMASK_Z
)
810 brw_MOV(p
, dst
[2], brw_imm_f(0));
812 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
816 /* Kill pixel - set execution mask to zero for those pixels which
819 static void emit_kil( struct brw_wm_compile
*c
,
820 struct brw_reg
*arg0
)
822 struct brw_compile
*p
= &c
->func
;
823 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
827 /* XXX - usually won't need 4 compares!
829 for (i
= 0; i
< 4; i
++) {
830 brw_push_insn_state(p
);
831 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
832 brw_set_predicate_control_flag_value(p
, 0xff);
833 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
834 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
835 brw_pop_insn_state(p
);
839 static void fire_fb_write( struct brw_wm_compile
*c
,
845 struct brw_compile
*p
= &c
->func
;
847 /* Pass through control information:
849 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
851 brw_push_insn_state(p
);
852 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
853 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
855 brw_message_reg(base_reg
+ 1),
857 brw_pop_insn_state(p
);
860 /* Send framebuffer write message: */
861 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
863 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
865 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
872 static void emit_aa( struct brw_wm_compile
*c
,
873 struct brw_reg
*arg1
,
876 struct brw_compile
*p
= &c
->func
;
877 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
878 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
879 struct brw_reg aa
= offset(arg1
[comp
], off
);
881 brw_push_insn_state(p
);
882 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
883 brw_MOV(p
, brw_message_reg(reg
), aa
);
884 brw_pop_insn_state(p
);
888 /* Post-fragment-program processing. Send the results to the
891 static void emit_fb_write( struct brw_wm_compile
*c
,
892 struct brw_reg
*arg0
,
893 struct brw_reg
*arg1
,
894 struct brw_reg
*arg2
,
898 struct brw_compile
*p
= &c
->func
;
902 /* Reserve a space for AA - may not be needed:
904 if (c
->key
.aa_dest_stencil_reg
)
907 /* I don't really understand how this achieves the color interleave
908 * (ie RGBARGBA) in the result: [Do the saturation here]
911 brw_push_insn_state(p
);
913 for (channel
= 0; channel
< 4; channel
++) {
914 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
915 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
917 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
919 brw_message_reg(nr
+ channel
),
922 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
924 brw_message_reg(nr
+ channel
+ 4),
925 sechalf(arg0
[channel
]));
928 /* skip over the regs populated above:
932 brw_pop_insn_state(p
);
935 if (c
->key
.source_depth_to_render_target
)
937 if (c
->key
.computes_depth
)
938 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
940 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
945 if (c
->key
.dest_depth_reg
)
947 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
948 GLuint off
= c
->key
.dest_depth_reg
% 2;
951 brw_push_insn_state(p
);
952 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
954 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
956 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
957 brw_pop_insn_state(p
);
960 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
966 if (!c
->key
.runtime_check_aads_emit
) {
967 if (c
->key
.aa_dest_stencil_reg
)
970 fire_fb_write(c
, 0, nr
, target
, eot
);
973 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
974 struct brw_reg ip
= brw_ip_reg();
975 struct brw_instruction
*jmp
;
977 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
978 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
981 get_element_ud(brw_vec8_grf(1,0), 6),
984 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
987 fire_fb_write(c
, 0, nr
, target
, eot
);
988 /* note - thread killed in subroutine */
990 brw_land_fwd_jump(p
, jmp
);
992 /* ELSE: Shuffle up one register to fill in the hole left for AA:
994 fire_fb_write(c
, 1, nr
-1, target
, eot
);
1001 /* Post-fragment-program processing. Send the results to the
1004 static void emit_spill( struct brw_wm_compile
*c
,
1008 struct brw_compile
*p
= &c
->func
;
1011 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1013 brw_MOV(p
, brw_message_reg(2), reg
);
1016 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1017 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1020 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1025 static void emit_unspill( struct brw_wm_compile
*c
,
1029 struct brw_compile
*p
= &c
->func
;
1031 /* Slot 0 is the undef value.
1034 brw_MOV(p
, reg
, brw_imm_f(0));
1039 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1040 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1044 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1052 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
1054 static void get_argument_regs( struct brw_wm_compile
*c
,
1055 struct brw_wm_ref
*arg
[],
1056 struct brw_reg
*regs
)
1060 for (i
= 0; i
< 4; i
++) {
1063 if (arg
[i
]->unspill_reg
)
1065 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1066 arg
[i
]->value
->spill_slot
);
1068 regs
[i
] = arg
[i
]->hw_reg
;
1071 regs
[i
] = brw_null_reg();
1076 static void spill_values( struct brw_wm_compile
*c
,
1077 struct brw_wm_value
*values
,
1082 for (i
= 0; i
< nr
; i
++)
1083 if (values
[i
].spill_slot
)
1084 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1089 /* Emit the fragment program instructions here.
1091 void brw_wm_emit( struct brw_wm_compile
*c
)
1093 struct brw_compile
*p
= &c
->func
;
1096 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1098 /* Check if any of the payload regs need to be spilled:
1100 spill_values(c
, c
->payload
.depth
, 4);
1101 spill_values(c
, c
->creg
, c
->nr_creg
);
1102 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1105 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1107 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1108 struct brw_reg args
[3][4], dst
[4];
1109 GLuint i
, dst_flags
;
1111 /* Get argument regs:
1113 for (i
= 0; i
< 3; i
++)
1114 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1118 for (i
= 0; i
< 4; i
++)
1120 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1122 dst
[i
] = brw_null_reg();
1126 dst_flags
= inst
->writemask
;
1128 dst_flags
|= SATURATE
;
1130 switch (inst
->opcode
) {
1131 /* Generated instructions for calculating triangle interpolants:
1134 emit_pixel_xy(p
, dst
, dst_flags
, args
[0]);
1138 emit_delta_xy(p
, dst
, dst_flags
, args
[0], args
[1]);
1142 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1146 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1150 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1154 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1158 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1162 emit_fb_write(c
, args
[0], args
[1], args
[2], inst
->target
, inst
->eot
);
1165 /* Straightforward arithmetic:
1168 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1172 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1176 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1179 case OPCODE_DP3
: /* */
1180 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1184 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1188 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1191 case OPCODE_LRP
: /* */
1192 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1196 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1201 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1205 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1209 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1212 /* Higher math functions:
1215 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1219 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1223 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1227 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1231 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1235 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1239 /* There is an scs math function, but it would need some
1240 * fixup for 16-element execution.
1242 if (dst_flags
& WRITEMASK_X
)
1243 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1244 if (dst_flags
& WRITEMASK_Y
)
1245 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1249 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1255 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1259 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1263 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1267 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1271 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1274 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1277 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1280 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1283 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1287 emit_lit(p
, dst
, dst_flags
, args
[0]);
1290 /* Texturing operations:
1293 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1297 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1301 emit_kil(c
, args
[0]);
1305 _mesa_printf("unsupport opcode %d in fragment program\n",
1309 for (i
= 0; i
< 4; i
++)
1310 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1312 inst
->dst
[i
]->hw_reg
,
1313 inst
->dst
[i
]->spill_slot
);