2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static __inline
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
69 const struct brw_reg
*arg0
)
71 struct brw_reg r1
= brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
79 if (mask
& WRITEMASK_X
) {
81 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
82 stride(suboffset(r1_uw
, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
86 if (mask
& WRITEMASK_Y
) {
88 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
89 stride(suboffset(r1_uw
,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
93 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
98 static void emit_delta_xy(struct brw_compile
*p
,
99 const struct brw_reg
*dst
,
101 const struct brw_reg
*arg0
,
102 const struct brw_reg
*arg1
)
104 struct brw_reg r1
= brw_vec1_grf(1, 0);
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
109 if (mask
& WRITEMASK_X
) {
112 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
116 if (mask
& WRITEMASK_Y
) {
119 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
120 negate(suboffset(r1
,1)));
125 static void emit_wpos_xy(struct brw_compile
*p
,
126 const struct brw_reg
*dst
,
128 const struct brw_reg
*arg0
)
130 /* Calc delta X,Y by subtracting origin in r1 from the pixel
133 if (mask
& WRITEMASK_X
) {
136 retype(arg0
[0], BRW_REGISTER_TYPE_UW
));
139 if (mask
& WRITEMASK_Y
) {
140 /* TODO -- window_height - Y */
143 negate(retype(arg0
[1], BRW_REGISTER_TYPE_UW
)));
149 static void emit_pixel_w( struct brw_compile
*p
,
150 const struct brw_reg
*dst
,
152 const struct brw_reg
*arg0
,
153 const struct brw_reg
*deltas
)
155 /* Don't need this if all you are doing is interpolating color, for
158 if (mask
& WRITEMASK_W
) {
159 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
161 /* Calc 1/w - just linterp wpos[3] optimized by putting the
162 * result straight into a message reg.
164 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
165 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
168 brw_math_16( p
, dst
[3],
169 BRW_MATH_FUNCTION_INV
,
170 BRW_MATH_SATURATE_NONE
,
172 BRW_MATH_PRECISION_FULL
);
178 static void emit_linterp( struct brw_compile
*p
,
179 const struct brw_reg
*dst
,
181 const struct brw_reg
*arg0
,
182 const struct brw_reg
*deltas
)
184 struct brw_reg interp
[4];
185 GLuint nr
= arg0
[0].nr
;
188 interp
[0] = brw_vec1_grf(nr
, 0);
189 interp
[1] = brw_vec1_grf(nr
, 4);
190 interp
[2] = brw_vec1_grf(nr
+1, 0);
191 interp
[3] = brw_vec1_grf(nr
+1, 4);
193 for(i
= 0; i
< 4; i
++ ) {
195 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
196 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
202 static void emit_pinterp( struct brw_compile
*p
,
203 const struct brw_reg
*dst
,
205 const struct brw_reg
*arg0
,
206 const struct brw_reg
*deltas
,
207 const struct brw_reg
*w
)
209 struct brw_reg interp
[4];
210 GLuint nr
= arg0
[0].nr
;
213 interp
[0] = brw_vec1_grf(nr
, 0);
214 interp
[1] = brw_vec1_grf(nr
, 4);
215 interp
[2] = brw_vec1_grf(nr
+1, 0);
216 interp
[3] = brw_vec1_grf(nr
+1, 4);
218 for(i
= 0; i
< 4; i
++ ) {
220 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
221 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
222 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
227 static void emit_cinterp( struct brw_compile
*p
,
228 const struct brw_reg
*dst
,
230 const struct brw_reg
*arg0
)
232 struct brw_reg interp
[4];
233 GLuint nr
= arg0
[0].nr
;
236 interp
[0] = brw_vec1_grf(nr
, 0);
237 interp
[1] = brw_vec1_grf(nr
, 4);
238 interp
[2] = brw_vec1_grf(nr
+1, 0);
239 interp
[3] = brw_vec1_grf(nr
+1, 4);
241 for(i
= 0; i
< 4; i
++ ) {
243 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
252 static void emit_alu1( struct brw_compile
*p
,
253 struct brw_instruction
*(*func
)(struct brw_compile
*,
256 const struct brw_reg
*dst
,
258 const struct brw_reg
*arg0
)
263 brw_set_saturate(p
, 1);
265 for (i
= 0; i
< 4; i
++) {
267 func(p
, dst
[i
], arg0
[i
]);
272 brw_set_saturate(p
, 0);
275 static void emit_alu2( struct brw_compile
*p
,
276 struct brw_instruction
*(*func
)(struct brw_compile
*,
280 const struct brw_reg
*dst
,
282 const struct brw_reg
*arg0
,
283 const struct brw_reg
*arg1
)
288 brw_set_saturate(p
, 1);
290 for (i
= 0; i
< 4; i
++) {
292 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
297 brw_set_saturate(p
, 0);
301 static void emit_mad( struct brw_compile
*p
,
302 const struct brw_reg
*dst
,
304 const struct brw_reg
*arg0
,
305 const struct brw_reg
*arg1
,
306 const struct brw_reg
*arg2
)
310 for (i
= 0; i
< 4; i
++) {
312 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
314 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
315 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
316 brw_set_saturate(p
, 0);
322 static void emit_lrp( struct brw_compile
*p
,
323 const struct brw_reg
*dst
,
325 const struct brw_reg
*arg0
,
326 const struct brw_reg
*arg1
,
327 const struct brw_reg
*arg2
)
331 /* Uses dst as a temporary:
333 for (i
= 0; i
< 4; i
++) {
335 /* Can I use the LINE instruction for this?
337 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
338 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
340 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
341 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
342 brw_set_saturate(p
, 0);
348 static void emit_slt( struct brw_compile
*p
,
349 const struct brw_reg
*dst
,
351 const struct brw_reg
*arg0
,
352 const struct brw_reg
*arg1
)
356 for (i
= 0; i
< 4; i
++) {
358 brw_MOV(p
, dst
[i
], brw_imm_f(0));
359 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
360 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
361 brw_set_predicate_control_flag_value(p
, 0xff);
366 /* Isn't this just the same as the above with the args swapped?
368 static void emit_sge( struct brw_compile
*p
,
369 const struct brw_reg
*dst
,
371 const struct brw_reg
*arg0
,
372 const struct brw_reg
*arg1
)
376 for (i
= 0; i
< 4; i
++) {
378 brw_MOV(p
, dst
[i
], brw_imm_f(0));
379 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], arg1
[i
]);
380 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
381 brw_set_predicate_control_flag_value(p
, 0xff);
388 static void emit_cmp( struct brw_compile
*p
,
389 const struct brw_reg
*dst
,
391 const struct brw_reg
*arg0
,
392 const struct brw_reg
*arg1
,
393 const struct brw_reg
*arg2
)
397 for (i
= 0; i
< 4; i
++) {
399 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
400 brw_MOV(p
, dst
[i
], arg2
[i
]);
401 brw_set_saturate(p
, 0);
403 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
405 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
406 brw_MOV(p
, dst
[i
], arg1
[i
]);
407 brw_set_saturate(p
, 0);
408 brw_set_predicate_control_flag_value(p
, 0xff);
413 static void emit_max( struct brw_compile
*p
,
414 const struct brw_reg
*dst
,
416 const struct brw_reg
*arg0
,
417 const struct brw_reg
*arg1
)
421 for (i
= 0; i
< 4; i
++) {
423 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
424 brw_MOV(p
, dst
[i
], arg0
[i
]);
425 brw_set_saturate(p
, 0);
427 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
429 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
430 brw_MOV(p
, dst
[i
], arg1
[i
]);
431 brw_set_saturate(p
, 0);
432 brw_set_predicate_control_flag_value(p
, 0xff);
437 static void emit_min( struct brw_compile
*p
,
438 const struct brw_reg
*dst
,
440 const struct brw_reg
*arg0
,
441 const struct brw_reg
*arg1
)
445 for (i
= 0; i
< 4; i
++) {
447 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
448 brw_MOV(p
, dst
[i
], arg1
[i
]);
449 brw_set_saturate(p
, 0);
451 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
453 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
454 brw_MOV(p
, dst
[i
], arg0
[i
]);
455 brw_set_saturate(p
, 0);
456 brw_set_predicate_control_flag_value(p
, 0xff);
462 static void emit_dp3( struct brw_compile
*p
,
463 const struct brw_reg
*dst
,
465 const struct brw_reg
*arg0
,
466 const struct brw_reg
*arg1
)
468 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
470 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
471 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
473 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
474 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
475 brw_set_saturate(p
, 0);
479 static void emit_dp4( struct brw_compile
*p
,
480 const struct brw_reg
*dst
,
482 const struct brw_reg
*arg0
,
483 const struct brw_reg
*arg1
)
485 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
487 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
488 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
489 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
491 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
492 brw_MAC(p
, dst
[0], arg0
[3], arg1
[3]);
493 brw_set_saturate(p
, 0);
497 static void emit_dph( struct brw_compile
*p
,
498 const struct brw_reg
*dst
,
500 const struct brw_reg
*arg0
,
501 const struct brw_reg
*arg1
)
503 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
505 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
506 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
507 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
509 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
510 brw_ADD(p
, dst
[0], dst
[0], arg1
[3]);
511 brw_set_saturate(p
, 0);
515 static void emit_xpd( struct brw_compile
*p
,
516 const struct brw_reg
*dst
,
518 const struct brw_reg
*arg0
,
519 const struct brw_reg
*arg1
)
523 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
525 for (i
= 0 ; i
< 3; i
++) {
530 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
532 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
533 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
534 brw_set_saturate(p
, 0);
540 static void emit_math1( struct brw_compile
*p
,
542 const struct brw_reg
*dst
,
544 const struct brw_reg
*arg0
)
546 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
||
547 function
== BRW_MATH_FUNCTION_SINCOS
);
549 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
551 /* Send two messages to perform all 16 operations:
556 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
559 BRW_MATH_PRECISION_FULL
);
563 static void emit_math2( struct brw_compile
*p
,
565 const struct brw_reg
*dst
,
567 const struct brw_reg
*arg0
,
568 const struct brw_reg
*arg1
)
570 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
572 brw_push_insn_state(p
);
574 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
575 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
576 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
577 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
579 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
580 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
581 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
582 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
585 /* Send two messages to perform all 16 operations:
587 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
591 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
594 BRW_MATH_DATA_VECTOR
,
595 BRW_MATH_PRECISION_FULL
);
597 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
601 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
604 BRW_MATH_DATA_VECTOR
,
605 BRW_MATH_PRECISION_FULL
);
607 brw_pop_insn_state(p
);
612 static void emit_tex( struct brw_wm_compile
*c
,
613 const struct brw_wm_instruction
*inst
,
616 struct brw_reg
*arg
)
618 struct brw_compile
*p
= &c
->func
;
619 GLuint msgLength
, responseLength
;
620 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<inst
->tex_unit
)) ? 1 : 0;
624 /* How many input regs are there?
626 switch (inst
->tex_idx
) {
627 case TEXTURE_1D_INDEX
:
631 case TEXTURE_2D_INDEX
:
632 case TEXTURE_RECT_INDEX
:
637 emit
= WRITEMASK_XYZ
;
649 for (i
= 0; i
< nr
; i
++) {
650 static const GLuint swz
[4] = {0,1,2,2};
652 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
654 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
658 responseLength
= 8; /* always */
661 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
663 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
664 inst
->tex_unit
+ 1, /* surface */
665 inst
->tex_unit
, /* sampler */
668 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
:
669 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
),
677 static void emit_txb( struct brw_wm_compile
*c
,
678 const struct brw_wm_instruction
*inst
,
681 struct brw_reg
*arg
)
683 struct brw_compile
*p
= &c
->func
;
686 /* Shadow ignored for txb.
688 switch (inst
->tex_idx
) {
689 case TEXTURE_1D_INDEX
:
690 brw_MOV(p
, brw_message_reg(2), arg
[0]);
691 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
692 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
694 case TEXTURE_2D_INDEX
:
695 case TEXTURE_RECT_INDEX
:
696 brw_MOV(p
, brw_message_reg(2), arg
[0]);
697 brw_MOV(p
, brw_message_reg(4), arg
[1]);
698 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
701 brw_MOV(p
, brw_message_reg(2), arg
[0]);
702 brw_MOV(p
, brw_message_reg(4), arg
[1]);
703 brw_MOV(p
, brw_message_reg(6), arg
[2]);
707 brw_MOV(p
, brw_message_reg(8), arg
[3]);
712 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
714 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
715 inst
->tex_unit
+ 1, /* surface */
716 inst
->tex_unit
, /* sampler */
718 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
719 8, /* responseLength */
726 static void emit_lit( struct brw_compile
*p
,
727 const struct brw_reg
*dst
,
729 const struct brw_reg
*arg0
)
731 assert((mask
& WRITEMASK_XW
) == 0);
733 if (mask
& WRITEMASK_Y
) {
734 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
735 brw_MOV(p
, dst
[1], arg0
[0]);
736 brw_set_saturate(p
, 0);
739 if (mask
& WRITEMASK_Z
) {
740 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
742 WRITEMASK_X
| (mask
& SATURATE
),
747 /* Ordinarily you'd use an iff statement to skip or shortcircuit
748 * some of the POW calculations above, but 16-wide iff statements
749 * seem to lock c1 hardware, so this is a nasty workaround:
751 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
753 if (mask
& WRITEMASK_Y
)
754 brw_MOV(p
, dst
[1], brw_imm_f(0));
756 if (mask
& WRITEMASK_Z
)
757 brw_MOV(p
, dst
[2], brw_imm_f(0));
759 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
763 /* Kill pixel - set execution mask to zero for those pixels which
766 static void emit_kil( struct brw_wm_compile
*c
,
767 struct brw_reg
*arg0
)
769 struct brw_compile
*p
= &c
->func
;
770 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
774 /* XXX - usually won't need 4 compares!
776 for (i
= 0; i
< 4; i
++) {
777 brw_push_insn_state(p
);
778 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
779 brw_set_predicate_control_flag_value(p
, 0xff);
780 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
781 brw_pop_insn_state(p
);
785 static void fire_fb_write( struct brw_wm_compile
*c
,
789 struct brw_compile
*p
= &c
->func
;
791 /* Pass through control information:
793 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
795 brw_push_insn_state(p
);
796 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
797 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
799 brw_message_reg(base_reg
+ 1),
801 brw_pop_insn_state(p
);
804 /* Send framebuffer write message: */
805 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
807 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
809 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
810 0, /* render surface always 0 */
816 static void emit_aa( struct brw_wm_compile
*c
,
817 struct brw_reg
*arg1
,
820 struct brw_compile
*p
= &c
->func
;
821 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
822 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
823 struct brw_reg aa
= offset(arg1
[comp
], off
);
825 brw_push_insn_state(p
);
826 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
827 brw_MOV(p
, brw_message_reg(reg
), aa
);
828 brw_pop_insn_state(p
);
832 /* Post-fragment-program processing. Send the results to the
835 static void emit_fb_write( struct brw_wm_compile
*c
,
836 struct brw_reg
*arg0
,
837 struct brw_reg
*arg1
,
838 struct brw_reg
*arg2
)
840 struct brw_compile
*p
= &c
->func
;
844 /* Reserve a space for AA - may not be needed:
846 if (c
->key
.aa_dest_stencil_reg
)
849 /* I don't really understand how this achieves the color interleave
850 * (ie RGBARGBA) in the result: [Do the saturation here]
853 brw_push_insn_state(p
);
855 for (channel
= 0; channel
< 4; channel
++) {
856 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
857 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
859 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
861 brw_message_reg(nr
+ channel
),
864 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
866 brw_message_reg(nr
+ channel
+ 4),
867 sechalf(arg0
[channel
]));
870 /* skip over the regs populated above:
874 brw_pop_insn_state(p
);
877 if (c
->key
.source_depth_to_render_target
)
879 if (c
->key
.computes_depth
)
880 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
882 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
887 if (c
->key
.dest_depth_reg
)
889 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
890 GLuint off
= c
->key
.dest_depth_reg
% 2;
893 brw_push_insn_state(p
);
894 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
895 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
897 brw_MOV(p
, brw_message_reg(nr
+1), offset(arg1
[comp
],1));
898 brw_pop_insn_state(p
);
901 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
907 if (!c
->key
.runtime_check_aads_emit
) {
908 if (c
->key
.aa_dest_stencil_reg
)
911 fire_fb_write(c
, 0, nr
);
914 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
915 struct brw_reg ip
= brw_ip_reg();
916 struct brw_instruction
*jmp
;
918 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
919 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
922 get_element_ud(brw_vec8_grf(1,0), 6),
925 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
928 fire_fb_write(c
, 0, nr
);
929 /* note - thread killed in subroutine */
931 brw_land_fwd_jump(p
, jmp
);
933 /* ELSE: Shuffle up one register to fill in the hole left for AA:
935 fire_fb_write(c
, 1, nr
-1);
942 /* Post-fragment-program processing. Send the results to the
945 static void emit_spill( struct brw_wm_compile
*c
,
949 struct brw_compile
*p
= &c
->func
;
952 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
954 brw_MOV(p
, brw_message_reg(2), reg
);
957 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
958 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
961 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
966 static void emit_unspill( struct brw_wm_compile
*c
,
970 struct brw_compile
*p
= &c
->func
;
972 /* Slot 0 is the undef value.
975 brw_MOV(p
, reg
, brw_imm_f(0));
980 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
981 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
985 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
993 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
995 static void get_argument_regs( struct brw_wm_compile
*c
,
996 struct brw_wm_ref
*arg
[],
997 struct brw_reg
*regs
)
1001 for (i
= 0; i
< 4; i
++) {
1004 if (arg
[i
]->unspill_reg
)
1006 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1007 arg
[i
]->value
->spill_slot
);
1009 regs
[i
] = arg
[i
]->hw_reg
;
1012 regs
[i
] = brw_null_reg();
1017 static void spill_values( struct brw_wm_compile
*c
,
1018 struct brw_wm_value
*values
,
1023 for (i
= 0; i
< nr
; i
++)
1024 if (values
[i
].spill_slot
)
1025 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1030 /* Emit the fragment program instructions here.
1032 void brw_wm_emit( struct brw_wm_compile
*c
)
1034 struct brw_compile
*p
= &c
->func
;
1037 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1039 /* Check if any of the payload regs need to be spilled:
1041 spill_values(c
, c
->payload
.depth
, 4);
1042 spill_values(c
, c
->creg
, c
->nr_creg
);
1043 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1046 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1048 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1049 struct brw_reg args
[3][4], dst
[4];
1050 GLuint i
, dst_flags
;
1052 /* Get argument regs:
1054 for (i
= 0; i
< 3; i
++)
1055 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1059 for (i
= 0; i
< 4; i
++)
1061 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1063 dst
[i
] = brw_null_reg();
1067 dst_flags
= inst
->writemask
;
1069 dst_flags
|= SATURATE
;
1071 switch (inst
->opcode
) {
1072 /* Generated instructions for calculating triangle interpolants:
1075 emit_pixel_xy(p
, dst
, dst_flags
, args
[0]);
1079 emit_delta_xy(p
, dst
, dst_flags
, args
[0], args
[1]);
1083 emit_wpos_xy(p
, dst
, dst_flags
, args
[0]);
1087 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1091 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1095 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1099 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1103 emit_fb_write(c
, args
[0], args
[1], args
[2]);
1106 /* Straightforward arithmetic:
1109 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1113 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1117 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1120 case OPCODE_DP3
: /* */
1121 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1125 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1129 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1132 case OPCODE_LRP
: /* */
1133 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1137 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1142 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1146 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1150 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1153 /* Higher math functions:
1156 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1160 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1164 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1168 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1172 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1176 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1180 /* There is an scs math function, but it would need some
1181 * fixup for 16-element execution.
1183 if (dst_flags
& WRITEMASK_X
)
1184 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1185 if (dst_flags
& WRITEMASK_Y
)
1186 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1190 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1196 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1200 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1204 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1208 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1212 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1216 emit_lit(p
, dst
, dst_flags
, args
[0]);
1219 /* Texturing operations:
1222 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1226 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1230 emit_kil(c
, args
[0]);
1237 for (i
= 0; i
< 4; i
++)
1238 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1240 inst
->dst
[i
]->hw_reg
,
1241 inst
->dst
[i
]->spill_slot
);