2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
35 #include "program_instruction.h"
39 #define SATURATE (1<<5)
41 /* Not quite sure how correct this is - need to understand horiz
42 * vs. vertical strides a little better.
44 static __inline
struct brw_reg
sechalf( struct brw_reg reg
)
53 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
54 * corresponding to each of the 16 execution channels.
56 * R1.0 -- triangle vertex 0.X
57 * R1.1 -- triangle vertex 0.Y
58 * R1.2 -- tile 0 x,y coords (2 packed uwords)
59 * R1.3 -- tile 1 x,y coords (2 packed uwords)
60 * R1.4 -- tile 2 x,y coords (2 packed uwords)
61 * R1.5 -- tile 3 x,y coords (2 packed uwords)
68 static void emit_pixel_xy(struct brw_compile
*p
,
69 const struct brw_reg
*dst
,
71 const struct brw_reg
*arg0
)
73 struct brw_reg r1
= brw_vec1_grf(1, 0);
74 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
78 /* Calculate pixel centers by adding 1 or 0 to each of the
79 * micro-tile coordinates passed in r1.
81 if (mask
& WRITEMASK_X
) {
83 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
84 stride(suboffset(r1_uw
, 4), 2, 4, 0),
85 brw_imm_v(0x10101010));
88 if (mask
& WRITEMASK_Y
) {
90 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
91 stride(suboffset(r1_uw
,5), 2, 4, 0),
92 brw_imm_v(0x11001100));
95 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
100 static void emit_delta_xy(struct brw_compile
*p
,
101 const struct brw_reg
*dst
,
103 const struct brw_reg
*arg0
,
104 const struct brw_reg
*arg1
)
106 struct brw_reg r1
= brw_vec1_grf(1, 0);
108 /* Calc delta X,Y by subtracting origin in r1 from the pixel
111 if (mask
& WRITEMASK_X
) {
114 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
118 if (mask
& WRITEMASK_Y
) {
121 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
122 negate(suboffset(r1
,1)));
127 static void emit_wpos_xy(struct brw_compile
*p
,
128 const struct brw_reg
*dst
,
130 const struct brw_reg
*arg0
)
132 /* Calc delta X,Y by subtracting origin in r1 from the pixel
135 if (mask
& WRITEMASK_X
) {
138 retype(arg0
[0], BRW_REGISTER_TYPE_UW
));
141 if (mask
& WRITEMASK_Y
) {
142 /* TODO -- window_height - Y */
145 negate(retype(arg0
[1], BRW_REGISTER_TYPE_UW
)));
151 static void emit_pixel_w( struct brw_compile
*p
,
152 const struct brw_reg
*dst
,
154 const struct brw_reg
*arg0
,
155 const struct brw_reg
*deltas
)
157 /* Don't need this if all you are doing is interpolating color, for
160 if (mask
& WRITEMASK_W
) {
161 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
163 /* Calc 1/w - just linterp wpos[3] optimized by putting the
164 * result straight into a message reg.
166 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
167 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
170 brw_math_16( p
, dst
[3],
171 BRW_MATH_FUNCTION_INV
,
172 BRW_MATH_SATURATE_NONE
,
174 BRW_MATH_PRECISION_FULL
);
180 static void emit_linterp( struct brw_compile
*p
,
181 const struct brw_reg
*dst
,
183 const struct brw_reg
*arg0
,
184 const struct brw_reg
*deltas
)
186 struct brw_reg interp
[4];
187 GLuint nr
= arg0
[0].nr
;
190 interp
[0] = brw_vec1_grf(nr
, 0);
191 interp
[1] = brw_vec1_grf(nr
, 4);
192 interp
[2] = brw_vec1_grf(nr
+1, 0);
193 interp
[3] = brw_vec1_grf(nr
+1, 4);
195 for(i
= 0; i
< 4; i
++ ) {
197 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
198 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
204 static void emit_pinterp( struct brw_compile
*p
,
205 const struct brw_reg
*dst
,
207 const struct brw_reg
*arg0
,
208 const struct brw_reg
*deltas
,
209 const struct brw_reg
*w
)
211 struct brw_reg interp
[4];
212 GLuint nr
= arg0
[0].nr
;
215 interp
[0] = brw_vec1_grf(nr
, 0);
216 interp
[1] = brw_vec1_grf(nr
, 4);
217 interp
[2] = brw_vec1_grf(nr
+1, 0);
218 interp
[3] = brw_vec1_grf(nr
+1, 4);
220 for(i
= 0; i
< 4; i
++ ) {
222 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
223 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
224 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
229 static void emit_cinterp( struct brw_compile
*p
,
230 const struct brw_reg
*dst
,
232 const struct brw_reg
*arg0
)
234 struct brw_reg interp
[4];
235 GLuint nr
= arg0
[0].nr
;
238 interp
[0] = brw_vec1_grf(nr
, 0);
239 interp
[1] = brw_vec1_grf(nr
, 4);
240 interp
[2] = brw_vec1_grf(nr
+1, 0);
241 interp
[3] = brw_vec1_grf(nr
+1, 4);
243 for(i
= 0; i
< 4; i
++ ) {
245 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
254 static void emit_alu1( struct brw_compile
*p
,
255 struct brw_instruction
*(*func
)(struct brw_compile
*,
258 const struct brw_reg
*dst
,
260 const struct brw_reg
*arg0
)
265 brw_set_saturate(p
, 1);
267 for (i
= 0; i
< 4; i
++) {
269 func(p
, dst
[i
], arg0
[i
]);
274 brw_set_saturate(p
, 0);
277 static void emit_alu2( struct brw_compile
*p
,
278 struct brw_instruction
*(*func
)(struct brw_compile
*,
282 const struct brw_reg
*dst
,
284 const struct brw_reg
*arg0
,
285 const struct brw_reg
*arg1
)
290 brw_set_saturate(p
, 1);
292 for (i
= 0; i
< 4; i
++) {
294 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
299 brw_set_saturate(p
, 0);
303 static void emit_mad( struct brw_compile
*p
,
304 const struct brw_reg
*dst
,
306 const struct brw_reg
*arg0
,
307 const struct brw_reg
*arg1
,
308 const struct brw_reg
*arg2
)
312 for (i
= 0; i
< 4; i
++) {
314 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
316 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
317 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
318 brw_set_saturate(p
, 0);
324 static void emit_lrp( struct brw_compile
*p
,
325 const struct brw_reg
*dst
,
327 const struct brw_reg
*arg0
,
328 const struct brw_reg
*arg1
,
329 const struct brw_reg
*arg2
)
333 /* Uses dst as a temporary:
335 for (i
= 0; i
< 4; i
++) {
337 /* Can I use the LINE instruction for this?
339 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
340 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
342 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
343 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
344 brw_set_saturate(p
, 0);
350 static void emit_slt( struct brw_compile
*p
,
351 const struct brw_reg
*dst
,
353 const struct brw_reg
*arg0
,
354 const struct brw_reg
*arg1
)
358 for (i
= 0; i
< 4; i
++) {
360 brw_MOV(p
, dst
[i
], brw_imm_f(0));
361 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
362 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
363 brw_set_predicate_control_flag_value(p
, 0xff);
368 /* Isn't this just the same as the above with the args swapped?
370 static void emit_sge( struct brw_compile
*p
,
371 const struct brw_reg
*dst
,
373 const struct brw_reg
*arg0
,
374 const struct brw_reg
*arg1
)
378 for (i
= 0; i
< 4; i
++) {
380 brw_MOV(p
, dst
[i
], brw_imm_f(0));
381 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], arg1
[i
]);
382 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
383 brw_set_predicate_control_flag_value(p
, 0xff);
390 static void emit_cmp( struct brw_compile
*p
,
391 const struct brw_reg
*dst
,
393 const struct brw_reg
*arg0
,
394 const struct brw_reg
*arg1
,
395 const struct brw_reg
*arg2
)
399 for (i
= 0; i
< 4; i
++) {
401 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
402 brw_MOV(p
, dst
[i
], arg2
[i
]);
403 brw_set_saturate(p
, 0);
405 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
407 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
408 brw_MOV(p
, dst
[i
], arg1
[i
]);
409 brw_set_saturate(p
, 0);
410 brw_set_predicate_control_flag_value(p
, 0xff);
415 static void emit_max( struct brw_compile
*p
,
416 const struct brw_reg
*dst
,
418 const struct brw_reg
*arg0
,
419 const struct brw_reg
*arg1
)
423 for (i
= 0; i
< 4; i
++) {
425 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
426 brw_MOV(p
, dst
[i
], arg0
[i
]);
427 brw_set_saturate(p
, 0);
429 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
431 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
432 brw_MOV(p
, dst
[i
], arg1
[i
]);
433 brw_set_saturate(p
, 0);
434 brw_set_predicate_control_flag_value(p
, 0xff);
439 static void emit_min( struct brw_compile
*p
,
440 const struct brw_reg
*dst
,
442 const struct brw_reg
*arg0
,
443 const struct brw_reg
*arg1
)
447 for (i
= 0; i
< 4; i
++) {
449 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
450 brw_MOV(p
, dst
[i
], arg1
[i
]);
451 brw_set_saturate(p
, 0);
453 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
455 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
456 brw_MOV(p
, dst
[i
], arg0
[i
]);
457 brw_set_saturate(p
, 0);
458 brw_set_predicate_control_flag_value(p
, 0xff);
464 static void emit_dp3( struct brw_compile
*p
,
465 const struct brw_reg
*dst
,
467 const struct brw_reg
*arg0
,
468 const struct brw_reg
*arg1
)
470 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
472 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
473 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
475 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
476 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
477 brw_set_saturate(p
, 0);
481 static void emit_dp4( struct brw_compile
*p
,
482 const struct brw_reg
*dst
,
484 const struct brw_reg
*arg0
,
485 const struct brw_reg
*arg1
)
487 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
489 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
490 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
491 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
493 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
494 brw_MAC(p
, dst
[0], arg0
[3], arg1
[3]);
495 brw_set_saturate(p
, 0);
499 static void emit_dph( struct brw_compile
*p
,
500 const struct brw_reg
*dst
,
502 const struct brw_reg
*arg0
,
503 const struct brw_reg
*arg1
)
505 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
507 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
508 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
509 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
511 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
512 brw_ADD(p
, dst
[0], dst
[0], arg1
[3]);
513 brw_set_saturate(p
, 0);
517 static void emit_xpd( struct brw_compile
*p
,
518 const struct brw_reg
*dst
,
520 const struct brw_reg
*arg0
,
521 const struct brw_reg
*arg1
)
525 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
527 for (i
= 0 ; i
< 3; i
++) {
532 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
534 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
535 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
536 brw_set_saturate(p
, 0);
542 static void emit_math1( struct brw_compile
*p
,
544 const struct brw_reg
*dst
,
546 const struct brw_reg
*arg0
)
548 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
||
549 function
== BRW_MATH_FUNCTION_SINCOS
);
551 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
553 /* Send two messages to perform all 16 operations:
558 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
561 BRW_MATH_PRECISION_FULL
);
565 static void emit_math2( struct brw_compile
*p
,
567 const struct brw_reg
*dst
,
569 const struct brw_reg
*arg0
,
570 const struct brw_reg
*arg1
)
572 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
574 brw_push_insn_state(p
);
576 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
577 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
578 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
579 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
581 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
582 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
583 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
584 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
587 /* Send two messages to perform all 16 operations:
589 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
593 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
596 BRW_MATH_DATA_VECTOR
,
597 BRW_MATH_PRECISION_FULL
);
599 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
603 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
606 BRW_MATH_DATA_VECTOR
,
607 BRW_MATH_PRECISION_FULL
);
609 brw_pop_insn_state(p
);
614 static void emit_tex( struct brw_wm_compile
*c
,
615 const struct brw_wm_instruction
*inst
,
618 struct brw_reg
*arg
)
620 struct brw_compile
*p
= &c
->func
;
621 GLuint msgLength
, responseLength
;
622 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<inst
->tex_unit
)) ? 1 : 0;
626 /* How many input regs are there?
628 switch (inst
->tex_idx
) {
629 case TEXTURE_1D_INDEX
:
633 case TEXTURE_2D_INDEX
:
634 case TEXTURE_RECT_INDEX
:
639 emit
= WRITEMASK_XYZ
;
651 for (i
= 0; i
< nr
; i
++) {
652 static const GLuint swz
[4] = {0,1,2,2};
654 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
656 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
660 responseLength
= 8; /* always */
663 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
665 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
666 inst
->tex_unit
+ 1, /* surface */
667 inst
->tex_unit
, /* sampler */
670 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
:
671 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
),
679 static void emit_txb( struct brw_wm_compile
*c
,
680 const struct brw_wm_instruction
*inst
,
683 struct brw_reg
*arg
)
685 struct brw_compile
*p
= &c
->func
;
688 /* Shadow ignored for txb.
690 switch (inst
->tex_idx
) {
691 case TEXTURE_1D_INDEX
:
692 brw_MOV(p
, brw_message_reg(2), arg
[0]);
693 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
694 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
696 case TEXTURE_2D_INDEX
:
697 case TEXTURE_RECT_INDEX
:
698 brw_MOV(p
, brw_message_reg(2), arg
[0]);
699 brw_MOV(p
, brw_message_reg(4), arg
[1]);
700 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
703 brw_MOV(p
, brw_message_reg(2), arg
[0]);
704 brw_MOV(p
, brw_message_reg(4), arg
[1]);
705 brw_MOV(p
, brw_message_reg(6), arg
[2]);
709 brw_MOV(p
, brw_message_reg(8), arg
[3]);
714 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
716 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
717 inst
->tex_unit
+ 1, /* surface */
718 inst
->tex_unit
, /* sampler */
720 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
721 8, /* responseLength */
728 static void emit_lit( struct brw_compile
*p
,
729 const struct brw_reg
*dst
,
731 const struct brw_reg
*arg0
)
733 assert((mask
& WRITEMASK_XW
) == 0);
735 if (mask
& WRITEMASK_Y
) {
736 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
737 brw_MOV(p
, dst
[1], arg0
[0]);
738 brw_set_saturate(p
, 0);
741 if (mask
& WRITEMASK_Z
) {
742 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
744 WRITEMASK_X
| (mask
& SATURATE
),
749 /* Ordinarily you'd use an iff statement to skip or shortcircuit
750 * some of the POW calculations above, but 16-wide iff statements
751 * seem to lock c1 hardware, so this is a nasty workaround:
753 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
755 if (mask
& WRITEMASK_Y
)
756 brw_MOV(p
, dst
[1], brw_imm_f(0));
758 if (mask
& WRITEMASK_Z
)
759 brw_MOV(p
, dst
[2], brw_imm_f(0));
761 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
765 /* Kill pixel - set execution mask to zero for those pixels which
768 static void emit_kil( struct brw_wm_compile
*c
,
769 struct brw_reg
*arg0
)
771 struct brw_compile
*p
= &c
->func
;
772 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
776 /* XXX - usually won't need 4 compares!
778 for (i
= 0; i
< 4; i
++) {
779 brw_push_insn_state(p
);
780 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
781 brw_set_predicate_control_flag_value(p
, 0xff);
782 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
783 brw_pop_insn_state(p
);
787 static void fire_fb_write( struct brw_wm_compile
*c
,
791 struct brw_compile
*p
= &c
->func
;
793 /* Pass through control information:
795 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
797 brw_push_insn_state(p
);
798 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
799 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
801 brw_message_reg(base_reg
+ 1),
803 brw_pop_insn_state(p
);
806 /* Send framebuffer write message: */
807 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
809 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
811 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
812 0, /* render surface always 0 */
818 static void emit_aa( struct brw_wm_compile
*c
,
819 struct brw_reg
*arg1
,
822 struct brw_compile
*p
= &c
->func
;
823 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
824 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
825 struct brw_reg aa
= offset(arg1
[comp
], off
);
827 brw_push_insn_state(p
);
828 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
829 brw_MOV(p
, brw_message_reg(reg
), aa
);
830 brw_pop_insn_state(p
);
834 /* Post-fragment-program processing. Send the results to the
837 static void emit_fb_write( struct brw_wm_compile
*c
,
838 struct brw_reg
*arg0
,
839 struct brw_reg
*arg1
,
840 struct brw_reg
*arg2
)
842 struct brw_compile
*p
= &c
->func
;
846 /* Reserve a space for AA - may not be needed:
848 if (c
->key
.aa_dest_stencil_reg
)
851 /* I don't really understand how this achieves the color interleave
852 * (ie RGBARGBA) in the result: [Do the saturation here]
855 brw_push_insn_state(p
);
857 for (channel
= 0; channel
< 4; channel
++) {
858 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
859 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
861 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
863 brw_message_reg(nr
+ channel
),
866 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
868 brw_message_reg(nr
+ channel
+ 4),
869 sechalf(arg0
[channel
]));
872 /* skip over the regs populated above:
876 brw_pop_insn_state(p
);
879 if (c
->key
.source_depth_to_render_target
)
881 if (c
->key
.computes_depth
)
882 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
884 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
889 if (c
->key
.dest_depth_reg
)
891 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
892 GLuint off
= c
->key
.dest_depth_reg
% 2;
895 brw_push_insn_state(p
);
896 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
897 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
899 brw_MOV(p
, brw_message_reg(nr
+1), offset(arg1
[comp
],1));
900 brw_pop_insn_state(p
);
903 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
909 if (!c
->key
.runtime_check_aads_emit
) {
910 if (c
->key
.aa_dest_stencil_reg
)
913 fire_fb_write(c
, 0, nr
);
916 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
917 struct brw_reg ip
= brw_ip_reg();
918 struct brw_instruction
*jmp
;
920 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
921 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
924 get_element_ud(brw_vec8_grf(1,0), 6),
927 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
930 fire_fb_write(c
, 0, nr
);
931 /* note - thread killed in subroutine */
933 brw_land_fwd_jump(p
, jmp
);
935 /* ELSE: Shuffle up one register to fill in the hole left for AA:
937 fire_fb_write(c
, 1, nr
-1);
944 /* Post-fragment-program processing. Send the results to the
947 static void emit_spill( struct brw_wm_compile
*c
,
951 struct brw_compile
*p
= &c
->func
;
954 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
956 brw_MOV(p
, brw_message_reg(2), reg
);
959 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
960 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
963 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
968 static void emit_unspill( struct brw_wm_compile
*c
,
972 struct brw_compile
*p
= &c
->func
;
974 /* Slot 0 is the undef value.
977 brw_MOV(p
, reg
, brw_imm_f(0));
982 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
983 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
987 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
995 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
997 static void get_argument_regs( struct brw_wm_compile
*c
,
998 struct brw_wm_ref
*arg
[],
999 struct brw_reg
*regs
)
1003 for (i
= 0; i
< 4; i
++) {
1006 if (arg
[i
]->unspill_reg
)
1008 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1009 arg
[i
]->value
->spill_slot
);
1011 regs
[i
] = arg
[i
]->hw_reg
;
1014 regs
[i
] = brw_null_reg();
1019 static void spill_values( struct brw_wm_compile
*c
,
1020 struct brw_wm_value
*values
,
1025 for (i
= 0; i
< nr
; i
++)
1026 if (values
[i
].spill_slot
)
1027 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1032 /* Emit the fragment program instructions here.
1034 void brw_wm_emit( struct brw_wm_compile
*c
)
1036 struct brw_compile
*p
= &c
->func
;
1039 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1041 /* Check if any of the payload regs need to be spilled:
1043 spill_values(c
, c
->payload
.depth
, 4);
1044 spill_values(c
, c
->creg
, c
->nr_creg
);
1045 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1048 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1050 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1051 struct brw_reg args
[3][4], dst
[4];
1052 GLuint i
, dst_flags
;
1054 /* Get argument regs:
1056 for (i
= 0; i
< 3; i
++)
1057 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1061 for (i
= 0; i
< 4; i
++)
1063 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1065 dst
[i
] = brw_null_reg();
1069 dst_flags
= inst
->writemask
;
1071 dst_flags
|= SATURATE
;
1073 switch (inst
->opcode
) {
1074 /* Generated instructions for calculating triangle interpolants:
1077 emit_pixel_xy(p
, dst
, dst_flags
, args
[0]);
1081 emit_delta_xy(p
, dst
, dst_flags
, args
[0], args
[1]);
1085 emit_wpos_xy(p
, dst
, dst_flags
, args
[0]);
1089 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1093 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1097 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1101 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1105 emit_fb_write(c
, args
[0], args
[1], args
[2]);
1108 /* Straightforward arithmetic:
1111 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1115 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1119 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1122 case OPCODE_DP3
: /* */
1123 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1127 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1131 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1134 case OPCODE_LRP
: /* */
1135 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1139 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1144 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1148 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1152 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1155 /* Higher math functions:
1158 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1162 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1166 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1170 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1174 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1178 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1182 /* There is an scs math function, but it would need some
1183 * fixup for 16-element execution.
1185 if (dst_flags
& WRITEMASK_X
)
1186 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1187 if (dst_flags
& WRITEMASK_Y
)
1188 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1192 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1198 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1202 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1206 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1210 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1214 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1218 emit_lit(p
, dst
, dst_flags
, args
[0]);
1221 /* Texturing operations:
1224 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1228 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1232 emit_kil(c
, args
[0]);
1239 for (i
= 0; i
< 4; i
++)
1240 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1242 inst
->dst
[i
]->hw_reg
,
1243 inst
->dst
[i
]->spill_slot
);