2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static __inline
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
69 const struct brw_reg
*arg0
)
71 struct brw_reg r1
= brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
79 if (mask
& WRITEMASK_X
) {
81 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
82 stride(suboffset(r1_uw
, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
86 if (mask
& WRITEMASK_Y
) {
88 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
89 stride(suboffset(r1_uw
,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
93 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
98 static void emit_delta_xy(struct brw_compile
*p
,
99 const struct brw_reg
*dst
,
101 const struct brw_reg
*arg0
,
102 const struct brw_reg
*arg1
)
104 struct brw_reg r1
= brw_vec1_grf(1, 0);
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
109 if (mask
& WRITEMASK_X
) {
112 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
116 if (mask
& WRITEMASK_Y
) {
119 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
120 negate(suboffset(r1
,1)));
125 static void emit_wpos_xy(struct brw_compile
*p
,
126 const struct brw_reg
*dst
,
128 const struct brw_reg
*arg0
)
130 /* Calc delta X,Y by subtracting origin in r1 from the pixel
133 if (mask
& WRITEMASK_X
) {
136 retype(arg0
[0], BRW_REGISTER_TYPE_UW
));
139 if (mask
& WRITEMASK_Y
) {
140 /* TODO -- window_height - Y */
143 negate(retype(arg0
[1], BRW_REGISTER_TYPE_UW
)));
149 static void emit_pixel_w( struct brw_compile
*p
,
150 const struct brw_reg
*dst
,
152 const struct brw_reg
*arg0
,
153 const struct brw_reg
*deltas
)
155 /* Don't need this if all you are doing is interpolating color, for
158 if (mask
& WRITEMASK_W
) {
159 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
161 /* Calc 1/w - just linterp wpos[3] optimized by putting the
162 * result straight into a message reg.
164 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
165 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
168 brw_math_16( p
, dst
[3],
169 BRW_MATH_FUNCTION_INV
,
170 BRW_MATH_SATURATE_NONE
,
172 BRW_MATH_PRECISION_FULL
);
178 static void emit_linterp( struct brw_compile
*p
,
179 const struct brw_reg
*dst
,
181 const struct brw_reg
*arg0
,
182 const struct brw_reg
*deltas
)
184 struct brw_reg interp
[4];
185 GLuint nr
= arg0
[0].nr
;
188 interp
[0] = brw_vec1_grf(nr
, 0);
189 interp
[1] = brw_vec1_grf(nr
, 4);
190 interp
[2] = brw_vec1_grf(nr
+1, 0);
191 interp
[3] = brw_vec1_grf(nr
+1, 4);
193 for(i
= 0; i
< 4; i
++ ) {
195 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
196 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
202 static void emit_pinterp( struct brw_compile
*p
,
203 const struct brw_reg
*dst
,
205 const struct brw_reg
*arg0
,
206 const struct brw_reg
*deltas
,
207 const struct brw_reg
*w
)
209 struct brw_reg interp
[4];
210 GLuint nr
= arg0
[0].nr
;
213 interp
[0] = brw_vec1_grf(nr
, 0);
214 interp
[1] = brw_vec1_grf(nr
, 4);
215 interp
[2] = brw_vec1_grf(nr
+1, 0);
216 interp
[3] = brw_vec1_grf(nr
+1, 4);
218 for(i
= 0; i
< 4; i
++ ) {
220 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
221 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
222 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
227 static void emit_cinterp( struct brw_compile
*p
,
228 const struct brw_reg
*dst
,
230 const struct brw_reg
*arg0
)
232 struct brw_reg interp
[4];
233 GLuint nr
= arg0
[0].nr
;
236 interp
[0] = brw_vec1_grf(nr
, 0);
237 interp
[1] = brw_vec1_grf(nr
, 4);
238 interp
[2] = brw_vec1_grf(nr
+1, 0);
239 interp
[3] = brw_vec1_grf(nr
+1, 4);
241 for(i
= 0; i
< 4; i
++ ) {
243 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
252 static void emit_alu1( struct brw_compile
*p
,
253 struct brw_instruction
*(*func
)(struct brw_compile
*,
256 const struct brw_reg
*dst
,
258 const struct brw_reg
*arg0
)
263 brw_set_saturate(p
, 1);
265 for (i
= 0; i
< 4; i
++) {
267 func(p
, dst
[i
], arg0
[i
]);
272 brw_set_saturate(p
, 0);
275 static void emit_alu2( struct brw_compile
*p
,
276 struct brw_instruction
*(*func
)(struct brw_compile
*,
280 const struct brw_reg
*dst
,
282 const struct brw_reg
*arg0
,
283 const struct brw_reg
*arg1
)
288 brw_set_saturate(p
, 1);
290 for (i
= 0; i
< 4; i
++) {
292 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
297 brw_set_saturate(p
, 0);
301 static void emit_mad( struct brw_compile
*p
,
302 const struct brw_reg
*dst
,
304 const struct brw_reg
*arg0
,
305 const struct brw_reg
*arg1
,
306 const struct brw_reg
*arg2
)
310 for (i
= 0; i
< 4; i
++) {
312 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
314 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
315 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
316 brw_set_saturate(p
, 0);
322 static void emit_lrp( struct brw_compile
*p
,
323 const struct brw_reg
*dst
,
325 const struct brw_reg
*arg0
,
326 const struct brw_reg
*arg1
,
327 const struct brw_reg
*arg2
)
331 /* Uses dst as a temporary:
333 for (i
= 0; i
< 4; i
++) {
335 /* Can I use the LINE instruction for this?
337 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
338 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
340 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
341 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
342 brw_set_saturate(p
, 0);
346 static void emit_sop( struct brw_compile
*p
,
347 const struct brw_reg
*dst
,
350 const struct brw_reg
*arg0
,
351 const struct brw_reg
*arg1
)
355 for (i
= 0; i
< 4; i
++) {
357 brw_MOV(p
, dst
[i
], brw_imm_f(0));
358 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
359 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
360 brw_set_predicate_control_flag_value(p
, 0xff);
365 static void emit_slt( struct brw_compile
*p
,
366 const struct brw_reg
*dst
,
368 const struct brw_reg
*arg0
,
369 const struct brw_reg
*arg1
)
371 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
374 static void emit_sle( struct brw_compile
*p
,
375 const struct brw_reg
*dst
,
377 const struct brw_reg
*arg0
,
378 const struct brw_reg
*arg1
)
380 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
383 static void emit_sgt( struct brw_compile
*p
,
384 const struct brw_reg
*dst
,
386 const struct brw_reg
*arg0
,
387 const struct brw_reg
*arg1
)
389 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
392 static void emit_sge( struct brw_compile
*p
,
393 const struct brw_reg
*dst
,
395 const struct brw_reg
*arg0
,
396 const struct brw_reg
*arg1
)
398 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
401 static void emit_seq( struct brw_compile
*p
,
402 const struct brw_reg
*dst
,
404 const struct brw_reg
*arg0
,
405 const struct brw_reg
*arg1
)
407 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
410 static void emit_sne( struct brw_compile
*p
,
411 const struct brw_reg
*dst
,
413 const struct brw_reg
*arg0
,
414 const struct brw_reg
*arg1
)
416 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
419 static void emit_cmp( struct brw_compile
*p
,
420 const struct brw_reg
*dst
,
422 const struct brw_reg
*arg0
,
423 const struct brw_reg
*arg1
,
424 const struct brw_reg
*arg2
)
428 for (i
= 0; i
< 4; i
++) {
430 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
431 brw_MOV(p
, dst
[i
], arg2
[i
]);
432 brw_set_saturate(p
, 0);
434 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
436 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
437 brw_MOV(p
, dst
[i
], arg1
[i
]);
438 brw_set_saturate(p
, 0);
439 brw_set_predicate_control_flag_value(p
, 0xff);
444 static void emit_max( struct brw_compile
*p
,
445 const struct brw_reg
*dst
,
447 const struct brw_reg
*arg0
,
448 const struct brw_reg
*arg1
)
452 for (i
= 0; i
< 4; i
++) {
454 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
455 brw_MOV(p
, dst
[i
], arg0
[i
]);
456 brw_set_saturate(p
, 0);
458 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
460 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
461 brw_MOV(p
, dst
[i
], arg1
[i
]);
462 brw_set_saturate(p
, 0);
463 brw_set_predicate_control_flag_value(p
, 0xff);
468 static void emit_min( struct brw_compile
*p
,
469 const struct brw_reg
*dst
,
471 const struct brw_reg
*arg0
,
472 const struct brw_reg
*arg1
)
476 for (i
= 0; i
< 4; i
++) {
478 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
479 brw_MOV(p
, dst
[i
], arg1
[i
]);
480 brw_set_saturate(p
, 0);
482 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
484 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
485 brw_MOV(p
, dst
[i
], arg0
[i
]);
486 brw_set_saturate(p
, 0);
487 brw_set_predicate_control_flag_value(p
, 0xff);
493 static void emit_dp3( struct brw_compile
*p
,
494 const struct brw_reg
*dst
,
496 const struct brw_reg
*arg0
,
497 const struct brw_reg
*arg1
)
499 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
501 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
502 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
504 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
505 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
506 brw_set_saturate(p
, 0);
510 static void emit_dp4( struct brw_compile
*p
,
511 const struct brw_reg
*dst
,
513 const struct brw_reg
*arg0
,
514 const struct brw_reg
*arg1
)
516 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
518 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
519 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
520 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
522 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
523 brw_MAC(p
, dst
[0], arg0
[3], arg1
[3]);
524 brw_set_saturate(p
, 0);
528 static void emit_dph( struct brw_compile
*p
,
529 const struct brw_reg
*dst
,
531 const struct brw_reg
*arg0
,
532 const struct brw_reg
*arg1
)
534 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
536 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
537 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
538 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
540 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
541 brw_ADD(p
, dst
[0], dst
[0], arg1
[3]);
542 brw_set_saturate(p
, 0);
546 static void emit_xpd( struct brw_compile
*p
,
547 const struct brw_reg
*dst
,
549 const struct brw_reg
*arg0
,
550 const struct brw_reg
*arg1
)
554 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
556 for (i
= 0 ; i
< 3; i
++) {
561 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
563 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
564 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
565 brw_set_saturate(p
, 0);
571 static void emit_math1( struct brw_compile
*p
,
573 const struct brw_reg
*dst
,
575 const struct brw_reg
*arg0
)
577 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
578 // function == BRW_MATH_FUNCTION_SINCOS);
580 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
582 /* Send two messages to perform all 16 operations:
587 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
590 BRW_MATH_PRECISION_FULL
);
594 static void emit_math2( struct brw_compile
*p
,
596 const struct brw_reg
*dst
,
598 const struct brw_reg
*arg0
,
599 const struct brw_reg
*arg1
)
601 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
603 brw_push_insn_state(p
);
605 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
606 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
607 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
608 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
610 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
611 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
612 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
613 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
616 /* Send two messages to perform all 16 operations:
618 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
622 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
625 BRW_MATH_DATA_VECTOR
,
626 BRW_MATH_PRECISION_FULL
);
628 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
632 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
635 BRW_MATH_DATA_VECTOR
,
636 BRW_MATH_PRECISION_FULL
);
638 brw_pop_insn_state(p
);
643 static void emit_tex( struct brw_wm_compile
*c
,
644 const struct brw_wm_instruction
*inst
,
647 struct brw_reg
*arg
)
649 struct brw_compile
*p
= &c
->func
;
650 GLuint msgLength
, responseLength
;
651 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<inst
->tex_unit
)) ? 1 : 0;
655 /* How many input regs are there?
657 switch (inst
->tex_idx
) {
658 case TEXTURE_1D_INDEX
:
662 case TEXTURE_2D_INDEX
:
663 case TEXTURE_RECT_INDEX
:
668 emit
= WRITEMASK_XYZ
;
680 for (i
= 0; i
< nr
; i
++) {
681 static const GLuint swz
[4] = {0,1,2,2};
683 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
685 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
689 responseLength
= 8; /* always */
692 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
694 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
695 inst
->tex_unit
+ 1, /* surface */
696 inst
->tex_unit
, /* sampler */
699 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
:
700 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
),
708 static void emit_txb( struct brw_wm_compile
*c
,
709 const struct brw_wm_instruction
*inst
,
712 struct brw_reg
*arg
)
714 struct brw_compile
*p
= &c
->func
;
717 /* Shadow ignored for txb.
719 switch (inst
->tex_idx
) {
720 case TEXTURE_1D_INDEX
:
721 brw_MOV(p
, brw_message_reg(2), arg
[0]);
722 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
723 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
725 case TEXTURE_2D_INDEX
:
726 case TEXTURE_RECT_INDEX
:
727 brw_MOV(p
, brw_message_reg(2), arg
[0]);
728 brw_MOV(p
, brw_message_reg(4), arg
[1]);
729 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
732 brw_MOV(p
, brw_message_reg(2), arg
[0]);
733 brw_MOV(p
, brw_message_reg(4), arg
[1]);
734 brw_MOV(p
, brw_message_reg(6), arg
[2]);
738 brw_MOV(p
, brw_message_reg(8), arg
[3]);
743 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
745 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
746 inst
->tex_unit
+ 1, /* surface */
747 inst
->tex_unit
, /* sampler */
749 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
750 8, /* responseLength */
757 static void emit_lit( struct brw_compile
*p
,
758 const struct brw_reg
*dst
,
760 const struct brw_reg
*arg0
)
762 assert((mask
& WRITEMASK_XW
) == 0);
764 if (mask
& WRITEMASK_Y
) {
765 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
766 brw_MOV(p
, dst
[1], arg0
[0]);
767 brw_set_saturate(p
, 0);
770 if (mask
& WRITEMASK_Z
) {
771 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
773 WRITEMASK_X
| (mask
& SATURATE
),
778 /* Ordinarily you'd use an iff statement to skip or shortcircuit
779 * some of the POW calculations above, but 16-wide iff statements
780 * seem to lock c1 hardware, so this is a nasty workaround:
782 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
784 if (mask
& WRITEMASK_Y
)
785 brw_MOV(p
, dst
[1], brw_imm_f(0));
787 if (mask
& WRITEMASK_Z
)
788 brw_MOV(p
, dst
[2], brw_imm_f(0));
790 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
794 /* Kill pixel - set execution mask to zero for those pixels which
797 static void emit_kil( struct brw_wm_compile
*c
,
798 struct brw_reg
*arg0
)
800 struct brw_compile
*p
= &c
->func
;
801 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
805 /* XXX - usually won't need 4 compares!
807 for (i
= 0; i
< 4; i
++) {
808 brw_push_insn_state(p
);
809 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
810 brw_set_predicate_control_flag_value(p
, 0xff);
811 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
812 brw_pop_insn_state(p
);
816 static void fire_fb_write( struct brw_wm_compile
*c
,
820 struct brw_compile
*p
= &c
->func
;
822 /* Pass through control information:
824 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
826 brw_push_insn_state(p
);
827 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
828 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
830 brw_message_reg(base_reg
+ 1),
832 brw_pop_insn_state(p
);
835 /* Send framebuffer write message: */
836 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
838 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
840 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
841 0, /* render surface always 0 */
847 static void emit_aa( struct brw_wm_compile
*c
,
848 struct brw_reg
*arg1
,
851 struct brw_compile
*p
= &c
->func
;
852 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
853 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
854 struct brw_reg aa
= offset(arg1
[comp
], off
);
856 brw_push_insn_state(p
);
857 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
858 brw_MOV(p
, brw_message_reg(reg
), aa
);
859 brw_pop_insn_state(p
);
863 /* Post-fragment-program processing. Send the results to the
866 static void emit_fb_write( struct brw_wm_compile
*c
,
867 struct brw_reg
*arg0
,
868 struct brw_reg
*arg1
,
869 struct brw_reg
*arg2
)
871 struct brw_compile
*p
= &c
->func
;
875 /* Reserve a space for AA - may not be needed:
877 if (c
->key
.aa_dest_stencil_reg
)
880 /* I don't really understand how this achieves the color interleave
881 * (ie RGBARGBA) in the result: [Do the saturation here]
884 brw_push_insn_state(p
);
886 for (channel
= 0; channel
< 4; channel
++) {
887 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
888 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
890 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
892 brw_message_reg(nr
+ channel
),
895 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
897 brw_message_reg(nr
+ channel
+ 4),
898 sechalf(arg0
[channel
]));
901 /* skip over the regs populated above:
905 brw_pop_insn_state(p
);
908 if (c
->key
.source_depth_to_render_target
)
910 if (c
->key
.computes_depth
)
911 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
913 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
918 if (c
->key
.dest_depth_reg
)
920 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
921 GLuint off
= c
->key
.dest_depth_reg
% 2;
924 brw_push_insn_state(p
);
925 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
926 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
928 brw_MOV(p
, brw_message_reg(nr
+1), offset(arg1
[comp
],1));
929 brw_pop_insn_state(p
);
932 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
938 if (!c
->key
.runtime_check_aads_emit
) {
939 if (c
->key
.aa_dest_stencil_reg
)
942 fire_fb_write(c
, 0, nr
);
945 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
946 struct brw_reg ip
= brw_ip_reg();
947 struct brw_instruction
*jmp
;
949 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
950 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
953 get_element_ud(brw_vec8_grf(1,0), 6),
956 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
959 fire_fb_write(c
, 0, nr
);
960 /* note - thread killed in subroutine */
962 brw_land_fwd_jump(p
, jmp
);
964 /* ELSE: Shuffle up one register to fill in the hole left for AA:
966 fire_fb_write(c
, 1, nr
-1);
973 /* Post-fragment-program processing. Send the results to the
976 static void emit_spill( struct brw_wm_compile
*c
,
980 struct brw_compile
*p
= &c
->func
;
983 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
985 brw_MOV(p
, brw_message_reg(2), reg
);
988 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
989 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
992 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
997 static void emit_unspill( struct brw_wm_compile
*c
,
1001 struct brw_compile
*p
= &c
->func
;
1003 /* Slot 0 is the undef value.
1006 brw_MOV(p
, reg
, brw_imm_f(0));
1011 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1012 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1016 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1024 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
1026 static void get_argument_regs( struct brw_wm_compile
*c
,
1027 struct brw_wm_ref
*arg
[],
1028 struct brw_reg
*regs
)
1032 for (i
= 0; i
< 4; i
++) {
1035 if (arg
[i
]->unspill_reg
)
1037 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1038 arg
[i
]->value
->spill_slot
);
1040 regs
[i
] = arg
[i
]->hw_reg
;
1043 regs
[i
] = brw_null_reg();
1048 static void spill_values( struct brw_wm_compile
*c
,
1049 struct brw_wm_value
*values
,
1054 for (i
= 0; i
< nr
; i
++)
1055 if (values
[i
].spill_slot
)
1056 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1061 /* Emit the fragment program instructions here.
1063 void brw_wm_emit( struct brw_wm_compile
*c
)
1065 struct brw_compile
*p
= &c
->func
;
1068 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1070 /* Check if any of the payload regs need to be spilled:
1072 spill_values(c
, c
->payload
.depth
, 4);
1073 spill_values(c
, c
->creg
, c
->nr_creg
);
1074 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1077 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1079 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1080 struct brw_reg args
[3][4], dst
[4];
1081 GLuint i
, dst_flags
;
1083 /* Get argument regs:
1085 for (i
= 0; i
< 3; i
++)
1086 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1090 for (i
= 0; i
< 4; i
++)
1092 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1094 dst
[i
] = brw_null_reg();
1098 dst_flags
= inst
->writemask
;
1100 dst_flags
|= SATURATE
;
1102 switch (inst
->opcode
) {
1103 /* Generated instructions for calculating triangle interpolants:
1106 emit_pixel_xy(p
, dst
, dst_flags
, args
[0]);
1110 emit_delta_xy(p
, dst
, dst_flags
, args
[0], args
[1]);
1114 emit_wpos_xy(p
, dst
, dst_flags
, args
[0]);
1118 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1122 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1126 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1130 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1134 emit_fb_write(c
, args
[0], args
[1], args
[2]);
1137 /* Straightforward arithmetic:
1140 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1144 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1148 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1151 case OPCODE_DP3
: /* */
1152 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1156 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1160 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1163 case OPCODE_LRP
: /* */
1164 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1168 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1173 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1177 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1181 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1184 /* Higher math functions:
1187 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1191 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1195 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1199 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1203 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1207 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1211 /* There is an scs math function, but it would need some
1212 * fixup for 16-element execution.
1214 if (dst_flags
& WRITEMASK_X
)
1215 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1216 if (dst_flags
& WRITEMASK_Y
)
1217 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1221 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1227 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1231 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1235 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1239 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1243 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1246 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1249 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1252 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1255 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1259 emit_lit(p
, dst
, dst_flags
, args
[0]);
1262 /* Texturing operations:
1265 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1269 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1273 emit_kil(c
, args
[0]);
1277 _mesa_printf("unsupport opcode %d in fragment program\n",
1281 for (i
= 0; i
< 4; i
++)
1282 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1284 inst
->dst
[i
]->hw_reg
,
1285 inst
->dst
[i
]->spill_slot
);