2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
34 #include "brw_context.h"
37 #define SATURATE (1<<5)
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
42 static __inline
struct brw_reg
sechalf( struct brw_reg reg
)
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
66 static void emit_pixel_xy(struct brw_compile
*p
,
67 const struct brw_reg
*dst
,
69 const struct brw_reg
*arg0
)
71 struct brw_reg r1
= brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
79 if (mask
& WRITEMASK_X
) {
81 vec16(retype(dst
[0], BRW_REGISTER_TYPE_UW
)),
82 stride(suboffset(r1_uw
, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
86 if (mask
& WRITEMASK_Y
) {
88 vec16(retype(dst
[1], BRW_REGISTER_TYPE_UW
)),
89 stride(suboffset(r1_uw
,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
93 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
98 static void emit_delta_xy(struct brw_compile
*p
,
99 const struct brw_reg
*dst
,
101 const struct brw_reg
*arg0
,
102 const struct brw_reg
*arg1
)
104 struct brw_reg r1
= brw_vec1_grf(1, 0);
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
109 if (mask
& WRITEMASK_X
) {
112 retype(arg0
[0], BRW_REGISTER_TYPE_UW
),
116 if (mask
& WRITEMASK_Y
) {
119 retype(arg0
[1], BRW_REGISTER_TYPE_UW
),
120 negate(suboffset(r1
,1)));
125 static void emit_wpos_xy(struct brw_compile
*p
,
126 const struct brw_reg
*dst
,
128 const struct brw_reg
*arg0
)
130 /* Calc delta X,Y by subtracting origin in r1 from the pixel
133 if (mask
& WRITEMASK_X
) {
136 retype(arg0
[0], BRW_REGISTER_TYPE_UW
));
139 if (mask
& WRITEMASK_Y
) {
140 /* TODO -- window_height - Y */
143 negate(retype(arg0
[1], BRW_REGISTER_TYPE_UW
)));
149 static void emit_pixel_w( struct brw_compile
*p
,
150 const struct brw_reg
*dst
,
152 const struct brw_reg
*arg0
,
153 const struct brw_reg
*deltas
)
155 /* Don't need this if all you are doing is interpolating color, for
158 if (mask
& WRITEMASK_W
) {
159 struct brw_reg interp3
= brw_vec1_grf(arg0
[0].nr
+1, 4);
161 /* Calc 1/w - just linterp wpos[3] optimized by putting the
162 * result straight into a message reg.
164 brw_LINE(p
, brw_null_reg(), interp3
, deltas
[0]);
165 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), deltas
[1]);
168 brw_math_16( p
, dst
[3],
169 BRW_MATH_FUNCTION_INV
,
170 BRW_MATH_SATURATE_NONE
,
172 BRW_MATH_PRECISION_FULL
);
178 static void emit_linterp( struct brw_compile
*p
,
179 const struct brw_reg
*dst
,
181 const struct brw_reg
*arg0
,
182 const struct brw_reg
*deltas
)
184 struct brw_reg interp
[4];
185 GLuint nr
= arg0
[0].nr
;
188 interp
[0] = brw_vec1_grf(nr
, 0);
189 interp
[1] = brw_vec1_grf(nr
, 4);
190 interp
[2] = brw_vec1_grf(nr
+1, 0);
191 interp
[3] = brw_vec1_grf(nr
+1, 4);
193 for(i
= 0; i
< 4; i
++ ) {
195 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
196 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
202 static void emit_pinterp( struct brw_compile
*p
,
203 const struct brw_reg
*dst
,
205 const struct brw_reg
*arg0
,
206 const struct brw_reg
*deltas
,
207 const struct brw_reg
*w
)
209 struct brw_reg interp
[4];
210 GLuint nr
= arg0
[0].nr
;
213 interp
[0] = brw_vec1_grf(nr
, 0);
214 interp
[1] = brw_vec1_grf(nr
, 4);
215 interp
[2] = brw_vec1_grf(nr
+1, 0);
216 interp
[3] = brw_vec1_grf(nr
+1, 4);
218 for(i
= 0; i
< 4; i
++ ) {
220 brw_LINE(p
, brw_null_reg(), interp
[i
], deltas
[0]);
221 brw_MAC(p
, dst
[i
], suboffset(interp
[i
],1), deltas
[1]);
222 brw_MUL(p
, dst
[i
], dst
[i
], w
[3]);
227 static void emit_cinterp( struct brw_compile
*p
,
228 const struct brw_reg
*dst
,
230 const struct brw_reg
*arg0
)
232 struct brw_reg interp
[4];
233 GLuint nr
= arg0
[0].nr
;
236 interp
[0] = brw_vec1_grf(nr
, 0);
237 interp
[1] = brw_vec1_grf(nr
, 4);
238 interp
[2] = brw_vec1_grf(nr
+1, 0);
239 interp
[3] = brw_vec1_grf(nr
+1, 4);
241 for(i
= 0; i
< 4; i
++ ) {
243 brw_MOV(p
, dst
[i
], suboffset(interp
[i
],3)); /* TODO: optimize away like other moves */
252 static void emit_alu1( struct brw_compile
*p
,
253 struct brw_instruction
*(*func
)(struct brw_compile
*,
256 const struct brw_reg
*dst
,
258 const struct brw_reg
*arg0
)
263 brw_set_saturate(p
, 1);
265 for (i
= 0; i
< 4; i
++) {
267 func(p
, dst
[i
], arg0
[i
]);
272 brw_set_saturate(p
, 0);
275 static void emit_alu2( struct brw_compile
*p
,
276 struct brw_instruction
*(*func
)(struct brw_compile
*,
280 const struct brw_reg
*dst
,
282 const struct brw_reg
*arg0
,
283 const struct brw_reg
*arg1
)
288 brw_set_saturate(p
, 1);
290 for (i
= 0; i
< 4; i
++) {
292 func(p
, dst
[i
], arg0
[i
], arg1
[i
]);
297 brw_set_saturate(p
, 0);
301 static void emit_mad( struct brw_compile
*p
,
302 const struct brw_reg
*dst
,
304 const struct brw_reg
*arg0
,
305 const struct brw_reg
*arg1
,
306 const struct brw_reg
*arg2
)
310 for (i
= 0; i
< 4; i
++) {
312 brw_MUL(p
, dst
[i
], arg0
[i
], arg1
[i
]);
314 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
315 brw_ADD(p
, dst
[i
], dst
[i
], arg2
[i
]);
316 brw_set_saturate(p
, 0);
322 static void emit_lrp( struct brw_compile
*p
,
323 const struct brw_reg
*dst
,
325 const struct brw_reg
*arg0
,
326 const struct brw_reg
*arg1
,
327 const struct brw_reg
*arg2
)
331 /* Uses dst as a temporary:
333 for (i
= 0; i
< 4; i
++) {
335 /* Can I use the LINE instruction for this?
337 brw_ADD(p
, dst
[i
], negate(arg0
[i
]), brw_imm_f(1.0));
338 brw_MUL(p
, brw_null_reg(), dst
[i
], arg2
[i
]);
340 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
341 brw_MAC(p
, dst
[i
], arg0
[i
], arg1
[i
]);
342 brw_set_saturate(p
, 0);
346 static void emit_sop( struct brw_compile
*p
,
347 const struct brw_reg
*dst
,
350 const struct brw_reg
*arg0
,
351 const struct brw_reg
*arg1
)
355 for (i
= 0; i
< 4; i
++) {
357 brw_MOV(p
, dst
[i
], brw_imm_f(0));
358 brw_CMP(p
, brw_null_reg(), cond
, arg0
[i
], arg1
[i
]);
359 brw_MOV(p
, dst
[i
], brw_imm_f(1.0));
360 brw_set_predicate_control_flag_value(p
, 0xff);
365 static void emit_slt( struct brw_compile
*p
,
366 const struct brw_reg
*dst
,
368 const struct brw_reg
*arg0
,
369 const struct brw_reg
*arg1
)
371 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_L
, arg0
, arg1
);
374 static void emit_sle( struct brw_compile
*p
,
375 const struct brw_reg
*dst
,
377 const struct brw_reg
*arg0
,
378 const struct brw_reg
*arg1
)
380 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_LE
, arg0
, arg1
);
383 static void emit_sgt( struct brw_compile
*p
,
384 const struct brw_reg
*dst
,
386 const struct brw_reg
*arg0
,
387 const struct brw_reg
*arg1
)
389 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_G
, arg0
, arg1
);
392 static void emit_sge( struct brw_compile
*p
,
393 const struct brw_reg
*dst
,
395 const struct brw_reg
*arg0
,
396 const struct brw_reg
*arg1
)
398 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_GE
, arg0
, arg1
);
401 static void emit_seq( struct brw_compile
*p
,
402 const struct brw_reg
*dst
,
404 const struct brw_reg
*arg0
,
405 const struct brw_reg
*arg1
)
407 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_EQ
, arg0
, arg1
);
410 static void emit_sne( struct brw_compile
*p
,
411 const struct brw_reg
*dst
,
413 const struct brw_reg
*arg0
,
414 const struct brw_reg
*arg1
)
416 emit_sop(p
, dst
, mask
, BRW_CONDITIONAL_NEQ
, arg0
, arg1
);
419 static void emit_cmp( struct brw_compile
*p
,
420 const struct brw_reg
*dst
,
422 const struct brw_reg
*arg0
,
423 const struct brw_reg
*arg1
,
424 const struct brw_reg
*arg2
)
428 for (i
= 0; i
< 4; i
++) {
430 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
431 brw_MOV(p
, dst
[i
], arg2
[i
]);
432 brw_set_saturate(p
, 0);
434 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], brw_imm_f(0));
436 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
437 brw_MOV(p
, dst
[i
], arg1
[i
]);
438 brw_set_saturate(p
, 0);
439 brw_set_predicate_control_flag_value(p
, 0xff);
444 static void emit_max( struct brw_compile
*p
,
445 const struct brw_reg
*dst
,
447 const struct brw_reg
*arg0
,
448 const struct brw_reg
*arg1
)
452 for (i
= 0; i
< 4; i
++) {
454 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
455 brw_MOV(p
, dst
[i
], arg0
[i
]);
456 brw_set_saturate(p
, 0);
458 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
460 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
461 brw_MOV(p
, dst
[i
], arg1
[i
]);
462 brw_set_saturate(p
, 0);
463 brw_set_predicate_control_flag_value(p
, 0xff);
468 static void emit_min( struct brw_compile
*p
,
469 const struct brw_reg
*dst
,
471 const struct brw_reg
*arg0
,
472 const struct brw_reg
*arg1
)
476 for (i
= 0; i
< 4; i
++) {
478 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
479 brw_MOV(p
, dst
[i
], arg1
[i
]);
480 brw_set_saturate(p
, 0);
482 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
[i
], arg1
[i
]);
484 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
485 brw_MOV(p
, dst
[i
], arg0
[i
]);
486 brw_set_saturate(p
, 0);
487 brw_set_predicate_control_flag_value(p
, 0xff);
493 static void emit_dp3( struct brw_compile
*p
,
494 const struct brw_reg
*dst
,
496 const struct brw_reg
*arg0
,
497 const struct brw_reg
*arg1
)
499 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
501 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
502 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
504 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
505 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
506 brw_set_saturate(p
, 0);
510 static void emit_dp4( struct brw_compile
*p
,
511 const struct brw_reg
*dst
,
513 const struct brw_reg
*arg0
,
514 const struct brw_reg
*arg1
)
516 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
518 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
519 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
520 brw_MAC(p
, brw_null_reg(), arg0
[2], arg1
[2]);
522 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
523 brw_MAC(p
, dst
[0], arg0
[3], arg1
[3]);
524 brw_set_saturate(p
, 0);
528 static void emit_dph( struct brw_compile
*p
,
529 const struct brw_reg
*dst
,
531 const struct brw_reg
*arg0
,
532 const struct brw_reg
*arg1
)
534 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
536 brw_MUL(p
, brw_null_reg(), arg0
[0], arg1
[0]);
537 brw_MAC(p
, brw_null_reg(), arg0
[1], arg1
[1]);
538 brw_MAC(p
, dst
[0], arg0
[2], arg1
[2]);
540 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
541 brw_ADD(p
, dst
[0], dst
[0], arg1
[3]);
542 brw_set_saturate(p
, 0);
546 static void emit_xpd( struct brw_compile
*p
,
547 const struct brw_reg
*dst
,
549 const struct brw_reg
*arg0
,
550 const struct brw_reg
*arg1
)
554 assert(!(mask
& WRITEMASK_W
) == WRITEMASK_X
);
556 for (i
= 0 ; i
< 3; i
++) {
561 brw_MUL(p
, brw_null_reg(), negate(arg0
[i2
]), arg1
[i1
]);
563 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
564 brw_MAC(p
, dst
[i
], arg0
[i1
], arg1
[i2
]);
565 brw_set_saturate(p
, 0);
571 static void emit_math1( struct brw_compile
*p
,
573 const struct brw_reg
*dst
,
575 const struct brw_reg
*arg0
)
577 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
578 // function == BRW_MATH_FUNCTION_SINCOS);
580 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
582 /* Send two messages to perform all 16 operations:
587 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
590 BRW_MATH_PRECISION_FULL
);
594 static void emit_math2( struct brw_compile
*p
,
596 const struct brw_reg
*dst
,
598 const struct brw_reg
*arg0
,
599 const struct brw_reg
*arg1
)
601 assert((mask
& WRITEMASK_XYZW
) == WRITEMASK_X
);
603 brw_push_insn_state(p
);
605 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
606 brw_MOV(p
, brw_message_reg(2), arg0
[0]);
607 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
608 brw_MOV(p
, brw_message_reg(4), sechalf(arg0
[0]));
610 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
611 brw_MOV(p
, brw_message_reg(3), arg1
[0]);
612 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
613 brw_MOV(p
, brw_message_reg(5), sechalf(arg1
[0]));
616 /* Send two messages to perform all 16 operations:
618 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
622 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
625 BRW_MATH_DATA_VECTOR
,
626 BRW_MATH_PRECISION_FULL
);
628 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
632 (mask
& SATURATE
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
635 BRW_MATH_DATA_VECTOR
,
636 BRW_MATH_PRECISION_FULL
);
638 brw_pop_insn_state(p
);
643 static void emit_tex( struct brw_wm_compile
*c
,
644 const struct brw_wm_instruction
*inst
,
647 struct brw_reg
*arg
)
649 struct brw_compile
*p
= &c
->func
;
650 GLuint msgLength
, responseLength
;
651 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<inst
->tex_unit
)) ? 1 : 0;
655 /* How many input regs are there?
657 switch (inst
->tex_idx
) {
658 case TEXTURE_1D_INDEX
:
662 case TEXTURE_2D_INDEX
:
663 case TEXTURE_RECT_INDEX
:
668 emit
= WRITEMASK_XYZ
;
680 for (i
= 0; i
< nr
; i
++) {
681 static const GLuint swz
[4] = {0,1,2,2};
683 brw_MOV(p
, brw_message_reg(msgLength
+1), arg
[swz
[i
]]);
685 brw_MOV(p
, brw_message_reg(msgLength
+1), brw_imm_f(0));
689 responseLength
= 8; /* always */
692 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
694 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
695 inst
->tex_unit
+ 1, /* surface */
696 inst
->tex_unit
, /* sampler */
699 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
:
700 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
),
706 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
710 static void emit_txb( struct brw_wm_compile
*c
,
711 const struct brw_wm_instruction
*inst
,
714 struct brw_reg
*arg
)
716 struct brw_compile
*p
= &c
->func
;
719 /* Shadow ignored for txb.
721 switch (inst
->tex_idx
) {
722 case TEXTURE_1D_INDEX
:
723 brw_MOV(p
, brw_message_reg(2), arg
[0]);
724 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
725 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
727 case TEXTURE_2D_INDEX
:
728 case TEXTURE_RECT_INDEX
:
729 brw_MOV(p
, brw_message_reg(2), arg
[0]);
730 brw_MOV(p
, brw_message_reg(4), arg
[1]);
731 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
734 brw_MOV(p
, brw_message_reg(2), arg
[0]);
735 brw_MOV(p
, brw_message_reg(4), arg
[1]);
736 brw_MOV(p
, brw_message_reg(6), arg
[2]);
740 brw_MOV(p
, brw_message_reg(8), arg
[3]);
745 retype(vec16(dst
[0]), BRW_REGISTER_TYPE_UW
),
747 retype(c
->payload
.depth
[0].hw_reg
, BRW_REGISTER_TYPE_UW
),
748 inst
->tex_unit
+ 1, /* surface */
749 inst
->tex_unit
, /* sampler */
751 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
752 8, /* responseLength */
759 static void emit_lit( struct brw_compile
*p
,
760 const struct brw_reg
*dst
,
762 const struct brw_reg
*arg0
)
764 assert((mask
& WRITEMASK_XW
) == 0);
766 if (mask
& WRITEMASK_Y
) {
767 brw_set_saturate(p
, (mask
& SATURATE
) ? 1 : 0);
768 brw_MOV(p
, dst
[1], arg0
[0]);
769 brw_set_saturate(p
, 0);
772 if (mask
& WRITEMASK_Z
) {
773 emit_math2(p
, BRW_MATH_FUNCTION_POW
,
775 WRITEMASK_X
| (mask
& SATURATE
),
780 /* Ordinarily you'd use an iff statement to skip or shortcircuit
781 * some of the POW calculations above, but 16-wide iff statements
782 * seem to lock c1 hardware, so this is a nasty workaround:
784 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_LE
, arg0
[0], brw_imm_f(0));
786 if (mask
& WRITEMASK_Y
)
787 brw_MOV(p
, dst
[1], brw_imm_f(0));
789 if (mask
& WRITEMASK_Z
)
790 brw_MOV(p
, dst
[2], brw_imm_f(0));
792 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
796 /* Kill pixel - set execution mask to zero for those pixels which
799 static void emit_kil( struct brw_wm_compile
*c
,
800 struct brw_reg
*arg0
)
802 struct brw_compile
*p
= &c
->func
;
803 struct brw_reg r0uw
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
807 /* XXX - usually won't need 4 compares!
809 for (i
= 0; i
< 4; i
++) {
810 brw_push_insn_state(p
);
811 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
[i
], brw_imm_f(0));
812 brw_set_predicate_control_flag_value(p
, 0xff);
813 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
814 brw_AND(p
, r0uw
, brw_flag_reg(), r0uw
);
815 brw_pop_insn_state(p
);
819 static void fire_fb_write( struct brw_wm_compile
*c
,
823 struct brw_compile
*p
= &c
->func
;
825 /* Pass through control information:
827 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
829 brw_push_insn_state(p
);
830 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
831 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
833 brw_message_reg(base_reg
+ 1),
835 brw_pop_insn_state(p
);
838 /* Send framebuffer write message: */
839 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
841 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
843 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
844 0, /* render surface always 0 */
850 static void emit_aa( struct brw_wm_compile
*c
,
851 struct brw_reg
*arg1
,
854 struct brw_compile
*p
= &c
->func
;
855 GLuint comp
= c
->key
.aa_dest_stencil_reg
/ 2;
856 GLuint off
= c
->key
.aa_dest_stencil_reg
% 2;
857 struct brw_reg aa
= offset(arg1
[comp
], off
);
859 brw_push_insn_state(p
);
860 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
); /* ?? */
861 brw_MOV(p
, brw_message_reg(reg
), aa
);
862 brw_pop_insn_state(p
);
866 /* Post-fragment-program processing. Send the results to the
869 static void emit_fb_write( struct brw_wm_compile
*c
,
870 struct brw_reg
*arg0
,
871 struct brw_reg
*arg1
,
872 struct brw_reg
*arg2
)
874 struct brw_compile
*p
= &c
->func
;
878 /* Reserve a space for AA - may not be needed:
880 if (c
->key
.aa_dest_stencil_reg
)
883 /* I don't really understand how this achieves the color interleave
884 * (ie RGBARGBA) in the result: [Do the saturation here]
887 brw_push_insn_state(p
);
889 for (channel
= 0; channel
< 4; channel
++) {
890 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
891 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
893 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
895 brw_message_reg(nr
+ channel
),
898 brw_set_compression_control(p
, BRW_COMPRESSION_2NDHALF
);
900 brw_message_reg(nr
+ channel
+ 4),
901 sechalf(arg0
[channel
]));
904 /* skip over the regs populated above:
908 brw_pop_insn_state(p
);
911 if (c
->key
.source_depth_to_render_target
)
913 if (c
->key
.computes_depth
)
914 brw_MOV(p
, brw_message_reg(nr
), arg2
[2]);
916 brw_MOV(p
, brw_message_reg(nr
), arg1
[1]); /* ? */
921 if (c
->key
.dest_depth_reg
)
923 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
924 GLuint off
= c
->key
.dest_depth_reg
% 2;
927 brw_push_insn_state(p
);
928 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
929 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
931 brw_MOV(p
, brw_message_reg(nr
+1), offset(arg1
[comp
],1));
932 brw_pop_insn_state(p
);
935 brw_MOV(p
, brw_message_reg(nr
), arg1
[comp
]);
941 if (!c
->key
.runtime_check_aads_emit
) {
942 if (c
->key
.aa_dest_stencil_reg
)
945 fire_fb_write(c
, 0, nr
);
948 struct brw_reg v1_null_ud
= vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD
));
949 struct brw_reg ip
= brw_ip_reg();
950 struct brw_instruction
*jmp
;
952 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
953 brw_set_conditionalmod(p
, BRW_CONDITIONAL_Z
);
956 get_element_ud(brw_vec8_grf(1,0), 6),
959 jmp
= brw_JMPI(p
, ip
, ip
, brw_imm_w(0));
962 fire_fb_write(c
, 0, nr
);
963 /* note - thread killed in subroutine */
965 brw_land_fwd_jump(p
, jmp
);
967 /* ELSE: Shuffle up one register to fill in the hole left for AA:
969 fire_fb_write(c
, 1, nr
-1);
976 /* Post-fragment-program processing. Send the results to the
979 static void emit_spill( struct brw_wm_compile
*c
,
983 struct brw_compile
*p
= &c
->func
;
986 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
988 brw_MOV(p
, brw_message_reg(2), reg
);
991 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
992 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
995 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW
),
1000 static void emit_unspill( struct brw_wm_compile
*c
,
1004 struct brw_compile
*p
= &c
->func
;
1006 /* Slot 0 is the undef value.
1009 brw_MOV(p
, reg
, brw_imm_f(0));
1014 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1015 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1019 retype(vec16(reg
), BRW_REGISTER_TYPE_UW
),
1027 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
1029 static void get_argument_regs( struct brw_wm_compile
*c
,
1030 struct brw_wm_ref
*arg
[],
1031 struct brw_reg
*regs
)
1035 for (i
= 0; i
< 4; i
++) {
1038 if (arg
[i
]->unspill_reg
)
1040 brw_vec8_grf(arg
[i
]->unspill_reg
, 0),
1041 arg
[i
]->value
->spill_slot
);
1043 regs
[i
] = arg
[i
]->hw_reg
;
1046 regs
[i
] = brw_null_reg();
1051 static void spill_values( struct brw_wm_compile
*c
,
1052 struct brw_wm_value
*values
,
1057 for (i
= 0; i
< nr
; i
++)
1058 if (values
[i
].spill_slot
)
1059 emit_spill(c
, values
[i
].hw_reg
, values
[i
].spill_slot
);
1064 /* Emit the fragment program instructions here.
1066 void brw_wm_emit( struct brw_wm_compile
*c
)
1068 struct brw_compile
*p
= &c
->func
;
1071 brw_set_compression_control(p
, BRW_COMPRESSION_COMPRESSED
);
1073 /* Check if any of the payload regs need to be spilled:
1075 spill_values(c
, c
->payload
.depth
, 4);
1076 spill_values(c
, c
->creg
, c
->nr_creg
);
1077 spill_values(c
, c
->payload
.input_interp
, FRAG_ATTRIB_MAX
);
1080 for (insn
= 0; insn
< c
->nr_insns
; insn
++) {
1082 struct brw_wm_instruction
*inst
= &c
->instruction
[insn
];
1083 struct brw_reg args
[3][4], dst
[4];
1084 GLuint i
, dst_flags
;
1086 /* Get argument regs:
1088 for (i
= 0; i
< 3; i
++)
1089 get_argument_regs(c
, inst
->src
[i
], args
[i
]);
1093 for (i
= 0; i
< 4; i
++)
1095 dst
[i
] = inst
->dst
[i
]->hw_reg
;
1097 dst
[i
] = brw_null_reg();
1101 dst_flags
= inst
->writemask
;
1103 dst_flags
|= SATURATE
;
1105 switch (inst
->opcode
) {
1106 /* Generated instructions for calculating triangle interpolants:
1109 emit_pixel_xy(p
, dst
, dst_flags
, args
[0]);
1113 emit_delta_xy(p
, dst
, dst_flags
, args
[0], args
[1]);
1117 emit_wpos_xy(p
, dst
, dst_flags
, args
[0]);
1121 emit_pixel_w(p
, dst
, dst_flags
, args
[0], args
[1]);
1125 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1129 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1133 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1137 emit_fb_write(c
, args
[0], args
[1], args
[2]);
1140 /* Straightforward arithmetic:
1143 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1147 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1151 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1154 case OPCODE_DP3
: /* */
1155 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1159 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1163 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1166 case OPCODE_LRP
: /* */
1167 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1171 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1176 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1180 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1184 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1187 /* Higher math functions:
1190 emit_math1(p
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1194 emit_math1(p
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1198 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1202 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1206 emit_math1(p
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1210 emit_math1(p
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1214 /* There is an scs math function, but it would need some
1215 * fixup for 16-element execution.
1217 if (dst_flags
& WRITEMASK_X
)
1218 emit_math1(p
, BRW_MATH_FUNCTION_COS
, dst
, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1219 if (dst_flags
& WRITEMASK_Y
)
1220 emit_math1(p
, BRW_MATH_FUNCTION_SIN
, dst
+1, (dst_flags
&SATURATE
)|WRITEMASK_X
, args
[0]);
1224 emit_math2(p
, BRW_MATH_FUNCTION_POW
, dst
, dst_flags
, args
[0], args
[1]);
1230 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1234 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1238 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1242 emit_slt(p
, dst
, dst_flags
, args
[0], args
[1]);
1246 emit_sle(p
, dst
, dst_flags
, args
[0], args
[1]);
1249 emit_sgt(p
, dst
, dst_flags
, args
[0], args
[1]);
1252 emit_sge(p
, dst
, dst_flags
, args
[0], args
[1]);
1255 emit_seq(p
, dst
, dst_flags
, args
[0], args
[1]);
1258 emit_sne(p
, dst
, dst_flags
, args
[0], args
[1]);
1262 emit_lit(p
, dst
, dst_flags
, args
[0]);
1265 /* Texturing operations:
1268 emit_tex(c
, inst
, dst
, dst_flags
, args
[0]);
1272 emit_txb(c
, inst
, dst
, dst_flags
, args
[0]);
1276 emit_kil(c
, args
[0]);
1280 _mesa_printf("unsupport opcode %d in fragment program\n",
1284 for (i
= 0; i
< 4; i
++)
1285 if (inst
->dst
[i
] && inst
->dst
[i
]->spill_slot
)
1287 inst
->dst
[i
]->hw_reg
,
1288 inst
->dst
[i
]->spill_slot
);