i965: Fix glFrontFacing in twoside GLSL demo.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 #define SATURATE (1<<5)
38
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
41 */
42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
43 {
44 if (reg.vstride)
45 reg.nr++;
46 return reg;
47 }
48
49 /* Payload R0:
50 *
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
53 * R0.1..8 -- ?
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
60 * R1.6 -- ?
61 * R1.7 -- ?
62 * R1.8 -- ?
63 */
64
65
66 static void emit_pixel_xy(struct brw_compile *p,
67 const struct brw_reg *dst,
68 GLuint mask,
69 const struct brw_reg *arg0)
70 {
71 struct brw_reg r1 = brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
73
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
78 */
79 if (mask & WRITEMASK_X) {
80 brw_ADD(p,
81 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
82 stride(suboffset(r1_uw, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
84 }
85
86 if (mask & WRITEMASK_Y) {
87 brw_ADD(p,
88 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
89 stride(suboffset(r1_uw,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
91 }
92
93 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
94 }
95
96
97
98 static void emit_delta_xy(struct brw_compile *p,
99 const struct brw_reg *dst,
100 GLuint mask,
101 const struct brw_reg *arg0,
102 const struct brw_reg *arg1)
103 {
104 struct brw_reg r1 = brw_vec1_grf(1, 0);
105
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 * centers.
108 */
109 if (mask & WRITEMASK_X) {
110 brw_ADD(p,
111 dst[0],
112 retype(arg0[0], BRW_REGISTER_TYPE_UW),
113 negate(r1));
114 }
115
116 if (mask & WRITEMASK_Y) {
117 brw_ADD(p,
118 dst[1],
119 retype(arg0[1], BRW_REGISTER_TYPE_UW),
120 negate(suboffset(r1,1)));
121
122 }
123 }
124
125 static void emit_wpos_xy(struct brw_wm_compile *c,
126 const struct brw_reg *dst,
127 GLuint mask,
128 const struct brw_reg *arg0)
129 {
130 struct brw_compile *p = &c->func;
131
132 /* Calculate the pixel offset from window bottom left into destination
133 * X and Y channels.
134 */
135 if (mask & WRITEMASK_X) {
136 /* X' = X - origin */
137 brw_ADD(p,
138 dst[0],
139 retype(arg0[0], BRW_REGISTER_TYPE_W),
140 brw_imm_d(0 - c->key.origin_x));
141 }
142
143 if (mask & WRITEMASK_Y) {
144 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
145 brw_ADD(p,
146 dst[1],
147 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
148 brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
149 }
150 }
151
152
153 static void emit_pixel_w( struct brw_compile *p,
154 const struct brw_reg *dst,
155 GLuint mask,
156 const struct brw_reg *arg0,
157 const struct brw_reg *deltas)
158 {
159 /* Don't need this if all you are doing is interpolating color, for
160 * instance.
161 */
162 if (mask & WRITEMASK_W) {
163 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
164
165 /* Calc 1/w - just linterp wpos[3] optimized by putting the
166 * result straight into a message reg.
167 */
168 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
169 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
170
171 /* Calc w */
172 brw_math_16( p, dst[3],
173 BRW_MATH_FUNCTION_INV,
174 BRW_MATH_SATURATE_NONE,
175 2, brw_null_reg(),
176 BRW_MATH_PRECISION_FULL);
177 }
178 }
179
180
181
182 static void emit_linterp( struct brw_compile *p,
183 const struct brw_reg *dst,
184 GLuint mask,
185 const struct brw_reg *arg0,
186 const struct brw_reg *deltas )
187 {
188 struct brw_reg interp[4];
189 GLuint nr = arg0[0].nr;
190 GLuint i;
191
192 interp[0] = brw_vec1_grf(nr, 0);
193 interp[1] = brw_vec1_grf(nr, 4);
194 interp[2] = brw_vec1_grf(nr+1, 0);
195 interp[3] = brw_vec1_grf(nr+1, 4);
196
197 for (i = 0; i < 4; i++) {
198 if (mask & (1<<i)) {
199 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
200 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
201 }
202 }
203 }
204
205
206 static void emit_pinterp( struct brw_compile *p,
207 const struct brw_reg *dst,
208 GLuint mask,
209 const struct brw_reg *arg0,
210 const struct brw_reg *deltas,
211 const struct brw_reg *w)
212 {
213 struct brw_reg interp[4];
214 GLuint nr = arg0[0].nr;
215 GLuint i;
216
217 interp[0] = brw_vec1_grf(nr, 0);
218 interp[1] = brw_vec1_grf(nr, 4);
219 interp[2] = brw_vec1_grf(nr+1, 0);
220 interp[3] = brw_vec1_grf(nr+1, 4);
221
222 for (i = 0; i < 4; i++) {
223 if (mask & (1<<i)) {
224 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
225 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
226 }
227 }
228 for (i = 0; i < 4; i++) {
229 if (mask & (1<<i)) {
230 brw_MUL(p, dst[i], dst[i], w[3]);
231 }
232 }
233 }
234
235
236 static void emit_cinterp( struct brw_compile *p,
237 const struct brw_reg *dst,
238 GLuint mask,
239 const struct brw_reg *arg0 )
240 {
241 struct brw_reg interp[4];
242 GLuint nr = arg0[0].nr;
243 GLuint i;
244
245 interp[0] = brw_vec1_grf(nr, 0);
246 interp[1] = brw_vec1_grf(nr, 4);
247 interp[2] = brw_vec1_grf(nr+1, 0);
248 interp[3] = brw_vec1_grf(nr+1, 4);
249
250 for (i = 0; i < 4; i++) {
251 if (mask & (1<<i)) {
252 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
253 }
254 }
255 }
256
257 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
258 static void emit_frontfacing( struct brw_compile *p,
259 const struct brw_reg *dst,
260 GLuint mask )
261 {
262 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
263 GLuint i;
264
265 if (!(mask & WRITEMASK_XYZW))
266 return;
267
268 for (i = 0; i < 4; i++) {
269 if (mask & (1<<i)) {
270 brw_MOV(p, dst[i], brw_imm_f(0.0));
271 }
272 }
273
274 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
275 * us front face
276 */
277 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
278 for (i = 0; i < 4; i++) {
279 if (mask & (1<<i)) {
280 brw_MOV(p, dst[i], brw_imm_f(1.0));
281 }
282 }
283 brw_set_predicate_control_flag_value(p, 0xff);
284 }
285
286 static void emit_alu1( struct brw_compile *p,
287 struct brw_instruction *(*func)(struct brw_compile *,
288 struct brw_reg,
289 struct brw_reg),
290 const struct brw_reg *dst,
291 GLuint mask,
292 const struct brw_reg *arg0 )
293 {
294 GLuint i;
295
296 if (mask & SATURATE)
297 brw_set_saturate(p, 1);
298
299 for (i = 0; i < 4; i++) {
300 if (mask & (1<<i)) {
301 func(p, dst[i], arg0[i]);
302 }
303 }
304
305 if (mask & SATURATE)
306 brw_set_saturate(p, 0);
307 }
308
309
310 static void emit_alu2( struct brw_compile *p,
311 struct brw_instruction *(*func)(struct brw_compile *,
312 struct brw_reg,
313 struct brw_reg,
314 struct brw_reg),
315 const struct brw_reg *dst,
316 GLuint mask,
317 const struct brw_reg *arg0,
318 const struct brw_reg *arg1 )
319 {
320 GLuint i;
321
322 if (mask & SATURATE)
323 brw_set_saturate(p, 1);
324
325 for (i = 0; i < 4; i++) {
326 if (mask & (1<<i)) {
327 func(p, dst[i], arg0[i], arg1[i]);
328 }
329 }
330
331 if (mask & SATURATE)
332 brw_set_saturate(p, 0);
333 }
334
335
336 static void emit_mad( struct brw_compile *p,
337 const struct brw_reg *dst,
338 GLuint mask,
339 const struct brw_reg *arg0,
340 const struct brw_reg *arg1,
341 const struct brw_reg *arg2 )
342 {
343 GLuint i;
344
345 for (i = 0; i < 4; i++) {
346 if (mask & (1<<i)) {
347 brw_MUL(p, dst[i], arg0[i], arg1[i]);
348
349 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
350 brw_ADD(p, dst[i], dst[i], arg2[i]);
351 brw_set_saturate(p, 0);
352 }
353 }
354 }
355
356
357 static void emit_lrp( struct brw_compile *p,
358 const struct brw_reg *dst,
359 GLuint mask,
360 const struct brw_reg *arg0,
361 const struct brw_reg *arg1,
362 const struct brw_reg *arg2 )
363 {
364 GLuint i;
365
366 /* Uses dst as a temporary:
367 */
368 for (i = 0; i < 4; i++) {
369 if (mask & (1<<i)) {
370 /* Can I use the LINE instruction for this?
371 */
372 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
373 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
374
375 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
376 brw_MAC(p, dst[i], arg0[i], arg1[i]);
377 brw_set_saturate(p, 0);
378 }
379 }
380 }
381
382 static void emit_sop( struct brw_compile *p,
383 const struct brw_reg *dst,
384 GLuint mask,
385 GLuint cond,
386 const struct brw_reg *arg0,
387 const struct brw_reg *arg1 )
388 {
389 GLuint i;
390
391 for (i = 0; i < 4; i++) {
392 if (mask & (1<<i)) {
393 brw_MOV(p, dst[i], brw_imm_f(0));
394 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
395 brw_MOV(p, dst[i], brw_imm_f(1.0));
396 brw_set_predicate_control_flag_value(p, 0xff);
397 }
398 }
399 }
400
401 static void emit_slt( struct brw_compile *p,
402 const struct brw_reg *dst,
403 GLuint mask,
404 const struct brw_reg *arg0,
405 const struct brw_reg *arg1 )
406 {
407 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
408 }
409
410 static void emit_sle( struct brw_compile *p,
411 const struct brw_reg *dst,
412 GLuint mask,
413 const struct brw_reg *arg0,
414 const struct brw_reg *arg1 )
415 {
416 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
417 }
418
419 static void emit_sgt( struct brw_compile *p,
420 const struct brw_reg *dst,
421 GLuint mask,
422 const struct brw_reg *arg0,
423 const struct brw_reg *arg1 )
424 {
425 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
426 }
427
428 static void emit_sge( struct brw_compile *p,
429 const struct brw_reg *dst,
430 GLuint mask,
431 const struct brw_reg *arg0,
432 const struct brw_reg *arg1 )
433 {
434 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
435 }
436
437 static void emit_seq( struct brw_compile *p,
438 const struct brw_reg *dst,
439 GLuint mask,
440 const struct brw_reg *arg0,
441 const struct brw_reg *arg1 )
442 {
443 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
444 }
445
446 static void emit_sne( struct brw_compile *p,
447 const struct brw_reg *dst,
448 GLuint mask,
449 const struct brw_reg *arg0,
450 const struct brw_reg *arg1 )
451 {
452 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
453 }
454
455 static void emit_cmp( struct brw_compile *p,
456 const struct brw_reg *dst,
457 GLuint mask,
458 const struct brw_reg *arg0,
459 const struct brw_reg *arg1,
460 const struct brw_reg *arg2 )
461 {
462 GLuint i;
463
464 for (i = 0; i < 4; i++) {
465 if (mask & (1<<i)) {
466 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
467 brw_MOV(p, dst[i], arg2[i]);
468 brw_set_saturate(p, 0);
469
470 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
471
472 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
473 brw_MOV(p, dst[i], arg1[i]);
474 brw_set_saturate(p, 0);
475 brw_set_predicate_control_flag_value(p, 0xff);
476 }
477 }
478 }
479
480 static void emit_max( struct brw_compile *p,
481 const struct brw_reg *dst,
482 GLuint mask,
483 const struct brw_reg *arg0,
484 const struct brw_reg *arg1 )
485 {
486 GLuint i;
487
488 for (i = 0; i < 4; i++) {
489 if (mask & (1<<i)) {
490 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
491 brw_MOV(p, dst[i], arg0[i]);
492 brw_set_saturate(p, 0);
493
494 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
495
496 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
497 brw_MOV(p, dst[i], arg1[i]);
498 brw_set_saturate(p, 0);
499 brw_set_predicate_control_flag_value(p, 0xff);
500 }
501 }
502 }
503
504 static void emit_min( struct brw_compile *p,
505 const struct brw_reg *dst,
506 GLuint mask,
507 const struct brw_reg *arg0,
508 const struct brw_reg *arg1 )
509 {
510 GLuint i;
511
512 for (i = 0; i < 4; i++) {
513 if (mask & (1<<i)) {
514 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
515 brw_MOV(p, dst[i], arg1[i]);
516 brw_set_saturate(p, 0);
517
518 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
519
520 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
521 brw_MOV(p, dst[i], arg0[i]);
522 brw_set_saturate(p, 0);
523 brw_set_predicate_control_flag_value(p, 0xff);
524 }
525 }
526 }
527
528
529 static void emit_dp3( struct brw_compile *p,
530 const struct brw_reg *dst,
531 GLuint mask,
532 const struct brw_reg *arg0,
533 const struct brw_reg *arg1 )
534 {
535 if (!(mask & WRITEMASK_XYZW))
536 return; /* Do not emit dead code */
537
538 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
539
540 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
541 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
542
543 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
544 brw_MAC(p, dst[0], arg0[2], arg1[2]);
545 brw_set_saturate(p, 0);
546 }
547
548
549 static void emit_dp4( struct brw_compile *p,
550 const struct brw_reg *dst,
551 GLuint mask,
552 const struct brw_reg *arg0,
553 const struct brw_reg *arg1 )
554 {
555 if (!(mask & WRITEMASK_XYZW))
556 return; /* Do not emit dead code */
557
558 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
559
560 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
561 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
562 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
563
564 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
565 brw_MAC(p, dst[0], arg0[3], arg1[3]);
566 brw_set_saturate(p, 0);
567 }
568
569
570 static void emit_dph( struct brw_compile *p,
571 const struct brw_reg *dst,
572 GLuint mask,
573 const struct brw_reg *arg0,
574 const struct brw_reg *arg1 )
575 {
576 if (!(mask & WRITEMASK_XYZW))
577 return; /* Do not emit dead code */
578
579 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
580
581 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
582 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
583 brw_MAC(p, dst[0], arg0[2], arg1[2]);
584
585 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
586 brw_ADD(p, dst[0], dst[0], arg1[3]);
587 brw_set_saturate(p, 0);
588 }
589
590
591 static void emit_xpd( struct brw_compile *p,
592 const struct brw_reg *dst,
593 GLuint mask,
594 const struct brw_reg *arg0,
595 const struct brw_reg *arg1 )
596 {
597 GLuint i;
598
599 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
600
601 for (i = 0 ; i < 3; i++) {
602 if (mask & (1<<i)) {
603 GLuint i2 = (i+2)%3;
604 GLuint i1 = (i+1)%3;
605
606 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
607
608 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
609 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
610 brw_set_saturate(p, 0);
611 }
612 }
613 }
614
615
616 static void emit_math1( struct brw_compile *p,
617 GLuint function,
618 const struct brw_reg *dst,
619 GLuint mask,
620 const struct brw_reg *arg0 )
621 {
622 if (!(mask & WRITEMASK_XYZW))
623 return; /* Do not emit dead code */
624
625 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
626 // function == BRW_MATH_FUNCTION_SINCOS);
627
628 brw_MOV(p, brw_message_reg(2), arg0[0]);
629
630 /* Send two messages to perform all 16 operations:
631 */
632 brw_math_16(p,
633 dst[0],
634 function,
635 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
636 2,
637 brw_null_reg(),
638 BRW_MATH_PRECISION_FULL);
639 }
640
641
642 static void emit_math2( struct brw_compile *p,
643 GLuint function,
644 const struct brw_reg *dst,
645 GLuint mask,
646 const struct brw_reg *arg0,
647 const struct brw_reg *arg1)
648 {
649 if (!(mask & WRITEMASK_XYZW))
650 return; /* Do not emit dead code */
651
652 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
653
654 brw_push_insn_state(p);
655
656 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
657 brw_MOV(p, brw_message_reg(2), arg0[0]);
658 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
659 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
660
661 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
662 brw_MOV(p, brw_message_reg(3), arg1[0]);
663 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
664 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
665
666
667 /* Send two messages to perform all 16 operations:
668 */
669 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
670 brw_math(p,
671 dst[0],
672 function,
673 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
674 2,
675 brw_null_reg(),
676 BRW_MATH_DATA_VECTOR,
677 BRW_MATH_PRECISION_FULL);
678
679 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
680 brw_math(p,
681 offset(dst[0],1),
682 function,
683 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
684 4,
685 brw_null_reg(),
686 BRW_MATH_DATA_VECTOR,
687 BRW_MATH_PRECISION_FULL);
688
689 brw_pop_insn_state(p);
690 }
691
692
693
694 static void emit_tex( struct brw_wm_compile *c,
695 const struct brw_wm_instruction *inst,
696 struct brw_reg *dst,
697 GLuint dst_flags,
698 struct brw_reg *arg )
699 {
700 struct brw_compile *p = &c->func;
701 GLuint msgLength, responseLength;
702 GLuint i, nr;
703 GLuint emit;
704
705 /* How many input regs are there?
706 */
707 switch (inst->tex_idx) {
708 case TEXTURE_1D_INDEX:
709 emit = WRITEMASK_X;
710 nr = 1;
711 break;
712 case TEXTURE_2D_INDEX:
713 case TEXTURE_RECT_INDEX:
714 emit = WRITEMASK_XY;
715 nr = 2;
716 break;
717 default:
718 emit = WRITEMASK_XYZ;
719 nr = 3;
720 break;
721 }
722
723 if (inst->tex_shadow) {
724 nr = 4;
725 emit |= WRITEMASK_W;
726 }
727
728 msgLength = 1;
729
730 for (i = 0; i < nr; i++) {
731 static const GLuint swz[4] = {0,1,2,2};
732 if (emit & (1<<i))
733 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
734 else
735 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
736 msgLength += 2;
737 }
738
739 responseLength = 8; /* always */
740
741 brw_SAMPLE(p,
742 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
743 1,
744 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
745 inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
746 inst->tex_unit, /* sampler */
747 inst->writemask,
748 (inst->tex_shadow ?
749 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
750 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
751 responseLength,
752 msgLength,
753 0);
754 }
755
756
757 static void emit_txb( struct brw_wm_compile *c,
758 const struct brw_wm_instruction *inst,
759 struct brw_reg *dst,
760 GLuint dst_flags,
761 struct brw_reg *arg )
762 {
763 struct brw_compile *p = &c->func;
764 GLuint msgLength;
765
766 /* Shadow ignored for txb.
767 */
768 switch (inst->tex_idx) {
769 case TEXTURE_1D_INDEX:
770 brw_MOV(p, brw_message_reg(2), arg[0]);
771 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
772 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
773 break;
774 case TEXTURE_2D_INDEX:
775 case TEXTURE_RECT_INDEX:
776 brw_MOV(p, brw_message_reg(2), arg[0]);
777 brw_MOV(p, brw_message_reg(4), arg[1]);
778 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
779 break;
780 default:
781 brw_MOV(p, brw_message_reg(2), arg[0]);
782 brw_MOV(p, brw_message_reg(4), arg[1]);
783 brw_MOV(p, brw_message_reg(6), arg[2]);
784 break;
785 }
786
787 brw_MOV(p, brw_message_reg(8), arg[3]);
788 msgLength = 9;
789
790 brw_SAMPLE(p,
791 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
792 1,
793 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
794 inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
795 inst->tex_unit, /* sampler */
796 inst->writemask,
797 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
798 8, /* responseLength */
799 msgLength,
800 0);
801 }
802
803
804 static void emit_lit( struct brw_compile *p,
805 const struct brw_reg *dst,
806 GLuint mask,
807 const struct brw_reg *arg0 )
808 {
809 assert((mask & WRITEMASK_XW) == 0);
810
811 if (mask & WRITEMASK_Y) {
812 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
813 brw_MOV(p, dst[1], arg0[0]);
814 brw_set_saturate(p, 0);
815 }
816
817 if (mask & WRITEMASK_Z) {
818 emit_math2(p, BRW_MATH_FUNCTION_POW,
819 &dst[2],
820 WRITEMASK_X | (mask & SATURATE),
821 &arg0[1],
822 &arg0[3]);
823 }
824
825 /* Ordinarily you'd use an iff statement to skip or shortcircuit
826 * some of the POW calculations above, but 16-wide iff statements
827 * seem to lock c1 hardware, so this is a nasty workaround:
828 */
829 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
830 {
831 if (mask & WRITEMASK_Y)
832 brw_MOV(p, dst[1], brw_imm_f(0));
833
834 if (mask & WRITEMASK_Z)
835 brw_MOV(p, dst[2], brw_imm_f(0));
836 }
837 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
838 }
839
840
841 /* Kill pixel - set execution mask to zero for those pixels which
842 * fail.
843 */
844 static void emit_kil( struct brw_wm_compile *c,
845 struct brw_reg *arg0)
846 {
847 struct brw_compile *p = &c->func;
848 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
849 GLuint i;
850
851 /* XXX - usually won't need 4 compares!
852 */
853 for (i = 0; i < 4; i++) {
854 brw_push_insn_state(p);
855 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
856 brw_set_predicate_control_flag_value(p, 0xff);
857 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
858 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
859 brw_pop_insn_state(p);
860 }
861 }
862
863
864 static void fire_fb_write( struct brw_wm_compile *c,
865 GLuint base_reg,
866 GLuint nr,
867 GLuint target,
868 GLuint eot )
869 {
870 struct brw_compile *p = &c->func;
871
872 /* Pass through control information:
873 */
874 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
875 {
876 brw_push_insn_state(p);
877 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
878 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
879 brw_MOV(p,
880 brw_message_reg(base_reg + 1),
881 brw_vec8_grf(1, 0));
882 brw_pop_insn_state(p);
883 }
884
885 /* Send framebuffer write message: */
886 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
887 brw_fb_WRITE(p,
888 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
889 base_reg,
890 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
891 target,
892 nr,
893 0,
894 eot);
895 }
896
897
898 static void emit_aa( struct brw_wm_compile *c,
899 struct brw_reg *arg1,
900 GLuint reg )
901 {
902 struct brw_compile *p = &c->func;
903 GLuint comp = c->key.aa_dest_stencil_reg / 2;
904 GLuint off = c->key.aa_dest_stencil_reg % 2;
905 struct brw_reg aa = offset(arg1[comp], off);
906
907 brw_push_insn_state(p);
908 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
909 brw_MOV(p, brw_message_reg(reg), aa);
910 brw_pop_insn_state(p);
911 }
912
913
914 /* Post-fragment-program processing. Send the results to the
915 * framebuffer.
916 * \param arg0 the fragment color
917 * \param arg1 the pass-through depth value
918 * \param arg2 the shader-computed depth value
919 */
920 static void emit_fb_write( struct brw_wm_compile *c,
921 struct brw_reg *arg0,
922 struct brw_reg *arg1,
923 struct brw_reg *arg2,
924 GLuint target,
925 GLuint eot)
926 {
927 struct brw_compile *p = &c->func;
928 GLuint nr = 2;
929 GLuint channel;
930
931 /* Reserve a space for AA - may not be needed:
932 */
933 if (c->key.aa_dest_stencil_reg)
934 nr += 1;
935
936 /* I don't really understand how this achieves the color interleave
937 * (ie RGBARGBA) in the result: [Do the saturation here]
938 */
939 {
940 brw_push_insn_state(p);
941
942 for (channel = 0; channel < 4; channel++) {
943 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
944 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
945
946 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
947 brw_MOV(p,
948 brw_message_reg(nr + channel),
949 arg0[channel]);
950
951 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
952 brw_MOV(p,
953 brw_message_reg(nr + channel + 4),
954 sechalf(arg0[channel]));
955 }
956
957 /* skip over the regs populated above:
958 */
959 nr += 8;
960
961 brw_pop_insn_state(p);
962 }
963
964 if (c->key.source_depth_to_render_target)
965 {
966 if (c->key.computes_depth)
967 brw_MOV(p, brw_message_reg(nr), arg2[2]);
968 else
969 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
970
971 nr += 2;
972 }
973
974 if (c->key.dest_depth_reg)
975 {
976 GLuint comp = c->key.dest_depth_reg / 2;
977 GLuint off = c->key.dest_depth_reg % 2;
978
979 if (off != 0) {
980 brw_push_insn_state(p);
981 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
982
983 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
984 /* 2nd half? */
985 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
986 brw_pop_insn_state(p);
987 }
988 else {
989 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
990 }
991 nr += 2;
992 }
993
994 if (!c->key.runtime_check_aads_emit) {
995 if (c->key.aa_dest_stencil_reg)
996 emit_aa(c, arg1, 2);
997
998 fire_fb_write(c, 0, nr, target, eot);
999 }
1000 else {
1001 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1002 struct brw_reg ip = brw_ip_reg();
1003 struct brw_instruction *jmp;
1004
1005 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1006 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1007 brw_AND(p,
1008 v1_null_ud,
1009 get_element_ud(brw_vec8_grf(1,0), 6),
1010 brw_imm_ud(1<<26));
1011
1012 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
1013 {
1014 emit_aa(c, arg1, 2);
1015 fire_fb_write(c, 0, nr, target, eot);
1016 /* note - thread killed in subroutine */
1017 }
1018 brw_land_fwd_jump(p, jmp);
1019
1020 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1021 */
1022 fire_fb_write(c, 1, nr-1, target, eot);
1023 }
1024 }
1025
1026
1027 /* Post-fragment-program processing. Send the results to the
1028 * framebuffer.
1029 */
1030 static void emit_spill( struct brw_wm_compile *c,
1031 struct brw_reg reg,
1032 GLuint slot )
1033 {
1034 struct brw_compile *p = &c->func;
1035
1036 /*
1037 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1038 */
1039 brw_MOV(p, brw_message_reg(2), reg);
1040
1041 /*
1042 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1043 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1044 */
1045 brw_dp_WRITE_16(p,
1046 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1047 1,
1048 slot);
1049 }
1050
1051
1052 static void emit_unspill( struct brw_wm_compile *c,
1053 struct brw_reg reg,
1054 GLuint slot )
1055 {
1056 struct brw_compile *p = &c->func;
1057
1058 /* Slot 0 is the undef value.
1059 */
1060 if (slot == 0) {
1061 brw_MOV(p, reg, brw_imm_f(0));
1062 return;
1063 }
1064
1065 /*
1066 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1067 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1068 */
1069
1070 brw_dp_READ_16(p,
1071 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1072 1,
1073 slot);
1074 }
1075
1076
1077 /**
1078 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
1079 */
1080 static void get_argument_regs( struct brw_wm_compile *c,
1081 struct brw_wm_ref *arg[],
1082 struct brw_reg *regs )
1083 {
1084 GLuint i;
1085
1086 for (i = 0; i < 4; i++) {
1087 if (arg[i]) {
1088
1089 if (arg[i]->unspill_reg)
1090 emit_unspill(c,
1091 brw_vec8_grf(arg[i]->unspill_reg, 0),
1092 arg[i]->value->spill_slot);
1093
1094 regs[i] = arg[i]->hw_reg;
1095 }
1096 else {
1097 regs[i] = brw_null_reg();
1098 }
1099 }
1100 }
1101
1102
1103 static void spill_values( struct brw_wm_compile *c,
1104 struct brw_wm_value *values,
1105 GLuint nr )
1106 {
1107 GLuint i;
1108
1109 for (i = 0; i < nr; i++)
1110 if (values[i].spill_slot)
1111 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1112 }
1113
1114
1115 /* Emit the fragment program instructions here.
1116 */
1117 void brw_wm_emit( struct brw_wm_compile *c )
1118 {
1119 struct brw_compile *p = &c->func;
1120 GLuint insn;
1121
1122 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1123
1124 /* Check if any of the payload regs need to be spilled:
1125 */
1126 spill_values(c, c->payload.depth, 4);
1127 spill_values(c, c->creg, c->nr_creg);
1128 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1129
1130
1131 for (insn = 0; insn < c->nr_insns; insn++) {
1132
1133 struct brw_wm_instruction *inst = &c->instruction[insn];
1134 struct brw_reg args[3][4], dst[4];
1135 GLuint i, dst_flags;
1136
1137 /* Get argument regs:
1138 */
1139 for (i = 0; i < 3; i++)
1140 get_argument_regs(c, inst->src[i], args[i]);
1141
1142 /* Get dest regs:
1143 */
1144 for (i = 0; i < 4; i++)
1145 if (inst->dst[i])
1146 dst[i] = inst->dst[i]->hw_reg;
1147 else
1148 dst[i] = brw_null_reg();
1149
1150 /* Flags
1151 */
1152 dst_flags = inst->writemask;
1153 if (inst->saturate)
1154 dst_flags |= SATURATE;
1155
1156 switch (inst->opcode) {
1157 /* Generated instructions for calculating triangle interpolants:
1158 */
1159 case WM_PIXELXY:
1160 emit_pixel_xy(p, dst, dst_flags, args[0]);
1161 break;
1162
1163 case WM_DELTAXY:
1164 emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1165 break;
1166
1167 case WM_WPOSXY:
1168 emit_wpos_xy(c, dst, dst_flags, args[0]);
1169 break;
1170
1171 case WM_PIXELW:
1172 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1173 break;
1174
1175 case WM_LINTERP:
1176 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1177 break;
1178
1179 case WM_PINTERP:
1180 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1181 break;
1182
1183 case WM_CINTERP:
1184 emit_cinterp(p, dst, dst_flags, args[0]);
1185 break;
1186
1187 case WM_FB_WRITE:
1188 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1189 break;
1190
1191 case WM_FRONTFACING:
1192 emit_frontfacing(p, dst, dst_flags);
1193 break;
1194
1195 /* Straightforward arithmetic:
1196 */
1197 case OPCODE_ADD:
1198 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1199 break;
1200
1201 case OPCODE_FRC:
1202 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1203 break;
1204
1205 case OPCODE_FLR:
1206 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1207 break;
1208
1209 case OPCODE_DP3:
1210 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1211 break;
1212
1213 case OPCODE_DP4:
1214 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1215 break;
1216
1217 case OPCODE_DPH:
1218 emit_dph(p, dst, dst_flags, args[0], args[1]);
1219 break;
1220
1221 case OPCODE_LRP:
1222 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1223 break;
1224
1225 case OPCODE_MAD:
1226 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1227 break;
1228
1229 case OPCODE_MOV:
1230 case OPCODE_SWZ:
1231 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1232 break;
1233
1234 case OPCODE_MUL:
1235 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1236 break;
1237
1238 case OPCODE_XPD:
1239 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1240 break;
1241
1242 /* Higher math functions:
1243 */
1244 case OPCODE_RCP:
1245 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1246 break;
1247
1248 case OPCODE_RSQ:
1249 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1250 break;
1251
1252 case OPCODE_SIN:
1253 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1254 break;
1255
1256 case OPCODE_COS:
1257 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1258 break;
1259
1260 case OPCODE_EX2:
1261 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1262 break;
1263
1264 case OPCODE_LG2:
1265 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1266 break;
1267
1268 case OPCODE_SCS:
1269 /* There is an scs math function, but it would need some
1270 * fixup for 16-element execution.
1271 */
1272 if (dst_flags & WRITEMASK_X)
1273 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1274 if (dst_flags & WRITEMASK_Y)
1275 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1276 break;
1277
1278 case OPCODE_POW:
1279 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1280 break;
1281
1282 /* Comparisons:
1283 */
1284 case OPCODE_CMP:
1285 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1286 break;
1287
1288 case OPCODE_MAX:
1289 emit_max(p, dst, dst_flags, args[0], args[1]);
1290 break;
1291
1292 case OPCODE_MIN:
1293 emit_min(p, dst, dst_flags, args[0], args[1]);
1294 break;
1295
1296 case OPCODE_SLT:
1297 emit_slt(p, dst, dst_flags, args[0], args[1]);
1298 break;
1299
1300 case OPCODE_SLE:
1301 emit_sle(p, dst, dst_flags, args[0], args[1]);
1302 break;
1303 case OPCODE_SGT:
1304 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1305 break;
1306 case OPCODE_SGE:
1307 emit_sge(p, dst, dst_flags, args[0], args[1]);
1308 break;
1309 case OPCODE_SEQ:
1310 emit_seq(p, dst, dst_flags, args[0], args[1]);
1311 break;
1312 case OPCODE_SNE:
1313 emit_sne(p, dst, dst_flags, args[0], args[1]);
1314 break;
1315
1316 case OPCODE_LIT:
1317 emit_lit(p, dst, dst_flags, args[0]);
1318 break;
1319
1320 /* Texturing operations:
1321 */
1322 case OPCODE_TEX:
1323 emit_tex(c, inst, dst, dst_flags, args[0]);
1324 break;
1325
1326 case OPCODE_TXB:
1327 emit_txb(c, inst, dst, dst_flags, args[0]);
1328 break;
1329
1330 case OPCODE_KIL:
1331 emit_kil(c, args[0]);
1332 break;
1333
1334 default:
1335 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1336 inst->opcode, inst->opcode < MAX_OPCODE ?
1337 _mesa_opcode_string(inst->opcode) :
1338 "unknown");
1339 }
1340
1341 for (i = 0; i < 4; i++)
1342 if (inst->dst[i] && inst->dst[i]->spill_slot)
1343 emit_spill(c,
1344 inst->dst[i]->hw_reg,
1345 inst->dst[i]->spill_slot);
1346 }
1347 }