i965: fix bugs in projective texture coordinates
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 #define SATURATE (1<<5)
38
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
41 */
42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
43 {
44 if (reg.vstride)
45 reg.nr++;
46 return reg;
47 }
48
49 /* Payload R0:
50 *
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
53 * R0.1..8 -- ?
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
60 * R1.6 -- ?
61 * R1.7 -- ?
62 * R1.8 -- ?
63 */
64
65
66 static void emit_pixel_xy(struct brw_compile *p,
67 const struct brw_reg *dst,
68 GLuint mask,
69 const struct brw_reg *arg0)
70 {
71 struct brw_reg r1 = brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
73
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
78 */
79 if (mask & WRITEMASK_X) {
80 brw_ADD(p,
81 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
82 stride(suboffset(r1_uw, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
84 }
85
86 if (mask & WRITEMASK_Y) {
87 brw_ADD(p,
88 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
89 stride(suboffset(r1_uw,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
91 }
92
93 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
94 }
95
96
97
98 static void emit_delta_xy(struct brw_compile *p,
99 const struct brw_reg *dst,
100 GLuint mask,
101 const struct brw_reg *arg0,
102 const struct brw_reg *arg1)
103 {
104 struct brw_reg r1 = brw_vec1_grf(1, 0);
105
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 * centers.
108 */
109 if (mask & WRITEMASK_X) {
110 brw_ADD(p,
111 dst[0],
112 retype(arg0[0], BRW_REGISTER_TYPE_UW),
113 negate(r1));
114 }
115
116 if (mask & WRITEMASK_Y) {
117 brw_ADD(p,
118 dst[1],
119 retype(arg0[1], BRW_REGISTER_TYPE_UW),
120 negate(suboffset(r1,1)));
121
122 }
123 }
124
125 static void emit_wpos_xy(struct brw_wm_compile *c,
126 const struct brw_reg *dst,
127 GLuint mask,
128 const struct brw_reg *arg0)
129 {
130 struct brw_compile *p = &c->func;
131
132 /* Calculate the pixel offset from window bottom left into destination
133 * X and Y channels.
134 */
135 if (mask & WRITEMASK_X) {
136 /* X' = X - origin */
137 brw_ADD(p,
138 dst[0],
139 retype(arg0[0], BRW_REGISTER_TYPE_W),
140 brw_imm_d(0 - c->key.origin_x));
141 }
142
143 if (mask & WRITEMASK_Y) {
144 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
145 brw_ADD(p,
146 dst[1],
147 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
148 brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
149 }
150 }
151
152
153 static void emit_pixel_w( struct brw_compile *p,
154 const struct brw_reg *dst,
155 GLuint mask,
156 const struct brw_reg *arg0,
157 const struct brw_reg *deltas)
158 {
159 /* Don't need this if all you are doing is interpolating color, for
160 * instance.
161 */
162 if (mask & WRITEMASK_W) {
163 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
164
165 /* Calc 1/w - just linterp wpos[3] optimized by putting the
166 * result straight into a message reg.
167 */
168 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
169 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
170
171 /* Calc w */
172 brw_math_16( p, dst[3],
173 BRW_MATH_FUNCTION_INV,
174 BRW_MATH_SATURATE_NONE,
175 2, brw_null_reg(),
176 BRW_MATH_PRECISION_FULL);
177 }
178 }
179
180
181
182 static void emit_linterp( struct brw_compile *p,
183 const struct brw_reg *dst,
184 GLuint mask,
185 const struct brw_reg *arg0,
186 const struct brw_reg *deltas )
187 {
188 struct brw_reg interp[4];
189 GLuint nr = arg0[0].nr;
190 GLuint i;
191
192 interp[0] = brw_vec1_grf(nr, 0);
193 interp[1] = brw_vec1_grf(nr, 4);
194 interp[2] = brw_vec1_grf(nr+1, 0);
195 interp[3] = brw_vec1_grf(nr+1, 4);
196
197 for (i = 0; i < 4; i++) {
198 if (mask & (1<<i)) {
199 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
200 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
201 }
202 }
203 }
204
205
206 static void emit_pinterp( struct brw_compile *p,
207 const struct brw_reg *dst,
208 GLuint mask,
209 const struct brw_reg *arg0,
210 const struct brw_reg *deltas,
211 const struct brw_reg *w)
212 {
213 struct brw_reg interp[4];
214 GLuint nr = arg0[0].nr;
215 GLuint i;
216
217 interp[0] = brw_vec1_grf(nr, 0);
218 interp[1] = brw_vec1_grf(nr, 4);
219 interp[2] = brw_vec1_grf(nr+1, 0);
220 interp[3] = brw_vec1_grf(nr+1, 4);
221
222 for (i = 0; i < 4; i++) {
223 if (mask & (1<<i)) {
224 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
225 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
226 }
227 }
228 for (i = 0; i < 4; i++) {
229 if (mask & (1<<i)) {
230 brw_MUL(p, dst[i], dst[i], w[3]);
231 }
232 }
233 }
234
235
236 static void emit_cinterp( struct brw_compile *p,
237 const struct brw_reg *dst,
238 GLuint mask,
239 const struct brw_reg *arg0 )
240 {
241 struct brw_reg interp[4];
242 GLuint nr = arg0[0].nr;
243 GLuint i;
244
245 interp[0] = brw_vec1_grf(nr, 0);
246 interp[1] = brw_vec1_grf(nr, 4);
247 interp[2] = brw_vec1_grf(nr+1, 0);
248 interp[3] = brw_vec1_grf(nr+1, 4);
249
250 for (i = 0; i < 4; i++) {
251 if (mask & (1<<i)) {
252 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
253 }
254 }
255 }
256
257 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
258 static void emit_frontfacing( struct brw_compile *p,
259 const struct brw_reg *dst,
260 GLuint mask )
261 {
262 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
263 GLuint i;
264
265 if (!(mask & WRITEMASK_XYZW))
266 return;
267
268 for (i = 0; i < 4; i++) {
269 if (mask & (1<<i)) {
270 brw_MOV(p, dst[i], brw_imm_f(0.0));
271 }
272 }
273
274 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
275 * us front face
276 */
277 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
278 for (i = 0; i < 4; i++) {
279 if (mask & (1<<i)) {
280 brw_MOV(p, dst[i], brw_imm_f(1.0));
281 }
282 }
283 brw_set_predicate_control_flag_value(p, 0xff);
284 }
285
286 static void emit_alu1( struct brw_compile *p,
287 struct brw_instruction *(*func)(struct brw_compile *,
288 struct brw_reg,
289 struct brw_reg),
290 const struct brw_reg *dst,
291 GLuint mask,
292 const struct brw_reg *arg0 )
293 {
294 GLuint i;
295
296 if (mask & SATURATE)
297 brw_set_saturate(p, 1);
298
299 for (i = 0; i < 4; i++) {
300 if (mask & (1<<i)) {
301 func(p, dst[i], arg0[i]);
302 }
303 }
304
305 if (mask & SATURATE)
306 brw_set_saturate(p, 0);
307 }
308
309
310 static void emit_alu2( struct brw_compile *p,
311 struct brw_instruction *(*func)(struct brw_compile *,
312 struct brw_reg,
313 struct brw_reg,
314 struct brw_reg),
315 const struct brw_reg *dst,
316 GLuint mask,
317 const struct brw_reg *arg0,
318 const struct brw_reg *arg1 )
319 {
320 GLuint i;
321
322 if (mask & SATURATE)
323 brw_set_saturate(p, 1);
324
325 for (i = 0; i < 4; i++) {
326 if (mask & (1<<i)) {
327 func(p, dst[i], arg0[i], arg1[i]);
328 }
329 }
330
331 if (mask & SATURATE)
332 brw_set_saturate(p, 0);
333 }
334
335
336 static void emit_mad( struct brw_compile *p,
337 const struct brw_reg *dst,
338 GLuint mask,
339 const struct brw_reg *arg0,
340 const struct brw_reg *arg1,
341 const struct brw_reg *arg2 )
342 {
343 GLuint i;
344
345 for (i = 0; i < 4; i++) {
346 if (mask & (1<<i)) {
347 brw_MUL(p, dst[i], arg0[i], arg1[i]);
348
349 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
350 brw_ADD(p, dst[i], dst[i], arg2[i]);
351 brw_set_saturate(p, 0);
352 }
353 }
354 }
355
356
357 static void emit_lrp( struct brw_compile *p,
358 const struct brw_reg *dst,
359 GLuint mask,
360 const struct brw_reg *arg0,
361 const struct brw_reg *arg1,
362 const struct brw_reg *arg2 )
363 {
364 GLuint i;
365
366 /* Uses dst as a temporary:
367 */
368 for (i = 0; i < 4; i++) {
369 if (mask & (1<<i)) {
370 /* Can I use the LINE instruction for this?
371 */
372 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
373 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
374
375 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
376 brw_MAC(p, dst[i], arg0[i], arg1[i]);
377 brw_set_saturate(p, 0);
378 }
379 }
380 }
381
382 static void emit_sop( struct brw_compile *p,
383 const struct brw_reg *dst,
384 GLuint mask,
385 GLuint cond,
386 const struct brw_reg *arg0,
387 const struct brw_reg *arg1 )
388 {
389 GLuint i;
390
391 for (i = 0; i < 4; i++) {
392 if (mask & (1<<i)) {
393 brw_MOV(p, dst[i], brw_imm_f(0));
394 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
395 brw_MOV(p, dst[i], brw_imm_f(1.0));
396 brw_set_predicate_control_flag_value(p, 0xff);
397 }
398 }
399 }
400
401 static void emit_slt( struct brw_compile *p,
402 const struct brw_reg *dst,
403 GLuint mask,
404 const struct brw_reg *arg0,
405 const struct brw_reg *arg1 )
406 {
407 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
408 }
409
410 static void emit_sle( struct brw_compile *p,
411 const struct brw_reg *dst,
412 GLuint mask,
413 const struct brw_reg *arg0,
414 const struct brw_reg *arg1 )
415 {
416 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
417 }
418
419 static void emit_sgt( struct brw_compile *p,
420 const struct brw_reg *dst,
421 GLuint mask,
422 const struct brw_reg *arg0,
423 const struct brw_reg *arg1 )
424 {
425 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
426 }
427
428 static void emit_sge( struct brw_compile *p,
429 const struct brw_reg *dst,
430 GLuint mask,
431 const struct brw_reg *arg0,
432 const struct brw_reg *arg1 )
433 {
434 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
435 }
436
437 static void emit_seq( struct brw_compile *p,
438 const struct brw_reg *dst,
439 GLuint mask,
440 const struct brw_reg *arg0,
441 const struct brw_reg *arg1 )
442 {
443 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
444 }
445
446 static void emit_sne( struct brw_compile *p,
447 const struct brw_reg *dst,
448 GLuint mask,
449 const struct brw_reg *arg0,
450 const struct brw_reg *arg1 )
451 {
452 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
453 }
454
455 static void emit_cmp( struct brw_compile *p,
456 const struct brw_reg *dst,
457 GLuint mask,
458 const struct brw_reg *arg0,
459 const struct brw_reg *arg1,
460 const struct brw_reg *arg2 )
461 {
462 GLuint i;
463
464 for (i = 0; i < 4; i++) {
465 if (mask & (1<<i)) {
466 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
467 brw_MOV(p, dst[i], arg2[i]);
468 brw_set_saturate(p, 0);
469
470 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
471
472 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
473 brw_MOV(p, dst[i], arg1[i]);
474 brw_set_saturate(p, 0);
475 brw_set_predicate_control_flag_value(p, 0xff);
476 }
477 }
478 }
479
480 static void emit_max( struct brw_compile *p,
481 const struct brw_reg *dst,
482 GLuint mask,
483 const struct brw_reg *arg0,
484 const struct brw_reg *arg1 )
485 {
486 GLuint i;
487
488 for (i = 0; i < 4; i++) {
489 if (mask & (1<<i)) {
490 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
491 brw_MOV(p, dst[i], arg0[i]);
492 brw_set_saturate(p, 0);
493
494 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
495
496 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
497 brw_MOV(p, dst[i], arg1[i]);
498 brw_set_saturate(p, 0);
499 brw_set_predicate_control_flag_value(p, 0xff);
500 }
501 }
502 }
503
504 static void emit_min( struct brw_compile *p,
505 const struct brw_reg *dst,
506 GLuint mask,
507 const struct brw_reg *arg0,
508 const struct brw_reg *arg1 )
509 {
510 GLuint i;
511
512 for (i = 0; i < 4; i++) {
513 if (mask & (1<<i)) {
514 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
515 brw_MOV(p, dst[i], arg1[i]);
516 brw_set_saturate(p, 0);
517
518 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
519
520 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
521 brw_MOV(p, dst[i], arg0[i]);
522 brw_set_saturate(p, 0);
523 brw_set_predicate_control_flag_value(p, 0xff);
524 }
525 }
526 }
527
528
529 static void emit_dp3( struct brw_compile *p,
530 const struct brw_reg *dst,
531 GLuint mask,
532 const struct brw_reg *arg0,
533 const struct brw_reg *arg1 )
534 {
535 if (!(mask & WRITEMASK_XYZW))
536 return; /* Do not emit dead code */
537
538 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
539
540 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
541 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
542
543 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
544 brw_MAC(p, dst[0], arg0[2], arg1[2]);
545 brw_set_saturate(p, 0);
546 }
547
548
549 static void emit_dp4( struct brw_compile *p,
550 const struct brw_reg *dst,
551 GLuint mask,
552 const struct brw_reg *arg0,
553 const struct brw_reg *arg1 )
554 {
555 if (!(mask & WRITEMASK_XYZW))
556 return; /* Do not emit dead code */
557
558 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
559
560 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
561 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
562 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
563
564 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
565 brw_MAC(p, dst[0], arg0[3], arg1[3]);
566 brw_set_saturate(p, 0);
567 }
568
569
570 static void emit_dph( struct brw_compile *p,
571 const struct brw_reg *dst,
572 GLuint mask,
573 const struct brw_reg *arg0,
574 const struct brw_reg *arg1 )
575 {
576 if (!(mask & WRITEMASK_XYZW))
577 return; /* Do not emit dead code */
578
579 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
580
581 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
582 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
583 brw_MAC(p, dst[0], arg0[2], arg1[2]);
584
585 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
586 brw_ADD(p, dst[0], dst[0], arg1[3]);
587 brw_set_saturate(p, 0);
588 }
589
590
591 static void emit_xpd( struct brw_compile *p,
592 const struct brw_reg *dst,
593 GLuint mask,
594 const struct brw_reg *arg0,
595 const struct brw_reg *arg1 )
596 {
597 GLuint i;
598
599 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
600
601 for (i = 0 ; i < 3; i++) {
602 if (mask & (1<<i)) {
603 GLuint i2 = (i+2)%3;
604 GLuint i1 = (i+1)%3;
605
606 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
607
608 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
609 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
610 brw_set_saturate(p, 0);
611 }
612 }
613 }
614
615
616 static void emit_math1( struct brw_compile *p,
617 GLuint function,
618 const struct brw_reg *dst,
619 GLuint mask,
620 const struct brw_reg *arg0 )
621 {
622 if (!(mask & WRITEMASK_XYZW))
623 return; /* Do not emit dead code */
624
625 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
626 // function == BRW_MATH_FUNCTION_SINCOS);
627
628 brw_MOV(p, brw_message_reg(2), arg0[0]);
629
630 /* Send two messages to perform all 16 operations:
631 */
632 brw_math_16(p,
633 dst[0],
634 function,
635 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
636 2,
637 brw_null_reg(),
638 BRW_MATH_PRECISION_FULL);
639 }
640
641
642 static void emit_math2( struct brw_compile *p,
643 GLuint function,
644 const struct brw_reg *dst,
645 GLuint mask,
646 const struct brw_reg *arg0,
647 const struct brw_reg *arg1)
648 {
649 if (!(mask & WRITEMASK_XYZW))
650 return; /* Do not emit dead code */
651
652 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
653
654 brw_push_insn_state(p);
655
656 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
657 brw_MOV(p, brw_message_reg(2), arg0[0]);
658 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
659 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
660
661 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
662 brw_MOV(p, brw_message_reg(3), arg1[0]);
663 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
664 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
665
666
667 /* Send two messages to perform all 16 operations:
668 */
669 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
670 brw_math(p,
671 dst[0],
672 function,
673 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
674 2,
675 brw_null_reg(),
676 BRW_MATH_DATA_VECTOR,
677 BRW_MATH_PRECISION_FULL);
678
679 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
680 brw_math(p,
681 offset(dst[0],1),
682 function,
683 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
684 4,
685 brw_null_reg(),
686 BRW_MATH_DATA_VECTOR,
687 BRW_MATH_PRECISION_FULL);
688
689 brw_pop_insn_state(p);
690 }
691
692
693
694 static void emit_tex( struct brw_wm_compile *c,
695 const struct brw_wm_instruction *inst,
696 struct brw_reg *dst,
697 GLuint dst_flags,
698 struct brw_reg *arg )
699 {
700 struct brw_compile *p = &c->func;
701 GLuint msgLength, responseLength;
702 GLuint i, nr;
703 GLuint emit;
704
705 /* How many input regs are there?
706 */
707 switch (inst->tex_idx) {
708 case TEXTURE_1D_INDEX:
709 emit = WRITEMASK_X;
710 nr = 1;
711 break;
712 case TEXTURE_2D_INDEX:
713 case TEXTURE_RECT_INDEX:
714 emit = WRITEMASK_XY;
715 nr = 2;
716 break;
717 default:
718 emit = WRITEMASK_XYZ;
719 nr = 3;
720 break;
721 }
722
723 if (inst->tex_shadow) {
724 nr = 4;
725 emit |= WRITEMASK_W;
726 }
727
728 msgLength = 1;
729
730 for (i = 0; i < nr; i++) {
731 static const GLuint swz[4] = {0,1,2,2};
732 if (emit & (1<<i))
733 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
734 else
735 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
736 msgLength += 2;
737 }
738
739 responseLength = 8; /* always */
740
741 brw_SAMPLE(p,
742 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
743 1,
744 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
745 SURF_INDEX_TEXTURE(inst->tex_unit),
746 inst->tex_unit, /* sampler */
747 inst->writemask,
748 (inst->tex_shadow ?
749 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
750 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
751 responseLength,
752 msgLength,
753 0);
754 }
755
756
757 static void emit_txb( struct brw_wm_compile *c,
758 const struct brw_wm_instruction *inst,
759 struct brw_reg *dst,
760 GLuint dst_flags,
761 struct brw_reg *arg )
762 {
763 struct brw_compile *p = &c->func;
764 GLuint msgLength;
765
766 /* Shadow ignored for txb.
767 */
768 switch (inst->tex_idx) {
769 case TEXTURE_1D_INDEX:
770 brw_MOV(p, brw_message_reg(2), arg[0]);
771 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
772 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
773 break;
774 case TEXTURE_2D_INDEX:
775 case TEXTURE_RECT_INDEX:
776 brw_MOV(p, brw_message_reg(2), arg[0]);
777 brw_MOV(p, brw_message_reg(4), arg[1]);
778 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
779 break;
780 default:
781 brw_MOV(p, brw_message_reg(2), arg[0]);
782 brw_MOV(p, brw_message_reg(4), arg[1]);
783 brw_MOV(p, brw_message_reg(6), arg[2]);
784 break;
785 }
786
787 brw_MOV(p, brw_message_reg(8), arg[3]);
788 msgLength = 9;
789
790 brw_SAMPLE(p,
791 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
792 1,
793 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
794 SURF_INDEX_TEXTURE(inst->tex_unit),
795 inst->tex_unit, /* sampler */
796 inst->writemask,
797 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
798 8, /* responseLength */
799 msgLength,
800 0);
801 }
802
803
804 static void emit_lit( struct brw_compile *p,
805 const struct brw_reg *dst,
806 GLuint mask,
807 const struct brw_reg *arg0 )
808 {
809 assert((mask & WRITEMASK_XW) == 0);
810
811 if (mask & WRITEMASK_Y) {
812 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
813 brw_MOV(p, dst[1], arg0[0]);
814 brw_set_saturate(p, 0);
815 }
816
817 if (mask & WRITEMASK_Z) {
818 emit_math2(p, BRW_MATH_FUNCTION_POW,
819 &dst[2],
820 WRITEMASK_X | (mask & SATURATE),
821 &arg0[1],
822 &arg0[3]);
823 }
824
825 /* Ordinarily you'd use an iff statement to skip or shortcircuit
826 * some of the POW calculations above, but 16-wide iff statements
827 * seem to lock c1 hardware, so this is a nasty workaround:
828 */
829 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
830 {
831 if (mask & WRITEMASK_Y)
832 brw_MOV(p, dst[1], brw_imm_f(0));
833
834 if (mask & WRITEMASK_Z)
835 brw_MOV(p, dst[2], brw_imm_f(0));
836 }
837 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
838 }
839
840
841 /* Kill pixel - set execution mask to zero for those pixels which
842 * fail.
843 */
844 static void emit_kil( struct brw_wm_compile *c,
845 struct brw_reg *arg0)
846 {
847 struct brw_compile *p = &c->func;
848 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
849 GLuint i;
850
851 /* XXX - usually won't need 4 compares!
852 */
853 for (i = 0; i < 4; i++) {
854 brw_push_insn_state(p);
855 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
856 brw_set_predicate_control_flag_value(p, 0xff);
857 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
858 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
859 brw_pop_insn_state(p);
860 }
861 }
862
863
864 static void fire_fb_write( struct brw_wm_compile *c,
865 GLuint base_reg,
866 GLuint nr,
867 GLuint target,
868 GLuint eot )
869 {
870 struct brw_compile *p = &c->func;
871
872 /* Pass through control information:
873 */
874 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
875 {
876 brw_push_insn_state(p);
877 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
878 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
879 brw_MOV(p,
880 brw_message_reg(base_reg + 1),
881 brw_vec8_grf(1, 0));
882 brw_pop_insn_state(p);
883 }
884
885 /* Send framebuffer write message: */
886 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
887 brw_fb_WRITE(p,
888 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
889 base_reg,
890 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
891 target,
892 nr,
893 0,
894 eot);
895 }
896
897
898 static void emit_aa( struct brw_wm_compile *c,
899 struct brw_reg *arg1,
900 GLuint reg )
901 {
902 struct brw_compile *p = &c->func;
903 GLuint comp = c->key.aa_dest_stencil_reg / 2;
904 GLuint off = c->key.aa_dest_stencil_reg % 2;
905 struct brw_reg aa = offset(arg1[comp], off);
906
907 brw_push_insn_state(p);
908 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
909 brw_MOV(p, brw_message_reg(reg), aa);
910 brw_pop_insn_state(p);
911 }
912
913
914 /* Post-fragment-program processing. Send the results to the
915 * framebuffer.
916 * \param arg0 the fragment color
917 * \param arg1 the pass-through depth value
918 * \param arg2 the shader-computed depth value
919 */
920 static void emit_fb_write( struct brw_wm_compile *c,
921 struct brw_reg *arg0,
922 struct brw_reg *arg1,
923 struct brw_reg *arg2,
924 GLuint target,
925 GLuint eot)
926 {
927 struct brw_compile *p = &c->func;
928 GLuint nr = 2;
929 GLuint channel;
930
931 /* Reserve a space for AA - may not be needed:
932 */
933 if (c->key.aa_dest_stencil_reg)
934 nr += 1;
935
936 /* I don't really understand how this achieves the color interleave
937 * (ie RGBARGBA) in the result: [Do the saturation here]
938 */
939 {
940 brw_push_insn_state(p);
941
942 for (channel = 0; channel < 4; channel++) {
943 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
944 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
945
946 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
947 brw_MOV(p,
948 brw_message_reg(nr + channel),
949 arg0[channel]);
950
951 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
952 brw_MOV(p,
953 brw_message_reg(nr + channel + 4),
954 sechalf(arg0[channel]));
955 }
956
957 /* skip over the regs populated above:
958 */
959 nr += 8;
960
961 brw_pop_insn_state(p);
962 }
963
964 if (c->key.source_depth_to_render_target)
965 {
966 if (c->key.computes_depth)
967 brw_MOV(p, brw_message_reg(nr), arg2[2]);
968 else
969 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
970
971 nr += 2;
972 }
973
974 if (c->key.dest_depth_reg)
975 {
976 GLuint comp = c->key.dest_depth_reg / 2;
977 GLuint off = c->key.dest_depth_reg % 2;
978
979 if (off != 0) {
980 brw_push_insn_state(p);
981 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
982
983 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
984 /* 2nd half? */
985 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
986 brw_pop_insn_state(p);
987 }
988 else {
989 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
990 }
991 nr += 2;
992 }
993
994 if (!c->key.runtime_check_aads_emit) {
995 if (c->key.aa_dest_stencil_reg)
996 emit_aa(c, arg1, 2);
997
998 fire_fb_write(c, 0, nr, target, eot);
999 }
1000 else {
1001 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1002 struct brw_reg ip = brw_ip_reg();
1003 struct brw_instruction *jmp;
1004
1005 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1006 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1007 brw_AND(p,
1008 v1_null_ud,
1009 get_element_ud(brw_vec8_grf(1,0), 6),
1010 brw_imm_ud(1<<26));
1011
1012 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
1013 {
1014 emit_aa(c, arg1, 2);
1015 fire_fb_write(c, 0, nr, target, eot);
1016 /* note - thread killed in subroutine */
1017 }
1018 brw_land_fwd_jump(p, jmp);
1019
1020 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1021 */
1022 fire_fb_write(c, 1, nr-1, target, eot);
1023 }
1024 }
1025
1026
1027 /**
1028 * Move a GPR to scratch memory.
1029 */
1030 static void emit_spill( struct brw_wm_compile *c,
1031 struct brw_reg reg,
1032 GLuint slot )
1033 {
1034 struct brw_compile *p = &c->func;
1035
1036 /*
1037 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1038 */
1039 brw_MOV(p, brw_message_reg(2), reg);
1040
1041 /*
1042 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1043 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1044 */
1045 brw_dp_WRITE_16(p,
1046 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1047 1,
1048 slot);
1049 }
1050
1051
1052 /**
1053 * Load a GPR from scratch memory.
1054 */
1055 static void emit_unspill( struct brw_wm_compile *c,
1056 struct brw_reg reg,
1057 GLuint slot )
1058 {
1059 struct brw_compile *p = &c->func;
1060
1061 /* Slot 0 is the undef value.
1062 */
1063 if (slot == 0) {
1064 brw_MOV(p, reg, brw_imm_f(0));
1065 return;
1066 }
1067
1068 /*
1069 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1070 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1071 */
1072
1073 brw_dp_READ_16(p,
1074 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1075 1,
1076 slot);
1077 }
1078
1079
1080 /**
1081 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1082 * Args with unspill_reg != 0 will be loaded from scratch memory.
1083 */
1084 static void get_argument_regs( struct brw_wm_compile *c,
1085 struct brw_wm_ref *arg[],
1086 struct brw_reg *regs )
1087 {
1088 GLuint i;
1089
1090 for (i = 0; i < 4; i++) {
1091 if (arg[i]) {
1092 if (arg[i]->unspill_reg)
1093 emit_unspill(c,
1094 brw_vec8_grf(arg[i]->unspill_reg, 0),
1095 arg[i]->value->spill_slot);
1096
1097 regs[i] = arg[i]->hw_reg;
1098 }
1099 else {
1100 regs[i] = brw_null_reg();
1101 }
1102 }
1103 }
1104
1105
1106 /**
1107 * For values that have a spill_slot!=0, write those regs to scratch memory.
1108 */
1109 static void spill_values( struct brw_wm_compile *c,
1110 struct brw_wm_value *values,
1111 GLuint nr )
1112 {
1113 GLuint i;
1114
1115 for (i = 0; i < nr; i++)
1116 if (values[i].spill_slot)
1117 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1118 }
1119
1120
1121 /* Emit the fragment program instructions here.
1122 */
1123 void brw_wm_emit( struct brw_wm_compile *c )
1124 {
1125 struct brw_compile *p = &c->func;
1126 GLuint insn;
1127
1128 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1129
1130 /* Check if any of the payload regs need to be spilled:
1131 */
1132 spill_values(c, c->payload.depth, 4);
1133 spill_values(c, c->creg, c->nr_creg);
1134 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1135
1136
1137 for (insn = 0; insn < c->nr_insns; insn++) {
1138
1139 struct brw_wm_instruction *inst = &c->instruction[insn];
1140 struct brw_reg args[3][4], dst[4];
1141 GLuint i, dst_flags;
1142
1143 /* Get argument regs:
1144 */
1145 for (i = 0; i < 3; i++)
1146 get_argument_regs(c, inst->src[i], args[i]);
1147
1148 /* Get dest regs:
1149 */
1150 for (i = 0; i < 4; i++)
1151 if (inst->dst[i])
1152 dst[i] = inst->dst[i]->hw_reg;
1153 else
1154 dst[i] = brw_null_reg();
1155
1156 /* Flags
1157 */
1158 dst_flags = inst->writemask;
1159 if (inst->saturate)
1160 dst_flags |= SATURATE;
1161
1162 switch (inst->opcode) {
1163 /* Generated instructions for calculating triangle interpolants:
1164 */
1165 case WM_PIXELXY:
1166 emit_pixel_xy(p, dst, dst_flags, args[0]);
1167 break;
1168
1169 case WM_DELTAXY:
1170 emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1171 break;
1172
1173 case WM_WPOSXY:
1174 emit_wpos_xy(c, dst, dst_flags, args[0]);
1175 break;
1176
1177 case WM_PIXELW:
1178 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1179 break;
1180
1181 case WM_LINTERP:
1182 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1183 break;
1184
1185 case WM_PINTERP:
1186 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1187 break;
1188
1189 case WM_CINTERP:
1190 emit_cinterp(p, dst, dst_flags, args[0]);
1191 break;
1192
1193 case WM_FB_WRITE:
1194 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1195 break;
1196
1197 case WM_FRONTFACING:
1198 emit_frontfacing(p, dst, dst_flags);
1199 break;
1200
1201 /* Straightforward arithmetic:
1202 */
1203 case OPCODE_ADD:
1204 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1205 break;
1206
1207 case OPCODE_FRC:
1208 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1209 break;
1210
1211 case OPCODE_FLR:
1212 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1213 break;
1214
1215 case OPCODE_DP3:
1216 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1217 break;
1218
1219 case OPCODE_DP4:
1220 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1221 break;
1222
1223 case OPCODE_DPH:
1224 emit_dph(p, dst, dst_flags, args[0], args[1]);
1225 break;
1226
1227 case OPCODE_LRP:
1228 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1229 break;
1230
1231 case OPCODE_MAD:
1232 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1233 break;
1234
1235 case OPCODE_MOV:
1236 case OPCODE_SWZ:
1237 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1238 break;
1239
1240 case OPCODE_MUL:
1241 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1242 break;
1243
1244 case OPCODE_XPD:
1245 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1246 break;
1247
1248 /* Higher math functions:
1249 */
1250 case OPCODE_RCP:
1251 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1252 break;
1253
1254 case OPCODE_RSQ:
1255 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1256 break;
1257
1258 case OPCODE_SIN:
1259 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1260 break;
1261
1262 case OPCODE_COS:
1263 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1264 break;
1265
1266 case OPCODE_EX2:
1267 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1268 break;
1269
1270 case OPCODE_LG2:
1271 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1272 break;
1273
1274 case OPCODE_SCS:
1275 /* There is an scs math function, but it would need some
1276 * fixup for 16-element execution.
1277 */
1278 if (dst_flags & WRITEMASK_X)
1279 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1280 if (dst_flags & WRITEMASK_Y)
1281 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1282 break;
1283
1284 case OPCODE_POW:
1285 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1286 break;
1287
1288 /* Comparisons:
1289 */
1290 case OPCODE_CMP:
1291 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1292 break;
1293
1294 case OPCODE_MAX:
1295 emit_max(p, dst, dst_flags, args[0], args[1]);
1296 break;
1297
1298 case OPCODE_MIN:
1299 emit_min(p, dst, dst_flags, args[0], args[1]);
1300 break;
1301
1302 case OPCODE_SLT:
1303 emit_slt(p, dst, dst_flags, args[0], args[1]);
1304 break;
1305
1306 case OPCODE_SLE:
1307 emit_sle(p, dst, dst_flags, args[0], args[1]);
1308 break;
1309 case OPCODE_SGT:
1310 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1311 break;
1312 case OPCODE_SGE:
1313 emit_sge(p, dst, dst_flags, args[0], args[1]);
1314 break;
1315 case OPCODE_SEQ:
1316 emit_seq(p, dst, dst_flags, args[0], args[1]);
1317 break;
1318 case OPCODE_SNE:
1319 emit_sne(p, dst, dst_flags, args[0], args[1]);
1320 break;
1321
1322 case OPCODE_LIT:
1323 emit_lit(p, dst, dst_flags, args[0]);
1324 break;
1325
1326 /* Texturing operations:
1327 */
1328 case OPCODE_TEX:
1329 emit_tex(c, inst, dst, dst_flags, args[0]);
1330 break;
1331
1332 case OPCODE_TXB:
1333 emit_txb(c, inst, dst, dst_flags, args[0]);
1334 break;
1335
1336 case OPCODE_KIL:
1337 emit_kil(c, args[0]);
1338 break;
1339
1340 default:
1341 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1342 inst->opcode, inst->opcode < MAX_OPCODE ?
1343 _mesa_opcode_string(inst->opcode) :
1344 "unknown");
1345 }
1346
1347 for (i = 0; i < 4; i++)
1348 if (inst->dst[i] && inst->dst[i]->spill_slot)
1349 emit_spill(c,
1350 inst->dst[i]->hw_reg,
1351 inst->dst[i]->spill_slot);
1352 }
1353 }