Merge commit 'origin/gallium-0.1' into gallium-0.2
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 #define SATURATE (1<<5)
38
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
41 */
42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
43 {
44 if (reg.vstride)
45 reg.nr++;
46 return reg;
47 }
48
49 /* Payload R0:
50 *
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
53 * R0.1..8 -- ?
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
60 * R1.6 -- ?
61 * R1.7 -- ?
62 * R1.8 -- ?
63 */
64
65
66 static void emit_pixel_xy(struct brw_compile *p,
67 const struct brw_reg *dst,
68 GLuint mask,
69 const struct brw_reg *arg0)
70 {
71 struct brw_reg r1 = brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
73
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
78 */
79 if (mask & WRITEMASK_X) {
80 brw_ADD(p,
81 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
82 stride(suboffset(r1_uw, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
84 }
85
86 if (mask & WRITEMASK_Y) {
87 brw_ADD(p,
88 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
89 stride(suboffset(r1_uw,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
91 }
92
93 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
94 }
95
96
97
98 static void emit_delta_xy(struct brw_compile *p,
99 const struct brw_reg *dst,
100 GLuint mask,
101 const struct brw_reg *arg0,
102 const struct brw_reg *arg1)
103 {
104 struct brw_reg r1 = brw_vec1_grf(1, 0);
105
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 * centers.
108 */
109 if (mask & WRITEMASK_X) {
110 brw_ADD(p,
111 dst[0],
112 retype(arg0[0], BRW_REGISTER_TYPE_UW),
113 negate(r1));
114 }
115
116 if (mask & WRITEMASK_Y) {
117 brw_ADD(p,
118 dst[1],
119 retype(arg0[1], BRW_REGISTER_TYPE_UW),
120 negate(suboffset(r1,1)));
121
122 }
123 }
124
125 static void emit_wpos_xy(struct brw_wm_compile *c,
126 const struct brw_reg *dst,
127 GLuint mask,
128 const struct brw_reg *arg0)
129 {
130 struct brw_compile *p = &c->func;
131
132 /* Calculate the pixel offset from window bottom left into destination
133 * X and Y channels.
134 */
135 if (mask & WRITEMASK_X) {
136 /* X' = X - origin */
137 brw_ADD(p,
138 dst[0],
139 retype(arg0[0], BRW_REGISTER_TYPE_W),
140 brw_imm_d(0 - c->key.origin_x));
141 }
142
143 if (mask & WRITEMASK_Y) {
144 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
145 brw_ADD(p,
146 dst[1],
147 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
148 brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
149 }
150 }
151
152
153 static void emit_pixel_w( struct brw_compile *p,
154 const struct brw_reg *dst,
155 GLuint mask,
156 const struct brw_reg *arg0,
157 const struct brw_reg *deltas)
158 {
159 /* Don't need this if all you are doing is interpolating color, for
160 * instance.
161 */
162 if (mask & WRITEMASK_W) {
163 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
164
165 /* Calc 1/w - just linterp wpos[3] optimized by putting the
166 * result straight into a message reg.
167 */
168 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
169 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
170
171 /* Calc w */
172 brw_math_16( p, dst[3],
173 BRW_MATH_FUNCTION_INV,
174 BRW_MATH_SATURATE_NONE,
175 2, brw_null_reg(),
176 BRW_MATH_PRECISION_FULL);
177 }
178 }
179
180
181
182 static void emit_linterp( struct brw_compile *p,
183 const struct brw_reg *dst,
184 GLuint mask,
185 const struct brw_reg *arg0,
186 const struct brw_reg *deltas )
187 {
188 struct brw_reg interp[4];
189 GLuint nr = arg0[0].nr;
190 GLuint i;
191
192 interp[0] = brw_vec1_grf(nr, 0);
193 interp[1] = brw_vec1_grf(nr, 4);
194 interp[2] = brw_vec1_grf(nr+1, 0);
195 interp[3] = brw_vec1_grf(nr+1, 4);
196
197 for(i = 0; i < 4; i++ ) {
198 if (mask & (1<<i)) {
199 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
200 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
201 }
202 }
203 }
204
205
206 static void emit_pinterp( struct brw_compile *p,
207 const struct brw_reg *dst,
208 GLuint mask,
209 const struct brw_reg *arg0,
210 const struct brw_reg *deltas,
211 const struct brw_reg *w)
212 {
213 struct brw_reg interp[4];
214 GLuint nr = arg0[0].nr;
215 GLuint i;
216
217 interp[0] = brw_vec1_grf(nr, 0);
218 interp[1] = brw_vec1_grf(nr, 4);
219 interp[2] = brw_vec1_grf(nr+1, 0);
220 interp[3] = brw_vec1_grf(nr+1, 4);
221
222 for(i = 0; i < 4; i++ ) {
223 if (mask & (1<<i)) {
224 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
225 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
226 }
227 }
228 for(i = 0; i < 4; i++ ) {
229 if (mask & (1<<i)) {
230 brw_MUL(p, dst[i], dst[i], w[3]);
231 }
232 }
233 }
234
235 static void emit_cinterp( struct brw_compile *p,
236 const struct brw_reg *dst,
237 GLuint mask,
238 const struct brw_reg *arg0 )
239 {
240 struct brw_reg interp[4];
241 GLuint nr = arg0[0].nr;
242 GLuint i;
243
244 interp[0] = brw_vec1_grf(nr, 0);
245 interp[1] = brw_vec1_grf(nr, 4);
246 interp[2] = brw_vec1_grf(nr+1, 0);
247 interp[3] = brw_vec1_grf(nr+1, 4);
248
249 for(i = 0; i < 4; i++ ) {
250 if (mask & (1<<i)) {
251 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
252 }
253 }
254 }
255
256
257
258
259
260 static void emit_alu1( struct brw_compile *p,
261 struct brw_instruction *(*func)(struct brw_compile *,
262 struct brw_reg,
263 struct brw_reg),
264 const struct brw_reg *dst,
265 GLuint mask,
266 const struct brw_reg *arg0 )
267 {
268 GLuint i;
269
270 if (mask & SATURATE)
271 brw_set_saturate(p, 1);
272
273 for (i = 0; i < 4; i++) {
274 if (mask & (1<<i)) {
275 func(p, dst[i], arg0[i]);
276 }
277 }
278
279 if (mask & SATURATE)
280 brw_set_saturate(p, 0);
281 }
282
283 static void emit_alu2( struct brw_compile *p,
284 struct brw_instruction *(*func)(struct brw_compile *,
285 struct brw_reg,
286 struct brw_reg,
287 struct brw_reg),
288 const struct brw_reg *dst,
289 GLuint mask,
290 const struct brw_reg *arg0,
291 const struct brw_reg *arg1 )
292 {
293 GLuint i;
294
295 if (mask & SATURATE)
296 brw_set_saturate(p, 1);
297
298 for (i = 0; i < 4; i++) {
299 if (mask & (1<<i)) {
300 func(p, dst[i], arg0[i], arg1[i]);
301 }
302 }
303
304 if (mask & SATURATE)
305 brw_set_saturate(p, 0);
306 }
307
308
309 static void emit_mad( struct brw_compile *p,
310 const struct brw_reg *dst,
311 GLuint mask,
312 const struct brw_reg *arg0,
313 const struct brw_reg *arg1,
314 const struct brw_reg *arg2 )
315 {
316 GLuint i;
317
318 for (i = 0; i < 4; i++) {
319 if (mask & (1<<i)) {
320 brw_MUL(p, dst[i], arg0[i], arg1[i]);
321
322 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
323 brw_ADD(p, dst[i], dst[i], arg2[i]);
324 brw_set_saturate(p, 0);
325 }
326 }
327 }
328
329
330 static void emit_lrp( struct brw_compile *p,
331 const struct brw_reg *dst,
332 GLuint mask,
333 const struct brw_reg *arg0,
334 const struct brw_reg *arg1,
335 const struct brw_reg *arg2 )
336 {
337 GLuint i;
338
339 /* Uses dst as a temporary:
340 */
341 for (i = 0; i < 4; i++) {
342 if (mask & (1<<i)) {
343 /* Can I use the LINE instruction for this?
344 */
345 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
346 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
347
348 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
349 brw_MAC(p, dst[i], arg0[i], arg1[i]);
350 brw_set_saturate(p, 0);
351 }
352 }
353 }
354 static void emit_sop( struct brw_compile *p,
355 const struct brw_reg *dst,
356 GLuint mask,
357 GLuint cond,
358 const struct brw_reg *arg0,
359 const struct brw_reg *arg1 )
360 {
361 GLuint i;
362
363 for (i = 0; i < 4; i++) {
364 if (mask & (1<<i)) {
365 brw_MOV(p, dst[i], brw_imm_f(0));
366 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
367 brw_MOV(p, dst[i], brw_imm_f(1.0));
368 brw_set_predicate_control_flag_value(p, 0xff);
369 }
370 }
371 }
372
373 static void emit_slt( struct brw_compile *p,
374 const struct brw_reg *dst,
375 GLuint mask,
376 const struct brw_reg *arg0,
377 const struct brw_reg *arg1 )
378 {
379 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
380 }
381
382 static void emit_sle( struct brw_compile *p,
383 const struct brw_reg *dst,
384 GLuint mask,
385 const struct brw_reg *arg0,
386 const struct brw_reg *arg1 )
387 {
388 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
389 }
390
391 static void emit_sgt( struct brw_compile *p,
392 const struct brw_reg *dst,
393 GLuint mask,
394 const struct brw_reg *arg0,
395 const struct brw_reg *arg1 )
396 {
397 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
398 }
399
400 static void emit_sge( struct brw_compile *p,
401 const struct brw_reg *dst,
402 GLuint mask,
403 const struct brw_reg *arg0,
404 const struct brw_reg *arg1 )
405 {
406 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
407 }
408
409 static void emit_seq( struct brw_compile *p,
410 const struct brw_reg *dst,
411 GLuint mask,
412 const struct brw_reg *arg0,
413 const struct brw_reg *arg1 )
414 {
415 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
416 }
417
418 static void emit_sne( struct brw_compile *p,
419 const struct brw_reg *dst,
420 GLuint mask,
421 const struct brw_reg *arg0,
422 const struct brw_reg *arg1 )
423 {
424 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
425 }
426
427 static void emit_cmp( struct brw_compile *p,
428 const struct brw_reg *dst,
429 GLuint mask,
430 const struct brw_reg *arg0,
431 const struct brw_reg *arg1,
432 const struct brw_reg *arg2 )
433 {
434 GLuint i;
435
436 for (i = 0; i < 4; i++) {
437 if (mask & (1<<i)) {
438 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
439 brw_MOV(p, dst[i], arg2[i]);
440 brw_set_saturate(p, 0);
441
442 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
443
444 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
445 brw_MOV(p, dst[i], arg1[i]);
446 brw_set_saturate(p, 0);
447 brw_set_predicate_control_flag_value(p, 0xff);
448 }
449 }
450 }
451
452 static void emit_max( struct brw_compile *p,
453 const struct brw_reg *dst,
454 GLuint mask,
455 const struct brw_reg *arg0,
456 const struct brw_reg *arg1 )
457 {
458 GLuint i;
459
460 for (i = 0; i < 4; i++) {
461 if (mask & (1<<i)) {
462 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
463 brw_MOV(p, dst[i], arg0[i]);
464 brw_set_saturate(p, 0);
465
466 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
467
468 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
469 brw_MOV(p, dst[i], arg1[i]);
470 brw_set_saturate(p, 0);
471 brw_set_predicate_control_flag_value(p, 0xff);
472 }
473 }
474 }
475
476 static void emit_min( struct brw_compile *p,
477 const struct brw_reg *dst,
478 GLuint mask,
479 const struct brw_reg *arg0,
480 const struct brw_reg *arg1 )
481 {
482 GLuint i;
483
484 for (i = 0; i < 4; i++) {
485 if (mask & (1<<i)) {
486 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
487 brw_MOV(p, dst[i], arg1[i]);
488 brw_set_saturate(p, 0);
489
490 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
491
492 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
493 brw_MOV(p, dst[i], arg0[i]);
494 brw_set_saturate(p, 0);
495 brw_set_predicate_control_flag_value(p, 0xff);
496 }
497 }
498 }
499
500
501 static void emit_dp3( struct brw_compile *p,
502 const struct brw_reg *dst,
503 GLuint mask,
504 const struct brw_reg *arg0,
505 const struct brw_reg *arg1 )
506 {
507 if (!(mask & WRITEMASK_XYZW))
508 return; /* Do not emit dead code*/
509
510 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
511
512 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
513 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
514
515 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
516 brw_MAC(p, dst[0], arg0[2], arg1[2]);
517 brw_set_saturate(p, 0);
518 }
519
520
521 static void emit_dp4( struct brw_compile *p,
522 const struct brw_reg *dst,
523 GLuint mask,
524 const struct brw_reg *arg0,
525 const struct brw_reg *arg1 )
526 {
527 if (!(mask & WRITEMASK_XYZW))
528 return; /* Do not emit dead code*/
529
530 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
531
532 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
533 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
534 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
535
536 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
537 brw_MAC(p, dst[0], arg0[3], arg1[3]);
538 brw_set_saturate(p, 0);
539 }
540
541
542 static void emit_dph( struct brw_compile *p,
543 const struct brw_reg *dst,
544 GLuint mask,
545 const struct brw_reg *arg0,
546 const struct brw_reg *arg1 )
547 {
548 if (!(mask & WRITEMASK_XYZW))
549 return; /* Do not emit dead code*/
550
551 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
552
553 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
554 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
555 brw_MAC(p, dst[0], arg0[2], arg1[2]);
556
557 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
558 brw_ADD(p, dst[0], dst[0], arg1[3]);
559 brw_set_saturate(p, 0);
560 }
561
562
563 static void emit_xpd( struct brw_compile *p,
564 const struct brw_reg *dst,
565 GLuint mask,
566 const struct brw_reg *arg0,
567 const struct brw_reg *arg1 )
568 {
569 GLuint i;
570
571 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
572
573 for (i = 0 ; i < 3; i++) {
574 if (mask & (1<<i)) {
575 GLuint i2 = (i+2)%3;
576 GLuint i1 = (i+1)%3;
577
578 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
579
580 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
581 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
582 brw_set_saturate(p, 0);
583 }
584 }
585 }
586
587
588 static void emit_math1( struct brw_compile *p,
589 GLuint function,
590 const struct brw_reg *dst,
591 GLuint mask,
592 const struct brw_reg *arg0 )
593 {
594 if (!(mask & WRITEMASK_XYZW))
595 return; /* Do not emit dead code*/
596
597 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
598 // function == BRW_MATH_FUNCTION_SINCOS);
599
600 brw_MOV(p, brw_message_reg(2), arg0[0]);
601
602 /* Send two messages to perform all 16 operations:
603 */
604 brw_math_16(p,
605 dst[0],
606 function,
607 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
608 2,
609 brw_null_reg(),
610 BRW_MATH_PRECISION_FULL);
611 }
612
613
614 static void emit_math2( struct brw_compile *p,
615 GLuint function,
616 const struct brw_reg *dst,
617 GLuint mask,
618 const struct brw_reg *arg0,
619 const struct brw_reg *arg1)
620 {
621 if (!(mask & WRITEMASK_XYZW))
622 return; /* Do not emit dead code*/
623
624 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
625
626 brw_push_insn_state(p);
627
628 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
629 brw_MOV(p, brw_message_reg(2), arg0[0]);
630 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
631 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
632
633 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
634 brw_MOV(p, brw_message_reg(3), arg1[0]);
635 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
636 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
637
638
639 /* Send two messages to perform all 16 operations:
640 */
641 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
642 brw_math(p,
643 dst[0],
644 function,
645 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
646 2,
647 brw_null_reg(),
648 BRW_MATH_DATA_VECTOR,
649 BRW_MATH_PRECISION_FULL);
650
651 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
652 brw_math(p,
653 offset(dst[0],1),
654 function,
655 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
656 4,
657 brw_null_reg(),
658 BRW_MATH_DATA_VECTOR,
659 BRW_MATH_PRECISION_FULL);
660
661 brw_pop_insn_state(p);
662 }
663
664
665
666 static void emit_tex( struct brw_wm_compile *c,
667 const struct brw_wm_instruction *inst,
668 struct brw_reg *dst,
669 GLuint dst_flags,
670 struct brw_reg *arg )
671 {
672 struct brw_compile *p = &c->func;
673 GLuint msgLength, responseLength;
674 GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
675 GLuint i, nr;
676 GLuint emit;
677
678 /* How many input regs are there?
679 */
680 switch (inst->tex_idx) {
681 case TEXTURE_1D_INDEX:
682 emit = WRITEMASK_X;
683 nr = 1;
684 break;
685 case TEXTURE_2D_INDEX:
686 case TEXTURE_RECT_INDEX:
687 emit = WRITEMASK_XY;
688 nr = 2;
689 break;
690 default:
691 emit = WRITEMASK_XYZ;
692 nr = 3;
693 break;
694 }
695
696 if (shadow) {
697 nr = 4;
698 emit |= WRITEMASK_W;
699 }
700
701 msgLength = 1;
702
703 for (i = 0; i < nr; i++) {
704 static const GLuint swz[4] = {0,1,2,2};
705 if (emit & (1<<i))
706 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
707 else
708 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
709 msgLength += 2;
710 }
711
712 responseLength = 8; /* always */
713
714 brw_SAMPLE(p,
715 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
716 1,
717 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
718 inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
719 inst->tex_unit, /* sampler */
720 inst->writemask,
721 (shadow ?
722 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
723 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
724 responseLength,
725 msgLength,
726 0);
727 }
728
729
730 static void emit_txb( struct brw_wm_compile *c,
731 const struct brw_wm_instruction *inst,
732 struct brw_reg *dst,
733 GLuint dst_flags,
734 struct brw_reg *arg )
735 {
736 struct brw_compile *p = &c->func;
737 GLuint msgLength;
738
739 /* Shadow ignored for txb.
740 */
741 switch (inst->tex_idx) {
742 case TEXTURE_1D_INDEX:
743 brw_MOV(p, brw_message_reg(2), arg[0]);
744 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
745 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
746 break;
747 case TEXTURE_2D_INDEX:
748 case TEXTURE_RECT_INDEX:
749 brw_MOV(p, brw_message_reg(2), arg[0]);
750 brw_MOV(p, brw_message_reg(4), arg[1]);
751 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
752 break;
753 default:
754 brw_MOV(p, brw_message_reg(2), arg[0]);
755 brw_MOV(p, brw_message_reg(4), arg[1]);
756 brw_MOV(p, brw_message_reg(6), arg[2]);
757 break;
758 }
759
760 brw_MOV(p, brw_message_reg(8), arg[3]);
761 msgLength = 9;
762
763
764 brw_SAMPLE(p,
765 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
766 1,
767 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
768 inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
769 inst->tex_unit, /* sampler */
770 inst->writemask,
771 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
772 8, /* responseLength */
773 msgLength,
774 0);
775
776 }
777
778
779 static void emit_lit( struct brw_compile *p,
780 const struct brw_reg *dst,
781 GLuint mask,
782 const struct brw_reg *arg0 )
783 {
784 assert((mask & WRITEMASK_XW) == 0);
785
786 if (mask & WRITEMASK_Y) {
787 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
788 brw_MOV(p, dst[1], arg0[0]);
789 brw_set_saturate(p, 0);
790 }
791
792 if (mask & WRITEMASK_Z) {
793 emit_math2(p, BRW_MATH_FUNCTION_POW,
794 &dst[2],
795 WRITEMASK_X | (mask & SATURATE),
796 &arg0[1],
797 &arg0[3]);
798 }
799
800 /* Ordinarily you'd use an iff statement to skip or shortcircuit
801 * some of the POW calculations above, but 16-wide iff statements
802 * seem to lock c1 hardware, so this is a nasty workaround:
803 */
804 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
805 {
806 if (mask & WRITEMASK_Y)
807 brw_MOV(p, dst[1], brw_imm_f(0));
808
809 if (mask & WRITEMASK_Z)
810 brw_MOV(p, dst[2], brw_imm_f(0));
811 }
812 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
813 }
814
815
816 /* Kill pixel - set execution mask to zero for those pixels which
817 * fail.
818 */
819 static void emit_kil( struct brw_wm_compile *c,
820 struct brw_reg *arg0)
821 {
822 struct brw_compile *p = &c->func;
823 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
824 GLuint i;
825
826
827 /* XXX - usually won't need 4 compares!
828 */
829 for (i = 0; i < 4; i++) {
830 brw_push_insn_state(p);
831 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
832 brw_set_predicate_control_flag_value(p, 0xff);
833 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
834 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
835 brw_pop_insn_state(p);
836 }
837 }
838
839 static void fire_fb_write( struct brw_wm_compile *c,
840 GLuint base_reg,
841 GLuint nr,
842 GLuint target,
843 GLuint eot )
844 {
845 struct brw_compile *p = &c->func;
846
847 /* Pass through control information:
848 */
849 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
850 {
851 brw_push_insn_state(p);
852 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
853 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
854 brw_MOV(p,
855 brw_message_reg(base_reg + 1),
856 brw_vec8_grf(1, 0));
857 brw_pop_insn_state(p);
858 }
859
860 /* Send framebuffer write message: */
861 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
862 brw_fb_WRITE(p,
863 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
864 base_reg,
865 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
866 target,
867 nr,
868 0,
869 eot);
870 }
871
872 static void emit_aa( struct brw_wm_compile *c,
873 struct brw_reg *arg1,
874 GLuint reg )
875 {
876 struct brw_compile *p = &c->func;
877 GLuint comp = c->key.aa_dest_stencil_reg / 2;
878 GLuint off = c->key.aa_dest_stencil_reg % 2;
879 struct brw_reg aa = offset(arg1[comp], off);
880
881 brw_push_insn_state(p);
882 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
883 brw_MOV(p, brw_message_reg(reg), aa);
884 brw_pop_insn_state(p);
885 }
886
887
888 /* Post-fragment-program processing. Send the results to the
889 * framebuffer.
890 */
891 static void emit_fb_write( struct brw_wm_compile *c,
892 struct brw_reg *arg0,
893 struct brw_reg *arg1,
894 struct brw_reg *arg2,
895 GLuint target,
896 GLuint eot)
897 {
898 struct brw_compile *p = &c->func;
899 GLuint nr = 2;
900 GLuint channel;
901
902 /* Reserve a space for AA - may not be needed:
903 */
904 if (c->key.aa_dest_stencil_reg)
905 nr += 1;
906
907 /* I don't really understand how this achieves the color interleave
908 * (ie RGBARGBA) in the result: [Do the saturation here]
909 */
910 {
911 brw_push_insn_state(p);
912
913 for (channel = 0; channel < 4; channel++) {
914 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
915 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
916
917 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
918 brw_MOV(p,
919 brw_message_reg(nr + channel),
920 arg0[channel]);
921
922 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
923 brw_MOV(p,
924 brw_message_reg(nr + channel + 4),
925 sechalf(arg0[channel]));
926 }
927
928 /* skip over the regs populated above:
929 */
930 nr += 8;
931
932 brw_pop_insn_state(p);
933 }
934
935 if (c->key.source_depth_to_render_target)
936 {
937 if (c->key.computes_depth)
938 brw_MOV(p, brw_message_reg(nr), arg2[2]);
939 else
940 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
941
942 nr += 2;
943 }
944
945 if (c->key.dest_depth_reg)
946 {
947 GLuint comp = c->key.dest_depth_reg / 2;
948 GLuint off = c->key.dest_depth_reg % 2;
949
950 if (off != 0) {
951 brw_push_insn_state(p);
952 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
953
954 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
955 /* 2nd half? */
956 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
957 brw_pop_insn_state(p);
958 }
959 else {
960 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
961 }
962 nr += 2;
963 }
964
965
966 if (!c->key.runtime_check_aads_emit) {
967 if (c->key.aa_dest_stencil_reg)
968 emit_aa(c, arg1, 2);
969
970 fire_fb_write(c, 0, nr, target, eot);
971 }
972 else {
973 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
974 struct brw_reg ip = brw_ip_reg();
975 struct brw_instruction *jmp;
976
977 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
978 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
979 brw_AND(p,
980 v1_null_ud,
981 get_element_ud(brw_vec8_grf(1,0), 6),
982 brw_imm_ud(1<<26));
983
984 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
985 {
986 emit_aa(c, arg1, 2);
987 fire_fb_write(c, 0, nr, target, eot);
988 /* note - thread killed in subroutine */
989 }
990 brw_land_fwd_jump(p, jmp);
991
992 /* ELSE: Shuffle up one register to fill in the hole left for AA:
993 */
994 fire_fb_write(c, 1, nr-1, target, eot);
995 }
996 }
997
998
999
1000
1001 /* Post-fragment-program processing. Send the results to the
1002 * framebuffer.
1003 */
1004 static void emit_spill( struct brw_wm_compile *c,
1005 struct brw_reg reg,
1006 GLuint slot )
1007 {
1008 struct brw_compile *p = &c->func;
1009
1010 /*
1011 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1012 */
1013 brw_MOV(p, brw_message_reg(2), reg);
1014
1015 /*
1016 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1017 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1018 */
1019 brw_dp_WRITE_16(p,
1020 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1021 1,
1022 slot);
1023 }
1024
1025 static void emit_unspill( struct brw_wm_compile *c,
1026 struct brw_reg reg,
1027 GLuint slot )
1028 {
1029 struct brw_compile *p = &c->func;
1030
1031 /* Slot 0 is the undef value.
1032 */
1033 if (slot == 0) {
1034 brw_MOV(p, reg, brw_imm_f(0));
1035 return;
1036 }
1037
1038 /*
1039 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1040 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1041 */
1042
1043 brw_dp_READ_16(p,
1044 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1045 1,
1046 slot);
1047 }
1048
1049
1050
1051 /**
1052 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
1053 */
1054 static void get_argument_regs( struct brw_wm_compile *c,
1055 struct brw_wm_ref *arg[],
1056 struct brw_reg *regs )
1057 {
1058 GLuint i;
1059
1060 for (i = 0; i < 4; i++) {
1061 if (arg[i]) {
1062
1063 if (arg[i]->unspill_reg)
1064 emit_unspill(c,
1065 brw_vec8_grf(arg[i]->unspill_reg, 0),
1066 arg[i]->value->spill_slot);
1067
1068 regs[i] = arg[i]->hw_reg;
1069 }
1070 else {
1071 regs[i] = brw_null_reg();
1072 }
1073 }
1074 }
1075
1076 static void spill_values( struct brw_wm_compile *c,
1077 struct brw_wm_value *values,
1078 GLuint nr )
1079 {
1080 GLuint i;
1081
1082 for (i = 0; i < nr; i++)
1083 if (values[i].spill_slot)
1084 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1085 }
1086
1087
1088
1089 /* Emit the fragment program instructions here.
1090 */
1091 void brw_wm_emit( struct brw_wm_compile *c )
1092 {
1093 struct brw_compile *p = &c->func;
1094 GLuint insn;
1095
1096 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1097
1098 /* Check if any of the payload regs need to be spilled:
1099 */
1100 spill_values(c, c->payload.depth, 4);
1101 spill_values(c, c->creg, c->nr_creg);
1102 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1103
1104
1105 for (insn = 0; insn < c->nr_insns; insn++) {
1106
1107 struct brw_wm_instruction *inst = &c->instruction[insn];
1108 struct brw_reg args[3][4], dst[4];
1109 GLuint i, dst_flags;
1110
1111 /* Get argument regs:
1112 */
1113 for (i = 0; i < 3; i++)
1114 get_argument_regs(c, inst->src[i], args[i]);
1115
1116 /* Get dest regs:
1117 */
1118 for (i = 0; i < 4; i++)
1119 if (inst->dst[i])
1120 dst[i] = inst->dst[i]->hw_reg;
1121 else
1122 dst[i] = brw_null_reg();
1123
1124 /* Flags
1125 */
1126 dst_flags = inst->writemask;
1127 if (inst->saturate)
1128 dst_flags |= SATURATE;
1129
1130 switch (inst->opcode) {
1131 /* Generated instructions for calculating triangle interpolants:
1132 */
1133 case WM_PIXELXY:
1134 emit_pixel_xy(p, dst, dst_flags, args[0]);
1135 break;
1136
1137 case WM_DELTAXY:
1138 emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1139 break;
1140
1141 case WM_WPOSXY:
1142 emit_wpos_xy(c, dst, dst_flags, args[0]);
1143 break;
1144
1145 case WM_PIXELW:
1146 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1147 break;
1148
1149 case WM_LINTERP:
1150 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1151 break;
1152
1153 case WM_PINTERP:
1154 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1155 break;
1156
1157 case WM_CINTERP:
1158 emit_cinterp(p, dst, dst_flags, args[0]);
1159 break;
1160
1161 case WM_FB_WRITE:
1162 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1163 break;
1164
1165 /* Straightforward arithmetic:
1166 */
1167 case OPCODE_ADD:
1168 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1169 break;
1170
1171 case OPCODE_FRC:
1172 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1173 break;
1174
1175 case OPCODE_FLR:
1176 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1177 break;
1178
1179 case OPCODE_DP3: /* */
1180 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1181 break;
1182
1183 case OPCODE_DP4:
1184 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1185 break;
1186
1187 case OPCODE_DPH:
1188 emit_dph(p, dst, dst_flags, args[0], args[1]);
1189 break;
1190
1191 case OPCODE_LRP: /* */
1192 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1193 break;
1194
1195 case OPCODE_MAD:
1196 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1197 break;
1198
1199 case OPCODE_MOV:
1200 case OPCODE_SWZ:
1201 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1202 break;
1203
1204 case OPCODE_MUL:
1205 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1206 break;
1207
1208 case OPCODE_XPD:
1209 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1210 break;
1211
1212 /* Higher math functions:
1213 */
1214 case OPCODE_RCP:
1215 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1216 break;
1217
1218 case OPCODE_RSQ:
1219 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1220 break;
1221
1222 case OPCODE_SIN:
1223 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1224 break;
1225
1226 case OPCODE_COS:
1227 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1228 break;
1229
1230 case OPCODE_EX2:
1231 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1232 break;
1233
1234 case OPCODE_LG2:
1235 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1236 break;
1237
1238 case OPCODE_SCS:
1239 /* There is an scs math function, but it would need some
1240 * fixup for 16-element execution.
1241 */
1242 if (dst_flags & WRITEMASK_X)
1243 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1244 if (dst_flags & WRITEMASK_Y)
1245 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1246 break;
1247
1248 case OPCODE_POW:
1249 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1250 break;
1251
1252 /* Comparisons:
1253 */
1254 case OPCODE_CMP:
1255 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1256 break;
1257
1258 case OPCODE_MAX:
1259 emit_max(p, dst, dst_flags, args[0], args[1]);
1260 break;
1261
1262 case OPCODE_MIN:
1263 emit_min(p, dst, dst_flags, args[0], args[1]);
1264 break;
1265
1266 case OPCODE_SLT:
1267 emit_slt(p, dst, dst_flags, args[0], args[1]);
1268 break;
1269
1270 case OPCODE_SLE:
1271 emit_sle(p, dst, dst_flags, args[0], args[1]);
1272 break;
1273 case OPCODE_SGT:
1274 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1275 break;
1276 case OPCODE_SGE:
1277 emit_sge(p, dst, dst_flags, args[0], args[1]);
1278 break;
1279 case OPCODE_SEQ:
1280 emit_seq(p, dst, dst_flags, args[0], args[1]);
1281 break;
1282 case OPCODE_SNE:
1283 emit_sne(p, dst, dst_flags, args[0], args[1]);
1284 break;
1285
1286 case OPCODE_LIT:
1287 emit_lit(p, dst, dst_flags, args[0]);
1288 break;
1289
1290 /* Texturing operations:
1291 */
1292 case OPCODE_TEX:
1293 emit_tex(c, inst, dst, dst_flags, args[0]);
1294 break;
1295
1296 case OPCODE_TXB:
1297 emit_txb(c, inst, dst, dst_flags, args[0]);
1298 break;
1299
1300 case OPCODE_KIL:
1301 emit_kil(c, args[0]);
1302 break;
1303
1304 default:
1305 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1306 inst->opcode, inst->opcode < MAX_OPCODE ?
1307 _mesa_opcode_string(inst->opcode) :
1308 "unknown");
1309 }
1310
1311 for (i = 0; i < 4; i++)
1312 if (inst->dst[i] && inst->dst[i]->spill_slot)
1313 emit_spill(c,
1314 inst->dst[i]->hw_reg,
1315 inst->dst[i]->spill_slot);
1316 }
1317 }
1318
1319
1320
1321
1322