f2dca9caa6c725cff343f14214a922a86de2e138
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 #define SATURATE (1<<5)
38
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
41 */
42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
43 {
44 if (reg.vstride)
45 reg.nr++;
46 return reg;
47 }
48
49 /* Payload R0:
50 *
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
53 * R0.1..8 -- ?
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
60 * R1.6 -- ?
61 * R1.7 -- ?
62 * R1.8 -- ?
63 */
64
65
66 static void emit_pixel_xy(struct brw_compile *p,
67 const struct brw_reg *dst,
68 GLuint mask,
69 const struct brw_reg *arg0)
70 {
71 struct brw_reg r1 = brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
73
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
78 */
79 if (mask & WRITEMASK_X) {
80 brw_ADD(p,
81 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
82 stride(suboffset(r1_uw, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
84 }
85
86 if (mask & WRITEMASK_Y) {
87 brw_ADD(p,
88 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
89 stride(suboffset(r1_uw,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
91 }
92
93 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
94 }
95
96
97
98 static void emit_delta_xy(struct brw_compile *p,
99 const struct brw_reg *dst,
100 GLuint mask,
101 const struct brw_reg *arg0,
102 const struct brw_reg *arg1)
103 {
104 struct brw_reg r1 = brw_vec1_grf(1, 0);
105
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 * centers.
108 */
109 if (mask & WRITEMASK_X) {
110 brw_ADD(p,
111 dst[0],
112 retype(arg0[0], BRW_REGISTER_TYPE_UW),
113 negate(r1));
114 }
115
116 if (mask & WRITEMASK_Y) {
117 brw_ADD(p,
118 dst[1],
119 retype(arg0[1], BRW_REGISTER_TYPE_UW),
120 negate(suboffset(r1,1)));
121
122 }
123 }
124
125 static void emit_wpos_xy(struct brw_wm_compile *c,
126 const struct brw_reg *dst,
127 GLuint mask,
128 const struct brw_reg *arg0)
129 {
130 struct brw_compile *p = &c->func;
131
132 /* Calculate the pixel offset from window bottom left into destination
133 * X and Y channels.
134 */
135 if (mask & WRITEMASK_X) {
136 /* X' = X - origin */
137 brw_ADD(p,
138 dst[0],
139 retype(arg0[0], BRW_REGISTER_TYPE_W),
140 brw_imm_d(0 - c->key.origin_x));
141 }
142
143 if (mask & WRITEMASK_Y) {
144 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
145 brw_ADD(p,
146 dst[1],
147 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
148 brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
149 }
150 }
151
152
153 static void emit_pixel_w( struct brw_compile *p,
154 const struct brw_reg *dst,
155 GLuint mask,
156 const struct brw_reg *arg0,
157 const struct brw_reg *deltas)
158 {
159 /* Don't need this if all you are doing is interpolating color, for
160 * instance.
161 */
162 if (mask & WRITEMASK_W) {
163 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
164
165 /* Calc 1/w - just linterp wpos[3] optimized by putting the
166 * result straight into a message reg.
167 */
168 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
169 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
170
171 /* Calc w */
172 brw_math_16( p, dst[3],
173 BRW_MATH_FUNCTION_INV,
174 BRW_MATH_SATURATE_NONE,
175 2, brw_null_reg(),
176 BRW_MATH_PRECISION_FULL);
177 }
178 }
179
180
181
182 static void emit_linterp( struct brw_compile *p,
183 const struct brw_reg *dst,
184 GLuint mask,
185 const struct brw_reg *arg0,
186 const struct brw_reg *deltas )
187 {
188 struct brw_reg interp[4];
189 GLuint nr = arg0[0].nr;
190 GLuint i;
191
192 interp[0] = brw_vec1_grf(nr, 0);
193 interp[1] = brw_vec1_grf(nr, 4);
194 interp[2] = brw_vec1_grf(nr+1, 0);
195 interp[3] = brw_vec1_grf(nr+1, 4);
196
197 for (i = 0; i < 4; i++) {
198 if (mask & (1<<i)) {
199 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
200 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
201 }
202 }
203 }
204
205
206 static void emit_pinterp( struct brw_compile *p,
207 const struct brw_reg *dst,
208 GLuint mask,
209 const struct brw_reg *arg0,
210 const struct brw_reg *deltas,
211 const struct brw_reg *w)
212 {
213 struct brw_reg interp[4];
214 GLuint nr = arg0[0].nr;
215 GLuint i;
216
217 interp[0] = brw_vec1_grf(nr, 0);
218 interp[1] = brw_vec1_grf(nr, 4);
219 interp[2] = brw_vec1_grf(nr+1, 0);
220 interp[3] = brw_vec1_grf(nr+1, 4);
221
222 for (i = 0; i < 4; i++) {
223 if (mask & (1<<i)) {
224 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
225 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
226 }
227 }
228 for (i = 0; i < 4; i++) {
229 if (mask & (1<<i)) {
230 brw_MUL(p, dst[i], dst[i], w[3]);
231 }
232 }
233 }
234
235
236 static void emit_cinterp( struct brw_compile *p,
237 const struct brw_reg *dst,
238 GLuint mask,
239 const struct brw_reg *arg0 )
240 {
241 struct brw_reg interp[4];
242 GLuint nr = arg0[0].nr;
243 GLuint i;
244
245 interp[0] = brw_vec1_grf(nr, 0);
246 interp[1] = brw_vec1_grf(nr, 4);
247 interp[2] = brw_vec1_grf(nr+1, 0);
248 interp[3] = brw_vec1_grf(nr+1, 4);
249
250 for (i = 0; i < 4; i++) {
251 if (mask & (1<<i)) {
252 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
253 }
254 }
255 }
256
257
258 static void emit_alu1( struct brw_compile *p,
259 struct brw_instruction *(*func)(struct brw_compile *,
260 struct brw_reg,
261 struct brw_reg),
262 const struct brw_reg *dst,
263 GLuint mask,
264 const struct brw_reg *arg0 )
265 {
266 GLuint i;
267
268 if (mask & SATURATE)
269 brw_set_saturate(p, 1);
270
271 for (i = 0; i < 4; i++) {
272 if (mask & (1<<i)) {
273 func(p, dst[i], arg0[i]);
274 }
275 }
276
277 if (mask & SATURATE)
278 brw_set_saturate(p, 0);
279 }
280
281
282 static void emit_alu2( struct brw_compile *p,
283 struct brw_instruction *(*func)(struct brw_compile *,
284 struct brw_reg,
285 struct brw_reg,
286 struct brw_reg),
287 const struct brw_reg *dst,
288 GLuint mask,
289 const struct brw_reg *arg0,
290 const struct brw_reg *arg1 )
291 {
292 GLuint i;
293
294 if (mask & SATURATE)
295 brw_set_saturate(p, 1);
296
297 for (i = 0; i < 4; i++) {
298 if (mask & (1<<i)) {
299 func(p, dst[i], arg0[i], arg1[i]);
300 }
301 }
302
303 if (mask & SATURATE)
304 brw_set_saturate(p, 0);
305 }
306
307
308 static void emit_mad( struct brw_compile *p,
309 const struct brw_reg *dst,
310 GLuint mask,
311 const struct brw_reg *arg0,
312 const struct brw_reg *arg1,
313 const struct brw_reg *arg2 )
314 {
315 GLuint i;
316
317 for (i = 0; i < 4; i++) {
318 if (mask & (1<<i)) {
319 brw_MUL(p, dst[i], arg0[i], arg1[i]);
320
321 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
322 brw_ADD(p, dst[i], dst[i], arg2[i]);
323 brw_set_saturate(p, 0);
324 }
325 }
326 }
327
328
329 static void emit_lrp( struct brw_compile *p,
330 const struct brw_reg *dst,
331 GLuint mask,
332 const struct brw_reg *arg0,
333 const struct brw_reg *arg1,
334 const struct brw_reg *arg2 )
335 {
336 GLuint i;
337
338 /* Uses dst as a temporary:
339 */
340 for (i = 0; i < 4; i++) {
341 if (mask & (1<<i)) {
342 /* Can I use the LINE instruction for this?
343 */
344 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
345 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
346
347 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
348 brw_MAC(p, dst[i], arg0[i], arg1[i]);
349 brw_set_saturate(p, 0);
350 }
351 }
352 }
353
354 static void emit_sop( struct brw_compile *p,
355 const struct brw_reg *dst,
356 GLuint mask,
357 GLuint cond,
358 const struct brw_reg *arg0,
359 const struct brw_reg *arg1 )
360 {
361 GLuint i;
362
363 for (i = 0; i < 4; i++) {
364 if (mask & (1<<i)) {
365 brw_MOV(p, dst[i], brw_imm_f(0));
366 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
367 brw_MOV(p, dst[i], brw_imm_f(1.0));
368 brw_set_predicate_control_flag_value(p, 0xff);
369 }
370 }
371 }
372
373 static void emit_slt( struct brw_compile *p,
374 const struct brw_reg *dst,
375 GLuint mask,
376 const struct brw_reg *arg0,
377 const struct brw_reg *arg1 )
378 {
379 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
380 }
381
382 static void emit_sle( struct brw_compile *p,
383 const struct brw_reg *dst,
384 GLuint mask,
385 const struct brw_reg *arg0,
386 const struct brw_reg *arg1 )
387 {
388 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
389 }
390
391 static void emit_sgt( struct brw_compile *p,
392 const struct brw_reg *dst,
393 GLuint mask,
394 const struct brw_reg *arg0,
395 const struct brw_reg *arg1 )
396 {
397 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
398 }
399
400 static void emit_sge( struct brw_compile *p,
401 const struct brw_reg *dst,
402 GLuint mask,
403 const struct brw_reg *arg0,
404 const struct brw_reg *arg1 )
405 {
406 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
407 }
408
409 static void emit_seq( struct brw_compile *p,
410 const struct brw_reg *dst,
411 GLuint mask,
412 const struct brw_reg *arg0,
413 const struct brw_reg *arg1 )
414 {
415 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
416 }
417
418 static void emit_sne( struct brw_compile *p,
419 const struct brw_reg *dst,
420 GLuint mask,
421 const struct brw_reg *arg0,
422 const struct brw_reg *arg1 )
423 {
424 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
425 }
426
427 static void emit_cmp( struct brw_compile *p,
428 const struct brw_reg *dst,
429 GLuint mask,
430 const struct brw_reg *arg0,
431 const struct brw_reg *arg1,
432 const struct brw_reg *arg2 )
433 {
434 GLuint i;
435
436 for (i = 0; i < 4; i++) {
437 if (mask & (1<<i)) {
438 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
439 brw_MOV(p, dst[i], arg2[i]);
440 brw_set_saturate(p, 0);
441
442 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
443
444 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
445 brw_MOV(p, dst[i], arg1[i]);
446 brw_set_saturate(p, 0);
447 brw_set_predicate_control_flag_value(p, 0xff);
448 }
449 }
450 }
451
452 static void emit_max( struct brw_compile *p,
453 const struct brw_reg *dst,
454 GLuint mask,
455 const struct brw_reg *arg0,
456 const struct brw_reg *arg1 )
457 {
458 GLuint i;
459
460 for (i = 0; i < 4; i++) {
461 if (mask & (1<<i)) {
462 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
463 brw_MOV(p, dst[i], arg0[i]);
464 brw_set_saturate(p, 0);
465
466 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
467
468 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
469 brw_MOV(p, dst[i], arg1[i]);
470 brw_set_saturate(p, 0);
471 brw_set_predicate_control_flag_value(p, 0xff);
472 }
473 }
474 }
475
476 static void emit_min( struct brw_compile *p,
477 const struct brw_reg *dst,
478 GLuint mask,
479 const struct brw_reg *arg0,
480 const struct brw_reg *arg1 )
481 {
482 GLuint i;
483
484 for (i = 0; i < 4; i++) {
485 if (mask & (1<<i)) {
486 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
487 brw_MOV(p, dst[i], arg1[i]);
488 brw_set_saturate(p, 0);
489
490 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
491
492 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
493 brw_MOV(p, dst[i], arg0[i]);
494 brw_set_saturate(p, 0);
495 brw_set_predicate_control_flag_value(p, 0xff);
496 }
497 }
498 }
499
500
501 static void emit_dp3( struct brw_compile *p,
502 const struct brw_reg *dst,
503 GLuint mask,
504 const struct brw_reg *arg0,
505 const struct brw_reg *arg1 )
506 {
507 if (!(mask & WRITEMASK_XYZW))
508 return; /* Do not emit dead code */
509
510 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
511
512 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
513 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
514
515 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
516 brw_MAC(p, dst[0], arg0[2], arg1[2]);
517 brw_set_saturate(p, 0);
518 }
519
520
521 static void emit_dp4( struct brw_compile *p,
522 const struct brw_reg *dst,
523 GLuint mask,
524 const struct brw_reg *arg0,
525 const struct brw_reg *arg1 )
526 {
527 if (!(mask & WRITEMASK_XYZW))
528 return; /* Do not emit dead code */
529
530 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
531
532 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
533 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
534 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
535
536 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
537 brw_MAC(p, dst[0], arg0[3], arg1[3]);
538 brw_set_saturate(p, 0);
539 }
540
541
542 static void emit_dph( struct brw_compile *p,
543 const struct brw_reg *dst,
544 GLuint mask,
545 const struct brw_reg *arg0,
546 const struct brw_reg *arg1 )
547 {
548 if (!(mask & WRITEMASK_XYZW))
549 return; /* Do not emit dead code */
550
551 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
552
553 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
554 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
555 brw_MAC(p, dst[0], arg0[2], arg1[2]);
556
557 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
558 brw_ADD(p, dst[0], dst[0], arg1[3]);
559 brw_set_saturate(p, 0);
560 }
561
562
563 static void emit_xpd( struct brw_compile *p,
564 const struct brw_reg *dst,
565 GLuint mask,
566 const struct brw_reg *arg0,
567 const struct brw_reg *arg1 )
568 {
569 GLuint i;
570
571 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
572
573 for (i = 0 ; i < 3; i++) {
574 if (mask & (1<<i)) {
575 GLuint i2 = (i+2)%3;
576 GLuint i1 = (i+1)%3;
577
578 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
579
580 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
581 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
582 brw_set_saturate(p, 0);
583 }
584 }
585 }
586
587
588 static void emit_math1( struct brw_compile *p,
589 GLuint function,
590 const struct brw_reg *dst,
591 GLuint mask,
592 const struct brw_reg *arg0 )
593 {
594 if (!(mask & WRITEMASK_XYZW))
595 return; /* Do not emit dead code */
596
597 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
598 // function == BRW_MATH_FUNCTION_SINCOS);
599
600 brw_MOV(p, brw_message_reg(2), arg0[0]);
601
602 /* Send two messages to perform all 16 operations:
603 */
604 brw_math_16(p,
605 dst[0],
606 function,
607 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
608 2,
609 brw_null_reg(),
610 BRW_MATH_PRECISION_FULL);
611 }
612
613
614 static void emit_math2( struct brw_compile *p,
615 GLuint function,
616 const struct brw_reg *dst,
617 GLuint mask,
618 const struct brw_reg *arg0,
619 const struct brw_reg *arg1)
620 {
621 if (!(mask & WRITEMASK_XYZW))
622 return; /* Do not emit dead code */
623
624 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
625
626 brw_push_insn_state(p);
627
628 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
629 brw_MOV(p, brw_message_reg(2), arg0[0]);
630 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
631 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
632
633 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
634 brw_MOV(p, brw_message_reg(3), arg1[0]);
635 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
636 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
637
638
639 /* Send two messages to perform all 16 operations:
640 */
641 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
642 brw_math(p,
643 dst[0],
644 function,
645 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
646 2,
647 brw_null_reg(),
648 BRW_MATH_DATA_VECTOR,
649 BRW_MATH_PRECISION_FULL);
650
651 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
652 brw_math(p,
653 offset(dst[0],1),
654 function,
655 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
656 4,
657 brw_null_reg(),
658 BRW_MATH_DATA_VECTOR,
659 BRW_MATH_PRECISION_FULL);
660
661 brw_pop_insn_state(p);
662 }
663
664
665
666 static void emit_tex( struct brw_wm_compile *c,
667 const struct brw_wm_instruction *inst,
668 struct brw_reg *dst,
669 GLuint dst_flags,
670 struct brw_reg *arg )
671 {
672 struct brw_compile *p = &c->func;
673 GLuint msgLength, responseLength;
674 GLuint i, nr;
675 GLuint emit;
676
677 /* How many input regs are there?
678 */
679 switch (inst->tex_idx) {
680 case TEXTURE_1D_INDEX:
681 emit = WRITEMASK_X;
682 nr = 1;
683 break;
684 case TEXTURE_2D_INDEX:
685 case TEXTURE_RECT_INDEX:
686 emit = WRITEMASK_XY;
687 nr = 2;
688 break;
689 default:
690 emit = WRITEMASK_XYZ;
691 nr = 3;
692 break;
693 }
694
695 if (inst->tex_shadow) {
696 nr = 4;
697 emit |= WRITEMASK_W;
698 }
699
700 msgLength = 1;
701
702 for (i = 0; i < nr; i++) {
703 static const GLuint swz[4] = {0,1,2,2};
704 if (emit & (1<<i))
705 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
706 else
707 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
708 msgLength += 2;
709 }
710
711 responseLength = 8; /* always */
712
713 brw_SAMPLE(p,
714 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
715 1,
716 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
717 inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
718 inst->tex_unit, /* sampler */
719 inst->writemask,
720 (inst->tex_shadow ?
721 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
722 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
723 responseLength,
724 msgLength,
725 0);
726 }
727
728
729 static void emit_txb( struct brw_wm_compile *c,
730 const struct brw_wm_instruction *inst,
731 struct brw_reg *dst,
732 GLuint dst_flags,
733 struct brw_reg *arg )
734 {
735 struct brw_compile *p = &c->func;
736 GLuint msgLength;
737
738 /* Shadow ignored for txb.
739 */
740 switch (inst->tex_idx) {
741 case TEXTURE_1D_INDEX:
742 brw_MOV(p, brw_message_reg(2), arg[0]);
743 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
744 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
745 break;
746 case TEXTURE_2D_INDEX:
747 case TEXTURE_RECT_INDEX:
748 brw_MOV(p, brw_message_reg(2), arg[0]);
749 brw_MOV(p, brw_message_reg(4), arg[1]);
750 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
751 break;
752 default:
753 brw_MOV(p, brw_message_reg(2), arg[0]);
754 brw_MOV(p, brw_message_reg(4), arg[1]);
755 brw_MOV(p, brw_message_reg(6), arg[2]);
756 break;
757 }
758
759 brw_MOV(p, brw_message_reg(8), arg[3]);
760 msgLength = 9;
761
762 brw_SAMPLE(p,
763 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
764 1,
765 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
766 inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
767 inst->tex_unit, /* sampler */
768 inst->writemask,
769 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
770 8, /* responseLength */
771 msgLength,
772 0);
773 }
774
775
776 static void emit_lit( struct brw_compile *p,
777 const struct brw_reg *dst,
778 GLuint mask,
779 const struct brw_reg *arg0 )
780 {
781 assert((mask & WRITEMASK_XW) == 0);
782
783 if (mask & WRITEMASK_Y) {
784 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
785 brw_MOV(p, dst[1], arg0[0]);
786 brw_set_saturate(p, 0);
787 }
788
789 if (mask & WRITEMASK_Z) {
790 emit_math2(p, BRW_MATH_FUNCTION_POW,
791 &dst[2],
792 WRITEMASK_X | (mask & SATURATE),
793 &arg0[1],
794 &arg0[3]);
795 }
796
797 /* Ordinarily you'd use an iff statement to skip or shortcircuit
798 * some of the POW calculations above, but 16-wide iff statements
799 * seem to lock c1 hardware, so this is a nasty workaround:
800 */
801 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
802 {
803 if (mask & WRITEMASK_Y)
804 brw_MOV(p, dst[1], brw_imm_f(0));
805
806 if (mask & WRITEMASK_Z)
807 brw_MOV(p, dst[2], brw_imm_f(0));
808 }
809 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
810 }
811
812
813 /* Kill pixel - set execution mask to zero for those pixels which
814 * fail.
815 */
816 static void emit_kil( struct brw_wm_compile *c,
817 struct brw_reg *arg0)
818 {
819 struct brw_compile *p = &c->func;
820 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
821 GLuint i;
822
823 /* XXX - usually won't need 4 compares!
824 */
825 for (i = 0; i < 4; i++) {
826 brw_push_insn_state(p);
827 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
828 brw_set_predicate_control_flag_value(p, 0xff);
829 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
830 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
831 brw_pop_insn_state(p);
832 }
833 }
834
835
836 static void fire_fb_write( struct brw_wm_compile *c,
837 GLuint base_reg,
838 GLuint nr,
839 GLuint target,
840 GLuint eot )
841 {
842 struct brw_compile *p = &c->func;
843
844 /* Pass through control information:
845 */
846 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
847 {
848 brw_push_insn_state(p);
849 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
850 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
851 brw_MOV(p,
852 brw_message_reg(base_reg + 1),
853 brw_vec8_grf(1, 0));
854 brw_pop_insn_state(p);
855 }
856
857 /* Send framebuffer write message: */
858 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
859 brw_fb_WRITE(p,
860 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
861 base_reg,
862 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
863 target,
864 nr,
865 0,
866 eot);
867 }
868
869
870 static void emit_aa( struct brw_wm_compile *c,
871 struct brw_reg *arg1,
872 GLuint reg )
873 {
874 struct brw_compile *p = &c->func;
875 GLuint comp = c->key.aa_dest_stencil_reg / 2;
876 GLuint off = c->key.aa_dest_stencil_reg % 2;
877 struct brw_reg aa = offset(arg1[comp], off);
878
879 brw_push_insn_state(p);
880 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
881 brw_MOV(p, brw_message_reg(reg), aa);
882 brw_pop_insn_state(p);
883 }
884
885
886 /* Post-fragment-program processing. Send the results to the
887 * framebuffer.
888 * \param arg0 the fragment color
889 * \param arg1 the pass-through depth value
890 * \param arg2 the shader-computed depth value
891 */
892 static void emit_fb_write( struct brw_wm_compile *c,
893 struct brw_reg *arg0,
894 struct brw_reg *arg1,
895 struct brw_reg *arg2,
896 GLuint target,
897 GLuint eot)
898 {
899 struct brw_compile *p = &c->func;
900 GLuint nr = 2;
901 GLuint channel;
902
903 /* Reserve a space for AA - may not be needed:
904 */
905 if (c->key.aa_dest_stencil_reg)
906 nr += 1;
907
908 /* I don't really understand how this achieves the color interleave
909 * (ie RGBARGBA) in the result: [Do the saturation here]
910 */
911 {
912 brw_push_insn_state(p);
913
914 for (channel = 0; channel < 4; channel++) {
915 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
916 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
917
918 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
919 brw_MOV(p,
920 brw_message_reg(nr + channel),
921 arg0[channel]);
922
923 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
924 brw_MOV(p,
925 brw_message_reg(nr + channel + 4),
926 sechalf(arg0[channel]));
927 }
928
929 /* skip over the regs populated above:
930 */
931 nr += 8;
932
933 brw_pop_insn_state(p);
934 }
935
936 if (c->key.source_depth_to_render_target)
937 {
938 if (c->key.computes_depth)
939 brw_MOV(p, brw_message_reg(nr), arg2[2]);
940 else
941 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
942
943 nr += 2;
944 }
945
946 if (c->key.dest_depth_reg)
947 {
948 GLuint comp = c->key.dest_depth_reg / 2;
949 GLuint off = c->key.dest_depth_reg % 2;
950
951 if (off != 0) {
952 brw_push_insn_state(p);
953 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
954
955 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
956 /* 2nd half? */
957 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
958 brw_pop_insn_state(p);
959 }
960 else {
961 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
962 }
963 nr += 2;
964 }
965
966 if (!c->key.runtime_check_aads_emit) {
967 if (c->key.aa_dest_stencil_reg)
968 emit_aa(c, arg1, 2);
969
970 fire_fb_write(c, 0, nr, target, eot);
971 }
972 else {
973 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
974 struct brw_reg ip = brw_ip_reg();
975 struct brw_instruction *jmp;
976
977 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
978 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
979 brw_AND(p,
980 v1_null_ud,
981 get_element_ud(brw_vec8_grf(1,0), 6),
982 brw_imm_ud(1<<26));
983
984 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
985 {
986 emit_aa(c, arg1, 2);
987 fire_fb_write(c, 0, nr, target, eot);
988 /* note - thread killed in subroutine */
989 }
990 brw_land_fwd_jump(p, jmp);
991
992 /* ELSE: Shuffle up one register to fill in the hole left for AA:
993 */
994 fire_fb_write(c, 1, nr-1, target, eot);
995 }
996 }
997
998
999 /* Post-fragment-program processing. Send the results to the
1000 * framebuffer.
1001 */
1002 static void emit_spill( struct brw_wm_compile *c,
1003 struct brw_reg reg,
1004 GLuint slot )
1005 {
1006 struct brw_compile *p = &c->func;
1007
1008 /*
1009 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1010 */
1011 brw_MOV(p, brw_message_reg(2), reg);
1012
1013 /*
1014 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1015 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1016 */
1017 brw_dp_WRITE_16(p,
1018 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1019 1,
1020 slot);
1021 }
1022
1023
1024 static void emit_unspill( struct brw_wm_compile *c,
1025 struct brw_reg reg,
1026 GLuint slot )
1027 {
1028 struct brw_compile *p = &c->func;
1029
1030 /* Slot 0 is the undef value.
1031 */
1032 if (slot == 0) {
1033 brw_MOV(p, reg, brw_imm_f(0));
1034 return;
1035 }
1036
1037 /*
1038 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1039 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1040 */
1041
1042 brw_dp_READ_16(p,
1043 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1044 1,
1045 slot);
1046 }
1047
1048
1049 /**
1050 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
1051 */
1052 static void get_argument_regs( struct brw_wm_compile *c,
1053 struct brw_wm_ref *arg[],
1054 struct brw_reg *regs )
1055 {
1056 GLuint i;
1057
1058 for (i = 0; i < 4; i++) {
1059 if (arg[i]) {
1060
1061 if (arg[i]->unspill_reg)
1062 emit_unspill(c,
1063 brw_vec8_grf(arg[i]->unspill_reg, 0),
1064 arg[i]->value->spill_slot);
1065
1066 regs[i] = arg[i]->hw_reg;
1067 }
1068 else {
1069 regs[i] = brw_null_reg();
1070 }
1071 }
1072 }
1073
1074
1075 static void spill_values( struct brw_wm_compile *c,
1076 struct brw_wm_value *values,
1077 GLuint nr )
1078 {
1079 GLuint i;
1080
1081 for (i = 0; i < nr; i++)
1082 if (values[i].spill_slot)
1083 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1084 }
1085
1086
1087 /* Emit the fragment program instructions here.
1088 */
1089 void brw_wm_emit( struct brw_wm_compile *c )
1090 {
1091 struct brw_compile *p = &c->func;
1092 GLuint insn;
1093
1094 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1095
1096 /* Check if any of the payload regs need to be spilled:
1097 */
1098 spill_values(c, c->payload.depth, 4);
1099 spill_values(c, c->creg, c->nr_creg);
1100 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1101
1102
1103 for (insn = 0; insn < c->nr_insns; insn++) {
1104
1105 struct brw_wm_instruction *inst = &c->instruction[insn];
1106 struct brw_reg args[3][4], dst[4];
1107 GLuint i, dst_flags;
1108
1109 /* Get argument regs:
1110 */
1111 for (i = 0; i < 3; i++)
1112 get_argument_regs(c, inst->src[i], args[i]);
1113
1114 /* Get dest regs:
1115 */
1116 for (i = 0; i < 4; i++)
1117 if (inst->dst[i])
1118 dst[i] = inst->dst[i]->hw_reg;
1119 else
1120 dst[i] = brw_null_reg();
1121
1122 /* Flags
1123 */
1124 dst_flags = inst->writemask;
1125 if (inst->saturate)
1126 dst_flags |= SATURATE;
1127
1128 switch (inst->opcode) {
1129 /* Generated instructions for calculating triangle interpolants:
1130 */
1131 case WM_PIXELXY:
1132 emit_pixel_xy(p, dst, dst_flags, args[0]);
1133 break;
1134
1135 case WM_DELTAXY:
1136 emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1137 break;
1138
1139 case WM_WPOSXY:
1140 emit_wpos_xy(c, dst, dst_flags, args[0]);
1141 break;
1142
1143 case WM_PIXELW:
1144 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1145 break;
1146
1147 case WM_LINTERP:
1148 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1149 break;
1150
1151 case WM_PINTERP:
1152 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1153 break;
1154
1155 case WM_CINTERP:
1156 emit_cinterp(p, dst, dst_flags, args[0]);
1157 break;
1158
1159 case WM_FB_WRITE:
1160 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1161 break;
1162
1163 /* Straightforward arithmetic:
1164 */
1165 case OPCODE_ADD:
1166 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1167 break;
1168
1169 case OPCODE_FRC:
1170 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1171 break;
1172
1173 case OPCODE_FLR:
1174 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1175 break;
1176
1177 case OPCODE_DP3:
1178 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1179 break;
1180
1181 case OPCODE_DP4:
1182 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1183 break;
1184
1185 case OPCODE_DPH:
1186 emit_dph(p, dst, dst_flags, args[0], args[1]);
1187 break;
1188
1189 case OPCODE_LRP:
1190 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1191 break;
1192
1193 case OPCODE_MAD:
1194 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1195 break;
1196
1197 case OPCODE_MOV:
1198 case OPCODE_SWZ:
1199 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1200 break;
1201
1202 case OPCODE_MUL:
1203 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1204 break;
1205
1206 case OPCODE_XPD:
1207 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1208 break;
1209
1210 /* Higher math functions:
1211 */
1212 case OPCODE_RCP:
1213 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1214 break;
1215
1216 case OPCODE_RSQ:
1217 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1218 break;
1219
1220 case OPCODE_SIN:
1221 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1222 break;
1223
1224 case OPCODE_COS:
1225 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1226 break;
1227
1228 case OPCODE_EX2:
1229 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1230 break;
1231
1232 case OPCODE_LG2:
1233 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1234 break;
1235
1236 case OPCODE_SCS:
1237 /* There is an scs math function, but it would need some
1238 * fixup for 16-element execution.
1239 */
1240 if (dst_flags & WRITEMASK_X)
1241 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1242 if (dst_flags & WRITEMASK_Y)
1243 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1244 break;
1245
1246 case OPCODE_POW:
1247 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1248 break;
1249
1250 /* Comparisons:
1251 */
1252 case OPCODE_CMP:
1253 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1254 break;
1255
1256 case OPCODE_MAX:
1257 emit_max(p, dst, dst_flags, args[0], args[1]);
1258 break;
1259
1260 case OPCODE_MIN:
1261 emit_min(p, dst, dst_flags, args[0], args[1]);
1262 break;
1263
1264 case OPCODE_SLT:
1265 emit_slt(p, dst, dst_flags, args[0], args[1]);
1266 break;
1267
1268 case OPCODE_SLE:
1269 emit_sle(p, dst, dst_flags, args[0], args[1]);
1270 break;
1271 case OPCODE_SGT:
1272 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1273 break;
1274 case OPCODE_SGE:
1275 emit_sge(p, dst, dst_flags, args[0], args[1]);
1276 break;
1277 case OPCODE_SEQ:
1278 emit_seq(p, dst, dst_flags, args[0], args[1]);
1279 break;
1280 case OPCODE_SNE:
1281 emit_sne(p, dst, dst_flags, args[0], args[1]);
1282 break;
1283
1284 case OPCODE_LIT:
1285 emit_lit(p, dst, dst_flags, args[0]);
1286 break;
1287
1288 /* Texturing operations:
1289 */
1290 case OPCODE_TEX:
1291 emit_tex(c, inst, dst, dst_flags, args[0]);
1292 break;
1293
1294 case OPCODE_TXB:
1295 emit_txb(c, inst, dst, dst_flags, args[0]);
1296 break;
1297
1298 case OPCODE_KIL:
1299 emit_kil(c, args[0]);
1300 break;
1301
1302 default:
1303 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1304 inst->opcode, inst->opcode < MAX_OPCODE ?
1305 _mesa_opcode_string(inst->opcode) :
1306 "unknown");
1307 }
1308
1309 for (i = 0; i < 4; i++)
1310 if (inst->dst[i] && inst->dst[i]->spill_slot)
1311 emit_spill(c,
1312 inst->dst[i]->hw_reg,
1313 inst->dst[i]->spill_slot);
1314 }
1315 }