Merge branch 'master' of git+ssh://znh@git.freedesktop.org/git/mesa/mesa into 965...
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 #define SATURATE (1<<5)
38
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
41 */
42 static __inline struct brw_reg sechalf( struct brw_reg reg )
43 {
44 if (reg.vstride)
45 reg.nr++;
46 return reg;
47 }
48
49 /* Payload R0:
50 *
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
53 * R0.1..8 -- ?
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
60 * R1.6 -- ?
61 * R1.7 -- ?
62 * R1.8 -- ?
63 */
64
65
66 static void emit_pixel_xy(struct brw_compile *p,
67 const struct brw_reg *dst,
68 GLuint mask,
69 const struct brw_reg *arg0)
70 {
71 struct brw_reg r1 = brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
73
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
78 */
79 if (mask & WRITEMASK_X) {
80 brw_ADD(p,
81 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
82 stride(suboffset(r1_uw, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
84 }
85
86 if (mask & WRITEMASK_Y) {
87 brw_ADD(p,
88 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
89 stride(suboffset(r1_uw,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
91 }
92
93 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
94 }
95
96
97
98 static void emit_delta_xy(struct brw_compile *p,
99 const struct brw_reg *dst,
100 GLuint mask,
101 const struct brw_reg *arg0,
102 const struct brw_reg *arg1)
103 {
104 struct brw_reg r1 = brw_vec1_grf(1, 0);
105
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 * centers.
108 */
109 if (mask & WRITEMASK_X) {
110 brw_ADD(p,
111 dst[0],
112 retype(arg0[0], BRW_REGISTER_TYPE_UW),
113 negate(r1));
114 }
115
116 if (mask & WRITEMASK_Y) {
117 brw_ADD(p,
118 dst[1],
119 retype(arg0[1], BRW_REGISTER_TYPE_UW),
120 negate(suboffset(r1,1)));
121
122 }
123 }
124
125 static void emit_wpos_xy(struct brw_compile *p,
126 const struct brw_reg *dst,
127 GLuint mask,
128 const struct brw_reg *arg0)
129 {
130 /* Calc delta X,Y by subtracting origin in r1 from the pixel
131 * centers.
132 */
133 if (mask & WRITEMASK_X) {
134 brw_MOV(p,
135 dst[0],
136 retype(arg0[0], BRW_REGISTER_TYPE_UW));
137 }
138
139 if (mask & WRITEMASK_Y) {
140 /* TODO -- window_height - Y */
141 brw_MOV(p,
142 dst[1],
143 negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
144
145 }
146 }
147
148
149 static void emit_pixel_w( struct brw_compile *p,
150 const struct brw_reg *dst,
151 GLuint mask,
152 const struct brw_reg *arg0,
153 const struct brw_reg *deltas)
154 {
155 /* Don't need this if all you are doing is interpolating color, for
156 * instance.
157 */
158 if (mask & WRITEMASK_W) {
159 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
160
161 /* Calc 1/w - just linterp wpos[3] optimized by putting the
162 * result straight into a message reg.
163 */
164 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
165 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
166
167 /* Calc w */
168 brw_math_16( p, dst[3],
169 BRW_MATH_FUNCTION_INV,
170 BRW_MATH_SATURATE_NONE,
171 2, brw_null_reg(),
172 BRW_MATH_PRECISION_FULL);
173 }
174 }
175
176
177
178 static void emit_linterp( struct brw_compile *p,
179 const struct brw_reg *dst,
180 GLuint mask,
181 const struct brw_reg *arg0,
182 const struct brw_reg *deltas )
183 {
184 struct brw_reg interp[4];
185 GLuint nr = arg0[0].nr;
186 GLuint i;
187
188 interp[0] = brw_vec1_grf(nr, 0);
189 interp[1] = brw_vec1_grf(nr, 4);
190 interp[2] = brw_vec1_grf(nr+1, 0);
191 interp[3] = brw_vec1_grf(nr+1, 4);
192
193 for(i = 0; i < 4; i++ ) {
194 if (mask & (1<<i)) {
195 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
196 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
197 }
198 }
199 }
200
201
202 static void emit_pinterp( struct brw_compile *p,
203 const struct brw_reg *dst,
204 GLuint mask,
205 const struct brw_reg *arg0,
206 const struct brw_reg *deltas,
207 const struct brw_reg *w)
208 {
209 struct brw_reg interp[4];
210 GLuint nr = arg0[0].nr;
211 GLuint i;
212
213 interp[0] = brw_vec1_grf(nr, 0);
214 interp[1] = brw_vec1_grf(nr, 4);
215 interp[2] = brw_vec1_grf(nr+1, 0);
216 interp[3] = brw_vec1_grf(nr+1, 4);
217
218 for(i = 0; i < 4; i++ ) {
219 if (mask & (1<<i)) {
220 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
221 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
222 brw_MUL(p, dst[i], dst[i], w[3]);
223 }
224 }
225 }
226
227 static void emit_cinterp( struct brw_compile *p,
228 const struct brw_reg *dst,
229 GLuint mask,
230 const struct brw_reg *arg0 )
231 {
232 struct brw_reg interp[4];
233 GLuint nr = arg0[0].nr;
234 GLuint i;
235
236 interp[0] = brw_vec1_grf(nr, 0);
237 interp[1] = brw_vec1_grf(nr, 4);
238 interp[2] = brw_vec1_grf(nr+1, 0);
239 interp[3] = brw_vec1_grf(nr+1, 4);
240
241 for(i = 0; i < 4; i++ ) {
242 if (mask & (1<<i)) {
243 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
244 }
245 }
246 }
247
248
249
250
251
252 static void emit_alu1( struct brw_compile *p,
253 struct brw_instruction *(*func)(struct brw_compile *,
254 struct brw_reg,
255 struct brw_reg),
256 const struct brw_reg *dst,
257 GLuint mask,
258 const struct brw_reg *arg0 )
259 {
260 GLuint i;
261
262 if (mask & SATURATE)
263 brw_set_saturate(p, 1);
264
265 for (i = 0; i < 4; i++) {
266 if (mask & (1<<i)) {
267 func(p, dst[i], arg0[i]);
268 }
269 }
270
271 if (mask & SATURATE)
272 brw_set_saturate(p, 0);
273 }
274
275 static void emit_alu2( struct brw_compile *p,
276 struct brw_instruction *(*func)(struct brw_compile *,
277 struct brw_reg,
278 struct brw_reg,
279 struct brw_reg),
280 const struct brw_reg *dst,
281 GLuint mask,
282 const struct brw_reg *arg0,
283 const struct brw_reg *arg1 )
284 {
285 GLuint i;
286
287 if (mask & SATURATE)
288 brw_set_saturate(p, 1);
289
290 for (i = 0; i < 4; i++) {
291 if (mask & (1<<i)) {
292 func(p, dst[i], arg0[i], arg1[i]);
293 }
294 }
295
296 if (mask & SATURATE)
297 brw_set_saturate(p, 0);
298 }
299
300
301 static void emit_mad( struct brw_compile *p,
302 const struct brw_reg *dst,
303 GLuint mask,
304 const struct brw_reg *arg0,
305 const struct brw_reg *arg1,
306 const struct brw_reg *arg2 )
307 {
308 GLuint i;
309
310 for (i = 0; i < 4; i++) {
311 if (mask & (1<<i)) {
312 brw_MUL(p, dst[i], arg0[i], arg1[i]);
313
314 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
315 brw_ADD(p, dst[i], dst[i], arg2[i]);
316 brw_set_saturate(p, 0);
317 }
318 }
319 }
320
321
322 static void emit_lrp( struct brw_compile *p,
323 const struct brw_reg *dst,
324 GLuint mask,
325 const struct brw_reg *arg0,
326 const struct brw_reg *arg1,
327 const struct brw_reg *arg2 )
328 {
329 GLuint i;
330
331 /* Uses dst as a temporary:
332 */
333 for (i = 0; i < 4; i++) {
334 if (mask & (1<<i)) {
335 /* Can I use the LINE instruction for this?
336 */
337 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
338 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
339
340 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
341 brw_MAC(p, dst[i], arg0[i], arg1[i]);
342 brw_set_saturate(p, 0);
343 }
344 }
345 }
346 static void emit_sop( struct brw_compile *p,
347 const struct brw_reg *dst,
348 GLuint mask,
349 GLuint cond,
350 const struct brw_reg *arg0,
351 const struct brw_reg *arg1 )
352 {
353 GLuint i;
354
355 for (i = 0; i < 4; i++) {
356 if (mask & (1<<i)) {
357 brw_MOV(p, dst[i], brw_imm_f(0));
358 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
359 brw_MOV(p, dst[i], brw_imm_f(1.0));
360 brw_set_predicate_control_flag_value(p, 0xff);
361 }
362 }
363 }
364
365 static void emit_slt( struct brw_compile *p,
366 const struct brw_reg *dst,
367 GLuint mask,
368 const struct brw_reg *arg0,
369 const struct brw_reg *arg1 )
370 {
371 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
372 }
373
374 static void emit_sle( struct brw_compile *p,
375 const struct brw_reg *dst,
376 GLuint mask,
377 const struct brw_reg *arg0,
378 const struct brw_reg *arg1 )
379 {
380 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
381 }
382
383 static void emit_sgt( struct brw_compile *p,
384 const struct brw_reg *dst,
385 GLuint mask,
386 const struct brw_reg *arg0,
387 const struct brw_reg *arg1 )
388 {
389 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
390 }
391
392 static void emit_sge( struct brw_compile *p,
393 const struct brw_reg *dst,
394 GLuint mask,
395 const struct brw_reg *arg0,
396 const struct brw_reg *arg1 )
397 {
398 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
399 }
400
401 static void emit_seq( struct brw_compile *p,
402 const struct brw_reg *dst,
403 GLuint mask,
404 const struct brw_reg *arg0,
405 const struct brw_reg *arg1 )
406 {
407 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
408 }
409
410 static void emit_sne( struct brw_compile *p,
411 const struct brw_reg *dst,
412 GLuint mask,
413 const struct brw_reg *arg0,
414 const struct brw_reg *arg1 )
415 {
416 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
417 }
418
419 static void emit_cmp( struct brw_compile *p,
420 const struct brw_reg *dst,
421 GLuint mask,
422 const struct brw_reg *arg0,
423 const struct brw_reg *arg1,
424 const struct brw_reg *arg2 )
425 {
426 GLuint i;
427
428 for (i = 0; i < 4; i++) {
429 if (mask & (1<<i)) {
430 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
431 brw_MOV(p, dst[i], arg2[i]);
432 brw_set_saturate(p, 0);
433
434 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
435
436 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
437 brw_MOV(p, dst[i], arg1[i]);
438 brw_set_saturate(p, 0);
439 brw_set_predicate_control_flag_value(p, 0xff);
440 }
441 }
442 }
443
444 static void emit_max( struct brw_compile *p,
445 const struct brw_reg *dst,
446 GLuint mask,
447 const struct brw_reg *arg0,
448 const struct brw_reg *arg1 )
449 {
450 GLuint i;
451
452 for (i = 0; i < 4; i++) {
453 if (mask & (1<<i)) {
454 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
455 brw_MOV(p, dst[i], arg0[i]);
456 brw_set_saturate(p, 0);
457
458 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
459
460 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
461 brw_MOV(p, dst[i], arg1[i]);
462 brw_set_saturate(p, 0);
463 brw_set_predicate_control_flag_value(p, 0xff);
464 }
465 }
466 }
467
468 static void emit_min( struct brw_compile *p,
469 const struct brw_reg *dst,
470 GLuint mask,
471 const struct brw_reg *arg0,
472 const struct brw_reg *arg1 )
473 {
474 GLuint i;
475
476 for (i = 0; i < 4; i++) {
477 if (mask & (1<<i)) {
478 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
479 brw_MOV(p, dst[i], arg1[i]);
480 brw_set_saturate(p, 0);
481
482 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
483
484 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
485 brw_MOV(p, dst[i], arg0[i]);
486 brw_set_saturate(p, 0);
487 brw_set_predicate_control_flag_value(p, 0xff);
488 }
489 }
490 }
491
492
493 static void emit_dp3( struct brw_compile *p,
494 const struct brw_reg *dst,
495 GLuint mask,
496 const struct brw_reg *arg0,
497 const struct brw_reg *arg1 )
498 {
499 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
500
501 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
502 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
503
504 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
505 brw_MAC(p, dst[0], arg0[2], arg1[2]);
506 brw_set_saturate(p, 0);
507 }
508
509
510 static void emit_dp4( struct brw_compile *p,
511 const struct brw_reg *dst,
512 GLuint mask,
513 const struct brw_reg *arg0,
514 const struct brw_reg *arg1 )
515 {
516 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
517
518 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
519 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
520 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
521
522 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
523 brw_MAC(p, dst[0], arg0[3], arg1[3]);
524 brw_set_saturate(p, 0);
525 }
526
527
528 static void emit_dph( struct brw_compile *p,
529 const struct brw_reg *dst,
530 GLuint mask,
531 const struct brw_reg *arg0,
532 const struct brw_reg *arg1 )
533 {
534 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
535
536 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
537 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
538 brw_MAC(p, dst[0], arg0[2], arg1[2]);
539
540 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
541 brw_ADD(p, dst[0], dst[0], arg1[3]);
542 brw_set_saturate(p, 0);
543 }
544
545
546 static void emit_xpd( struct brw_compile *p,
547 const struct brw_reg *dst,
548 GLuint mask,
549 const struct brw_reg *arg0,
550 const struct brw_reg *arg1 )
551 {
552 GLuint i;
553
554 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
555
556 for (i = 0 ; i < 3; i++) {
557 if (mask & (1<<i)) {
558 GLuint i2 = (i+2)%3;
559 GLuint i1 = (i+1)%3;
560
561 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
562
563 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
564 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
565 brw_set_saturate(p, 0);
566 }
567 }
568 }
569
570
571 static void emit_math1( struct brw_compile *p,
572 GLuint function,
573 const struct brw_reg *dst,
574 GLuint mask,
575 const struct brw_reg *arg0 )
576 {
577 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
578 // function == BRW_MATH_FUNCTION_SINCOS);
579
580 brw_MOV(p, brw_message_reg(2), arg0[0]);
581
582 /* Send two messages to perform all 16 operations:
583 */
584 brw_math_16(p,
585 dst[0],
586 function,
587 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
588 2,
589 brw_null_reg(),
590 BRW_MATH_PRECISION_FULL);
591 }
592
593
594 static void emit_math2( struct brw_compile *p,
595 GLuint function,
596 const struct brw_reg *dst,
597 GLuint mask,
598 const struct brw_reg *arg0,
599 const struct brw_reg *arg1)
600 {
601 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
602
603 brw_push_insn_state(p);
604
605 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
606 brw_MOV(p, brw_message_reg(2), arg0[0]);
607 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
608 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
609
610 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
611 brw_MOV(p, brw_message_reg(3), arg1[0]);
612 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
613 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
614
615
616 /* Send two messages to perform all 16 operations:
617 */
618 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
619 brw_math(p,
620 dst[0],
621 function,
622 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
623 2,
624 brw_null_reg(),
625 BRW_MATH_DATA_VECTOR,
626 BRW_MATH_PRECISION_FULL);
627
628 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
629 brw_math(p,
630 offset(dst[0],1),
631 function,
632 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
633 4,
634 brw_null_reg(),
635 BRW_MATH_DATA_VECTOR,
636 BRW_MATH_PRECISION_FULL);
637
638 brw_pop_insn_state(p);
639 }
640
641
642
643 static void emit_tex( struct brw_wm_compile *c,
644 const struct brw_wm_instruction *inst,
645 struct brw_reg *dst,
646 GLuint dst_flags,
647 struct brw_reg *arg )
648 {
649 struct brw_compile *p = &c->func;
650 GLuint msgLength, responseLength;
651 GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
652 GLuint i, nr;
653 GLuint emit;
654
655 /* How many input regs are there?
656 */
657 switch (inst->tex_idx) {
658 case TEXTURE_1D_INDEX:
659 emit = WRITEMASK_X;
660 nr = 1;
661 break;
662 case TEXTURE_2D_INDEX:
663 case TEXTURE_RECT_INDEX:
664 emit = WRITEMASK_XY;
665 nr = 2;
666 break;
667 default:
668 emit = WRITEMASK_XYZ;
669 nr = 3;
670 break;
671 }
672
673 if (shadow) {
674 nr = 4;
675 emit |= WRITEMASK_W;
676 }
677
678 msgLength = 1;
679
680 for (i = 0; i < nr; i++) {
681 static const GLuint swz[4] = {0,1,2,2};
682 if (emit & (1<<i))
683 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
684 else
685 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
686 msgLength += 2;
687 }
688
689 responseLength = 8; /* always */
690
691 brw_SAMPLE(p,
692 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
693 1,
694 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
695 inst->tex_unit + 1, /* surface */
696 inst->tex_unit, /* sampler */
697 inst->writemask,
698 (shadow ?
699 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
700 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
701 responseLength,
702 msgLength,
703 0);
704
705 }
706
707
708 static void emit_txb( struct brw_wm_compile *c,
709 const struct brw_wm_instruction *inst,
710 struct brw_reg *dst,
711 GLuint dst_flags,
712 struct brw_reg *arg )
713 {
714 struct brw_compile *p = &c->func;
715 GLuint msgLength;
716
717 /* Shadow ignored for txb.
718 */
719 switch (inst->tex_idx) {
720 case TEXTURE_1D_INDEX:
721 brw_MOV(p, brw_message_reg(2), arg[0]);
722 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
723 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
724 break;
725 case TEXTURE_2D_INDEX:
726 case TEXTURE_RECT_INDEX:
727 brw_MOV(p, brw_message_reg(2), arg[0]);
728 brw_MOV(p, brw_message_reg(4), arg[1]);
729 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
730 break;
731 default:
732 brw_MOV(p, brw_message_reg(2), arg[0]);
733 brw_MOV(p, brw_message_reg(4), arg[1]);
734 brw_MOV(p, brw_message_reg(6), arg[2]);
735 break;
736 }
737
738 brw_MOV(p, brw_message_reg(8), arg[3]);
739 msgLength = 9;
740
741
742 brw_SAMPLE(p,
743 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
744 1,
745 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
746 inst->tex_unit + 1, /* surface */
747 inst->tex_unit, /* sampler */
748 inst->writemask,
749 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
750 8, /* responseLength */
751 msgLength,
752 0);
753
754 }
755
756
757 static void emit_lit( struct brw_compile *p,
758 const struct brw_reg *dst,
759 GLuint mask,
760 const struct brw_reg *arg0 )
761 {
762 assert((mask & WRITEMASK_XW) == 0);
763
764 if (mask & WRITEMASK_Y) {
765 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
766 brw_MOV(p, dst[1], arg0[0]);
767 brw_set_saturate(p, 0);
768 }
769
770 if (mask & WRITEMASK_Z) {
771 emit_math2(p, BRW_MATH_FUNCTION_POW,
772 &dst[2],
773 WRITEMASK_X | (mask & SATURATE),
774 &arg0[1],
775 &arg0[3]);
776 }
777
778 /* Ordinarily you'd use an iff statement to skip or shortcircuit
779 * some of the POW calculations above, but 16-wide iff statements
780 * seem to lock c1 hardware, so this is a nasty workaround:
781 */
782 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
783 {
784 if (mask & WRITEMASK_Y)
785 brw_MOV(p, dst[1], brw_imm_f(0));
786
787 if (mask & WRITEMASK_Z)
788 brw_MOV(p, dst[2], brw_imm_f(0));
789 }
790 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
791 }
792
793
794 /* Kill pixel - set execution mask to zero for those pixels which
795 * fail.
796 */
797 static void emit_kil( struct brw_wm_compile *c,
798 struct brw_reg *arg0)
799 {
800 struct brw_compile *p = &c->func;
801 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
802 GLuint i;
803
804
805 /* XXX - usually won't need 4 compares!
806 */
807 for (i = 0; i < 4; i++) {
808 brw_push_insn_state(p);
809 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
810 brw_set_predicate_control_flag_value(p, 0xff);
811 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
812 brw_pop_insn_state(p);
813 }
814 }
815
816 static void fire_fb_write( struct brw_wm_compile *c,
817 GLuint base_reg,
818 GLuint nr )
819 {
820 struct brw_compile *p = &c->func;
821
822 /* Pass through control information:
823 */
824 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
825 {
826 brw_push_insn_state(p);
827 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
828 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
829 brw_MOV(p,
830 brw_message_reg(base_reg + 1),
831 brw_vec8_grf(1, 0));
832 brw_pop_insn_state(p);
833 }
834
835 /* Send framebuffer write message: */
836 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
837 brw_fb_WRITE(p,
838 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
839 base_reg,
840 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
841 0, /* render surface always 0 */
842 nr,
843 0,
844 1);
845 }
846
847 static void emit_aa( struct brw_wm_compile *c,
848 struct brw_reg *arg1,
849 GLuint reg )
850 {
851 struct brw_compile *p = &c->func;
852 GLuint comp = c->key.aa_dest_stencil_reg / 2;
853 GLuint off = c->key.aa_dest_stencil_reg % 2;
854 struct brw_reg aa = offset(arg1[comp], off);
855
856 brw_push_insn_state(p);
857 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
858 brw_MOV(p, brw_message_reg(reg), aa);
859 brw_pop_insn_state(p);
860 }
861
862
863 /* Post-fragment-program processing. Send the results to the
864 * framebuffer.
865 */
866 static void emit_fb_write( struct brw_wm_compile *c,
867 struct brw_reg *arg0,
868 struct brw_reg *arg1,
869 struct brw_reg *arg2)
870 {
871 struct brw_compile *p = &c->func;
872 GLuint nr = 2;
873 GLuint channel;
874
875 /* Reserve a space for AA - may not be needed:
876 */
877 if (c->key.aa_dest_stencil_reg)
878 nr += 1;
879
880 /* I don't really understand how this achieves the color interleave
881 * (ie RGBARGBA) in the result: [Do the saturation here]
882 */
883 {
884 brw_push_insn_state(p);
885
886 for (channel = 0; channel < 4; channel++) {
887 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
888 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
889
890 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
891 brw_MOV(p,
892 brw_message_reg(nr + channel),
893 arg0[channel]);
894
895 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
896 brw_MOV(p,
897 brw_message_reg(nr + channel + 4),
898 sechalf(arg0[channel]));
899 }
900
901 /* skip over the regs populated above:
902 */
903 nr += 8;
904
905 brw_pop_insn_state(p);
906 }
907
908 if (c->key.source_depth_to_render_target)
909 {
910 if (c->key.computes_depth)
911 brw_MOV(p, brw_message_reg(nr), arg2[2]);
912 else
913 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
914
915 nr += 2;
916 }
917
918 if (c->key.dest_depth_reg)
919 {
920 GLuint comp = c->key.dest_depth_reg / 2;
921 GLuint off = c->key.dest_depth_reg % 2;
922
923 if (off != 0) {
924 brw_push_insn_state(p);
925 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
926 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
927 /* 2nd half? */
928 brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1));
929 brw_pop_insn_state(p);
930 }
931 else {
932 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
933 }
934 nr += 2;
935 }
936
937
938 if (!c->key.runtime_check_aads_emit) {
939 if (c->key.aa_dest_stencil_reg)
940 emit_aa(c, arg1, 2);
941
942 fire_fb_write(c, 0, nr);
943 }
944 else {
945 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
946 struct brw_reg ip = brw_ip_reg();
947 struct brw_instruction *jmp;
948
949 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
950 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
951 brw_AND(p,
952 v1_null_ud,
953 get_element_ud(brw_vec8_grf(1,0), 6),
954 brw_imm_ud(1<<26));
955
956 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
957 {
958 emit_aa(c, arg1, 2);
959 fire_fb_write(c, 0, nr);
960 /* note - thread killed in subroutine */
961 }
962 brw_land_fwd_jump(p, jmp);
963
964 /* ELSE: Shuffle up one register to fill in the hole left for AA:
965 */
966 fire_fb_write(c, 1, nr-1);
967 }
968 }
969
970
971
972
973 /* Post-fragment-program processing. Send the results to the
974 * framebuffer.
975 */
976 static void emit_spill( struct brw_wm_compile *c,
977 struct brw_reg reg,
978 GLuint slot )
979 {
980 struct brw_compile *p = &c->func;
981
982 /*
983 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
984 */
985 brw_MOV(p, brw_message_reg(2), reg);
986
987 /*
988 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
989 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
990 */
991 brw_dp_WRITE_16(p,
992 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
993 1,
994 slot);
995 }
996
997 static void emit_unspill( struct brw_wm_compile *c,
998 struct brw_reg reg,
999 GLuint slot )
1000 {
1001 struct brw_compile *p = &c->func;
1002
1003 /* Slot 0 is the undef value.
1004 */
1005 if (slot == 0) {
1006 brw_MOV(p, reg, brw_imm_f(0));
1007 return;
1008 }
1009
1010 /*
1011 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1012 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1013 */
1014
1015 brw_dp_READ_16(p,
1016 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1017 1,
1018 slot);
1019 }
1020
1021
1022
1023 /**
1024 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
1025 */
1026 static void get_argument_regs( struct brw_wm_compile *c,
1027 struct brw_wm_ref *arg[],
1028 struct brw_reg *regs )
1029 {
1030 GLuint i;
1031
1032 for (i = 0; i < 4; i++) {
1033 if (arg[i]) {
1034
1035 if (arg[i]->unspill_reg)
1036 emit_unspill(c,
1037 brw_vec8_grf(arg[i]->unspill_reg, 0),
1038 arg[i]->value->spill_slot);
1039
1040 regs[i] = arg[i]->hw_reg;
1041 }
1042 else {
1043 regs[i] = brw_null_reg();
1044 }
1045 }
1046 }
1047
1048 static void spill_values( struct brw_wm_compile *c,
1049 struct brw_wm_value *values,
1050 GLuint nr )
1051 {
1052 GLuint i;
1053
1054 for (i = 0; i < nr; i++)
1055 if (values[i].spill_slot)
1056 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1057 }
1058
1059
1060
1061 /* Emit the fragment program instructions here.
1062 */
1063 void brw_wm_emit( struct brw_wm_compile *c )
1064 {
1065 struct brw_compile *p = &c->func;
1066 GLuint insn;
1067
1068 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1069
1070 /* Check if any of the payload regs need to be spilled:
1071 */
1072 spill_values(c, c->payload.depth, 4);
1073 spill_values(c, c->creg, c->nr_creg);
1074 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1075
1076
1077 for (insn = 0; insn < c->nr_insns; insn++) {
1078
1079 struct brw_wm_instruction *inst = &c->instruction[insn];
1080 struct brw_reg args[3][4], dst[4];
1081 GLuint i, dst_flags;
1082
1083 /* Get argument regs:
1084 */
1085 for (i = 0; i < 3; i++)
1086 get_argument_regs(c, inst->src[i], args[i]);
1087
1088 /* Get dest regs:
1089 */
1090 for (i = 0; i < 4; i++)
1091 if (inst->dst[i])
1092 dst[i] = inst->dst[i]->hw_reg;
1093 else
1094 dst[i] = brw_null_reg();
1095
1096 /* Flags
1097 */
1098 dst_flags = inst->writemask;
1099 if (inst->saturate)
1100 dst_flags |= SATURATE;
1101
1102 switch (inst->opcode) {
1103 /* Generated instructions for calculating triangle interpolants:
1104 */
1105 case WM_PIXELXY:
1106 emit_pixel_xy(p, dst, dst_flags, args[0]);
1107 break;
1108
1109 case WM_DELTAXY:
1110 emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1111 break;
1112
1113 case WM_WPOSXY:
1114 emit_wpos_xy(p, dst, dst_flags, args[0]);
1115 break;
1116
1117 case WM_PIXELW:
1118 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1119 break;
1120
1121 case WM_LINTERP:
1122 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1123 break;
1124
1125 case WM_PINTERP:
1126 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1127 break;
1128
1129 case WM_CINTERP:
1130 emit_cinterp(p, dst, dst_flags, args[0]);
1131 break;
1132
1133 case WM_FB_WRITE:
1134 emit_fb_write(c, args[0], args[1], args[2]);
1135 break;
1136
1137 /* Straightforward arithmetic:
1138 */
1139 case OPCODE_ADD:
1140 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1141 break;
1142
1143 case OPCODE_FRC:
1144 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1145 break;
1146
1147 case OPCODE_FLR:
1148 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1149 break;
1150
1151 case OPCODE_DP3: /* */
1152 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1153 break;
1154
1155 case OPCODE_DP4:
1156 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1157 break;
1158
1159 case OPCODE_DPH:
1160 emit_dph(p, dst, dst_flags, args[0], args[1]);
1161 break;
1162
1163 case OPCODE_LRP: /* */
1164 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1165 break;
1166
1167 case OPCODE_MAD:
1168 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1169 break;
1170
1171 case OPCODE_MOV:
1172 case OPCODE_SWZ:
1173 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1174 break;
1175
1176 case OPCODE_MUL:
1177 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1178 break;
1179
1180 case OPCODE_XPD:
1181 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1182 break;
1183
1184 /* Higher math functions:
1185 */
1186 case OPCODE_RCP:
1187 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1188 break;
1189
1190 case OPCODE_RSQ:
1191 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1192 break;
1193
1194 case OPCODE_SIN:
1195 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1196 break;
1197
1198 case OPCODE_COS:
1199 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1200 break;
1201
1202 case OPCODE_EX2:
1203 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1204 break;
1205
1206 case OPCODE_LG2:
1207 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1208 break;
1209
1210 case OPCODE_SCS:
1211 /* There is an scs math function, but it would need some
1212 * fixup for 16-element execution.
1213 */
1214 if (dst_flags & WRITEMASK_X)
1215 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1216 if (dst_flags & WRITEMASK_Y)
1217 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1218 break;
1219
1220 case OPCODE_POW:
1221 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1222 break;
1223
1224 /* Comparisons:
1225 */
1226 case OPCODE_CMP:
1227 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1228 break;
1229
1230 case OPCODE_MAX:
1231 emit_max(p, dst, dst_flags, args[0], args[1]);
1232 break;
1233
1234 case OPCODE_MIN:
1235 emit_min(p, dst, dst_flags, args[0], args[1]);
1236 break;
1237
1238 case OPCODE_SLT:
1239 emit_slt(p, dst, dst_flags, args[0], args[1]);
1240 break;
1241
1242 case OPCODE_SLE:
1243 emit_sle(p, dst, dst_flags, args[0], args[1]);
1244 break;
1245 case OPCODE_SGT:
1246 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1247 break;
1248 case OPCODE_SGE:
1249 emit_sge(p, dst, dst_flags, args[0], args[1]);
1250 break;
1251 case OPCODE_SEQ:
1252 emit_seq(p, dst, dst_flags, args[0], args[1]);
1253 break;
1254 case OPCODE_SNE:
1255 emit_sne(p, dst, dst_flags, args[0], args[1]);
1256 break;
1257
1258 case OPCODE_LIT:
1259 emit_lit(p, dst, dst_flags, args[0]);
1260 break;
1261
1262 /* Texturing operations:
1263 */
1264 case OPCODE_TEX:
1265 emit_tex(c, inst, dst, dst_flags, args[0]);
1266 break;
1267
1268 case OPCODE_TXB:
1269 emit_txb(c, inst, dst, dst_flags, args[0]);
1270 break;
1271
1272 case OPCODE_KIL:
1273 emit_kil(c, args[0]);
1274 break;
1275
1276 default:
1277 _mesa_printf("unsupport opcode %d in fragment program\n",
1278 inst->opcode);
1279 }
1280
1281 for (i = 0; i < 4; i++)
1282 if (inst->dst[i] && inst->dst[i]->spill_slot)
1283 emit_spill(c,
1284 inst->dst[i]->hw_reg,
1285 inst->dst[i]->spill_slot);
1286 }
1287 }
1288
1289
1290
1291
1292