Merge branch 'i915-unification' of git+ssh://people.freedesktop.org/~anholt/mesa...
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 #define SATURATE (1<<5)
38
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
41 */
42 static __inline struct brw_reg sechalf( struct brw_reg reg )
43 {
44 if (reg.vstride)
45 reg.nr++;
46 return reg;
47 }
48
49 /* Payload R0:
50 *
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
53 * R0.1..8 -- ?
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
60 * R1.6 -- ?
61 * R1.7 -- ?
62 * R1.8 -- ?
63 */
64
65
66 static void emit_pixel_xy(struct brw_compile *p,
67 const struct brw_reg *dst,
68 GLuint mask,
69 const struct brw_reg *arg0)
70 {
71 struct brw_reg r1 = brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
73
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
78 */
79 if (mask & WRITEMASK_X) {
80 brw_ADD(p,
81 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
82 stride(suboffset(r1_uw, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
84 }
85
86 if (mask & WRITEMASK_Y) {
87 brw_ADD(p,
88 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
89 stride(suboffset(r1_uw,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
91 }
92
93 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
94 }
95
96
97
98 static void emit_delta_xy(struct brw_compile *p,
99 const struct brw_reg *dst,
100 GLuint mask,
101 const struct brw_reg *arg0,
102 const struct brw_reg *arg1)
103 {
104 struct brw_reg r1 = brw_vec1_grf(1, 0);
105
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 * centers.
108 */
109 if (mask & WRITEMASK_X) {
110 brw_ADD(p,
111 dst[0],
112 retype(arg0[0], BRW_REGISTER_TYPE_UW),
113 negate(r1));
114 }
115
116 if (mask & WRITEMASK_Y) {
117 brw_ADD(p,
118 dst[1],
119 retype(arg0[1], BRW_REGISTER_TYPE_UW),
120 negate(suboffset(r1,1)));
121
122 }
123 }
124
125 static void emit_wpos_xy(struct brw_compile *p,
126 const struct brw_reg *dst,
127 GLuint mask,
128 const struct brw_reg *arg0)
129 {
130 /* Calc delta X,Y by subtracting origin in r1 from the pixel
131 * centers.
132 */
133 if (mask & WRITEMASK_X) {
134 brw_MOV(p,
135 dst[0],
136 retype(arg0[0], BRW_REGISTER_TYPE_UW));
137 }
138
139 if (mask & WRITEMASK_Y) {
140 /* TODO -- window_height - Y */
141 brw_MOV(p,
142 dst[1],
143 negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
144
145 }
146 }
147
148
149 static void emit_pixel_w( struct brw_compile *p,
150 const struct brw_reg *dst,
151 GLuint mask,
152 const struct brw_reg *arg0,
153 const struct brw_reg *deltas)
154 {
155 /* Don't need this if all you are doing is interpolating color, for
156 * instance.
157 */
158 if (mask & WRITEMASK_W) {
159 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
160
161 /* Calc 1/w - just linterp wpos[3] optimized by putting the
162 * result straight into a message reg.
163 */
164 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
165 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
166
167 /* Calc w */
168 brw_math_16( p, dst[3],
169 BRW_MATH_FUNCTION_INV,
170 BRW_MATH_SATURATE_NONE,
171 2, brw_null_reg(),
172 BRW_MATH_PRECISION_FULL);
173 }
174 }
175
176
177
178 static void emit_linterp( struct brw_compile *p,
179 const struct brw_reg *dst,
180 GLuint mask,
181 const struct brw_reg *arg0,
182 const struct brw_reg *deltas )
183 {
184 struct brw_reg interp[4];
185 GLuint nr = arg0[0].nr;
186 GLuint i;
187
188 interp[0] = brw_vec1_grf(nr, 0);
189 interp[1] = brw_vec1_grf(nr, 4);
190 interp[2] = brw_vec1_grf(nr+1, 0);
191 interp[3] = brw_vec1_grf(nr+1, 4);
192
193 for(i = 0; i < 4; i++ ) {
194 if (mask & (1<<i)) {
195 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
196 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
197 }
198 }
199 }
200
201
202 static void emit_pinterp( struct brw_compile *p,
203 const struct brw_reg *dst,
204 GLuint mask,
205 const struct brw_reg *arg0,
206 const struct brw_reg *deltas,
207 const struct brw_reg *w)
208 {
209 struct brw_reg interp[4];
210 GLuint nr = arg0[0].nr;
211 GLuint i;
212
213 interp[0] = brw_vec1_grf(nr, 0);
214 interp[1] = brw_vec1_grf(nr, 4);
215 interp[2] = brw_vec1_grf(nr+1, 0);
216 interp[3] = brw_vec1_grf(nr+1, 4);
217
218 for(i = 0; i < 4; i++ ) {
219 if (mask & (1<<i)) {
220 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
221 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
222 brw_MUL(p, dst[i], dst[i], w[3]);
223 }
224 }
225 }
226
227 static void emit_cinterp( struct brw_compile *p,
228 const struct brw_reg *dst,
229 GLuint mask,
230 const struct brw_reg *arg0 )
231 {
232 struct brw_reg interp[4];
233 GLuint nr = arg0[0].nr;
234 GLuint i;
235
236 interp[0] = brw_vec1_grf(nr, 0);
237 interp[1] = brw_vec1_grf(nr, 4);
238 interp[2] = brw_vec1_grf(nr+1, 0);
239 interp[3] = brw_vec1_grf(nr+1, 4);
240
241 for(i = 0; i < 4; i++ ) {
242 if (mask & (1<<i)) {
243 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
244 }
245 }
246 }
247
248
249
250
251
252 static void emit_alu1( struct brw_compile *p,
253 struct brw_instruction *(*func)(struct brw_compile *,
254 struct brw_reg,
255 struct brw_reg),
256 const struct brw_reg *dst,
257 GLuint mask,
258 const struct brw_reg *arg0 )
259 {
260 GLuint i;
261
262 if (mask & SATURATE)
263 brw_set_saturate(p, 1);
264
265 for (i = 0; i < 4; i++) {
266 if (mask & (1<<i)) {
267 func(p, dst[i], arg0[i]);
268 }
269 }
270
271 if (mask & SATURATE)
272 brw_set_saturate(p, 0);
273 }
274
275 static void emit_alu2( struct brw_compile *p,
276 struct brw_instruction *(*func)(struct brw_compile *,
277 struct brw_reg,
278 struct brw_reg,
279 struct brw_reg),
280 const struct brw_reg *dst,
281 GLuint mask,
282 const struct brw_reg *arg0,
283 const struct brw_reg *arg1 )
284 {
285 GLuint i;
286
287 if (mask & SATURATE)
288 brw_set_saturate(p, 1);
289
290 for (i = 0; i < 4; i++) {
291 if (mask & (1<<i)) {
292 func(p, dst[i], arg0[i], arg1[i]);
293 }
294 }
295
296 if (mask & SATURATE)
297 brw_set_saturate(p, 0);
298 }
299
300
301 static void emit_mad( struct brw_compile *p,
302 const struct brw_reg *dst,
303 GLuint mask,
304 const struct brw_reg *arg0,
305 const struct brw_reg *arg1,
306 const struct brw_reg *arg2 )
307 {
308 GLuint i;
309
310 for (i = 0; i < 4; i++) {
311 if (mask & (1<<i)) {
312 brw_MUL(p, dst[i], arg0[i], arg1[i]);
313
314 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
315 brw_ADD(p, dst[i], dst[i], arg2[i]);
316 brw_set_saturate(p, 0);
317 }
318 }
319 }
320
321
322 static void emit_lrp( struct brw_compile *p,
323 const struct brw_reg *dst,
324 GLuint mask,
325 const struct brw_reg *arg0,
326 const struct brw_reg *arg1,
327 const struct brw_reg *arg2 )
328 {
329 GLuint i;
330
331 /* Uses dst as a temporary:
332 */
333 for (i = 0; i < 4; i++) {
334 if (mask & (1<<i)) {
335 /* Can I use the LINE instruction for this?
336 */
337 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
338 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
339
340 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
341 brw_MAC(p, dst[i], arg0[i], arg1[i]);
342 brw_set_saturate(p, 0);
343 }
344 }
345 }
346
347
348 static void emit_slt( struct brw_compile *p,
349 const struct brw_reg *dst,
350 GLuint mask,
351 const struct brw_reg *arg0,
352 const struct brw_reg *arg1 )
353 {
354 GLuint i;
355
356 for (i = 0; i < 4; i++) {
357 if (mask & (1<<i)) {
358 brw_MOV(p, dst[i], brw_imm_f(0));
359 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
360 brw_MOV(p, dst[i], brw_imm_f(1.0));
361 brw_set_predicate_control_flag_value(p, 0xff);
362 }
363 }
364 }
365
366 /* Isn't this just the same as the above with the args swapped?
367 */
368 static void emit_sge( struct brw_compile *p,
369 const struct brw_reg *dst,
370 GLuint mask,
371 const struct brw_reg *arg0,
372 const struct brw_reg *arg1 )
373 {
374 GLuint i;
375
376 for (i = 0; i < 4; i++) {
377 if (mask & (1<<i)) {
378 brw_MOV(p, dst[i], brw_imm_f(0));
379 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
380 brw_MOV(p, dst[i], brw_imm_f(1.0));
381 brw_set_predicate_control_flag_value(p, 0xff);
382 }
383 }
384 }
385
386
387
388 static void emit_cmp( struct brw_compile *p,
389 const struct brw_reg *dst,
390 GLuint mask,
391 const struct brw_reg *arg0,
392 const struct brw_reg *arg1,
393 const struct brw_reg *arg2 )
394 {
395 GLuint i;
396
397 for (i = 0; i < 4; i++) {
398 if (mask & (1<<i)) {
399 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
400 brw_MOV(p, dst[i], arg2[i]);
401 brw_set_saturate(p, 0);
402
403 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
404
405 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
406 brw_MOV(p, dst[i], arg1[i]);
407 brw_set_saturate(p, 0);
408 brw_set_predicate_control_flag_value(p, 0xff);
409 }
410 }
411 }
412
413 static void emit_max( struct brw_compile *p,
414 const struct brw_reg *dst,
415 GLuint mask,
416 const struct brw_reg *arg0,
417 const struct brw_reg *arg1 )
418 {
419 GLuint i;
420
421 for (i = 0; i < 4; i++) {
422 if (mask & (1<<i)) {
423 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
424 brw_MOV(p, dst[i], arg0[i]);
425 brw_set_saturate(p, 0);
426
427 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
428
429 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
430 brw_MOV(p, dst[i], arg1[i]);
431 brw_set_saturate(p, 0);
432 brw_set_predicate_control_flag_value(p, 0xff);
433 }
434 }
435 }
436
437 static void emit_min( struct brw_compile *p,
438 const struct brw_reg *dst,
439 GLuint mask,
440 const struct brw_reg *arg0,
441 const struct brw_reg *arg1 )
442 {
443 GLuint i;
444
445 for (i = 0; i < 4; i++) {
446 if (mask & (1<<i)) {
447 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
448 brw_MOV(p, dst[i], arg1[i]);
449 brw_set_saturate(p, 0);
450
451 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
452
453 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
454 brw_MOV(p, dst[i], arg0[i]);
455 brw_set_saturate(p, 0);
456 brw_set_predicate_control_flag_value(p, 0xff);
457 }
458 }
459 }
460
461
462 static void emit_dp3( struct brw_compile *p,
463 const struct brw_reg *dst,
464 GLuint mask,
465 const struct brw_reg *arg0,
466 const struct brw_reg *arg1 )
467 {
468 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
469
470 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
471 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
472
473 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
474 brw_MAC(p, dst[0], arg0[2], arg1[2]);
475 brw_set_saturate(p, 0);
476 }
477
478
479 static void emit_dp4( struct brw_compile *p,
480 const struct brw_reg *dst,
481 GLuint mask,
482 const struct brw_reg *arg0,
483 const struct brw_reg *arg1 )
484 {
485 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
486
487 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
488 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
489 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
490
491 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
492 brw_MAC(p, dst[0], arg0[3], arg1[3]);
493 brw_set_saturate(p, 0);
494 }
495
496
497 static void emit_dph( struct brw_compile *p,
498 const struct brw_reg *dst,
499 GLuint mask,
500 const struct brw_reg *arg0,
501 const struct brw_reg *arg1 )
502 {
503 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
504
505 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
506 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
507 brw_MAC(p, dst[0], arg0[2], arg1[2]);
508
509 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
510 brw_ADD(p, dst[0], dst[0], arg1[3]);
511 brw_set_saturate(p, 0);
512 }
513
514
515 static void emit_xpd( struct brw_compile *p,
516 const struct brw_reg *dst,
517 GLuint mask,
518 const struct brw_reg *arg0,
519 const struct brw_reg *arg1 )
520 {
521 GLuint i;
522
523 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
524
525 for (i = 0 ; i < 3; i++) {
526 if (mask & (1<<i)) {
527 GLuint i2 = (i+2)%3;
528 GLuint i1 = (i+1)%3;
529
530 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
531
532 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
533 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
534 brw_set_saturate(p, 0);
535 }
536 }
537 }
538
539
540 static void emit_math1( struct brw_compile *p,
541 GLuint function,
542 const struct brw_reg *dst,
543 GLuint mask,
544 const struct brw_reg *arg0 )
545 {
546 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
547 function == BRW_MATH_FUNCTION_SINCOS);
548
549 brw_MOV(p, brw_message_reg(2), arg0[0]);
550
551 /* Send two messages to perform all 16 operations:
552 */
553 brw_math_16(p,
554 dst[0],
555 function,
556 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
557 2,
558 brw_null_reg(),
559 BRW_MATH_PRECISION_FULL);
560 }
561
562
563 static void emit_math2( struct brw_compile *p,
564 GLuint function,
565 const struct brw_reg *dst,
566 GLuint mask,
567 const struct brw_reg *arg0,
568 const struct brw_reg *arg1)
569 {
570 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
571
572 brw_push_insn_state(p);
573
574 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
575 brw_MOV(p, brw_message_reg(2), arg0[0]);
576 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
577 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
578
579 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
580 brw_MOV(p, brw_message_reg(3), arg1[0]);
581 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
582 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
583
584
585 /* Send two messages to perform all 16 operations:
586 */
587 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
588 brw_math(p,
589 dst[0],
590 function,
591 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
592 2,
593 brw_null_reg(),
594 BRW_MATH_DATA_VECTOR,
595 BRW_MATH_PRECISION_FULL);
596
597 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
598 brw_math(p,
599 offset(dst[0],1),
600 function,
601 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
602 4,
603 brw_null_reg(),
604 BRW_MATH_DATA_VECTOR,
605 BRW_MATH_PRECISION_FULL);
606
607 brw_pop_insn_state(p);
608 }
609
610
611
612 static void emit_tex( struct brw_wm_compile *c,
613 const struct brw_wm_instruction *inst,
614 struct brw_reg *dst,
615 GLuint dst_flags,
616 struct brw_reg *arg )
617 {
618 struct brw_compile *p = &c->func;
619 GLuint msgLength, responseLength;
620 GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
621 GLuint i, nr;
622 GLuint emit;
623
624 /* How many input regs are there?
625 */
626 switch (inst->tex_idx) {
627 case TEXTURE_1D_INDEX:
628 emit = WRITEMASK_X;
629 nr = 1;
630 break;
631 case TEXTURE_2D_INDEX:
632 case TEXTURE_RECT_INDEX:
633 emit = WRITEMASK_XY;
634 nr = 2;
635 break;
636 default:
637 emit = WRITEMASK_XYZ;
638 nr = 3;
639 break;
640 }
641
642 if (shadow) {
643 nr = 4;
644 emit |= WRITEMASK_W;
645 }
646
647 msgLength = 1;
648
649 for (i = 0; i < nr; i++) {
650 static const GLuint swz[4] = {0,1,2,2};
651 if (emit & (1<<i))
652 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
653 else
654 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
655 msgLength += 2;
656 }
657
658 responseLength = 8; /* always */
659
660 brw_SAMPLE(p,
661 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
662 1,
663 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
664 inst->tex_unit + 1, /* surface */
665 inst->tex_unit, /* sampler */
666 inst->writemask,
667 (shadow ?
668 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
669 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
670 responseLength,
671 msgLength,
672 0);
673
674 }
675
676
677 static void emit_txb( struct brw_wm_compile *c,
678 const struct brw_wm_instruction *inst,
679 struct brw_reg *dst,
680 GLuint dst_flags,
681 struct brw_reg *arg )
682 {
683 struct brw_compile *p = &c->func;
684 GLuint msgLength;
685
686 /* Shadow ignored for txb.
687 */
688 switch (inst->tex_idx) {
689 case TEXTURE_1D_INDEX:
690 brw_MOV(p, brw_message_reg(2), arg[0]);
691 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
692 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
693 break;
694 case TEXTURE_2D_INDEX:
695 case TEXTURE_RECT_INDEX:
696 brw_MOV(p, brw_message_reg(2), arg[0]);
697 brw_MOV(p, brw_message_reg(4), arg[1]);
698 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
699 break;
700 default:
701 brw_MOV(p, brw_message_reg(2), arg[0]);
702 brw_MOV(p, brw_message_reg(4), arg[1]);
703 brw_MOV(p, brw_message_reg(6), arg[2]);
704 break;
705 }
706
707 brw_MOV(p, brw_message_reg(8), arg[3]);
708 msgLength = 9;
709
710
711 brw_SAMPLE(p,
712 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
713 1,
714 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
715 inst->tex_unit + 1, /* surface */
716 inst->tex_unit, /* sampler */
717 inst->writemask,
718 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
719 8, /* responseLength */
720 msgLength,
721 0);
722
723 }
724
725
726 static void emit_lit( struct brw_compile *p,
727 const struct brw_reg *dst,
728 GLuint mask,
729 const struct brw_reg *arg0 )
730 {
731 assert((mask & WRITEMASK_XW) == 0);
732
733 if (mask & WRITEMASK_Y) {
734 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
735 brw_MOV(p, dst[1], arg0[0]);
736 brw_set_saturate(p, 0);
737 }
738
739 if (mask & WRITEMASK_Z) {
740 emit_math2(p, BRW_MATH_FUNCTION_POW,
741 &dst[2],
742 WRITEMASK_X | (mask & SATURATE),
743 &arg0[1],
744 &arg0[3]);
745 }
746
747 /* Ordinarily you'd use an iff statement to skip or shortcircuit
748 * some of the POW calculations above, but 16-wide iff statements
749 * seem to lock c1 hardware, so this is a nasty workaround:
750 */
751 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
752 {
753 if (mask & WRITEMASK_Y)
754 brw_MOV(p, dst[1], brw_imm_f(0));
755
756 if (mask & WRITEMASK_Z)
757 brw_MOV(p, dst[2], brw_imm_f(0));
758 }
759 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
760 }
761
762
763 /* Kill pixel - set execution mask to zero for those pixels which
764 * fail.
765 */
766 static void emit_kil( struct brw_wm_compile *c,
767 struct brw_reg *arg0)
768 {
769 struct brw_compile *p = &c->func;
770 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
771 GLuint i;
772
773
774 /* XXX - usually won't need 4 compares!
775 */
776 for (i = 0; i < 4; i++) {
777 brw_push_insn_state(p);
778 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
779 brw_set_predicate_control_flag_value(p, 0xff);
780 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
781 brw_pop_insn_state(p);
782 }
783 }
784
785 static void fire_fb_write( struct brw_wm_compile *c,
786 GLuint base_reg,
787 GLuint nr )
788 {
789 struct brw_compile *p = &c->func;
790
791 /* Pass through control information:
792 */
793 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
794 {
795 brw_push_insn_state(p);
796 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
797 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
798 brw_MOV(p,
799 brw_message_reg(base_reg + 1),
800 brw_vec8_grf(1, 0));
801 brw_pop_insn_state(p);
802 }
803
804 /* Send framebuffer write message: */
805 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
806 brw_fb_WRITE(p,
807 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
808 base_reg,
809 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
810 0, /* render surface always 0 */
811 nr,
812 0,
813 1);
814 }
815
816 static void emit_aa( struct brw_wm_compile *c,
817 struct brw_reg *arg1,
818 GLuint reg )
819 {
820 struct brw_compile *p = &c->func;
821 GLuint comp = c->key.aa_dest_stencil_reg / 2;
822 GLuint off = c->key.aa_dest_stencil_reg % 2;
823 struct brw_reg aa = offset(arg1[comp], off);
824
825 brw_push_insn_state(p);
826 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
827 brw_MOV(p, brw_message_reg(reg), aa);
828 brw_pop_insn_state(p);
829 }
830
831
832 /* Post-fragment-program processing. Send the results to the
833 * framebuffer.
834 */
835 static void emit_fb_write( struct brw_wm_compile *c,
836 struct brw_reg *arg0,
837 struct brw_reg *arg1,
838 struct brw_reg *arg2)
839 {
840 struct brw_compile *p = &c->func;
841 GLuint nr = 2;
842 GLuint channel;
843
844 /* Reserve a space for AA - may not be needed:
845 */
846 if (c->key.aa_dest_stencil_reg)
847 nr += 1;
848
849 /* I don't really understand how this achieves the color interleave
850 * (ie RGBARGBA) in the result: [Do the saturation here]
851 */
852 {
853 brw_push_insn_state(p);
854
855 for (channel = 0; channel < 4; channel++) {
856 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
857 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
858
859 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
860 brw_MOV(p,
861 brw_message_reg(nr + channel),
862 arg0[channel]);
863
864 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
865 brw_MOV(p,
866 brw_message_reg(nr + channel + 4),
867 sechalf(arg0[channel]));
868 }
869
870 /* skip over the regs populated above:
871 */
872 nr += 8;
873
874 brw_pop_insn_state(p);
875 }
876
877 if (c->key.source_depth_to_render_target)
878 {
879 if (c->key.computes_depth)
880 brw_MOV(p, brw_message_reg(nr), arg2[2]);
881 else
882 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
883
884 nr += 2;
885 }
886
887 if (c->key.dest_depth_reg)
888 {
889 GLuint comp = c->key.dest_depth_reg / 2;
890 GLuint off = c->key.dest_depth_reg % 2;
891
892 if (off != 0) {
893 brw_push_insn_state(p);
894 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
895 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
896 /* 2nd half? */
897 brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1));
898 brw_pop_insn_state(p);
899 }
900 else {
901 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
902 }
903 nr += 2;
904 }
905
906
907 if (!c->key.runtime_check_aads_emit) {
908 if (c->key.aa_dest_stencil_reg)
909 emit_aa(c, arg1, 2);
910
911 fire_fb_write(c, 0, nr);
912 }
913 else {
914 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
915 struct brw_reg ip = brw_ip_reg();
916 struct brw_instruction *jmp;
917
918 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
919 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
920 brw_AND(p,
921 v1_null_ud,
922 get_element_ud(brw_vec8_grf(1,0), 6),
923 brw_imm_ud(1<<26));
924
925 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
926 {
927 emit_aa(c, arg1, 2);
928 fire_fb_write(c, 0, nr);
929 /* note - thread killed in subroutine */
930 }
931 brw_land_fwd_jump(p, jmp);
932
933 /* ELSE: Shuffle up one register to fill in the hole left for AA:
934 */
935 fire_fb_write(c, 1, nr-1);
936 }
937 }
938
939
940
941
942 /* Post-fragment-program processing. Send the results to the
943 * framebuffer.
944 */
945 static void emit_spill( struct brw_wm_compile *c,
946 struct brw_reg reg,
947 GLuint slot )
948 {
949 struct brw_compile *p = &c->func;
950
951 /*
952 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
953 */
954 brw_MOV(p, brw_message_reg(2), reg);
955
956 /*
957 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
958 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
959 */
960 brw_dp_WRITE_16(p,
961 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
962 1,
963 slot);
964 }
965
966 static void emit_unspill( struct brw_wm_compile *c,
967 struct brw_reg reg,
968 GLuint slot )
969 {
970 struct brw_compile *p = &c->func;
971
972 /* Slot 0 is the undef value.
973 */
974 if (slot == 0) {
975 brw_MOV(p, reg, brw_imm_f(0));
976 return;
977 }
978
979 /*
980 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
981 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
982 */
983
984 brw_dp_READ_16(p,
985 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
986 1,
987 slot);
988 }
989
990
991
992 /**
993 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
994 */
995 static void get_argument_regs( struct brw_wm_compile *c,
996 struct brw_wm_ref *arg[],
997 struct brw_reg *regs )
998 {
999 GLuint i;
1000
1001 for (i = 0; i < 4; i++) {
1002 if (arg[i]) {
1003
1004 if (arg[i]->unspill_reg)
1005 emit_unspill(c,
1006 brw_vec8_grf(arg[i]->unspill_reg, 0),
1007 arg[i]->value->spill_slot);
1008
1009 regs[i] = arg[i]->hw_reg;
1010 }
1011 else {
1012 regs[i] = brw_null_reg();
1013 }
1014 }
1015 }
1016
1017 static void spill_values( struct brw_wm_compile *c,
1018 struct brw_wm_value *values,
1019 GLuint nr )
1020 {
1021 GLuint i;
1022
1023 for (i = 0; i < nr; i++)
1024 if (values[i].spill_slot)
1025 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1026 }
1027
1028
1029
1030 /* Emit the fragment program instructions here.
1031 */
1032 void brw_wm_emit( struct brw_wm_compile *c )
1033 {
1034 struct brw_compile *p = &c->func;
1035 GLuint insn;
1036
1037 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1038
1039 /* Check if any of the payload regs need to be spilled:
1040 */
1041 spill_values(c, c->payload.depth, 4);
1042 spill_values(c, c->creg, c->nr_creg);
1043 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1044
1045
1046 for (insn = 0; insn < c->nr_insns; insn++) {
1047
1048 struct brw_wm_instruction *inst = &c->instruction[insn];
1049 struct brw_reg args[3][4], dst[4];
1050 GLuint i, dst_flags;
1051
1052 /* Get argument regs:
1053 */
1054 for (i = 0; i < 3; i++)
1055 get_argument_regs(c, inst->src[i], args[i]);
1056
1057 /* Get dest regs:
1058 */
1059 for (i = 0; i < 4; i++)
1060 if (inst->dst[i])
1061 dst[i] = inst->dst[i]->hw_reg;
1062 else
1063 dst[i] = brw_null_reg();
1064
1065 /* Flags
1066 */
1067 dst_flags = inst->writemask;
1068 if (inst->saturate)
1069 dst_flags |= SATURATE;
1070
1071 switch (inst->opcode) {
1072 /* Generated instructions for calculating triangle interpolants:
1073 */
1074 case WM_PIXELXY:
1075 emit_pixel_xy(p, dst, dst_flags, args[0]);
1076 break;
1077
1078 case WM_DELTAXY:
1079 emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1080 break;
1081
1082 case WM_WPOSXY:
1083 emit_wpos_xy(p, dst, dst_flags, args[0]);
1084 break;
1085
1086 case WM_PIXELW:
1087 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1088 break;
1089
1090 case WM_LINTERP:
1091 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1092 break;
1093
1094 case WM_PINTERP:
1095 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1096 break;
1097
1098 case WM_CINTERP:
1099 emit_cinterp(p, dst, dst_flags, args[0]);
1100 break;
1101
1102 case WM_FB_WRITE:
1103 emit_fb_write(c, args[0], args[1], args[2]);
1104 break;
1105
1106 /* Straightforward arithmetic:
1107 */
1108 case OPCODE_ADD:
1109 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1110 break;
1111
1112 case OPCODE_FRC:
1113 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1114 break;
1115
1116 case OPCODE_FLR:
1117 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1118 break;
1119
1120 case OPCODE_DP3: /* */
1121 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1122 break;
1123
1124 case OPCODE_DP4:
1125 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1126 break;
1127
1128 case OPCODE_DPH:
1129 emit_dph(p, dst, dst_flags, args[0], args[1]);
1130 break;
1131
1132 case OPCODE_LRP: /* */
1133 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1134 break;
1135
1136 case OPCODE_MAD:
1137 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1138 break;
1139
1140 case OPCODE_MOV:
1141 case OPCODE_SWZ:
1142 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1143 break;
1144
1145 case OPCODE_MUL:
1146 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1147 break;
1148
1149 case OPCODE_XPD:
1150 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1151 break;
1152
1153 /* Higher math functions:
1154 */
1155 case OPCODE_RCP:
1156 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1157 break;
1158
1159 case OPCODE_RSQ:
1160 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1161 break;
1162
1163 case OPCODE_SIN:
1164 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1165 break;
1166
1167 case OPCODE_COS:
1168 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1169 break;
1170
1171 case OPCODE_EX2:
1172 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1173 break;
1174
1175 case OPCODE_LG2:
1176 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1177 break;
1178
1179 case OPCODE_SCS:
1180 /* There is an scs math function, but it would need some
1181 * fixup for 16-element execution.
1182 */
1183 if (dst_flags & WRITEMASK_X)
1184 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1185 if (dst_flags & WRITEMASK_Y)
1186 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1187 break;
1188
1189 case OPCODE_POW:
1190 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1191 break;
1192
1193 /* Comparisons:
1194 */
1195 case OPCODE_CMP:
1196 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1197 break;
1198
1199 case OPCODE_MAX:
1200 emit_max(p, dst, dst_flags, args[0], args[1]);
1201 break;
1202
1203 case OPCODE_MIN:
1204 emit_min(p, dst, dst_flags, args[0], args[1]);
1205 break;
1206
1207 case OPCODE_SLT:
1208 emit_slt(p, dst, dst_flags, args[0], args[1]);
1209 break;
1210
1211 case OPCODE_SGE:
1212 emit_sge(p, dst, dst_flags, args[0], args[1]);
1213 break;
1214
1215 case OPCODE_LIT:
1216 emit_lit(p, dst, dst_flags, args[0]);
1217 break;
1218
1219 /* Texturing operations:
1220 */
1221 case OPCODE_TEX:
1222 emit_tex(c, inst, dst, dst_flags, args[0]);
1223 break;
1224
1225 case OPCODE_TXB:
1226 emit_txb(c, inst, dst, dst_flags, args[0]);
1227 break;
1228
1229 case OPCODE_KIL:
1230 emit_kil(c, args[0]);
1231 break;
1232
1233 default:
1234 assert(0);
1235 }
1236
1237 for (i = 0; i < 4; i++)
1238 if (inst->dst[i] && inst->dst[i]->spill_slot)
1239 emit_spill(c,
1240 inst->dst[i]->hw_reg,
1241 inst->dst[i]->spill_slot);
1242 }
1243 }
1244
1245
1246
1247
1248