6bafa44b82bc8d50d0a77bb3bfef4e7b725570f3
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 #define SATURATE (1<<5)
38
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
41 */
42 static __inline struct brw_reg sechalf( struct brw_reg reg )
43 {
44 if (reg.vstride)
45 reg.nr++;
46 return reg;
47 }
48
49 /* Payload R0:
50 *
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
53 * R0.1..8 -- ?
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
60 * R1.6 -- ?
61 * R1.7 -- ?
62 * R1.8 -- ?
63 */
64
65
66 static void emit_pixel_xy(struct brw_compile *p,
67 const struct brw_reg *dst,
68 GLuint mask,
69 const struct brw_reg *arg0)
70 {
71 struct brw_reg r1 = brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
73
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
78 */
79 if (mask & WRITEMASK_X) {
80 brw_ADD(p,
81 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
82 stride(suboffset(r1_uw, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
84 }
85
86 if (mask & WRITEMASK_Y) {
87 brw_ADD(p,
88 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
89 stride(suboffset(r1_uw,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
91 }
92
93 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
94 }
95
96
97
98 static void emit_delta_xy(struct brw_compile *p,
99 const struct brw_reg *dst,
100 GLuint mask,
101 const struct brw_reg *arg0,
102 const struct brw_reg *arg1)
103 {
104 struct brw_reg r1 = brw_vec1_grf(1, 0);
105
106 /* Calc delta X,Y by subtracting origin in r1 from the pixel
107 * centers.
108 */
109 if (mask & WRITEMASK_X) {
110 brw_ADD(p,
111 dst[0],
112 retype(arg0[0], BRW_REGISTER_TYPE_UW),
113 negate(r1));
114 }
115
116 if (mask & WRITEMASK_Y) {
117 brw_ADD(p,
118 dst[1],
119 retype(arg0[1], BRW_REGISTER_TYPE_UW),
120 negate(suboffset(r1,1)));
121
122 }
123 }
124
125 static void emit_wpos_xy(struct brw_compile *p,
126 const struct brw_reg *dst,
127 GLuint mask,
128 const struct brw_reg *arg0)
129 {
130 /* Calc delta X,Y by subtracting origin in r1 from the pixel
131 * centers.
132 */
133 if (mask & WRITEMASK_X) {
134 brw_MOV(p,
135 dst[0],
136 retype(arg0[0], BRW_REGISTER_TYPE_UW));
137 }
138
139 if (mask & WRITEMASK_Y) {
140 /* TODO -- window_height - Y */
141 brw_MOV(p,
142 dst[1],
143 negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
144
145 }
146 }
147
148
149 static void emit_pixel_w( struct brw_compile *p,
150 const struct brw_reg *dst,
151 GLuint mask,
152 const struct brw_reg *arg0,
153 const struct brw_reg *deltas)
154 {
155 /* Don't need this if all you are doing is interpolating color, for
156 * instance.
157 */
158 if (mask & WRITEMASK_W) {
159 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
160
161 /* Calc 1/w - just linterp wpos[3] optimized by putting the
162 * result straight into a message reg.
163 */
164 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
165 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
166
167 /* Calc w */
168 brw_math_16( p, dst[3],
169 BRW_MATH_FUNCTION_INV,
170 BRW_MATH_SATURATE_NONE,
171 2, brw_null_reg(),
172 BRW_MATH_PRECISION_FULL);
173 }
174 }
175
176
177
178 static void emit_linterp( struct brw_compile *p,
179 const struct brw_reg *dst,
180 GLuint mask,
181 const struct brw_reg *arg0,
182 const struct brw_reg *deltas )
183 {
184 struct brw_reg interp[4];
185 GLuint nr = arg0[0].nr;
186 GLuint i;
187
188 interp[0] = brw_vec1_grf(nr, 0);
189 interp[1] = brw_vec1_grf(nr, 4);
190 interp[2] = brw_vec1_grf(nr+1, 0);
191 interp[3] = brw_vec1_grf(nr+1, 4);
192
193 for(i = 0; i < 4; i++ ) {
194 if (mask & (1<<i)) {
195 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
196 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
197 }
198 }
199 }
200
201
202 static void emit_pinterp( struct brw_compile *p,
203 const struct brw_reg *dst,
204 GLuint mask,
205 const struct brw_reg *arg0,
206 const struct brw_reg *deltas,
207 const struct brw_reg *w)
208 {
209 struct brw_reg interp[4];
210 GLuint nr = arg0[0].nr;
211 GLuint i;
212
213 interp[0] = brw_vec1_grf(nr, 0);
214 interp[1] = brw_vec1_grf(nr, 4);
215 interp[2] = brw_vec1_grf(nr+1, 0);
216 interp[3] = brw_vec1_grf(nr+1, 4);
217
218 for(i = 0; i < 4; i++ ) {
219 if (mask & (1<<i)) {
220 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
221 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
222 brw_MUL(p, dst[i], dst[i], w[3]);
223 }
224 }
225 }
226
227 static void emit_cinterp( struct brw_compile *p,
228 const struct brw_reg *dst,
229 GLuint mask,
230 const struct brw_reg *arg0 )
231 {
232 struct brw_reg interp[4];
233 GLuint nr = arg0[0].nr;
234 GLuint i;
235
236 interp[0] = brw_vec1_grf(nr, 0);
237 interp[1] = brw_vec1_grf(nr, 4);
238 interp[2] = brw_vec1_grf(nr+1, 0);
239 interp[3] = brw_vec1_grf(nr+1, 4);
240
241 for(i = 0; i < 4; i++ ) {
242 if (mask & (1<<i)) {
243 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
244 }
245 }
246 }
247
248
249
250
251
252 static void emit_alu1( struct brw_compile *p,
253 struct brw_instruction *(*func)(struct brw_compile *,
254 struct brw_reg,
255 struct brw_reg),
256 const struct brw_reg *dst,
257 GLuint mask,
258 const struct brw_reg *arg0 )
259 {
260 GLuint i;
261
262 if (mask & SATURATE)
263 brw_set_saturate(p, 1);
264
265 for (i = 0; i < 4; i++) {
266 if (mask & (1<<i)) {
267 func(p, dst[i], arg0[i]);
268 }
269 }
270
271 if (mask & SATURATE)
272 brw_set_saturate(p, 0);
273 }
274
275 static void emit_alu2( struct brw_compile *p,
276 struct brw_instruction *(*func)(struct brw_compile *,
277 struct brw_reg,
278 struct brw_reg,
279 struct brw_reg),
280 const struct brw_reg *dst,
281 GLuint mask,
282 const struct brw_reg *arg0,
283 const struct brw_reg *arg1 )
284 {
285 GLuint i;
286
287 if (mask & SATURATE)
288 brw_set_saturate(p, 1);
289
290 for (i = 0; i < 4; i++) {
291 if (mask & (1<<i)) {
292 func(p, dst[i], arg0[i], arg1[i]);
293 }
294 }
295
296 if (mask & SATURATE)
297 brw_set_saturate(p, 0);
298 }
299
300
301 static void emit_mad( struct brw_compile *p,
302 const struct brw_reg *dst,
303 GLuint mask,
304 const struct brw_reg *arg0,
305 const struct brw_reg *arg1,
306 const struct brw_reg *arg2 )
307 {
308 GLuint i;
309
310 for (i = 0; i < 4; i++) {
311 if (mask & (1<<i)) {
312 brw_MUL(p, dst[i], arg0[i], arg1[i]);
313
314 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
315 brw_ADD(p, dst[i], dst[i], arg2[i]);
316 brw_set_saturate(p, 0);
317 }
318 }
319 }
320
321
322 static void emit_lrp( struct brw_compile *p,
323 const struct brw_reg *dst,
324 GLuint mask,
325 const struct brw_reg *arg0,
326 const struct brw_reg *arg1,
327 const struct brw_reg *arg2 )
328 {
329 GLuint i;
330
331 /* Uses dst as a temporary:
332 */
333 for (i = 0; i < 4; i++) {
334 if (mask & (1<<i)) {
335 /* Can I use the LINE instruction for this?
336 */
337 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
338 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
339
340 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
341 brw_MAC(p, dst[i], arg0[i], arg1[i]);
342 brw_set_saturate(p, 0);
343 }
344 }
345 }
346 static void emit_sop( struct brw_compile *p,
347 const struct brw_reg *dst,
348 GLuint mask,
349 GLuint cond,
350 const struct brw_reg *arg0,
351 const struct brw_reg *arg1 )
352 {
353 GLuint i;
354
355 for (i = 0; i < 4; i++) {
356 if (mask & (1<<i)) {
357 brw_MOV(p, dst[i], brw_imm_f(0));
358 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
359 brw_MOV(p, dst[i], brw_imm_f(1.0));
360 brw_set_predicate_control_flag_value(p, 0xff);
361 }
362 }
363 }
364
365 static void emit_slt( struct brw_compile *p,
366 const struct brw_reg *dst,
367 GLuint mask,
368 const struct brw_reg *arg0,
369 const struct brw_reg *arg1 )
370 {
371 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
372 }
373
374 static void emit_sle( struct brw_compile *p,
375 const struct brw_reg *dst,
376 GLuint mask,
377 const struct brw_reg *arg0,
378 const struct brw_reg *arg1 )
379 {
380 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
381 }
382
383 static void emit_sgt( struct brw_compile *p,
384 const struct brw_reg *dst,
385 GLuint mask,
386 const struct brw_reg *arg0,
387 const struct brw_reg *arg1 )
388 {
389 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
390 }
391
392 static void emit_sge( struct brw_compile *p,
393 const struct brw_reg *dst,
394 GLuint mask,
395 const struct brw_reg *arg0,
396 const struct brw_reg *arg1 )
397 {
398 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
399 }
400
401 static void emit_seq( struct brw_compile *p,
402 const struct brw_reg *dst,
403 GLuint mask,
404 const struct brw_reg *arg0,
405 const struct brw_reg *arg1 )
406 {
407 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
408 }
409
410 static void emit_sne( struct brw_compile *p,
411 const struct brw_reg *dst,
412 GLuint mask,
413 const struct brw_reg *arg0,
414 const struct brw_reg *arg1 )
415 {
416 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
417 }
418
419 static void emit_cmp( struct brw_compile *p,
420 const struct brw_reg *dst,
421 GLuint mask,
422 const struct brw_reg *arg0,
423 const struct brw_reg *arg1,
424 const struct brw_reg *arg2 )
425 {
426 GLuint i;
427
428 for (i = 0; i < 4; i++) {
429 if (mask & (1<<i)) {
430 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
431 brw_MOV(p, dst[i], arg2[i]);
432 brw_set_saturate(p, 0);
433
434 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
435
436 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
437 brw_MOV(p, dst[i], arg1[i]);
438 brw_set_saturate(p, 0);
439 brw_set_predicate_control_flag_value(p, 0xff);
440 }
441 }
442 }
443
444 static void emit_max( struct brw_compile *p,
445 const struct brw_reg *dst,
446 GLuint mask,
447 const struct brw_reg *arg0,
448 const struct brw_reg *arg1 )
449 {
450 GLuint i;
451
452 for (i = 0; i < 4; i++) {
453 if (mask & (1<<i)) {
454 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
455 brw_MOV(p, dst[i], arg0[i]);
456 brw_set_saturate(p, 0);
457
458 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
459
460 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
461 brw_MOV(p, dst[i], arg1[i]);
462 brw_set_saturate(p, 0);
463 brw_set_predicate_control_flag_value(p, 0xff);
464 }
465 }
466 }
467
468 static void emit_min( struct brw_compile *p,
469 const struct brw_reg *dst,
470 GLuint mask,
471 const struct brw_reg *arg0,
472 const struct brw_reg *arg1 )
473 {
474 GLuint i;
475
476 for (i = 0; i < 4; i++) {
477 if (mask & (1<<i)) {
478 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
479 brw_MOV(p, dst[i], arg1[i]);
480 brw_set_saturate(p, 0);
481
482 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
483
484 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
485 brw_MOV(p, dst[i], arg0[i]);
486 brw_set_saturate(p, 0);
487 brw_set_predicate_control_flag_value(p, 0xff);
488 }
489 }
490 }
491
492
493 static void emit_dp3( struct brw_compile *p,
494 const struct brw_reg *dst,
495 GLuint mask,
496 const struct brw_reg *arg0,
497 const struct brw_reg *arg1 )
498 {
499 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
500
501 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
502 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
503
504 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
505 brw_MAC(p, dst[0], arg0[2], arg1[2]);
506 brw_set_saturate(p, 0);
507 }
508
509
510 static void emit_dp4( struct brw_compile *p,
511 const struct brw_reg *dst,
512 GLuint mask,
513 const struct brw_reg *arg0,
514 const struct brw_reg *arg1 )
515 {
516 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
517
518 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
519 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
520 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
521
522 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
523 brw_MAC(p, dst[0], arg0[3], arg1[3]);
524 brw_set_saturate(p, 0);
525 }
526
527
528 static void emit_dph( struct brw_compile *p,
529 const struct brw_reg *dst,
530 GLuint mask,
531 const struct brw_reg *arg0,
532 const struct brw_reg *arg1 )
533 {
534 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
535
536 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
537 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
538 brw_MAC(p, dst[0], arg0[2], arg1[2]);
539
540 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
541 brw_ADD(p, dst[0], dst[0], arg1[3]);
542 brw_set_saturate(p, 0);
543 }
544
545
546 static void emit_xpd( struct brw_compile *p,
547 const struct brw_reg *dst,
548 GLuint mask,
549 const struct brw_reg *arg0,
550 const struct brw_reg *arg1 )
551 {
552 GLuint i;
553
554 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
555
556 for (i = 0 ; i < 3; i++) {
557 if (mask & (1<<i)) {
558 GLuint i2 = (i+2)%3;
559 GLuint i1 = (i+1)%3;
560
561 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
562
563 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
564 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
565 brw_set_saturate(p, 0);
566 }
567 }
568 }
569
570
571 static void emit_math1( struct brw_compile *p,
572 GLuint function,
573 const struct brw_reg *dst,
574 GLuint mask,
575 const struct brw_reg *arg0 )
576 {
577 //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
578 // function == BRW_MATH_FUNCTION_SINCOS);
579
580 brw_MOV(p, brw_message_reg(2), arg0[0]);
581
582 /* Send two messages to perform all 16 operations:
583 */
584 brw_math_16(p,
585 dst[0],
586 function,
587 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
588 2,
589 brw_null_reg(),
590 BRW_MATH_PRECISION_FULL);
591 }
592
593
594 static void emit_math2( struct brw_compile *p,
595 GLuint function,
596 const struct brw_reg *dst,
597 GLuint mask,
598 const struct brw_reg *arg0,
599 const struct brw_reg *arg1)
600 {
601 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
602
603 brw_push_insn_state(p);
604
605 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
606 brw_MOV(p, brw_message_reg(2), arg0[0]);
607 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
608 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
609
610 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
611 brw_MOV(p, brw_message_reg(3), arg1[0]);
612 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
613 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
614
615
616 /* Send two messages to perform all 16 operations:
617 */
618 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
619 brw_math(p,
620 dst[0],
621 function,
622 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
623 2,
624 brw_null_reg(),
625 BRW_MATH_DATA_VECTOR,
626 BRW_MATH_PRECISION_FULL);
627
628 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
629 brw_math(p,
630 offset(dst[0],1),
631 function,
632 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
633 4,
634 brw_null_reg(),
635 BRW_MATH_DATA_VECTOR,
636 BRW_MATH_PRECISION_FULL);
637
638 brw_pop_insn_state(p);
639 }
640
641
642
643 static void emit_tex( struct brw_wm_compile *c,
644 const struct brw_wm_instruction *inst,
645 struct brw_reg *dst,
646 GLuint dst_flags,
647 struct brw_reg *arg )
648 {
649 struct brw_compile *p = &c->func;
650 GLuint msgLength, responseLength;
651 GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
652 GLuint i, nr;
653 GLuint emit;
654
655 /* How many input regs are there?
656 */
657 switch (inst->tex_idx) {
658 case TEXTURE_1D_INDEX:
659 emit = WRITEMASK_X;
660 nr = 1;
661 break;
662 case TEXTURE_2D_INDEX:
663 case TEXTURE_RECT_INDEX:
664 emit = WRITEMASK_XY;
665 nr = 2;
666 break;
667 default:
668 emit = WRITEMASK_XYZ;
669 nr = 3;
670 break;
671 }
672
673 if (shadow) {
674 nr = 4;
675 emit |= WRITEMASK_W;
676 }
677
678 msgLength = 1;
679
680 for (i = 0; i < nr; i++) {
681 static const GLuint swz[4] = {0,1,2,2};
682 if (emit & (1<<i))
683 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
684 else
685 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
686 msgLength += 2;
687 }
688
689 responseLength = 8; /* always */
690
691 brw_SAMPLE(p,
692 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
693 1,
694 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
695 inst->tex_unit + 1, /* surface */
696 inst->tex_unit, /* sampler */
697 inst->writemask,
698 (shadow ?
699 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
700 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
701 responseLength,
702 msgLength,
703 0);
704
705 if (shadow)
706 brw_MOV(p, dst[3], brw_imm_f(1.0));
707 }
708
709
710 static void emit_txb( struct brw_wm_compile *c,
711 const struct brw_wm_instruction *inst,
712 struct brw_reg *dst,
713 GLuint dst_flags,
714 struct brw_reg *arg )
715 {
716 struct brw_compile *p = &c->func;
717 GLuint msgLength;
718
719 /* Shadow ignored for txb.
720 */
721 switch (inst->tex_idx) {
722 case TEXTURE_1D_INDEX:
723 brw_MOV(p, brw_message_reg(2), arg[0]);
724 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
725 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
726 break;
727 case TEXTURE_2D_INDEX:
728 case TEXTURE_RECT_INDEX:
729 brw_MOV(p, brw_message_reg(2), arg[0]);
730 brw_MOV(p, brw_message_reg(4), arg[1]);
731 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
732 break;
733 default:
734 brw_MOV(p, brw_message_reg(2), arg[0]);
735 brw_MOV(p, brw_message_reg(4), arg[1]);
736 brw_MOV(p, brw_message_reg(6), arg[2]);
737 break;
738 }
739
740 brw_MOV(p, brw_message_reg(8), arg[3]);
741 msgLength = 9;
742
743
744 brw_SAMPLE(p,
745 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
746 1,
747 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
748 inst->tex_unit + 1, /* surface */
749 inst->tex_unit, /* sampler */
750 inst->writemask,
751 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
752 8, /* responseLength */
753 msgLength,
754 0);
755
756 }
757
758
759 static void emit_lit( struct brw_compile *p,
760 const struct brw_reg *dst,
761 GLuint mask,
762 const struct brw_reg *arg0 )
763 {
764 assert((mask & WRITEMASK_XW) == 0);
765
766 if (mask & WRITEMASK_Y) {
767 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
768 brw_MOV(p, dst[1], arg0[0]);
769 brw_set_saturate(p, 0);
770 }
771
772 if (mask & WRITEMASK_Z) {
773 emit_math2(p, BRW_MATH_FUNCTION_POW,
774 &dst[2],
775 WRITEMASK_X | (mask & SATURATE),
776 &arg0[1],
777 &arg0[3]);
778 }
779
780 /* Ordinarily you'd use an iff statement to skip or shortcircuit
781 * some of the POW calculations above, but 16-wide iff statements
782 * seem to lock c1 hardware, so this is a nasty workaround:
783 */
784 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
785 {
786 if (mask & WRITEMASK_Y)
787 brw_MOV(p, dst[1], brw_imm_f(0));
788
789 if (mask & WRITEMASK_Z)
790 brw_MOV(p, dst[2], brw_imm_f(0));
791 }
792 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
793 }
794
795
796 /* Kill pixel - set execution mask to zero for those pixels which
797 * fail.
798 */
799 static void emit_kil( struct brw_wm_compile *c,
800 struct brw_reg *arg0)
801 {
802 struct brw_compile *p = &c->func;
803 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
804 GLuint i;
805
806
807 /* XXX - usually won't need 4 compares!
808 */
809 for (i = 0; i < 4; i++) {
810 brw_push_insn_state(p);
811 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
812 brw_set_predicate_control_flag_value(p, 0xff);
813 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
814 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
815 brw_pop_insn_state(p);
816 }
817 }
818
819 static void fire_fb_write( struct brw_wm_compile *c,
820 GLuint base_reg,
821 GLuint nr )
822 {
823 struct brw_compile *p = &c->func;
824
825 /* Pass through control information:
826 */
827 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
828 {
829 brw_push_insn_state(p);
830 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
831 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
832 brw_MOV(p,
833 brw_message_reg(base_reg + 1),
834 brw_vec8_grf(1, 0));
835 brw_pop_insn_state(p);
836 }
837
838 /* Send framebuffer write message: */
839 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
840 brw_fb_WRITE(p,
841 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
842 base_reg,
843 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
844 0, /* render surface always 0 */
845 nr,
846 0,
847 1);
848 }
849
850 static void emit_aa( struct brw_wm_compile *c,
851 struct brw_reg *arg1,
852 GLuint reg )
853 {
854 struct brw_compile *p = &c->func;
855 GLuint comp = c->key.aa_dest_stencil_reg / 2;
856 GLuint off = c->key.aa_dest_stencil_reg % 2;
857 struct brw_reg aa = offset(arg1[comp], off);
858
859 brw_push_insn_state(p);
860 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
861 brw_MOV(p, brw_message_reg(reg), aa);
862 brw_pop_insn_state(p);
863 }
864
865
866 /* Post-fragment-program processing. Send the results to the
867 * framebuffer.
868 */
869 static void emit_fb_write( struct brw_wm_compile *c,
870 struct brw_reg *arg0,
871 struct brw_reg *arg1,
872 struct brw_reg *arg2)
873 {
874 struct brw_compile *p = &c->func;
875 GLuint nr = 2;
876 GLuint channel;
877
878 /* Reserve a space for AA - may not be needed:
879 */
880 if (c->key.aa_dest_stencil_reg)
881 nr += 1;
882
883 /* I don't really understand how this achieves the color interleave
884 * (ie RGBARGBA) in the result: [Do the saturation here]
885 */
886 {
887 brw_push_insn_state(p);
888
889 for (channel = 0; channel < 4; channel++) {
890 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
891 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
892
893 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
894 brw_MOV(p,
895 brw_message_reg(nr + channel),
896 arg0[channel]);
897
898 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
899 brw_MOV(p,
900 brw_message_reg(nr + channel + 4),
901 sechalf(arg0[channel]));
902 }
903
904 /* skip over the regs populated above:
905 */
906 nr += 8;
907
908 brw_pop_insn_state(p);
909 }
910
911 if (c->key.source_depth_to_render_target)
912 {
913 if (c->key.computes_depth)
914 brw_MOV(p, brw_message_reg(nr), arg2[2]);
915 else
916 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
917
918 nr += 2;
919 }
920
921 if (c->key.dest_depth_reg)
922 {
923 GLuint comp = c->key.dest_depth_reg / 2;
924 GLuint off = c->key.dest_depth_reg % 2;
925
926 if (off != 0) {
927 brw_push_insn_state(p);
928 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
929 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
930 /* 2nd half? */
931 brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1));
932 brw_pop_insn_state(p);
933 }
934 else {
935 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
936 }
937 nr += 2;
938 }
939
940
941 if (!c->key.runtime_check_aads_emit) {
942 if (c->key.aa_dest_stencil_reg)
943 emit_aa(c, arg1, 2);
944
945 fire_fb_write(c, 0, nr);
946 }
947 else {
948 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
949 struct brw_reg ip = brw_ip_reg();
950 struct brw_instruction *jmp;
951
952 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
953 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
954 brw_AND(p,
955 v1_null_ud,
956 get_element_ud(brw_vec8_grf(1,0), 6),
957 brw_imm_ud(1<<26));
958
959 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
960 {
961 emit_aa(c, arg1, 2);
962 fire_fb_write(c, 0, nr);
963 /* note - thread killed in subroutine */
964 }
965 brw_land_fwd_jump(p, jmp);
966
967 /* ELSE: Shuffle up one register to fill in the hole left for AA:
968 */
969 fire_fb_write(c, 1, nr-1);
970 }
971 }
972
973
974
975
976 /* Post-fragment-program processing. Send the results to the
977 * framebuffer.
978 */
979 static void emit_spill( struct brw_wm_compile *c,
980 struct brw_reg reg,
981 GLuint slot )
982 {
983 struct brw_compile *p = &c->func;
984
985 /*
986 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
987 */
988 brw_MOV(p, brw_message_reg(2), reg);
989
990 /*
991 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
992 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
993 */
994 brw_dp_WRITE_16(p,
995 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
996 1,
997 slot);
998 }
999
1000 static void emit_unspill( struct brw_wm_compile *c,
1001 struct brw_reg reg,
1002 GLuint slot )
1003 {
1004 struct brw_compile *p = &c->func;
1005
1006 /* Slot 0 is the undef value.
1007 */
1008 if (slot == 0) {
1009 brw_MOV(p, reg, brw_imm_f(0));
1010 return;
1011 }
1012
1013 /*
1014 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1015 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1016 */
1017
1018 brw_dp_READ_16(p,
1019 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1020 1,
1021 slot);
1022 }
1023
1024
1025
1026 /**
1027 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
1028 */
1029 static void get_argument_regs( struct brw_wm_compile *c,
1030 struct brw_wm_ref *arg[],
1031 struct brw_reg *regs )
1032 {
1033 GLuint i;
1034
1035 for (i = 0; i < 4; i++) {
1036 if (arg[i]) {
1037
1038 if (arg[i]->unspill_reg)
1039 emit_unspill(c,
1040 brw_vec8_grf(arg[i]->unspill_reg, 0),
1041 arg[i]->value->spill_slot);
1042
1043 regs[i] = arg[i]->hw_reg;
1044 }
1045 else {
1046 regs[i] = brw_null_reg();
1047 }
1048 }
1049 }
1050
1051 static void spill_values( struct brw_wm_compile *c,
1052 struct brw_wm_value *values,
1053 GLuint nr )
1054 {
1055 GLuint i;
1056
1057 for (i = 0; i < nr; i++)
1058 if (values[i].spill_slot)
1059 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1060 }
1061
1062
1063
1064 /* Emit the fragment program instructions here.
1065 */
1066 void brw_wm_emit( struct brw_wm_compile *c )
1067 {
1068 struct brw_compile *p = &c->func;
1069 GLuint insn;
1070
1071 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1072
1073 /* Check if any of the payload regs need to be spilled:
1074 */
1075 spill_values(c, c->payload.depth, 4);
1076 spill_values(c, c->creg, c->nr_creg);
1077 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1078
1079
1080 for (insn = 0; insn < c->nr_insns; insn++) {
1081
1082 struct brw_wm_instruction *inst = &c->instruction[insn];
1083 struct brw_reg args[3][4], dst[4];
1084 GLuint i, dst_flags;
1085
1086 /* Get argument regs:
1087 */
1088 for (i = 0; i < 3; i++)
1089 get_argument_regs(c, inst->src[i], args[i]);
1090
1091 /* Get dest regs:
1092 */
1093 for (i = 0; i < 4; i++)
1094 if (inst->dst[i])
1095 dst[i] = inst->dst[i]->hw_reg;
1096 else
1097 dst[i] = brw_null_reg();
1098
1099 /* Flags
1100 */
1101 dst_flags = inst->writemask;
1102 if (inst->saturate)
1103 dst_flags |= SATURATE;
1104
1105 switch (inst->opcode) {
1106 /* Generated instructions for calculating triangle interpolants:
1107 */
1108 case WM_PIXELXY:
1109 emit_pixel_xy(p, dst, dst_flags, args[0]);
1110 break;
1111
1112 case WM_DELTAXY:
1113 emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1114 break;
1115
1116 case WM_WPOSXY:
1117 emit_wpos_xy(p, dst, dst_flags, args[0]);
1118 break;
1119
1120 case WM_PIXELW:
1121 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1122 break;
1123
1124 case WM_LINTERP:
1125 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1126 break;
1127
1128 case WM_PINTERP:
1129 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1130 break;
1131
1132 case WM_CINTERP:
1133 emit_cinterp(p, dst, dst_flags, args[0]);
1134 break;
1135
1136 case WM_FB_WRITE:
1137 emit_fb_write(c, args[0], args[1], args[2]);
1138 break;
1139
1140 /* Straightforward arithmetic:
1141 */
1142 case OPCODE_ADD:
1143 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1144 break;
1145
1146 case OPCODE_FRC:
1147 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1148 break;
1149
1150 case OPCODE_FLR:
1151 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1152 break;
1153
1154 case OPCODE_DP3: /* */
1155 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1156 break;
1157
1158 case OPCODE_DP4:
1159 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1160 break;
1161
1162 case OPCODE_DPH:
1163 emit_dph(p, dst, dst_flags, args[0], args[1]);
1164 break;
1165
1166 case OPCODE_LRP: /* */
1167 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1168 break;
1169
1170 case OPCODE_MAD:
1171 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1172 break;
1173
1174 case OPCODE_MOV:
1175 case OPCODE_SWZ:
1176 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1177 break;
1178
1179 case OPCODE_MUL:
1180 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1181 break;
1182
1183 case OPCODE_XPD:
1184 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1185 break;
1186
1187 /* Higher math functions:
1188 */
1189 case OPCODE_RCP:
1190 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1191 break;
1192
1193 case OPCODE_RSQ:
1194 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1195 break;
1196
1197 case OPCODE_SIN:
1198 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1199 break;
1200
1201 case OPCODE_COS:
1202 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1203 break;
1204
1205 case OPCODE_EX2:
1206 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1207 break;
1208
1209 case OPCODE_LG2:
1210 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1211 break;
1212
1213 case OPCODE_SCS:
1214 /* There is an scs math function, but it would need some
1215 * fixup for 16-element execution.
1216 */
1217 if (dst_flags & WRITEMASK_X)
1218 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1219 if (dst_flags & WRITEMASK_Y)
1220 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1221 break;
1222
1223 case OPCODE_POW:
1224 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1225 break;
1226
1227 /* Comparisons:
1228 */
1229 case OPCODE_CMP:
1230 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1231 break;
1232
1233 case OPCODE_MAX:
1234 emit_max(p, dst, dst_flags, args[0], args[1]);
1235 break;
1236
1237 case OPCODE_MIN:
1238 emit_min(p, dst, dst_flags, args[0], args[1]);
1239 break;
1240
1241 case OPCODE_SLT:
1242 emit_slt(p, dst, dst_flags, args[0], args[1]);
1243 break;
1244
1245 case OPCODE_SLE:
1246 emit_sle(p, dst, dst_flags, args[0], args[1]);
1247 break;
1248 case OPCODE_SGT:
1249 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1250 break;
1251 case OPCODE_SGE:
1252 emit_sge(p, dst, dst_flags, args[0], args[1]);
1253 break;
1254 case OPCODE_SEQ:
1255 emit_seq(p, dst, dst_flags, args[0], args[1]);
1256 break;
1257 case OPCODE_SNE:
1258 emit_sne(p, dst, dst_flags, args[0], args[1]);
1259 break;
1260
1261 case OPCODE_LIT:
1262 emit_lit(p, dst, dst_flags, args[0]);
1263 break;
1264
1265 /* Texturing operations:
1266 */
1267 case OPCODE_TEX:
1268 emit_tex(c, inst, dst, dst_flags, args[0]);
1269 break;
1270
1271 case OPCODE_TXB:
1272 emit_txb(c, inst, dst, dst_flags, args[0]);
1273 break;
1274
1275 case OPCODE_KIL:
1276 emit_kil(c, args[0]);
1277 break;
1278
1279 default:
1280 _mesa_printf("unsupport opcode %d in fragment program\n",
1281 inst->opcode);
1282 }
1283
1284 for (i = 0; i < 4; i++)
1285 if (inst->dst[i] && inst->dst[i]->spill_slot)
1286 emit_spill(c,
1287 inst->dst[i]->hw_reg,
1288 inst->dst[i]->spill_slot);
1289 }
1290 }
1291
1292
1293
1294
1295