Minor r200 vertex program cleanups. Remove disabled leftovers from r300 vertex progra...
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "program.h"
35 #include "program_instruction.h"
36 #include "macros.h"
37 #include "brw_wm.h"
38
39 #define SATURATE (1<<5)
40
41 /* Not quite sure how correct this is - need to understand horiz
42 * vs. vertical strides a little better.
43 */
44 static __inline struct brw_reg sechalf( struct brw_reg reg )
45 {
46 if (reg.vstride)
47 reg.nr++;
48 return reg;
49 }
50
51 /* Payload R0:
52 *
53 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
54 * corresponding to each of the 16 execution channels.
55 * R0.1..8 -- ?
56 * R1.0 -- triangle vertex 0.X
57 * R1.1 -- triangle vertex 0.Y
58 * R1.2 -- tile 0 x,y coords (2 packed uwords)
59 * R1.3 -- tile 1 x,y coords (2 packed uwords)
60 * R1.4 -- tile 2 x,y coords (2 packed uwords)
61 * R1.5 -- tile 3 x,y coords (2 packed uwords)
62 * R1.6 -- ?
63 * R1.7 -- ?
64 * R1.8 -- ?
65 */
66
67
68 static void emit_pixel_xy(struct brw_compile *p,
69 const struct brw_reg *dst,
70 GLuint mask,
71 const struct brw_reg *arg0)
72 {
73 struct brw_reg r1 = brw_vec1_grf(1, 0);
74 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
75
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77
78 /* Calculate pixel centers by adding 1 or 0 to each of the
79 * micro-tile coordinates passed in r1.
80 */
81 if (mask & WRITEMASK_X) {
82 brw_ADD(p,
83 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
84 stride(suboffset(r1_uw, 4), 2, 4, 0),
85 brw_imm_v(0x10101010));
86 }
87
88 if (mask & WRITEMASK_Y) {
89 brw_ADD(p,
90 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
91 stride(suboffset(r1_uw,5), 2, 4, 0),
92 brw_imm_v(0x11001100));
93 }
94
95 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
96 }
97
98
99
100 static void emit_delta_xy(struct brw_compile *p,
101 const struct brw_reg *dst,
102 GLuint mask,
103 const struct brw_reg *arg0,
104 const struct brw_reg *arg1)
105 {
106 struct brw_reg r1 = brw_vec1_grf(1, 0);
107
108 /* Calc delta X,Y by subtracting origin in r1 from the pixel
109 * centers.
110 */
111 if (mask & WRITEMASK_X) {
112 brw_ADD(p,
113 dst[0],
114 retype(arg0[0], BRW_REGISTER_TYPE_UW),
115 negate(r1));
116 }
117
118 if (mask & WRITEMASK_Y) {
119 brw_ADD(p,
120 dst[1],
121 retype(arg0[1], BRW_REGISTER_TYPE_UW),
122 negate(suboffset(r1,1)));
123
124 }
125 }
126
127 static void emit_wpos_xy(struct brw_compile *p,
128 const struct brw_reg *dst,
129 GLuint mask,
130 const struct brw_reg *arg0)
131 {
132 /* Calc delta X,Y by subtracting origin in r1 from the pixel
133 * centers.
134 */
135 if (mask & WRITEMASK_X) {
136 brw_MOV(p,
137 dst[0],
138 retype(arg0[0], BRW_REGISTER_TYPE_UW));
139 }
140
141 if (mask & WRITEMASK_Y) {
142 /* TODO -- window_height - Y */
143 brw_MOV(p,
144 dst[1],
145 negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
146
147 }
148 }
149
150
151 static void emit_pixel_w( struct brw_compile *p,
152 const struct brw_reg *dst,
153 GLuint mask,
154 const struct brw_reg *arg0,
155 const struct brw_reg *deltas)
156 {
157 /* Don't need this if all you are doing is interpolating color, for
158 * instance.
159 */
160 if (mask & WRITEMASK_W) {
161 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
162
163 /* Calc 1/w - just linterp wpos[3] optimized by putting the
164 * result straight into a message reg.
165 */
166 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
167 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
168
169 /* Calc w */
170 brw_math_16( p, dst[3],
171 BRW_MATH_FUNCTION_INV,
172 BRW_MATH_SATURATE_NONE,
173 2, brw_null_reg(),
174 BRW_MATH_PRECISION_FULL);
175 }
176 }
177
178
179
180 static void emit_linterp( struct brw_compile *p,
181 const struct brw_reg *dst,
182 GLuint mask,
183 const struct brw_reg *arg0,
184 const struct brw_reg *deltas )
185 {
186 struct brw_reg interp[4];
187 GLuint nr = arg0[0].nr;
188 GLuint i;
189
190 interp[0] = brw_vec1_grf(nr, 0);
191 interp[1] = brw_vec1_grf(nr, 4);
192 interp[2] = brw_vec1_grf(nr+1, 0);
193 interp[3] = brw_vec1_grf(nr+1, 4);
194
195 for(i = 0; i < 4; i++ ) {
196 if (mask & (1<<i)) {
197 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
198 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
199 }
200 }
201 }
202
203
204 static void emit_pinterp( struct brw_compile *p,
205 const struct brw_reg *dst,
206 GLuint mask,
207 const struct brw_reg *arg0,
208 const struct brw_reg *deltas,
209 const struct brw_reg *w)
210 {
211 struct brw_reg interp[4];
212 GLuint nr = arg0[0].nr;
213 GLuint i;
214
215 interp[0] = brw_vec1_grf(nr, 0);
216 interp[1] = brw_vec1_grf(nr, 4);
217 interp[2] = brw_vec1_grf(nr+1, 0);
218 interp[3] = brw_vec1_grf(nr+1, 4);
219
220 for(i = 0; i < 4; i++ ) {
221 if (mask & (1<<i)) {
222 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
223 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
224 brw_MUL(p, dst[i], dst[i], w[3]);
225 }
226 }
227 }
228
229 static void emit_cinterp( struct brw_compile *p,
230 const struct brw_reg *dst,
231 GLuint mask,
232 const struct brw_reg *arg0 )
233 {
234 struct brw_reg interp[4];
235 GLuint nr = arg0[0].nr;
236 GLuint i;
237
238 interp[0] = brw_vec1_grf(nr, 0);
239 interp[1] = brw_vec1_grf(nr, 4);
240 interp[2] = brw_vec1_grf(nr+1, 0);
241 interp[3] = brw_vec1_grf(nr+1, 4);
242
243 for(i = 0; i < 4; i++ ) {
244 if (mask & (1<<i)) {
245 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
246 }
247 }
248 }
249
250
251
252
253
254 static void emit_alu1( struct brw_compile *p,
255 struct brw_instruction *(*func)(struct brw_compile *,
256 struct brw_reg,
257 struct brw_reg),
258 const struct brw_reg *dst,
259 GLuint mask,
260 const struct brw_reg *arg0 )
261 {
262 GLuint i;
263
264 if (mask & SATURATE)
265 brw_set_saturate(p, 1);
266
267 for (i = 0; i < 4; i++) {
268 if (mask & (1<<i)) {
269 func(p, dst[i], arg0[i]);
270 }
271 }
272
273 if (mask & SATURATE)
274 brw_set_saturate(p, 0);
275 }
276
277 static void emit_alu2( struct brw_compile *p,
278 struct brw_instruction *(*func)(struct brw_compile *,
279 struct brw_reg,
280 struct brw_reg,
281 struct brw_reg),
282 const struct brw_reg *dst,
283 GLuint mask,
284 const struct brw_reg *arg0,
285 const struct brw_reg *arg1 )
286 {
287 GLuint i;
288
289 if (mask & SATURATE)
290 brw_set_saturate(p, 1);
291
292 for (i = 0; i < 4; i++) {
293 if (mask & (1<<i)) {
294 func(p, dst[i], arg0[i], arg1[i]);
295 }
296 }
297
298 if (mask & SATURATE)
299 brw_set_saturate(p, 0);
300 }
301
302
303 static void emit_mad( struct brw_compile *p,
304 const struct brw_reg *dst,
305 GLuint mask,
306 const struct brw_reg *arg0,
307 const struct brw_reg *arg1,
308 const struct brw_reg *arg2 )
309 {
310 GLuint i;
311
312 for (i = 0; i < 4; i++) {
313 if (mask & (1<<i)) {
314 brw_MUL(p, dst[i], arg0[i], arg1[i]);
315
316 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
317 brw_ADD(p, dst[i], dst[i], arg2[i]);
318 brw_set_saturate(p, 0);
319 }
320 }
321 }
322
323
324 static void emit_lrp( struct brw_compile *p,
325 const struct brw_reg *dst,
326 GLuint mask,
327 const struct brw_reg *arg0,
328 const struct brw_reg *arg1,
329 const struct brw_reg *arg2 )
330 {
331 GLuint i;
332
333 /* Uses dst as a temporary:
334 */
335 for (i = 0; i < 4; i++) {
336 if (mask & (1<<i)) {
337 /* Can I use the LINE instruction for this?
338 */
339 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
340 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
341
342 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
343 brw_MAC(p, dst[i], arg0[i], arg1[i]);
344 brw_set_saturate(p, 0);
345 }
346 }
347 }
348
349
350 static void emit_slt( struct brw_compile *p,
351 const struct brw_reg *dst,
352 GLuint mask,
353 const struct brw_reg *arg0,
354 const struct brw_reg *arg1 )
355 {
356 GLuint i;
357
358 for (i = 0; i < 4; i++) {
359 if (mask & (1<<i)) {
360 brw_MOV(p, dst[i], brw_imm_f(0));
361 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
362 brw_MOV(p, dst[i], brw_imm_f(1.0));
363 brw_set_predicate_control_flag_value(p, 0xff);
364 }
365 }
366 }
367
368 /* Isn't this just the same as the above with the args swapped?
369 */
370 static void emit_sge( struct brw_compile *p,
371 const struct brw_reg *dst,
372 GLuint mask,
373 const struct brw_reg *arg0,
374 const struct brw_reg *arg1 )
375 {
376 GLuint i;
377
378 for (i = 0; i < 4; i++) {
379 if (mask & (1<<i)) {
380 brw_MOV(p, dst[i], brw_imm_f(0));
381 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
382 brw_MOV(p, dst[i], brw_imm_f(1.0));
383 brw_set_predicate_control_flag_value(p, 0xff);
384 }
385 }
386 }
387
388
389
390 static void emit_cmp( struct brw_compile *p,
391 const struct brw_reg *dst,
392 GLuint mask,
393 const struct brw_reg *arg0,
394 const struct brw_reg *arg1,
395 const struct brw_reg *arg2 )
396 {
397 GLuint i;
398
399 for (i = 0; i < 4; i++) {
400 if (mask & (1<<i)) {
401 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
402 brw_MOV(p, dst[i], arg2[i]);
403 brw_set_saturate(p, 0);
404
405 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
406
407 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
408 brw_MOV(p, dst[i], arg1[i]);
409 brw_set_saturate(p, 0);
410 brw_set_predicate_control_flag_value(p, 0xff);
411 }
412 }
413 }
414
415 static void emit_max( struct brw_compile *p,
416 const struct brw_reg *dst,
417 GLuint mask,
418 const struct brw_reg *arg0,
419 const struct brw_reg *arg1 )
420 {
421 GLuint i;
422
423 for (i = 0; i < 4; i++) {
424 if (mask & (1<<i)) {
425 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
426 brw_MOV(p, dst[i], arg0[i]);
427 brw_set_saturate(p, 0);
428
429 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
430
431 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
432 brw_MOV(p, dst[i], arg1[i]);
433 brw_set_saturate(p, 0);
434 brw_set_predicate_control_flag_value(p, 0xff);
435 }
436 }
437 }
438
439 static void emit_min( struct brw_compile *p,
440 const struct brw_reg *dst,
441 GLuint mask,
442 const struct brw_reg *arg0,
443 const struct brw_reg *arg1 )
444 {
445 GLuint i;
446
447 for (i = 0; i < 4; i++) {
448 if (mask & (1<<i)) {
449 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
450 brw_MOV(p, dst[i], arg1[i]);
451 brw_set_saturate(p, 0);
452
453 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
454
455 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
456 brw_MOV(p, dst[i], arg0[i]);
457 brw_set_saturate(p, 0);
458 brw_set_predicate_control_flag_value(p, 0xff);
459 }
460 }
461 }
462
463
464 static void emit_dp3( struct brw_compile *p,
465 const struct brw_reg *dst,
466 GLuint mask,
467 const struct brw_reg *arg0,
468 const struct brw_reg *arg1 )
469 {
470 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
471
472 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
473 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
474
475 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
476 brw_MAC(p, dst[0], arg0[2], arg1[2]);
477 brw_set_saturate(p, 0);
478 }
479
480
481 static void emit_dp4( struct brw_compile *p,
482 const struct brw_reg *dst,
483 GLuint mask,
484 const struct brw_reg *arg0,
485 const struct brw_reg *arg1 )
486 {
487 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
488
489 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
490 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
491 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
492
493 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
494 brw_MAC(p, dst[0], arg0[3], arg1[3]);
495 brw_set_saturate(p, 0);
496 }
497
498
499 static void emit_dph( struct brw_compile *p,
500 const struct brw_reg *dst,
501 GLuint mask,
502 const struct brw_reg *arg0,
503 const struct brw_reg *arg1 )
504 {
505 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
506
507 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
508 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
509 brw_MAC(p, dst[0], arg0[2], arg1[2]);
510
511 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
512 brw_ADD(p, dst[0], dst[0], arg1[3]);
513 brw_set_saturate(p, 0);
514 }
515
516
517 static void emit_xpd( struct brw_compile *p,
518 const struct brw_reg *dst,
519 GLuint mask,
520 const struct brw_reg *arg0,
521 const struct brw_reg *arg1 )
522 {
523 GLuint i;
524
525 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
526
527 for (i = 0 ; i < 3; i++) {
528 if (mask & (1<<i)) {
529 GLuint i2 = (i+2)%3;
530 GLuint i1 = (i+1)%3;
531
532 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
533
534 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
535 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
536 brw_set_saturate(p, 0);
537 }
538 }
539 }
540
541
542 static void emit_math1( struct brw_compile *p,
543 GLuint function,
544 const struct brw_reg *dst,
545 GLuint mask,
546 const struct brw_reg *arg0 )
547 {
548 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
549 function == BRW_MATH_FUNCTION_SINCOS);
550
551 brw_MOV(p, brw_message_reg(2), arg0[0]);
552
553 /* Send two messages to perform all 16 operations:
554 */
555 brw_math_16(p,
556 dst[0],
557 function,
558 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
559 2,
560 brw_null_reg(),
561 BRW_MATH_PRECISION_FULL);
562 }
563
564
565 static void emit_math2( struct brw_compile *p,
566 GLuint function,
567 const struct brw_reg *dst,
568 GLuint mask,
569 const struct brw_reg *arg0,
570 const struct brw_reg *arg1)
571 {
572 assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
573
574 brw_push_insn_state(p);
575
576 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
577 brw_MOV(p, brw_message_reg(2), arg0[0]);
578 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
579 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
580
581 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
582 brw_MOV(p, brw_message_reg(3), arg1[0]);
583 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
584 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
585
586
587 /* Send two messages to perform all 16 operations:
588 */
589 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
590 brw_math(p,
591 dst[0],
592 function,
593 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
594 2,
595 brw_null_reg(),
596 BRW_MATH_DATA_VECTOR,
597 BRW_MATH_PRECISION_FULL);
598
599 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
600 brw_math(p,
601 offset(dst[0],1),
602 function,
603 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
604 4,
605 brw_null_reg(),
606 BRW_MATH_DATA_VECTOR,
607 BRW_MATH_PRECISION_FULL);
608
609 brw_pop_insn_state(p);
610 }
611
612
613
614 static void emit_tex( struct brw_wm_compile *c,
615 const struct brw_wm_instruction *inst,
616 struct brw_reg *dst,
617 GLuint dst_flags,
618 struct brw_reg *arg )
619 {
620 struct brw_compile *p = &c->func;
621 GLuint msgLength, responseLength;
622 GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
623 GLuint i, nr;
624 GLuint emit;
625
626 /* How many input regs are there?
627 */
628 switch (inst->tex_idx) {
629 case TEXTURE_1D_INDEX:
630 emit = WRITEMASK_X;
631 nr = 1;
632 break;
633 case TEXTURE_2D_INDEX:
634 case TEXTURE_RECT_INDEX:
635 emit = WRITEMASK_XY;
636 nr = 2;
637 break;
638 default:
639 emit = WRITEMASK_XYZ;
640 nr = 3;
641 break;
642 }
643
644 if (shadow) {
645 nr = 4;
646 emit |= WRITEMASK_W;
647 }
648
649 msgLength = 1;
650
651 for (i = 0; i < nr; i++) {
652 static const GLuint swz[4] = {0,1,2,2};
653 if (emit & (1<<i))
654 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
655 else
656 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
657 msgLength += 2;
658 }
659
660 responseLength = 8; /* always */
661
662 brw_SAMPLE(p,
663 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
664 1,
665 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
666 inst->tex_unit + 1, /* surface */
667 inst->tex_unit, /* sampler */
668 inst->writemask,
669 (shadow ?
670 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
671 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
672 responseLength,
673 msgLength,
674 0);
675
676 }
677
678
679 static void emit_txb( struct brw_wm_compile *c,
680 const struct brw_wm_instruction *inst,
681 struct brw_reg *dst,
682 GLuint dst_flags,
683 struct brw_reg *arg )
684 {
685 struct brw_compile *p = &c->func;
686 GLuint msgLength;
687
688 /* Shadow ignored for txb.
689 */
690 switch (inst->tex_idx) {
691 case TEXTURE_1D_INDEX:
692 brw_MOV(p, brw_message_reg(2), arg[0]);
693 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
694 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
695 break;
696 case TEXTURE_2D_INDEX:
697 case TEXTURE_RECT_INDEX:
698 brw_MOV(p, brw_message_reg(2), arg[0]);
699 brw_MOV(p, brw_message_reg(4), arg[1]);
700 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
701 break;
702 default:
703 brw_MOV(p, brw_message_reg(2), arg[0]);
704 brw_MOV(p, brw_message_reg(4), arg[1]);
705 brw_MOV(p, brw_message_reg(6), arg[2]);
706 break;
707 }
708
709 brw_MOV(p, brw_message_reg(8), arg[3]);
710 msgLength = 9;
711
712
713 brw_SAMPLE(p,
714 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
715 1,
716 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
717 inst->tex_unit + 1, /* surface */
718 inst->tex_unit, /* sampler */
719 inst->writemask,
720 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
721 8, /* responseLength */
722 msgLength,
723 0);
724
725 }
726
727
728 static void emit_lit( struct brw_compile *p,
729 const struct brw_reg *dst,
730 GLuint mask,
731 const struct brw_reg *arg0 )
732 {
733 assert((mask & WRITEMASK_XW) == 0);
734
735 if (mask & WRITEMASK_Y) {
736 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
737 brw_MOV(p, dst[1], arg0[0]);
738 brw_set_saturate(p, 0);
739 }
740
741 if (mask & WRITEMASK_Z) {
742 emit_math2(p, BRW_MATH_FUNCTION_POW,
743 &dst[2],
744 WRITEMASK_X | (mask & SATURATE),
745 &arg0[1],
746 &arg0[3]);
747 }
748
749 /* Ordinarily you'd use an iff statement to skip or shortcircuit
750 * some of the POW calculations above, but 16-wide iff statements
751 * seem to lock c1 hardware, so this is a nasty workaround:
752 */
753 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
754 {
755 if (mask & WRITEMASK_Y)
756 brw_MOV(p, dst[1], brw_imm_f(0));
757
758 if (mask & WRITEMASK_Z)
759 brw_MOV(p, dst[2], brw_imm_f(0));
760 }
761 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
762 }
763
764
765 /* Kill pixel - set execution mask to zero for those pixels which
766 * fail.
767 */
768 static void emit_kil( struct brw_wm_compile *c,
769 struct brw_reg *arg0)
770 {
771 struct brw_compile *p = &c->func;
772 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
773 GLuint i;
774
775
776 /* XXX - usually won't need 4 compares!
777 */
778 for (i = 0; i < 4; i++) {
779 brw_push_insn_state(p);
780 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
781 brw_set_predicate_control_flag_value(p, 0xff);
782 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
783 brw_pop_insn_state(p);
784 }
785 }
786
787 static void fire_fb_write( struct brw_wm_compile *c,
788 GLuint base_reg,
789 GLuint nr )
790 {
791 struct brw_compile *p = &c->func;
792
793 /* Pass through control information:
794 */
795 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
796 {
797 brw_push_insn_state(p);
798 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
799 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
800 brw_MOV(p,
801 brw_message_reg(base_reg + 1),
802 brw_vec8_grf(1, 0));
803 brw_pop_insn_state(p);
804 }
805
806 /* Send framebuffer write message: */
807 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
808 brw_fb_WRITE(p,
809 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
810 base_reg,
811 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
812 0, /* render surface always 0 */
813 nr,
814 0,
815 1);
816 }
817
818 static void emit_aa( struct brw_wm_compile *c,
819 struct brw_reg *arg1,
820 GLuint reg )
821 {
822 struct brw_compile *p = &c->func;
823 GLuint comp = c->key.aa_dest_stencil_reg / 2;
824 GLuint off = c->key.aa_dest_stencil_reg % 2;
825 struct brw_reg aa = offset(arg1[comp], off);
826
827 brw_push_insn_state(p);
828 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
829 brw_MOV(p, brw_message_reg(reg), aa);
830 brw_pop_insn_state(p);
831 }
832
833
834 /* Post-fragment-program processing. Send the results to the
835 * framebuffer.
836 */
837 static void emit_fb_write( struct brw_wm_compile *c,
838 struct brw_reg *arg0,
839 struct brw_reg *arg1,
840 struct brw_reg *arg2)
841 {
842 struct brw_compile *p = &c->func;
843 GLuint nr = 2;
844 GLuint channel;
845
846 /* Reserve a space for AA - may not be needed:
847 */
848 if (c->key.aa_dest_stencil_reg)
849 nr += 1;
850
851 /* I don't really understand how this achieves the color interleave
852 * (ie RGBARGBA) in the result: [Do the saturation here]
853 */
854 {
855 brw_push_insn_state(p);
856
857 for (channel = 0; channel < 4; channel++) {
858 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
859 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
860
861 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
862 brw_MOV(p,
863 brw_message_reg(nr + channel),
864 arg0[channel]);
865
866 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
867 brw_MOV(p,
868 brw_message_reg(nr + channel + 4),
869 sechalf(arg0[channel]));
870 }
871
872 /* skip over the regs populated above:
873 */
874 nr += 8;
875
876 brw_pop_insn_state(p);
877 }
878
879 if (c->key.source_depth_to_render_target)
880 {
881 if (c->key.computes_depth)
882 brw_MOV(p, brw_message_reg(nr), arg2[2]);
883 else
884 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
885
886 nr += 2;
887 }
888
889 if (c->key.dest_depth_reg)
890 {
891 GLuint comp = c->key.dest_depth_reg / 2;
892 GLuint off = c->key.dest_depth_reg % 2;
893
894 if (off != 0) {
895 brw_push_insn_state(p);
896 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
897 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
898 /* 2nd half? */
899 brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1));
900 brw_pop_insn_state(p);
901 }
902 else {
903 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
904 }
905 nr += 2;
906 }
907
908
909 if (!c->key.runtime_check_aads_emit) {
910 if (c->key.aa_dest_stencil_reg)
911 emit_aa(c, arg1, 2);
912
913 fire_fb_write(c, 0, nr);
914 }
915 else {
916 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
917 struct brw_reg ip = brw_ip_reg();
918 struct brw_instruction *jmp;
919
920 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
921 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
922 brw_AND(p,
923 v1_null_ud,
924 get_element_ud(brw_vec8_grf(1,0), 6),
925 brw_imm_ud(1<<26));
926
927 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
928 {
929 emit_aa(c, arg1, 2);
930 fire_fb_write(c, 0, nr);
931 /* note - thread killed in subroutine */
932 }
933 brw_land_fwd_jump(p, jmp);
934
935 /* ELSE: Shuffle up one register to fill in the hole left for AA:
936 */
937 fire_fb_write(c, 1, nr-1);
938 }
939 }
940
941
942
943
944 /* Post-fragment-program processing. Send the results to the
945 * framebuffer.
946 */
947 static void emit_spill( struct brw_wm_compile *c,
948 struct brw_reg reg,
949 GLuint slot )
950 {
951 struct brw_compile *p = &c->func;
952
953 /*
954 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
955 */
956 brw_MOV(p, brw_message_reg(2), reg);
957
958 /*
959 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
960 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
961 */
962 brw_dp_WRITE_16(p,
963 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
964 1,
965 slot);
966 }
967
968 static void emit_unspill( struct brw_wm_compile *c,
969 struct brw_reg reg,
970 GLuint slot )
971 {
972 struct brw_compile *p = &c->func;
973
974 /* Slot 0 is the undef value.
975 */
976 if (slot == 0) {
977 brw_MOV(p, reg, brw_imm_f(0));
978 return;
979 }
980
981 /*
982 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
983 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
984 */
985
986 brw_dp_READ_16(p,
987 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
988 1,
989 slot);
990 }
991
992
993
994 /**
995 * Retrieve upto 4 GEN4 register pairs for the given wm reg:
996 */
997 static void get_argument_regs( struct brw_wm_compile *c,
998 struct brw_wm_ref *arg[],
999 struct brw_reg *regs )
1000 {
1001 GLuint i;
1002
1003 for (i = 0; i < 4; i++) {
1004 if (arg[i]) {
1005
1006 if (arg[i]->unspill_reg)
1007 emit_unspill(c,
1008 brw_vec8_grf(arg[i]->unspill_reg, 0),
1009 arg[i]->value->spill_slot);
1010
1011 regs[i] = arg[i]->hw_reg;
1012 }
1013 else {
1014 regs[i] = brw_null_reg();
1015 }
1016 }
1017 }
1018
1019 static void spill_values( struct brw_wm_compile *c,
1020 struct brw_wm_value *values,
1021 GLuint nr )
1022 {
1023 GLuint i;
1024
1025 for (i = 0; i < nr; i++)
1026 if (values[i].spill_slot)
1027 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1028 }
1029
1030
1031
1032 /* Emit the fragment program instructions here.
1033 */
1034 void brw_wm_emit( struct brw_wm_compile *c )
1035 {
1036 struct brw_compile *p = &c->func;
1037 GLuint insn;
1038
1039 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1040
1041 /* Check if any of the payload regs need to be spilled:
1042 */
1043 spill_values(c, c->payload.depth, 4);
1044 spill_values(c, c->creg, c->nr_creg);
1045 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1046
1047
1048 for (insn = 0; insn < c->nr_insns; insn++) {
1049
1050 struct brw_wm_instruction *inst = &c->instruction[insn];
1051 struct brw_reg args[3][4], dst[4];
1052 GLuint i, dst_flags;
1053
1054 /* Get argument regs:
1055 */
1056 for (i = 0; i < 3; i++)
1057 get_argument_regs(c, inst->src[i], args[i]);
1058
1059 /* Get dest regs:
1060 */
1061 for (i = 0; i < 4; i++)
1062 if (inst->dst[i])
1063 dst[i] = inst->dst[i]->hw_reg;
1064 else
1065 dst[i] = brw_null_reg();
1066
1067 /* Flags
1068 */
1069 dst_flags = inst->writemask;
1070 if (inst->saturate)
1071 dst_flags |= SATURATE;
1072
1073 switch (inst->opcode) {
1074 /* Generated instructions for calculating triangle interpolants:
1075 */
1076 case WM_PIXELXY:
1077 emit_pixel_xy(p, dst, dst_flags, args[0]);
1078 break;
1079
1080 case WM_DELTAXY:
1081 emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1082 break;
1083
1084 case WM_WPOSXY:
1085 emit_wpos_xy(p, dst, dst_flags, args[0]);
1086 break;
1087
1088 case WM_PIXELW:
1089 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1090 break;
1091
1092 case WM_LINTERP:
1093 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1094 break;
1095
1096 case WM_PINTERP:
1097 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1098 break;
1099
1100 case WM_CINTERP:
1101 emit_cinterp(p, dst, dst_flags, args[0]);
1102 break;
1103
1104 case WM_FB_WRITE:
1105 emit_fb_write(c, args[0], args[1], args[2]);
1106 break;
1107
1108 /* Straightforward arithmetic:
1109 */
1110 case OPCODE_ADD:
1111 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1112 break;
1113
1114 case OPCODE_FRC:
1115 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1116 break;
1117
1118 case OPCODE_FLR:
1119 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1120 break;
1121
1122 case OPCODE_DP3: /* */
1123 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1124 break;
1125
1126 case OPCODE_DP4:
1127 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1128 break;
1129
1130 case OPCODE_DPH:
1131 emit_dph(p, dst, dst_flags, args[0], args[1]);
1132 break;
1133
1134 case OPCODE_LRP: /* */
1135 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1136 break;
1137
1138 case OPCODE_MAD:
1139 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1140 break;
1141
1142 case OPCODE_MOV:
1143 case OPCODE_SWZ:
1144 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1145 break;
1146
1147 case OPCODE_MUL:
1148 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1149 break;
1150
1151 case OPCODE_XPD:
1152 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1153 break;
1154
1155 /* Higher math functions:
1156 */
1157 case OPCODE_RCP:
1158 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1159 break;
1160
1161 case OPCODE_RSQ:
1162 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1163 break;
1164
1165 case OPCODE_SIN:
1166 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1167 break;
1168
1169 case OPCODE_COS:
1170 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1171 break;
1172
1173 case OPCODE_EX2:
1174 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1175 break;
1176
1177 case OPCODE_LG2:
1178 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1179 break;
1180
1181 case OPCODE_SCS:
1182 /* There is an scs math function, but it would need some
1183 * fixup for 16-element execution.
1184 */
1185 if (dst_flags & WRITEMASK_X)
1186 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1187 if (dst_flags & WRITEMASK_Y)
1188 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1189 break;
1190
1191 case OPCODE_POW:
1192 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1193 break;
1194
1195 /* Comparisons:
1196 */
1197 case OPCODE_CMP:
1198 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1199 break;
1200
1201 case OPCODE_MAX:
1202 emit_max(p, dst, dst_flags, args[0], args[1]);
1203 break;
1204
1205 case OPCODE_MIN:
1206 emit_min(p, dst, dst_flags, args[0], args[1]);
1207 break;
1208
1209 case OPCODE_SLT:
1210 emit_slt(p, dst, dst_flags, args[0], args[1]);
1211 break;
1212
1213 case OPCODE_SGE:
1214 emit_sge(p, dst, dst_flags, args[0], args[1]);
1215 break;
1216
1217 case OPCODE_LIT:
1218 emit_lit(p, dst, dst_flags, args[0]);
1219 break;
1220
1221 /* Texturing operations:
1222 */
1223 case OPCODE_TEX:
1224 emit_tex(c, inst, dst, dst_flags, args[0]);
1225 break;
1226
1227 case OPCODE_TXB:
1228 emit_txb(c, inst, dst, dst_flags, args[0]);
1229 break;
1230
1231 case OPCODE_KIL:
1232 emit_kil(c, args[0]);
1233 break;
1234
1235 default:
1236 assert(0);
1237 }
1238
1239 for (i = 0; i < 4; i++)
1240 if (inst->dst[i] && inst->dst[i]->spill_slot)
1241 emit_spill(c,
1242 inst->dst[i]->hw_reg,
1243 inst->dst[i]->spill_slot);
1244 }
1245 }
1246
1247
1248
1249
1250