05e464d4b61726693af09a639dba25487234af26
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 static GLboolean can_do_pln(struct intel_context *intel,
38 const struct brw_reg *deltas)
39 {
40 struct brw_context *brw = brw_context(&intel->ctx);
41
42 if (!brw->has_pln)
43 return GL_FALSE;
44
45 if (deltas[1].nr != deltas[0].nr + 1)
46 return GL_FALSE;
47
48 if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
49 return GL_FALSE;
50
51 return GL_TRUE;
52 }
53
54 /* Not quite sure how correct this is - need to understand horiz
55 * vs. vertical strides a little better.
56 */
57 static INLINE struct brw_reg sechalf( struct brw_reg reg )
58 {
59 if (reg.vstride)
60 reg.nr++;
61 return reg;
62 }
63
64
65 /**
66 * Computes the screen-space x,y position of the pixels.
67 *
68 * This will be used by emit_delta_xy() or emit_wpos_xy() for
69 * interpolation of attributes..
70 *
71 * Payload R0:
72 *
73 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
74 * corresponding to each of the 16 execution channels.
75 * R0.1..8 -- ?
76 * R1.0 -- triangle vertex 0.X
77 * R1.1 -- triangle vertex 0.Y
78 * R1.2 -- tile 0 x,y coords (2 packed uwords)
79 * R1.3 -- tile 1 x,y coords (2 packed uwords)
80 * R1.4 -- tile 2 x,y coords (2 packed uwords)
81 * R1.5 -- tile 3 x,y coords (2 packed uwords)
82 * R1.6 -- ?
83 * R1.7 -- ?
84 * R1.8 -- ?
85 */
86 void emit_pixel_xy(struct brw_wm_compile *c,
87 const struct brw_reg *dst,
88 GLuint mask)
89 {
90 struct brw_compile *p = &c->func;
91 struct brw_reg r1 = brw_vec1_grf(1, 0);
92 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
93 struct brw_reg dst0_uw, dst1_uw;
94
95 brw_push_insn_state(p);
96 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
97
98 if (c->dispatch_width == 16) {
99 dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
100 dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
101 } else {
102 dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
103 dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
104 }
105
106 /* Calculate pixel centers by adding 1 or 0 to each of the
107 * micro-tile coordinates passed in r1.
108 */
109 if (mask & WRITEMASK_X) {
110 brw_ADD(p,
111 dst0_uw,
112 stride(suboffset(r1_uw, 4), 2, 4, 0),
113 brw_imm_v(0x10101010));
114 }
115
116 if (mask & WRITEMASK_Y) {
117 brw_ADD(p,
118 dst1_uw,
119 stride(suboffset(r1_uw,5), 2, 4, 0),
120 brw_imm_v(0x11001100));
121 }
122 brw_pop_insn_state(p);
123 }
124
125 /**
126 * Computes the screen-space x,y distance of the pixels from the start
127 * vertex.
128 *
129 * This will be used in linterp or pinterp with the start vertex value
130 * and the Cx, Cy, and C0 coefficients passed in from the setup engine
131 * to produce interpolated attribute values.
132 */
133 void emit_delta_xy(struct brw_compile *p,
134 const struct brw_reg *dst,
135 GLuint mask,
136 const struct brw_reg *arg0)
137 {
138 struct brw_reg r1 = brw_vec1_grf(1, 0);
139
140 if (mask == 0)
141 return;
142
143 assert(mask == WRITEMASK_XY);
144
145 /* Calc delta X,Y by subtracting origin in r1 from the pixel
146 * centers produced by emit_pixel_xy().
147 */
148 brw_ADD(p,
149 dst[0],
150 retype(arg0[0], BRW_REGISTER_TYPE_UW),
151 negate(r1));
152 brw_ADD(p,
153 dst[1],
154 retype(arg0[1], BRW_REGISTER_TYPE_UW),
155 negate(suboffset(r1,1)));
156 }
157
158 /**
159 * Computes the pixel offset from the window origin for gl_FragCoord().
160 */
161 void emit_wpos_xy(struct brw_wm_compile *c,
162 const struct brw_reg *dst,
163 GLuint mask,
164 const struct brw_reg *arg0)
165 {
166 struct brw_compile *p = &c->func;
167
168 if (mask & WRITEMASK_X) {
169 if (c->fp->program.PixelCenterInteger) {
170 /* X' = X */
171 brw_MOV(p,
172 dst[0],
173 retype(arg0[0], BRW_REGISTER_TYPE_W));
174 } else {
175 /* X' = X + 0.5 */
176 brw_ADD(p,
177 dst[0],
178 retype(arg0[0], BRW_REGISTER_TYPE_W),
179 brw_imm_f(0.5));
180 }
181 }
182
183 if (mask & WRITEMASK_Y) {
184 if (c->fp->program.OriginUpperLeft) {
185 if (c->fp->program.PixelCenterInteger) {
186 /* Y' = Y */
187 brw_MOV(p,
188 dst[1],
189 retype(arg0[1], BRW_REGISTER_TYPE_W));
190 } else {
191 /* Y' = Y + 0.5 */
192 brw_ADD(p,
193 dst[1],
194 retype(arg0[1], BRW_REGISTER_TYPE_W),
195 brw_imm_f(0.5));
196 }
197 } else {
198 float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
199
200 /* Y' = (height - 1) - Y + center */
201 brw_ADD(p,
202 dst[1],
203 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
204 brw_imm_f(c->key.drawable_height - 1 + center_offset));
205 }
206 }
207 }
208
209
210 void emit_pixel_w(struct brw_wm_compile *c,
211 const struct brw_reg *dst,
212 GLuint mask,
213 const struct brw_reg *arg0,
214 const struct brw_reg *deltas)
215 {
216 struct brw_compile *p = &c->func;
217 struct intel_context *intel = &p->brw->intel;
218
219 /* Don't need this if all you are doing is interpolating color, for
220 * instance.
221 */
222 if (mask & WRITEMASK_W) {
223 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
224
225 /* Calc 1/w - just linterp wpos[3] optimized by putting the
226 * result straight into a message reg.
227 */
228 if (can_do_pln(intel, deltas)) {
229 brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
230 } else {
231 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
232 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
233 }
234
235 /* Calc w */
236 if (c->dispatch_width == 16) {
237 brw_math_16(p, dst[3],
238 BRW_MATH_FUNCTION_INV,
239 BRW_MATH_SATURATE_NONE,
240 2, brw_null_reg(),
241 BRW_MATH_PRECISION_FULL);
242 } else {
243 brw_math(p, dst[3],
244 BRW_MATH_FUNCTION_INV,
245 BRW_MATH_SATURATE_NONE,
246 2, brw_null_reg(),
247 BRW_MATH_DATA_VECTOR,
248 BRW_MATH_PRECISION_FULL);
249 }
250 }
251 }
252
253
254 void emit_linterp(struct brw_compile *p,
255 const struct brw_reg *dst,
256 GLuint mask,
257 const struct brw_reg *arg0,
258 const struct brw_reg *deltas)
259 {
260 struct intel_context *intel = &p->brw->intel;
261 struct brw_reg interp[4];
262 GLuint nr = arg0[0].nr;
263 GLuint i;
264
265 interp[0] = brw_vec1_grf(nr, 0);
266 interp[1] = brw_vec1_grf(nr, 4);
267 interp[2] = brw_vec1_grf(nr+1, 0);
268 interp[3] = brw_vec1_grf(nr+1, 4);
269
270 for (i = 0; i < 4; i++) {
271 if (mask & (1<<i)) {
272 if (can_do_pln(intel, deltas)) {
273 brw_PLN(p, dst[i], interp[i], deltas[0]);
274 } else {
275 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
276 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
277 }
278 }
279 }
280 }
281
282
283 void emit_pinterp(struct brw_compile *p,
284 const struct brw_reg *dst,
285 GLuint mask,
286 const struct brw_reg *arg0,
287 const struct brw_reg *deltas,
288 const struct brw_reg *w)
289 {
290 struct intel_context *intel = &p->brw->intel;
291 struct brw_reg interp[4];
292 GLuint nr = arg0[0].nr;
293 GLuint i;
294
295 interp[0] = brw_vec1_grf(nr, 0);
296 interp[1] = brw_vec1_grf(nr, 4);
297 interp[2] = brw_vec1_grf(nr+1, 0);
298 interp[3] = brw_vec1_grf(nr+1, 4);
299
300 for (i = 0; i < 4; i++) {
301 if (mask & (1<<i)) {
302 if (can_do_pln(intel, deltas)) {
303 brw_PLN(p, dst[i], interp[i], deltas[0]);
304 } else {
305 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
306 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
307 }
308 }
309 }
310 for (i = 0; i < 4; i++) {
311 if (mask & (1<<i)) {
312 brw_MUL(p, dst[i], dst[i], w[3]);
313 }
314 }
315 }
316
317
318 void emit_cinterp(struct brw_compile *p,
319 const struct brw_reg *dst,
320 GLuint mask,
321 const struct brw_reg *arg0)
322 {
323 struct brw_reg interp[4];
324 GLuint nr = arg0[0].nr;
325 GLuint i;
326
327 interp[0] = brw_vec1_grf(nr, 0);
328 interp[1] = brw_vec1_grf(nr, 4);
329 interp[2] = brw_vec1_grf(nr+1, 0);
330 interp[3] = brw_vec1_grf(nr+1, 4);
331
332 for (i = 0; i < 4; i++) {
333 if (mask & (1<<i)) {
334 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
335 }
336 }
337 }
338
339 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
340 void emit_frontfacing(struct brw_compile *p,
341 const struct brw_reg *dst,
342 GLuint mask)
343 {
344 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
345 GLuint i;
346
347 if (!(mask & WRITEMASK_XYZW))
348 return;
349
350 for (i = 0; i < 4; i++) {
351 if (mask & (1<<i)) {
352 brw_MOV(p, dst[i], brw_imm_f(0.0));
353 }
354 }
355
356 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
357 * us front face
358 */
359 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
360 for (i = 0; i < 4; i++) {
361 if (mask & (1<<i)) {
362 brw_MOV(p, dst[i], brw_imm_f(1.0));
363 }
364 }
365 brw_set_predicate_control_flag_value(p, 0xff);
366 }
367
368 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
369 * looking like:
370 *
371 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
372 *
373 * and we're trying to produce:
374 *
375 * DDX DDY
376 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
377 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
378 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
379 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
380 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
381 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
382 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
383 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
384 *
385 * and add another set of two more subspans if in 16-pixel dispatch mode.
386 *
387 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
388 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
389 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
390 * between each other. We could probably do it like ddx and swizzle the right
391 * order later, but bail for now and just produce
392 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
393 */
394 void emit_ddxy(struct brw_compile *p,
395 const struct brw_reg *dst,
396 GLuint mask,
397 GLboolean is_ddx,
398 const struct brw_reg *arg0)
399 {
400 int i;
401 struct brw_reg src0, src1;
402
403 if (mask & SATURATE)
404 brw_set_saturate(p, 1);
405 for (i = 0; i < 4; i++ ) {
406 if (mask & (1<<i)) {
407 if (is_ddx) {
408 src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
409 BRW_REGISTER_TYPE_F,
410 BRW_VERTICAL_STRIDE_2,
411 BRW_WIDTH_2,
412 BRW_HORIZONTAL_STRIDE_0,
413 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
414 src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
415 BRW_REGISTER_TYPE_F,
416 BRW_VERTICAL_STRIDE_2,
417 BRW_WIDTH_2,
418 BRW_HORIZONTAL_STRIDE_0,
419 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
420 } else {
421 src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
422 BRW_REGISTER_TYPE_F,
423 BRW_VERTICAL_STRIDE_4,
424 BRW_WIDTH_4,
425 BRW_HORIZONTAL_STRIDE_0,
426 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
427 src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
428 BRW_REGISTER_TYPE_F,
429 BRW_VERTICAL_STRIDE_4,
430 BRW_WIDTH_4,
431 BRW_HORIZONTAL_STRIDE_0,
432 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
433 }
434 brw_ADD(p, dst[i], src0, negate(src1));
435 }
436 }
437 if (mask & SATURATE)
438 brw_set_saturate(p, 0);
439 }
440
441 void emit_alu1(struct brw_compile *p,
442 struct brw_instruction *(*func)(struct brw_compile *,
443 struct brw_reg,
444 struct brw_reg),
445 const struct brw_reg *dst,
446 GLuint mask,
447 const struct brw_reg *arg0)
448 {
449 GLuint i;
450
451 if (mask & SATURATE)
452 brw_set_saturate(p, 1);
453
454 for (i = 0; i < 4; i++) {
455 if (mask & (1<<i)) {
456 func(p, dst[i], arg0[i]);
457 }
458 }
459
460 if (mask & SATURATE)
461 brw_set_saturate(p, 0);
462 }
463
464
465 void emit_alu2(struct brw_compile *p,
466 struct brw_instruction *(*func)(struct brw_compile *,
467 struct brw_reg,
468 struct brw_reg,
469 struct brw_reg),
470 const struct brw_reg *dst,
471 GLuint mask,
472 const struct brw_reg *arg0,
473 const struct brw_reg *arg1)
474 {
475 GLuint i;
476
477 if (mask & SATURATE)
478 brw_set_saturate(p, 1);
479
480 for (i = 0; i < 4; i++) {
481 if (mask & (1<<i)) {
482 func(p, dst[i], arg0[i], arg1[i]);
483 }
484 }
485
486 if (mask & SATURATE)
487 brw_set_saturate(p, 0);
488 }
489
490
491 void emit_mad(struct brw_compile *p,
492 const struct brw_reg *dst,
493 GLuint mask,
494 const struct brw_reg *arg0,
495 const struct brw_reg *arg1,
496 const struct brw_reg *arg2)
497 {
498 GLuint i;
499
500 for (i = 0; i < 4; i++) {
501 if (mask & (1<<i)) {
502 brw_MUL(p, dst[i], arg0[i], arg1[i]);
503
504 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
505 brw_ADD(p, dst[i], dst[i], arg2[i]);
506 brw_set_saturate(p, 0);
507 }
508 }
509 }
510
511 void emit_lrp(struct brw_compile *p,
512 const struct brw_reg *dst,
513 GLuint mask,
514 const struct brw_reg *arg0,
515 const struct brw_reg *arg1,
516 const struct brw_reg *arg2)
517 {
518 GLuint i;
519
520 /* Uses dst as a temporary:
521 */
522 for (i = 0; i < 4; i++) {
523 if (mask & (1<<i)) {
524 /* Can I use the LINE instruction for this?
525 */
526 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
527 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
528
529 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
530 brw_MAC(p, dst[i], arg0[i], arg1[i]);
531 brw_set_saturate(p, 0);
532 }
533 }
534 }
535
536 void emit_sop(struct brw_compile *p,
537 const struct brw_reg *dst,
538 GLuint mask,
539 GLuint cond,
540 const struct brw_reg *arg0,
541 const struct brw_reg *arg1)
542 {
543 GLuint i;
544
545 for (i = 0; i < 4; i++) {
546 if (mask & (1<<i)) {
547 brw_push_insn_state(p);
548 brw_CMP(p, brw_null_reg(), cond, arg1[i], arg0[i]);
549 brw_SEL(p, dst[i], brw_null_reg(), brw_imm_f(1.0));
550 brw_pop_insn_state(p);
551 }
552 }
553 }
554
555 static void emit_slt( struct brw_compile *p,
556 const struct brw_reg *dst,
557 GLuint mask,
558 const struct brw_reg *arg0,
559 const struct brw_reg *arg1 )
560 {
561 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
562 }
563
564 static void emit_sle( struct brw_compile *p,
565 const struct brw_reg *dst,
566 GLuint mask,
567 const struct brw_reg *arg0,
568 const struct brw_reg *arg1 )
569 {
570 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
571 }
572
573 static void emit_sgt( struct brw_compile *p,
574 const struct brw_reg *dst,
575 GLuint mask,
576 const struct brw_reg *arg0,
577 const struct brw_reg *arg1 )
578 {
579 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
580 }
581
582 static void emit_sge( struct brw_compile *p,
583 const struct brw_reg *dst,
584 GLuint mask,
585 const struct brw_reg *arg0,
586 const struct brw_reg *arg1 )
587 {
588 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
589 }
590
591 static void emit_seq( struct brw_compile *p,
592 const struct brw_reg *dst,
593 GLuint mask,
594 const struct brw_reg *arg0,
595 const struct brw_reg *arg1 )
596 {
597 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
598 }
599
600 static void emit_sne( struct brw_compile *p,
601 const struct brw_reg *dst,
602 GLuint mask,
603 const struct brw_reg *arg0,
604 const struct brw_reg *arg1 )
605 {
606 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
607 }
608
609 void emit_cmp(struct brw_compile *p,
610 const struct brw_reg *dst,
611 GLuint mask,
612 const struct brw_reg *arg0,
613 const struct brw_reg *arg1,
614 const struct brw_reg *arg2)
615 {
616 GLuint i;
617
618 for (i = 0; i < 4; i++) {
619 if (mask & (1<<i)) {
620 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
621 brw_MOV(p, dst[i], arg2[i]);
622 brw_set_saturate(p, 0);
623
624 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
625
626 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
627 brw_MOV(p, dst[i], arg1[i]);
628 brw_set_saturate(p, 0);
629 brw_set_predicate_control_flag_value(p, 0xff);
630 }
631 }
632 }
633
634 void emit_max(struct brw_compile *p,
635 const struct brw_reg *dst,
636 GLuint mask,
637 const struct brw_reg *arg0,
638 const struct brw_reg *arg1)
639 {
640 GLuint i;
641
642 for (i = 0; i < 4; i++) {
643 if (mask & (1<<i)) {
644 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
645
646 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
647 brw_SEL(p, dst[i], arg0[i], arg1[i]);
648 brw_set_saturate(p, 0);
649 brw_set_predicate_control_flag_value(p, 0xff);
650 }
651 }
652 }
653
654 void emit_min(struct brw_compile *p,
655 const struct brw_reg *dst,
656 GLuint mask,
657 const struct brw_reg *arg0,
658 const struct brw_reg *arg1)
659 {
660 GLuint i;
661
662 for (i = 0; i < 4; i++) {
663 if (mask & (1<<i)) {
664 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
665
666 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
667 brw_SEL(p, dst[i], arg0[i], arg1[i]);
668 brw_set_saturate(p, 0);
669 brw_set_predicate_control_flag_value(p, 0xff);
670 }
671 }
672 }
673
674
675 void emit_dp3(struct brw_compile *p,
676 const struct brw_reg *dst,
677 GLuint mask,
678 const struct brw_reg *arg0,
679 const struct brw_reg *arg1)
680 {
681 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
682
683 if (!(mask & WRITEMASK_XYZW))
684 return; /* Do not emit dead code */
685
686 assert(is_power_of_two(mask & WRITEMASK_XYZW));
687
688 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
689 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
690
691 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
692 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
693 brw_set_saturate(p, 0);
694 }
695
696
697 void emit_dp4(struct brw_compile *p,
698 const struct brw_reg *dst,
699 GLuint mask,
700 const struct brw_reg *arg0,
701 const struct brw_reg *arg1)
702 {
703 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
704
705 if (!(mask & WRITEMASK_XYZW))
706 return; /* Do not emit dead code */
707
708 assert(is_power_of_two(mask & WRITEMASK_XYZW));
709
710 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
711 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
712 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
713
714 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
715 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
716 brw_set_saturate(p, 0);
717 }
718
719
720 void emit_dph(struct brw_compile *p,
721 const struct brw_reg *dst,
722 GLuint mask,
723 const struct brw_reg *arg0,
724 const struct brw_reg *arg1)
725 {
726 const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
727
728 if (!(mask & WRITEMASK_XYZW))
729 return; /* Do not emit dead code */
730
731 assert(is_power_of_two(mask & WRITEMASK_XYZW));
732
733 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
734 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
735 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
736
737 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
738 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
739 brw_set_saturate(p, 0);
740 }
741
742
743 void emit_xpd(struct brw_compile *p,
744 const struct brw_reg *dst,
745 GLuint mask,
746 const struct brw_reg *arg0,
747 const struct brw_reg *arg1)
748 {
749 GLuint i;
750
751 assert((mask & WRITEMASK_W) != WRITEMASK_W);
752
753 for (i = 0 ; i < 3; i++) {
754 if (mask & (1<<i)) {
755 GLuint i2 = (i+2)%3;
756 GLuint i1 = (i+1)%3;
757
758 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
759
760 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
761 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
762 brw_set_saturate(p, 0);
763 }
764 }
765 }
766
767
768 void emit_math1(struct brw_wm_compile *c,
769 GLuint function,
770 const struct brw_reg *dst,
771 GLuint mask,
772 const struct brw_reg *arg0)
773 {
774 struct brw_compile *p = &c->func;
775 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
776 GLuint saturate = ((mask & SATURATE) ?
777 BRW_MATH_SATURATE_SATURATE :
778 BRW_MATH_SATURATE_NONE);
779
780 if (!(mask & WRITEMASK_XYZW))
781 return; /* Do not emit dead code */
782
783 assert(is_power_of_two(mask & WRITEMASK_XYZW));
784
785 /* If compressed, this will write message reg 2,3 from arg0.x's 16
786 * channels.
787 */
788 brw_MOV(p, brw_message_reg(2), arg0[0]);
789
790 /* Send two messages to perform all 16 operations:
791 */
792 brw_push_insn_state(p);
793 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
794 brw_math(p,
795 dst[dst_chan],
796 function,
797 saturate,
798 2,
799 brw_null_reg(),
800 BRW_MATH_DATA_VECTOR,
801 BRW_MATH_PRECISION_FULL);
802
803 if (c->dispatch_width == 16) {
804 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
805 brw_math(p,
806 offset(dst[dst_chan],1),
807 function,
808 saturate,
809 3,
810 brw_null_reg(),
811 BRW_MATH_DATA_VECTOR,
812 BRW_MATH_PRECISION_FULL);
813 }
814 brw_pop_insn_state(p);
815 }
816
817
818 void emit_math2(struct brw_wm_compile *c,
819 GLuint function,
820 const struct brw_reg *dst,
821 GLuint mask,
822 const struct brw_reg *arg0,
823 const struct brw_reg *arg1)
824 {
825 struct brw_compile *p = &c->func;
826 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
827 GLuint saturate = ((mask & SATURATE) ?
828 BRW_MATH_SATURATE_SATURATE :
829 BRW_MATH_SATURATE_NONE);
830
831 if (!(mask & WRITEMASK_XYZW))
832 return; /* Do not emit dead code */
833
834 assert(is_power_of_two(mask & WRITEMASK_XYZW));
835
836 brw_push_insn_state(p);
837
838 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
839 brw_MOV(p, brw_message_reg(2), arg0[0]);
840 if (c->dispatch_width == 16) {
841 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
842 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
843 }
844
845 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
846 brw_MOV(p, brw_message_reg(3), arg1[0]);
847 if (c->dispatch_width == 16) {
848 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
849 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
850 }
851
852 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
853 brw_math(p,
854 dst[dst_chan],
855 function,
856 saturate,
857 2,
858 brw_null_reg(),
859 BRW_MATH_DATA_VECTOR,
860 BRW_MATH_PRECISION_FULL);
861
862 /* Send two messages to perform all 16 operations:
863 */
864 if (c->dispatch_width == 16) {
865 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
866 brw_math(p,
867 offset(dst[dst_chan],1),
868 function,
869 saturate,
870 4,
871 brw_null_reg(),
872 BRW_MATH_DATA_VECTOR,
873 BRW_MATH_PRECISION_FULL);
874 }
875 brw_pop_insn_state(p);
876 }
877
878
879 void emit_tex(struct brw_wm_compile *c,
880 struct brw_reg *dst,
881 GLuint dst_flags,
882 struct brw_reg *arg,
883 struct brw_reg depth_payload,
884 GLuint tex_idx,
885 GLuint sampler,
886 GLboolean shadow)
887 {
888 struct brw_compile *p = &c->func;
889 struct intel_context *intel = &p->brw->intel;
890 struct brw_reg dst_retyped;
891 GLuint cur_mrf = 2, response_length;
892 GLuint i, nr_texcoords;
893 GLuint emit;
894 GLuint msg_type;
895 GLuint mrf_per_channel;
896 GLuint simd_mode;
897
898 if (c->dispatch_width == 16) {
899 mrf_per_channel = 2;
900 response_length = 8;
901 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
902 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
903 } else {
904 mrf_per_channel = 1;
905 response_length = 4;
906 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
907 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
908 }
909
910 /* How many input regs are there?
911 */
912 switch (tex_idx) {
913 case TEXTURE_1D_INDEX:
914 emit = WRITEMASK_X;
915 nr_texcoords = 1;
916 break;
917 case TEXTURE_2D_INDEX:
918 case TEXTURE_RECT_INDEX:
919 emit = WRITEMASK_XY;
920 nr_texcoords = 2;
921 break;
922 case TEXTURE_3D_INDEX:
923 case TEXTURE_CUBE_INDEX:
924 emit = WRITEMASK_XYZ;
925 nr_texcoords = 3;
926 break;
927 default:
928 /* unexpected target */
929 abort();
930 }
931
932 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
933 if (!intel->is_ironlake && c->dispatch_width == 8)
934 nr_texcoords = 3;
935
936 /* For shadow comparisons, we have to supply u,v,r. */
937 if (shadow)
938 nr_texcoords = 3;
939
940 /* Emit the texcoords. */
941 for (i = 0; i < nr_texcoords; i++) {
942 if (emit & (1<<i))
943 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
944 else
945 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
946 cur_mrf += mrf_per_channel;
947 }
948
949 /* Fill in the shadow comparison reference value. */
950 if (shadow) {
951 if (intel->is_ironlake) {
952 /* Fill in the cube map array index value. */
953 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
954 cur_mrf += mrf_per_channel;
955 } else if (c->dispatch_width == 8) {
956 /* Fill in the LOD bias value. */
957 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
958 cur_mrf += mrf_per_channel;
959 }
960 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
961 cur_mrf += mrf_per_channel;
962 }
963
964 if (intel->is_ironlake) {
965 if (shadow)
966 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
967 else
968 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
969 } else {
970 /* Note that G45 and older determines shadow compare and dispatch width
971 * from message length for most messages.
972 */
973 if (c->dispatch_width == 16 && shadow)
974 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
975 else
976 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
977 }
978
979 brw_SAMPLE(p,
980 dst_retyped,
981 1,
982 retype(depth_payload, BRW_REGISTER_TYPE_UW),
983 SURF_INDEX_TEXTURE(sampler),
984 sampler,
985 dst_flags & WRITEMASK_XYZW,
986 msg_type,
987 response_length,
988 cur_mrf - 1,
989 0,
990 1,
991 simd_mode);
992 }
993
994
995 void emit_txb(struct brw_wm_compile *c,
996 struct brw_reg *dst,
997 GLuint dst_flags,
998 struct brw_reg *arg,
999 struct brw_reg depth_payload,
1000 GLuint tex_idx,
1001 GLuint sampler)
1002 {
1003 struct brw_compile *p = &c->func;
1004 struct intel_context *intel = &p->brw->intel;
1005 GLuint msgLength;
1006 GLuint msg_type;
1007 GLuint mrf_per_channel;
1008 GLuint response_length;
1009 struct brw_reg dst_retyped;
1010
1011 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
1012 * samples, so we'll use the 16-wide instruction, leave the second halves
1013 * undefined, and trust the execution mask to keep the undefined pixels
1014 * from mattering.
1015 */
1016 if (c->dispatch_width == 16 || !intel->is_ironlake) {
1017 if (intel->is_ironlake)
1018 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
1019 else
1020 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1021 mrf_per_channel = 2;
1022 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
1023 response_length = 8;
1024 } else {
1025 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
1026 mrf_per_channel = 1;
1027 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
1028 response_length = 4;
1029 }
1030
1031 /* Shadow ignored for txb. */
1032 switch (tex_idx) {
1033 case TEXTURE_1D_INDEX:
1034 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1035 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
1036 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
1037 break;
1038 case TEXTURE_2D_INDEX:
1039 case TEXTURE_RECT_INDEX:
1040 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1041 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
1042 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
1043 break;
1044 case TEXTURE_3D_INDEX:
1045 case TEXTURE_CUBE_INDEX:
1046 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1047 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
1048 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
1049 break;
1050 default:
1051 /* unexpected target */
1052 abort();
1053 }
1054
1055 brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
1056 msgLength = 2 + 4 * mrf_per_channel - 1;
1057
1058 brw_SAMPLE(p,
1059 dst_retyped,
1060 1,
1061 retype(depth_payload, BRW_REGISTER_TYPE_UW),
1062 SURF_INDEX_TEXTURE(sampler),
1063 sampler,
1064 dst_flags & WRITEMASK_XYZW,
1065 msg_type,
1066 response_length,
1067 msgLength,
1068 0,
1069 1,
1070 BRW_SAMPLER_SIMD_MODE_SIMD16);
1071 }
1072
1073
1074 static void emit_lit(struct brw_wm_compile *c,
1075 const struct brw_reg *dst,
1076 GLuint mask,
1077 const struct brw_reg *arg0)
1078 {
1079 struct brw_compile *p = &c->func;
1080
1081 assert((mask & WRITEMASK_XW) == 0);
1082
1083 if (mask & WRITEMASK_Y) {
1084 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
1085 brw_MOV(p, dst[1], arg0[0]);
1086 brw_set_saturate(p, 0);
1087 }
1088
1089 if (mask & WRITEMASK_Z) {
1090 emit_math2(c, BRW_MATH_FUNCTION_POW,
1091 &dst[2],
1092 WRITEMASK_X | (mask & SATURATE),
1093 &arg0[1],
1094 &arg0[3]);
1095 }
1096
1097 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1098 * some of the POW calculations above, but 16-wide iff statements
1099 * seem to lock c1 hardware, so this is a nasty workaround:
1100 */
1101 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
1102 {
1103 if (mask & WRITEMASK_Y)
1104 brw_MOV(p, dst[1], brw_imm_f(0));
1105
1106 if (mask & WRITEMASK_Z)
1107 brw_MOV(p, dst[2], brw_imm_f(0));
1108 }
1109 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1110 }
1111
1112
1113 /* Kill pixel - set execution mask to zero for those pixels which
1114 * fail.
1115 */
1116 static void emit_kil( struct brw_wm_compile *c,
1117 struct brw_reg *arg0)
1118 {
1119 struct brw_compile *p = &c->func;
1120 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1121 GLuint i, j;
1122
1123 for (i = 0; i < 4; i++) {
1124 /* Check if we've already done the comparison for this reg
1125 * -- common when someone does KIL TEMP.wwww.
1126 */
1127 for (j = 0; j < i; j++) {
1128 if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0)
1129 break;
1130 }
1131 if (j != i)
1132 continue;
1133
1134 brw_push_insn_state(p);
1135 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
1136 brw_set_predicate_control_flag_value(p, 0xff);
1137 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1138 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
1139 brw_pop_insn_state(p);
1140 }
1141 }
1142
1143 /* KIL_NV kills the pixels that are currently executing, not based on a test
1144 * of the arguments.
1145 */
1146 static void emit_kil_nv( struct brw_wm_compile *c )
1147 {
1148 struct brw_compile *p = &c->func;
1149 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1150
1151 brw_push_insn_state(p);
1152 brw_set_mask_control(p, BRW_MASK_DISABLE);
1153 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1154 brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1155 brw_pop_insn_state(p);
1156 }
1157
1158 static void fire_fb_write( struct brw_wm_compile *c,
1159 GLuint base_reg,
1160 GLuint nr,
1161 GLuint target,
1162 GLuint eot )
1163 {
1164 struct brw_compile *p = &c->func;
1165 struct brw_reg dst;
1166
1167 if (c->dispatch_width == 16)
1168 dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1169 else
1170 dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1171
1172 /* Pass through control information:
1173 */
1174 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1175 {
1176 brw_push_insn_state(p);
1177 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1178 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1179 brw_MOV(p,
1180 brw_message_reg(base_reg + 1),
1181 brw_vec8_grf(1, 0));
1182 brw_pop_insn_state(p);
1183 }
1184
1185 /* Send framebuffer write message: */
1186 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1187 brw_fb_WRITE(p,
1188 dst,
1189 base_reg,
1190 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1191 target,
1192 nr,
1193 0,
1194 eot);
1195 }
1196
1197
1198 static void emit_aa( struct brw_wm_compile *c,
1199 struct brw_reg *arg1,
1200 GLuint reg )
1201 {
1202 struct brw_compile *p = &c->func;
1203 GLuint comp = c->key.aa_dest_stencil_reg / 2;
1204 GLuint off = c->key.aa_dest_stencil_reg % 2;
1205 struct brw_reg aa = offset(arg1[comp], off);
1206
1207 brw_push_insn_state(p);
1208 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1209 brw_MOV(p, brw_message_reg(reg), aa);
1210 brw_pop_insn_state(p);
1211 }
1212
1213
1214 /* Post-fragment-program processing. Send the results to the
1215 * framebuffer.
1216 * \param arg0 the fragment color
1217 * \param arg1 the pass-through depth value
1218 * \param arg2 the shader-computed depth value
1219 */
1220 void emit_fb_write(struct brw_wm_compile *c,
1221 struct brw_reg *arg0,
1222 struct brw_reg *arg1,
1223 struct brw_reg *arg2,
1224 GLuint target,
1225 GLuint eot)
1226 {
1227 struct brw_compile *p = &c->func;
1228 struct brw_context *brw = p->brw;
1229 GLuint nr = 2;
1230 GLuint channel;
1231
1232 /* Reserve a space for AA - may not be needed:
1233 */
1234 if (c->key.aa_dest_stencil_reg)
1235 nr += 1;
1236
1237 /* I don't really understand how this achieves the color interleave
1238 * (ie RGBARGBA) in the result: [Do the saturation here]
1239 */
1240 brw_push_insn_state(p);
1241
1242 for (channel = 0; channel < 4; channel++) {
1243 if (c->dispatch_width == 16 && brw->has_compr4) {
1244 /* By setting the high bit of the MRF register number, we indicate
1245 * that we want COMPR4 mode - instead of doing the usual destination
1246 * + 1 for the second half we get destination + 4.
1247 */
1248 brw_MOV(p,
1249 brw_message_reg(nr + channel + (1 << 7)),
1250 arg0[channel]);
1251 } else {
1252 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1253 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1254 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1255 brw_MOV(p,
1256 brw_message_reg(nr + channel),
1257 arg0[channel]);
1258
1259 if (c->dispatch_width == 16) {
1260 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1261 brw_MOV(p,
1262 brw_message_reg(nr + channel + 4),
1263 sechalf(arg0[channel]));
1264 }
1265 }
1266 }
1267 /* skip over the regs populated above:
1268 */
1269 nr += 8;
1270 brw_pop_insn_state(p);
1271
1272 if (c->key.source_depth_to_render_target)
1273 {
1274 if (c->key.computes_depth)
1275 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1276 else
1277 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1278
1279 nr += 2;
1280 }
1281
1282 if (c->key.dest_depth_reg)
1283 {
1284 GLuint comp = c->key.dest_depth_reg / 2;
1285 GLuint off = c->key.dest_depth_reg % 2;
1286
1287 if (off != 0) {
1288 brw_push_insn_state(p);
1289 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1290
1291 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1292 /* 2nd half? */
1293 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1294 brw_pop_insn_state(p);
1295 }
1296 else {
1297 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1298 }
1299 nr += 2;
1300 }
1301
1302 if (!c->key.runtime_check_aads_emit) {
1303 if (c->key.aa_dest_stencil_reg)
1304 emit_aa(c, arg1, 2);
1305
1306 fire_fb_write(c, 0, nr, target, eot);
1307 }
1308 else {
1309 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1310 struct brw_reg ip = brw_ip_reg();
1311 struct brw_instruction *jmp;
1312
1313 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1314 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1315 brw_AND(p,
1316 v1_null_ud,
1317 get_element_ud(brw_vec8_grf(1,0), 6),
1318 brw_imm_ud(1<<26));
1319
1320 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
1321 {
1322 emit_aa(c, arg1, 2);
1323 fire_fb_write(c, 0, nr, target, eot);
1324 /* note - thread killed in subroutine */
1325 }
1326 brw_land_fwd_jump(p, jmp);
1327
1328 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1329 */
1330 fire_fb_write(c, 1, nr-1, target, eot);
1331 }
1332 }
1333
1334 /**
1335 * Move a GPR to scratch memory.
1336 */
1337 static void emit_spill( struct brw_wm_compile *c,
1338 struct brw_reg reg,
1339 GLuint slot )
1340 {
1341 struct brw_compile *p = &c->func;
1342
1343 /*
1344 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1345 */
1346 brw_MOV(p, brw_message_reg(2), reg);
1347
1348 /*
1349 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1350 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1351 */
1352 brw_dp_WRITE_16(p,
1353 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1354 slot);
1355 }
1356
1357
1358 /**
1359 * Load a GPR from scratch memory.
1360 */
1361 static void emit_unspill( struct brw_wm_compile *c,
1362 struct brw_reg reg,
1363 GLuint slot )
1364 {
1365 struct brw_compile *p = &c->func;
1366
1367 /* Slot 0 is the undef value.
1368 */
1369 if (slot == 0) {
1370 brw_MOV(p, reg, brw_imm_f(0));
1371 return;
1372 }
1373
1374 /*
1375 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1376 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1377 */
1378
1379 brw_dp_READ_16(p,
1380 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1381 slot);
1382 }
1383
1384
1385 /**
1386 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1387 * Args with unspill_reg != 0 will be loaded from scratch memory.
1388 */
1389 static void get_argument_regs( struct brw_wm_compile *c,
1390 struct brw_wm_ref *arg[],
1391 struct brw_reg *regs )
1392 {
1393 GLuint i;
1394
1395 for (i = 0; i < 4; i++) {
1396 if (arg[i]) {
1397 if (arg[i]->unspill_reg)
1398 emit_unspill(c,
1399 brw_vec8_grf(arg[i]->unspill_reg, 0),
1400 arg[i]->value->spill_slot);
1401
1402 regs[i] = arg[i]->hw_reg;
1403 }
1404 else {
1405 regs[i] = brw_null_reg();
1406 }
1407 }
1408 }
1409
1410
1411 /**
1412 * For values that have a spill_slot!=0, write those regs to scratch memory.
1413 */
1414 static void spill_values( struct brw_wm_compile *c,
1415 struct brw_wm_value *values,
1416 GLuint nr )
1417 {
1418 GLuint i;
1419
1420 for (i = 0; i < nr; i++)
1421 if (values[i].spill_slot)
1422 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1423 }
1424
1425
1426 /* Emit the fragment program instructions here.
1427 */
1428 void brw_wm_emit( struct brw_wm_compile *c )
1429 {
1430 struct brw_compile *p = &c->func;
1431 GLuint insn;
1432
1433 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1434
1435 /* Check if any of the payload regs need to be spilled:
1436 */
1437 spill_values(c, c->payload.depth, 4);
1438 spill_values(c, c->creg, c->nr_creg);
1439 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1440
1441
1442 for (insn = 0; insn < c->nr_insns; insn++) {
1443
1444 struct brw_wm_instruction *inst = &c->instruction[insn];
1445 struct brw_reg args[3][4], dst[4];
1446 GLuint i, dst_flags;
1447
1448 /* Get argument regs:
1449 */
1450 for (i = 0; i < 3; i++)
1451 get_argument_regs(c, inst->src[i], args[i]);
1452
1453 /* Get dest regs:
1454 */
1455 for (i = 0; i < 4; i++)
1456 if (inst->dst[i])
1457 dst[i] = inst->dst[i]->hw_reg;
1458 else
1459 dst[i] = brw_null_reg();
1460
1461 /* Flags
1462 */
1463 dst_flags = inst->writemask;
1464 if (inst->saturate)
1465 dst_flags |= SATURATE;
1466
1467 switch (inst->opcode) {
1468 /* Generated instructions for calculating triangle interpolants:
1469 */
1470 case WM_PIXELXY:
1471 emit_pixel_xy(c, dst, dst_flags);
1472 break;
1473
1474 case WM_DELTAXY:
1475 emit_delta_xy(p, dst, dst_flags, args[0]);
1476 break;
1477
1478 case WM_WPOSXY:
1479 emit_wpos_xy(c, dst, dst_flags, args[0]);
1480 break;
1481
1482 case WM_PIXELW:
1483 emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
1484 break;
1485
1486 case WM_LINTERP:
1487 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1488 break;
1489
1490 case WM_PINTERP:
1491 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1492 break;
1493
1494 case WM_CINTERP:
1495 emit_cinterp(p, dst, dst_flags, args[0]);
1496 break;
1497
1498 case WM_FB_WRITE:
1499 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1500 break;
1501
1502 case WM_FRONTFACING:
1503 emit_frontfacing(p, dst, dst_flags);
1504 break;
1505
1506 /* Straightforward arithmetic:
1507 */
1508 case OPCODE_ADD:
1509 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1510 break;
1511
1512 case OPCODE_FRC:
1513 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1514 break;
1515
1516 case OPCODE_FLR:
1517 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1518 break;
1519
1520 case OPCODE_DDX:
1521 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1522 break;
1523
1524 case OPCODE_DDY:
1525 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1526 break;
1527
1528 case OPCODE_DP3:
1529 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1530 break;
1531
1532 case OPCODE_DP4:
1533 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1534 break;
1535
1536 case OPCODE_DPH:
1537 emit_dph(p, dst, dst_flags, args[0], args[1]);
1538 break;
1539
1540 case OPCODE_TRUNC:
1541 emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
1542 break;
1543
1544 case OPCODE_LRP:
1545 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1546 break;
1547
1548 case OPCODE_MAD:
1549 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1550 break;
1551
1552 case OPCODE_MOV:
1553 case OPCODE_SWZ:
1554 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1555 break;
1556
1557 case OPCODE_MUL:
1558 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1559 break;
1560
1561 case OPCODE_XPD:
1562 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1563 break;
1564
1565 /* Higher math functions:
1566 */
1567 case OPCODE_RCP:
1568 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1569 break;
1570
1571 case OPCODE_RSQ:
1572 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1573 break;
1574
1575 case OPCODE_SIN:
1576 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1577 break;
1578
1579 case OPCODE_COS:
1580 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1581 break;
1582
1583 case OPCODE_EX2:
1584 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1585 break;
1586
1587 case OPCODE_LG2:
1588 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1589 break;
1590
1591 case OPCODE_SCS:
1592 /* There is an scs math function, but it would need some
1593 * fixup for 16-element execution.
1594 */
1595 if (dst_flags & WRITEMASK_X)
1596 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1597 if (dst_flags & WRITEMASK_Y)
1598 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1599 break;
1600
1601 case OPCODE_POW:
1602 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1603 break;
1604
1605 /* Comparisons:
1606 */
1607 case OPCODE_CMP:
1608 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1609 break;
1610
1611 case OPCODE_MAX:
1612 emit_max(p, dst, dst_flags, args[0], args[1]);
1613 break;
1614
1615 case OPCODE_MIN:
1616 emit_min(p, dst, dst_flags, args[0], args[1]);
1617 break;
1618
1619 case OPCODE_SLT:
1620 emit_slt(p, dst, dst_flags, args[0], args[1]);
1621 break;
1622
1623 case OPCODE_SLE:
1624 emit_sle(p, dst, dst_flags, args[0], args[1]);
1625 break;
1626 case OPCODE_SGT:
1627 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1628 break;
1629 case OPCODE_SGE:
1630 emit_sge(p, dst, dst_flags, args[0], args[1]);
1631 break;
1632 case OPCODE_SEQ:
1633 emit_seq(p, dst, dst_flags, args[0], args[1]);
1634 break;
1635 case OPCODE_SNE:
1636 emit_sne(p, dst, dst_flags, args[0], args[1]);
1637 break;
1638
1639 case OPCODE_LIT:
1640 emit_lit(c, dst, dst_flags, args[0]);
1641 break;
1642
1643 /* Texturing operations:
1644 */
1645 case OPCODE_TEX:
1646 emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1647 inst->tex_idx, inst->tex_unit,
1648 inst->tex_shadow);
1649 break;
1650
1651 case OPCODE_TXB:
1652 emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1653 inst->tex_idx, inst->tex_unit);
1654 break;
1655
1656 case OPCODE_KIL:
1657 emit_kil(c, args[0]);
1658 break;
1659
1660 case OPCODE_KIL_NV:
1661 emit_kil_nv(c);
1662 break;
1663
1664 default:
1665 printf("Unsupported opcode %i (%s) in fragment shader\n",
1666 inst->opcode, inst->opcode < MAX_OPCODE ?
1667 _mesa_opcode_string(inst->opcode) :
1668 "unknown");
1669 }
1670
1671 for (i = 0; i < 4; i++)
1672 if (inst->dst[i] && inst->dst[i]->spill_slot)
1673 emit_spill(c,
1674 inst->dst[i]->hw_reg,
1675 inst->dst[i]->spill_slot);
1676 }
1677
1678 if (INTEL_DEBUG & DEBUG_WM) {
1679 int i;
1680
1681 printf("wm-native:\n");
1682 for (i = 0; i < p->nr_insn; i++)
1683 brw_disasm(stderr, &p->store[i]);
1684 printf("\n");
1685 }
1686 }