Merge branch '7.8' into master
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 static GLboolean can_do_pln(struct intel_context *intel,
38 const struct brw_reg *deltas)
39 {
40 struct brw_context *brw = brw_context(&intel->ctx);
41
42 if (!brw->has_pln)
43 return GL_FALSE;
44
45 if (deltas[1].nr != deltas[0].nr + 1)
46 return GL_FALSE;
47
48 if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
49 return GL_FALSE;
50
51 return GL_TRUE;
52 }
53
54 /* Not quite sure how correct this is - need to understand horiz
55 * vs. vertical strides a little better.
56 */
57 static INLINE struct brw_reg sechalf( struct brw_reg reg )
58 {
59 if (reg.vstride)
60 reg.nr++;
61 return reg;
62 }
63
64
65 /**
66 * Computes the screen-space x,y position of the pixels.
67 *
68 * This will be used by emit_delta_xy() or emit_wpos_xy() for
69 * interpolation of attributes..
70 *
71 * Payload R0:
72 *
73 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
74 * corresponding to each of the 16 execution channels.
75 * R0.1..8 -- ?
76 * R1.0 -- triangle vertex 0.X
77 * R1.1 -- triangle vertex 0.Y
78 * R1.2 -- tile 0 x,y coords (2 packed uwords)
79 * R1.3 -- tile 1 x,y coords (2 packed uwords)
80 * R1.4 -- tile 2 x,y coords (2 packed uwords)
81 * R1.5 -- tile 3 x,y coords (2 packed uwords)
82 * R1.6 -- ?
83 * R1.7 -- ?
84 * R1.8 -- ?
85 */
86 void emit_pixel_xy(struct brw_wm_compile *c,
87 const struct brw_reg *dst,
88 GLuint mask)
89 {
90 struct brw_compile *p = &c->func;
91 struct brw_reg r1 = brw_vec1_grf(1, 0);
92 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
93 struct brw_reg dst0_uw, dst1_uw;
94
95 brw_push_insn_state(p);
96 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
97
98 if (c->dispatch_width == 16) {
99 dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
100 dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
101 } else {
102 dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
103 dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
104 }
105
106 /* Calculate pixel centers by adding 1 or 0 to each of the
107 * micro-tile coordinates passed in r1.
108 */
109 if (mask & WRITEMASK_X) {
110 brw_ADD(p,
111 dst0_uw,
112 stride(suboffset(r1_uw, 4), 2, 4, 0),
113 brw_imm_v(0x10101010));
114 }
115
116 if (mask & WRITEMASK_Y) {
117 brw_ADD(p,
118 dst1_uw,
119 stride(suboffset(r1_uw,5), 2, 4, 0),
120 brw_imm_v(0x11001100));
121 }
122 brw_pop_insn_state(p);
123 }
124
125 /**
126 * Computes the screen-space x,y distance of the pixels from the start
127 * vertex.
128 *
129 * This will be used in linterp or pinterp with the start vertex value
130 * and the Cx, Cy, and C0 coefficients passed in from the setup engine
131 * to produce interpolated attribute values.
132 */
133 void emit_delta_xy(struct brw_compile *p,
134 const struct brw_reg *dst,
135 GLuint mask,
136 const struct brw_reg *arg0)
137 {
138 struct brw_reg r1 = brw_vec1_grf(1, 0);
139
140 if (mask == 0)
141 return;
142
143 assert(mask == WRITEMASK_XY);
144
145 /* Calc delta X,Y by subtracting origin in r1 from the pixel
146 * centers produced by emit_pixel_xy().
147 */
148 brw_ADD(p,
149 dst[0],
150 retype(arg0[0], BRW_REGISTER_TYPE_UW),
151 negate(r1));
152 brw_ADD(p,
153 dst[1],
154 retype(arg0[1], BRW_REGISTER_TYPE_UW),
155 negate(suboffset(r1,1)));
156 }
157
158 /**
159 * Computes the pixel offset from the window origin for gl_FragCoord().
160 */
161 void emit_wpos_xy(struct brw_wm_compile *c,
162 const struct brw_reg *dst,
163 GLuint mask,
164 const struct brw_reg *arg0)
165 {
166 struct brw_compile *p = &c->func;
167
168 if (mask & WRITEMASK_X) {
169 if (c->fp->program.PixelCenterInteger) {
170 /* X' = X */
171 brw_MOV(p,
172 dst[0],
173 retype(arg0[0], BRW_REGISTER_TYPE_W));
174 } else {
175 /* X' = X + 0.5 */
176 brw_ADD(p,
177 dst[0],
178 retype(arg0[0], BRW_REGISTER_TYPE_W),
179 brw_imm_f(0.5));
180 }
181 }
182
183 if (mask & WRITEMASK_Y) {
184 if (c->fp->program.OriginUpperLeft) {
185 if (c->fp->program.PixelCenterInteger) {
186 /* Y' = Y */
187 brw_MOV(p,
188 dst[1],
189 retype(arg0[1], BRW_REGISTER_TYPE_W));
190 } else {
191 /* Y' = Y + 0.5 */
192 brw_ADD(p,
193 dst[1],
194 retype(arg0[1], BRW_REGISTER_TYPE_W),
195 brw_imm_f(0.5));
196 }
197 } else {
198 float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
199
200 /* Y' = (height - 1) - Y + center */
201 brw_ADD(p,
202 dst[1],
203 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
204 brw_imm_f(c->key.drawable_height - 1 + center_offset));
205 }
206 }
207 }
208
209
210 void emit_pixel_w(struct brw_wm_compile *c,
211 const struct brw_reg *dst,
212 GLuint mask,
213 const struct brw_reg *arg0,
214 const struct brw_reg *deltas)
215 {
216 struct brw_compile *p = &c->func;
217 struct intel_context *intel = &p->brw->intel;
218
219 /* Don't need this if all you are doing is interpolating color, for
220 * instance.
221 */
222 if (mask & WRITEMASK_W) {
223 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
224
225 /* Calc 1/w - just linterp wpos[3] optimized by putting the
226 * result straight into a message reg.
227 */
228 if (can_do_pln(intel, deltas)) {
229 brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
230 } else {
231 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
232 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
233 }
234
235 /* Calc w */
236 if (c->dispatch_width == 16) {
237 brw_math_16(p, dst[3],
238 BRW_MATH_FUNCTION_INV,
239 BRW_MATH_SATURATE_NONE,
240 2, brw_null_reg(),
241 BRW_MATH_PRECISION_FULL);
242 } else {
243 brw_math(p, dst[3],
244 BRW_MATH_FUNCTION_INV,
245 BRW_MATH_SATURATE_NONE,
246 2, brw_null_reg(),
247 BRW_MATH_DATA_VECTOR,
248 BRW_MATH_PRECISION_FULL);
249 }
250 }
251 }
252
253
254 void emit_linterp(struct brw_compile *p,
255 const struct brw_reg *dst,
256 GLuint mask,
257 const struct brw_reg *arg0,
258 const struct brw_reg *deltas)
259 {
260 struct intel_context *intel = &p->brw->intel;
261 struct brw_reg interp[4];
262 GLuint nr = arg0[0].nr;
263 GLuint i;
264
265 interp[0] = brw_vec1_grf(nr, 0);
266 interp[1] = brw_vec1_grf(nr, 4);
267 interp[2] = brw_vec1_grf(nr+1, 0);
268 interp[3] = brw_vec1_grf(nr+1, 4);
269
270 for (i = 0; i < 4; i++) {
271 if (mask & (1<<i)) {
272 if (can_do_pln(intel, deltas)) {
273 brw_PLN(p, dst[i], interp[i], deltas[0]);
274 } else {
275 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
276 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
277 }
278 }
279 }
280 }
281
282
283 void emit_pinterp(struct brw_compile *p,
284 const struct brw_reg *dst,
285 GLuint mask,
286 const struct brw_reg *arg0,
287 const struct brw_reg *deltas,
288 const struct brw_reg *w)
289 {
290 struct intel_context *intel = &p->brw->intel;
291 struct brw_reg interp[4];
292 GLuint nr = arg0[0].nr;
293 GLuint i;
294
295 interp[0] = brw_vec1_grf(nr, 0);
296 interp[1] = brw_vec1_grf(nr, 4);
297 interp[2] = brw_vec1_grf(nr+1, 0);
298 interp[3] = brw_vec1_grf(nr+1, 4);
299
300 for (i = 0; i < 4; i++) {
301 if (mask & (1<<i)) {
302 if (can_do_pln(intel, deltas)) {
303 brw_PLN(p, dst[i], interp[i], deltas[0]);
304 } else {
305 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
306 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
307 }
308 }
309 }
310 for (i = 0; i < 4; i++) {
311 if (mask & (1<<i)) {
312 brw_MUL(p, dst[i], dst[i], w[3]);
313 }
314 }
315 }
316
317
318 void emit_cinterp(struct brw_compile *p,
319 const struct brw_reg *dst,
320 GLuint mask,
321 const struct brw_reg *arg0)
322 {
323 struct brw_reg interp[4];
324 GLuint nr = arg0[0].nr;
325 GLuint i;
326
327 interp[0] = brw_vec1_grf(nr, 0);
328 interp[1] = brw_vec1_grf(nr, 4);
329 interp[2] = brw_vec1_grf(nr+1, 0);
330 interp[3] = brw_vec1_grf(nr+1, 4);
331
332 for (i = 0; i < 4; i++) {
333 if (mask & (1<<i)) {
334 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
335 }
336 }
337 }
338
339 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
340 void emit_frontfacing(struct brw_compile *p,
341 const struct brw_reg *dst,
342 GLuint mask)
343 {
344 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
345 GLuint i;
346
347 if (!(mask & WRITEMASK_XYZW))
348 return;
349
350 for (i = 0; i < 4; i++) {
351 if (mask & (1<<i)) {
352 brw_MOV(p, dst[i], brw_imm_f(0.0));
353 }
354 }
355
356 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
357 * us front face
358 */
359 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
360 for (i = 0; i < 4; i++) {
361 if (mask & (1<<i)) {
362 brw_MOV(p, dst[i], brw_imm_f(1.0));
363 }
364 }
365 brw_set_predicate_control_flag_value(p, 0xff);
366 }
367
368 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
369 * looking like:
370 *
371 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
372 *
373 * and we're trying to produce:
374 *
375 * DDX DDY
376 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
377 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
378 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
379 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
380 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
381 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
382 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
383 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
384 *
385 * and add another set of two more subspans if in 16-pixel dispatch mode.
386 *
387 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
388 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
389 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
390 * between each other. We could probably do it like ddx and swizzle the right
391 * order later, but bail for now and just produce
392 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
393 */
394 void emit_ddxy(struct brw_compile *p,
395 const struct brw_reg *dst,
396 GLuint mask,
397 GLboolean is_ddx,
398 const struct brw_reg *arg0)
399 {
400 int i;
401 struct brw_reg src0, src1;
402
403 if (mask & SATURATE)
404 brw_set_saturate(p, 1);
405 for (i = 0; i < 4; i++ ) {
406 if (mask & (1<<i)) {
407 if (is_ddx) {
408 src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
409 BRW_REGISTER_TYPE_F,
410 BRW_VERTICAL_STRIDE_2,
411 BRW_WIDTH_2,
412 BRW_HORIZONTAL_STRIDE_0,
413 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
414 src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
415 BRW_REGISTER_TYPE_F,
416 BRW_VERTICAL_STRIDE_2,
417 BRW_WIDTH_2,
418 BRW_HORIZONTAL_STRIDE_0,
419 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
420 } else {
421 src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
422 BRW_REGISTER_TYPE_F,
423 BRW_VERTICAL_STRIDE_4,
424 BRW_WIDTH_4,
425 BRW_HORIZONTAL_STRIDE_0,
426 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
427 src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
428 BRW_REGISTER_TYPE_F,
429 BRW_VERTICAL_STRIDE_4,
430 BRW_WIDTH_4,
431 BRW_HORIZONTAL_STRIDE_0,
432 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
433 }
434 brw_ADD(p, dst[i], src0, negate(src1));
435 }
436 }
437 if (mask & SATURATE)
438 brw_set_saturate(p, 0);
439 }
440
441 void emit_alu1(struct brw_compile *p,
442 struct brw_instruction *(*func)(struct brw_compile *,
443 struct brw_reg,
444 struct brw_reg),
445 const struct brw_reg *dst,
446 GLuint mask,
447 const struct brw_reg *arg0)
448 {
449 GLuint i;
450
451 if (mask & SATURATE)
452 brw_set_saturate(p, 1);
453
454 for (i = 0; i < 4; i++) {
455 if (mask & (1<<i)) {
456 func(p, dst[i], arg0[i]);
457 }
458 }
459
460 if (mask & SATURATE)
461 brw_set_saturate(p, 0);
462 }
463
464
465 void emit_alu2(struct brw_compile *p,
466 struct brw_instruction *(*func)(struct brw_compile *,
467 struct brw_reg,
468 struct brw_reg,
469 struct brw_reg),
470 const struct brw_reg *dst,
471 GLuint mask,
472 const struct brw_reg *arg0,
473 const struct brw_reg *arg1)
474 {
475 GLuint i;
476
477 if (mask & SATURATE)
478 brw_set_saturate(p, 1);
479
480 for (i = 0; i < 4; i++) {
481 if (mask & (1<<i)) {
482 func(p, dst[i], arg0[i], arg1[i]);
483 }
484 }
485
486 if (mask & SATURATE)
487 brw_set_saturate(p, 0);
488 }
489
490
491 void emit_mad(struct brw_compile *p,
492 const struct brw_reg *dst,
493 GLuint mask,
494 const struct brw_reg *arg0,
495 const struct brw_reg *arg1,
496 const struct brw_reg *arg2)
497 {
498 GLuint i;
499
500 for (i = 0; i < 4; i++) {
501 if (mask & (1<<i)) {
502 brw_MUL(p, dst[i], arg0[i], arg1[i]);
503
504 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
505 brw_ADD(p, dst[i], dst[i], arg2[i]);
506 brw_set_saturate(p, 0);
507 }
508 }
509 }
510
511 void emit_lrp(struct brw_compile *p,
512 const struct brw_reg *dst,
513 GLuint mask,
514 const struct brw_reg *arg0,
515 const struct brw_reg *arg1,
516 const struct brw_reg *arg2)
517 {
518 GLuint i;
519
520 /* Uses dst as a temporary:
521 */
522 for (i = 0; i < 4; i++) {
523 if (mask & (1<<i)) {
524 /* Can I use the LINE instruction for this?
525 */
526 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
527 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
528
529 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
530 brw_MAC(p, dst[i], arg0[i], arg1[i]);
531 brw_set_saturate(p, 0);
532 }
533 }
534 }
535
536 void emit_sop(struct brw_compile *p,
537 const struct brw_reg *dst,
538 GLuint mask,
539 GLuint cond,
540 const struct brw_reg *arg0,
541 const struct brw_reg *arg1)
542 {
543 GLuint i;
544
545 for (i = 0; i < 4; i++) {
546 if (mask & (1<<i)) {
547 brw_push_insn_state(p);
548 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
549 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
550 brw_MOV(p, dst[i], brw_imm_f(0));
551 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
552 brw_MOV(p, dst[i], brw_imm_f(1.0));
553 brw_pop_insn_state(p);
554 }
555 }
556 }
557
558 static void emit_slt( struct brw_compile *p,
559 const struct brw_reg *dst,
560 GLuint mask,
561 const struct brw_reg *arg0,
562 const struct brw_reg *arg1 )
563 {
564 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
565 }
566
567 static void emit_sle( struct brw_compile *p,
568 const struct brw_reg *dst,
569 GLuint mask,
570 const struct brw_reg *arg0,
571 const struct brw_reg *arg1 )
572 {
573 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
574 }
575
576 static void emit_sgt( struct brw_compile *p,
577 const struct brw_reg *dst,
578 GLuint mask,
579 const struct brw_reg *arg0,
580 const struct brw_reg *arg1 )
581 {
582 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
583 }
584
585 static void emit_sge( struct brw_compile *p,
586 const struct brw_reg *dst,
587 GLuint mask,
588 const struct brw_reg *arg0,
589 const struct brw_reg *arg1 )
590 {
591 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
592 }
593
594 static void emit_seq( struct brw_compile *p,
595 const struct brw_reg *dst,
596 GLuint mask,
597 const struct brw_reg *arg0,
598 const struct brw_reg *arg1 )
599 {
600 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
601 }
602
603 static void emit_sne( struct brw_compile *p,
604 const struct brw_reg *dst,
605 GLuint mask,
606 const struct brw_reg *arg0,
607 const struct brw_reg *arg1 )
608 {
609 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
610 }
611
612 void emit_cmp(struct brw_compile *p,
613 const struct brw_reg *dst,
614 GLuint mask,
615 const struct brw_reg *arg0,
616 const struct brw_reg *arg1,
617 const struct brw_reg *arg2)
618 {
619 GLuint i;
620
621 for (i = 0; i < 4; i++) {
622 if (mask & (1<<i)) {
623 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
624 brw_MOV(p, dst[i], arg2[i]);
625 brw_set_saturate(p, 0);
626
627 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
628
629 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
630 brw_MOV(p, dst[i], arg1[i]);
631 brw_set_saturate(p, 0);
632 brw_set_predicate_control_flag_value(p, 0xff);
633 }
634 }
635 }
636
637 void emit_max(struct brw_compile *p,
638 const struct brw_reg *dst,
639 GLuint mask,
640 const struct brw_reg *arg0,
641 const struct brw_reg *arg1)
642 {
643 GLuint i;
644
645 for (i = 0; i < 4; i++) {
646 if (mask & (1<<i)) {
647 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
648
649 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
650 brw_SEL(p, dst[i], arg0[i], arg1[i]);
651 brw_set_saturate(p, 0);
652 brw_set_predicate_control_flag_value(p, 0xff);
653 }
654 }
655 }
656
657 void emit_min(struct brw_compile *p,
658 const struct brw_reg *dst,
659 GLuint mask,
660 const struct brw_reg *arg0,
661 const struct brw_reg *arg1)
662 {
663 GLuint i;
664
665 for (i = 0; i < 4; i++) {
666 if (mask & (1<<i)) {
667 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
668
669 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
670 brw_SEL(p, dst[i], arg0[i], arg1[i]);
671 brw_set_saturate(p, 0);
672 brw_set_predicate_control_flag_value(p, 0xff);
673 }
674 }
675 }
676
677
678 void emit_dp3(struct brw_compile *p,
679 const struct brw_reg *dst,
680 GLuint mask,
681 const struct brw_reg *arg0,
682 const struct brw_reg *arg1)
683 {
684 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
685
686 if (!(mask & WRITEMASK_XYZW))
687 return; /* Do not emit dead code */
688
689 assert(is_power_of_two(mask & WRITEMASK_XYZW));
690
691 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
692 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
693
694 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
695 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
696 brw_set_saturate(p, 0);
697 }
698
699
700 void emit_dp4(struct brw_compile *p,
701 const struct brw_reg *dst,
702 GLuint mask,
703 const struct brw_reg *arg0,
704 const struct brw_reg *arg1)
705 {
706 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
707
708 if (!(mask & WRITEMASK_XYZW))
709 return; /* Do not emit dead code */
710
711 assert(is_power_of_two(mask & WRITEMASK_XYZW));
712
713 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
714 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
715 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
716
717 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
718 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
719 brw_set_saturate(p, 0);
720 }
721
722
723 void emit_dph(struct brw_compile *p,
724 const struct brw_reg *dst,
725 GLuint mask,
726 const struct brw_reg *arg0,
727 const struct brw_reg *arg1)
728 {
729 const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
730
731 if (!(mask & WRITEMASK_XYZW))
732 return; /* Do not emit dead code */
733
734 assert(is_power_of_two(mask & WRITEMASK_XYZW));
735
736 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
737 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
738 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
739
740 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
741 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
742 brw_set_saturate(p, 0);
743 }
744
745
746 void emit_xpd(struct brw_compile *p,
747 const struct brw_reg *dst,
748 GLuint mask,
749 const struct brw_reg *arg0,
750 const struct brw_reg *arg1)
751 {
752 GLuint i;
753
754 assert((mask & WRITEMASK_W) != WRITEMASK_W);
755
756 for (i = 0 ; i < 3; i++) {
757 if (mask & (1<<i)) {
758 GLuint i2 = (i+2)%3;
759 GLuint i1 = (i+1)%3;
760
761 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
762
763 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
764 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
765 brw_set_saturate(p, 0);
766 }
767 }
768 }
769
770
771 void emit_math1(struct brw_wm_compile *c,
772 GLuint function,
773 const struct brw_reg *dst,
774 GLuint mask,
775 const struct brw_reg *arg0)
776 {
777 struct brw_compile *p = &c->func;
778 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
779 GLuint saturate = ((mask & SATURATE) ?
780 BRW_MATH_SATURATE_SATURATE :
781 BRW_MATH_SATURATE_NONE);
782
783 if (!(mask & WRITEMASK_XYZW))
784 return; /* Do not emit dead code */
785
786 assert(is_power_of_two(mask & WRITEMASK_XYZW));
787
788 /* If compressed, this will write message reg 2,3 from arg0.x's 16
789 * channels.
790 */
791 brw_MOV(p, brw_message_reg(2), arg0[0]);
792
793 /* Send two messages to perform all 16 operations:
794 */
795 brw_push_insn_state(p);
796 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
797 brw_math(p,
798 dst[dst_chan],
799 function,
800 saturate,
801 2,
802 brw_null_reg(),
803 BRW_MATH_DATA_VECTOR,
804 BRW_MATH_PRECISION_FULL);
805
806 if (c->dispatch_width == 16) {
807 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
808 brw_math(p,
809 offset(dst[dst_chan],1),
810 function,
811 saturate,
812 3,
813 brw_null_reg(),
814 BRW_MATH_DATA_VECTOR,
815 BRW_MATH_PRECISION_FULL);
816 }
817 brw_pop_insn_state(p);
818 }
819
820
821 void emit_math2(struct brw_wm_compile *c,
822 GLuint function,
823 const struct brw_reg *dst,
824 GLuint mask,
825 const struct brw_reg *arg0,
826 const struct brw_reg *arg1)
827 {
828 struct brw_compile *p = &c->func;
829 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
830 GLuint saturate = ((mask & SATURATE) ?
831 BRW_MATH_SATURATE_SATURATE :
832 BRW_MATH_SATURATE_NONE);
833
834 if (!(mask & WRITEMASK_XYZW))
835 return; /* Do not emit dead code */
836
837 assert(is_power_of_two(mask & WRITEMASK_XYZW));
838
839 brw_push_insn_state(p);
840
841 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
842 brw_MOV(p, brw_message_reg(2), arg0[0]);
843 if (c->dispatch_width == 16) {
844 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
845 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
846 }
847
848 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
849 brw_MOV(p, brw_message_reg(3), arg1[0]);
850 if (c->dispatch_width == 16) {
851 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
852 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
853 }
854
855 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
856 brw_math(p,
857 dst[dst_chan],
858 function,
859 saturate,
860 2,
861 brw_null_reg(),
862 BRW_MATH_DATA_VECTOR,
863 BRW_MATH_PRECISION_FULL);
864
865 /* Send two messages to perform all 16 operations:
866 */
867 if (c->dispatch_width == 16) {
868 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
869 brw_math(p,
870 offset(dst[dst_chan],1),
871 function,
872 saturate,
873 4,
874 brw_null_reg(),
875 BRW_MATH_DATA_VECTOR,
876 BRW_MATH_PRECISION_FULL);
877 }
878 brw_pop_insn_state(p);
879 }
880
881
882 void emit_tex(struct brw_wm_compile *c,
883 struct brw_reg *dst,
884 GLuint dst_flags,
885 struct brw_reg *arg,
886 struct brw_reg depth_payload,
887 GLuint tex_idx,
888 GLuint sampler,
889 GLboolean shadow)
890 {
891 struct brw_compile *p = &c->func;
892 struct intel_context *intel = &p->brw->intel;
893 struct brw_reg dst_retyped;
894 GLuint cur_mrf = 2, response_length;
895 GLuint i, nr_texcoords;
896 GLuint emit;
897 GLuint msg_type;
898 GLuint mrf_per_channel;
899 GLuint simd_mode;
900
901 if (c->dispatch_width == 16) {
902 mrf_per_channel = 2;
903 response_length = 8;
904 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
905 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
906 } else {
907 mrf_per_channel = 1;
908 response_length = 4;
909 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
910 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
911 }
912
913 /* How many input regs are there?
914 */
915 switch (tex_idx) {
916 case TEXTURE_1D_INDEX:
917 emit = WRITEMASK_X;
918 nr_texcoords = 1;
919 break;
920 case TEXTURE_2D_INDEX:
921 case TEXTURE_RECT_INDEX:
922 emit = WRITEMASK_XY;
923 nr_texcoords = 2;
924 break;
925 case TEXTURE_3D_INDEX:
926 case TEXTURE_CUBE_INDEX:
927 emit = WRITEMASK_XYZ;
928 nr_texcoords = 3;
929 break;
930 default:
931 /* unexpected target */
932 abort();
933 }
934
935 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
936 if (!intel->is_ironlake && c->dispatch_width == 8)
937 nr_texcoords = 3;
938
939 /* For shadow comparisons, we have to supply u,v,r. */
940 if (shadow)
941 nr_texcoords = 3;
942
943 /* Emit the texcoords. */
944 for (i = 0; i < nr_texcoords; i++) {
945 if (emit & (1<<i))
946 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
947 else
948 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
949 cur_mrf += mrf_per_channel;
950 }
951
952 /* Fill in the shadow comparison reference value. */
953 if (shadow) {
954 if (intel->is_ironlake) {
955 /* Fill in the cube map array index value. */
956 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
957 cur_mrf += mrf_per_channel;
958 } else if (c->dispatch_width == 8) {
959 /* Fill in the LOD bias value. */
960 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
961 cur_mrf += mrf_per_channel;
962 }
963 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
964 cur_mrf += mrf_per_channel;
965 }
966
967 if (intel->is_ironlake) {
968 if (shadow)
969 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
970 else
971 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
972 } else {
973 /* Note that G45 and older determines shadow compare and dispatch width
974 * from message length for most messages.
975 */
976 if (c->dispatch_width == 16 && shadow)
977 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
978 else
979 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
980 }
981
982 brw_SAMPLE(p,
983 dst_retyped,
984 1,
985 retype(depth_payload, BRW_REGISTER_TYPE_UW),
986 SURF_INDEX_TEXTURE(sampler),
987 sampler,
988 dst_flags & WRITEMASK_XYZW,
989 msg_type,
990 response_length,
991 cur_mrf - 1,
992 0,
993 1,
994 simd_mode);
995 }
996
997
998 void emit_txb(struct brw_wm_compile *c,
999 struct brw_reg *dst,
1000 GLuint dst_flags,
1001 struct brw_reg *arg,
1002 struct brw_reg depth_payload,
1003 GLuint tex_idx,
1004 GLuint sampler)
1005 {
1006 struct brw_compile *p = &c->func;
1007 struct intel_context *intel = &p->brw->intel;
1008 GLuint msgLength;
1009 GLuint msg_type;
1010 GLuint mrf_per_channel;
1011 GLuint response_length;
1012 struct brw_reg dst_retyped;
1013
1014 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
1015 * samples, so we'll use the 16-wide instruction, leave the second halves
1016 * undefined, and trust the execution mask to keep the undefined pixels
1017 * from mattering.
1018 */
1019 if (c->dispatch_width == 16 || !intel->is_ironlake) {
1020 if (intel->is_ironlake)
1021 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
1022 else
1023 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1024 mrf_per_channel = 2;
1025 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
1026 response_length = 8;
1027 } else {
1028 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
1029 mrf_per_channel = 1;
1030 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
1031 response_length = 4;
1032 }
1033
1034 /* Shadow ignored for txb. */
1035 switch (tex_idx) {
1036 case TEXTURE_1D_INDEX:
1037 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1038 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
1039 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
1040 break;
1041 case TEXTURE_2D_INDEX:
1042 case TEXTURE_RECT_INDEX:
1043 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1044 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
1045 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
1046 break;
1047 case TEXTURE_3D_INDEX:
1048 case TEXTURE_CUBE_INDEX:
1049 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1050 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
1051 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
1052 break;
1053 default:
1054 /* unexpected target */
1055 abort();
1056 }
1057
1058 brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
1059 msgLength = 2 + 4 * mrf_per_channel - 1;
1060
1061 brw_SAMPLE(p,
1062 dst_retyped,
1063 1,
1064 retype(depth_payload, BRW_REGISTER_TYPE_UW),
1065 SURF_INDEX_TEXTURE(sampler),
1066 sampler,
1067 dst_flags & WRITEMASK_XYZW,
1068 msg_type,
1069 response_length,
1070 msgLength,
1071 0,
1072 1,
1073 BRW_SAMPLER_SIMD_MODE_SIMD16);
1074 }
1075
1076
1077 static void emit_lit(struct brw_wm_compile *c,
1078 const struct brw_reg *dst,
1079 GLuint mask,
1080 const struct brw_reg *arg0)
1081 {
1082 struct brw_compile *p = &c->func;
1083
1084 assert((mask & WRITEMASK_XW) == 0);
1085
1086 if (mask & WRITEMASK_Y) {
1087 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
1088 brw_MOV(p, dst[1], arg0[0]);
1089 brw_set_saturate(p, 0);
1090 }
1091
1092 if (mask & WRITEMASK_Z) {
1093 emit_math2(c, BRW_MATH_FUNCTION_POW,
1094 &dst[2],
1095 WRITEMASK_X | (mask & SATURATE),
1096 &arg0[1],
1097 &arg0[3]);
1098 }
1099
1100 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1101 * some of the POW calculations above, but 16-wide iff statements
1102 * seem to lock c1 hardware, so this is a nasty workaround:
1103 */
1104 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
1105 {
1106 if (mask & WRITEMASK_Y)
1107 brw_MOV(p, dst[1], brw_imm_f(0));
1108
1109 if (mask & WRITEMASK_Z)
1110 brw_MOV(p, dst[2], brw_imm_f(0));
1111 }
1112 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1113 }
1114
1115
1116 /* Kill pixel - set execution mask to zero for those pixels which
1117 * fail.
1118 */
1119 static void emit_kil( struct brw_wm_compile *c,
1120 struct brw_reg *arg0)
1121 {
1122 struct brw_compile *p = &c->func;
1123 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1124 GLuint i, j;
1125
1126 for (i = 0; i < 4; i++) {
1127 /* Check if we've already done the comparison for this reg
1128 * -- common when someone does KIL TEMP.wwww.
1129 */
1130 for (j = 0; j < i; j++) {
1131 if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0)
1132 break;
1133 }
1134 if (j != i)
1135 continue;
1136
1137 brw_push_insn_state(p);
1138 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
1139 brw_set_predicate_control_flag_value(p, 0xff);
1140 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1141 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
1142 brw_pop_insn_state(p);
1143 }
1144 }
1145
1146 /* KIL_NV kills the pixels that are currently executing, not based on a test
1147 * of the arguments.
1148 */
1149 static void emit_kil_nv( struct brw_wm_compile *c )
1150 {
1151 struct brw_compile *p = &c->func;
1152 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1153
1154 brw_push_insn_state(p);
1155 brw_set_mask_control(p, BRW_MASK_DISABLE);
1156 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1157 brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1158 brw_pop_insn_state(p);
1159 }
1160
1161 static void fire_fb_write( struct brw_wm_compile *c,
1162 GLuint base_reg,
1163 GLuint nr,
1164 GLuint target,
1165 GLuint eot )
1166 {
1167 struct brw_compile *p = &c->func;
1168 struct brw_reg dst;
1169
1170 if (c->dispatch_width == 16)
1171 dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1172 else
1173 dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1174
1175 /* Pass through control information:
1176 */
1177 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1178 {
1179 brw_push_insn_state(p);
1180 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1181 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1182 brw_MOV(p,
1183 brw_message_reg(base_reg + 1),
1184 brw_vec8_grf(1, 0));
1185 brw_pop_insn_state(p);
1186 }
1187
1188 /* Send framebuffer write message: */
1189 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1190 brw_fb_WRITE(p,
1191 dst,
1192 base_reg,
1193 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1194 target,
1195 nr,
1196 0,
1197 eot);
1198 }
1199
1200
1201 static void emit_aa( struct brw_wm_compile *c,
1202 struct brw_reg *arg1,
1203 GLuint reg )
1204 {
1205 struct brw_compile *p = &c->func;
1206 GLuint comp = c->key.aa_dest_stencil_reg / 2;
1207 GLuint off = c->key.aa_dest_stencil_reg % 2;
1208 struct brw_reg aa = offset(arg1[comp], off);
1209
1210 brw_push_insn_state(p);
1211 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1212 brw_MOV(p, brw_message_reg(reg), aa);
1213 brw_pop_insn_state(p);
1214 }
1215
1216
1217 /* Post-fragment-program processing. Send the results to the
1218 * framebuffer.
1219 * \param arg0 the fragment color
1220 * \param arg1 the pass-through depth value
1221 * \param arg2 the shader-computed depth value
1222 */
1223 void emit_fb_write(struct brw_wm_compile *c,
1224 struct brw_reg *arg0,
1225 struct brw_reg *arg1,
1226 struct brw_reg *arg2,
1227 GLuint target,
1228 GLuint eot)
1229 {
1230 struct brw_compile *p = &c->func;
1231 struct brw_context *brw = p->brw;
1232 GLuint nr = 2;
1233 GLuint channel;
1234
1235 /* Reserve a space for AA - may not be needed:
1236 */
1237 if (c->key.aa_dest_stencil_reg)
1238 nr += 1;
1239
1240 /* I don't really understand how this achieves the color interleave
1241 * (ie RGBARGBA) in the result: [Do the saturation here]
1242 */
1243 brw_push_insn_state(p);
1244
1245 for (channel = 0; channel < 4; channel++) {
1246 if (c->dispatch_width == 16 && brw->has_compr4) {
1247 /* By setting the high bit of the MRF register number, we indicate
1248 * that we want COMPR4 mode - instead of doing the usual destination
1249 * + 1 for the second half we get destination + 4.
1250 */
1251 brw_MOV(p,
1252 brw_message_reg(nr + channel + (1 << 7)),
1253 arg0[channel]);
1254 } else {
1255 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1256 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1257 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1258 brw_MOV(p,
1259 brw_message_reg(nr + channel),
1260 arg0[channel]);
1261
1262 if (c->dispatch_width == 16) {
1263 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1264 brw_MOV(p,
1265 brw_message_reg(nr + channel + 4),
1266 sechalf(arg0[channel]));
1267 }
1268 }
1269 }
1270 /* skip over the regs populated above:
1271 */
1272 nr += 8;
1273 brw_pop_insn_state(p);
1274
1275 if (c->key.source_depth_to_render_target)
1276 {
1277 if (c->key.computes_depth)
1278 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1279 else
1280 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1281
1282 nr += 2;
1283 }
1284
1285 if (c->key.dest_depth_reg)
1286 {
1287 GLuint comp = c->key.dest_depth_reg / 2;
1288 GLuint off = c->key.dest_depth_reg % 2;
1289
1290 if (off != 0) {
1291 brw_push_insn_state(p);
1292 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1293
1294 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1295 /* 2nd half? */
1296 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1297 brw_pop_insn_state(p);
1298 }
1299 else {
1300 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1301 }
1302 nr += 2;
1303 }
1304
1305 if (!c->key.runtime_check_aads_emit) {
1306 if (c->key.aa_dest_stencil_reg)
1307 emit_aa(c, arg1, 2);
1308
1309 fire_fb_write(c, 0, nr, target, eot);
1310 }
1311 else {
1312 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1313 struct brw_reg ip = brw_ip_reg();
1314 struct brw_instruction *jmp;
1315
1316 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1317 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1318 brw_AND(p,
1319 v1_null_ud,
1320 get_element_ud(brw_vec8_grf(1,0), 6),
1321 brw_imm_ud(1<<26));
1322
1323 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
1324 {
1325 emit_aa(c, arg1, 2);
1326 fire_fb_write(c, 0, nr, target, eot);
1327 /* note - thread killed in subroutine */
1328 }
1329 brw_land_fwd_jump(p, jmp);
1330
1331 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1332 */
1333 fire_fb_write(c, 1, nr-1, target, eot);
1334 }
1335 }
1336
1337 /**
1338 * Move a GPR to scratch memory.
1339 */
1340 static void emit_spill( struct brw_wm_compile *c,
1341 struct brw_reg reg,
1342 GLuint slot )
1343 {
1344 struct brw_compile *p = &c->func;
1345
1346 /*
1347 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1348 */
1349 brw_MOV(p, brw_message_reg(2), reg);
1350
1351 /*
1352 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1353 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1354 */
1355 brw_dp_WRITE_16(p,
1356 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1357 slot);
1358 }
1359
1360
1361 /**
1362 * Load a GPR from scratch memory.
1363 */
1364 static void emit_unspill( struct brw_wm_compile *c,
1365 struct brw_reg reg,
1366 GLuint slot )
1367 {
1368 struct brw_compile *p = &c->func;
1369
1370 /* Slot 0 is the undef value.
1371 */
1372 if (slot == 0) {
1373 brw_MOV(p, reg, brw_imm_f(0));
1374 return;
1375 }
1376
1377 /*
1378 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1379 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1380 */
1381
1382 brw_dp_READ_16(p,
1383 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1384 slot);
1385 }
1386
1387
1388 /**
1389 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1390 * Args with unspill_reg != 0 will be loaded from scratch memory.
1391 */
1392 static void get_argument_regs( struct brw_wm_compile *c,
1393 struct brw_wm_ref *arg[],
1394 struct brw_reg *regs )
1395 {
1396 GLuint i;
1397
1398 for (i = 0; i < 4; i++) {
1399 if (arg[i]) {
1400 if (arg[i]->unspill_reg)
1401 emit_unspill(c,
1402 brw_vec8_grf(arg[i]->unspill_reg, 0),
1403 arg[i]->value->spill_slot);
1404
1405 regs[i] = arg[i]->hw_reg;
1406 }
1407 else {
1408 regs[i] = brw_null_reg();
1409 }
1410 }
1411 }
1412
1413
1414 /**
1415 * For values that have a spill_slot!=0, write those regs to scratch memory.
1416 */
1417 static void spill_values( struct brw_wm_compile *c,
1418 struct brw_wm_value *values,
1419 GLuint nr )
1420 {
1421 GLuint i;
1422
1423 for (i = 0; i < nr; i++)
1424 if (values[i].spill_slot)
1425 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1426 }
1427
1428
1429 /* Emit the fragment program instructions here.
1430 */
1431 void brw_wm_emit( struct brw_wm_compile *c )
1432 {
1433 struct brw_compile *p = &c->func;
1434 GLuint insn;
1435
1436 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1437
1438 /* Check if any of the payload regs need to be spilled:
1439 */
1440 spill_values(c, c->payload.depth, 4);
1441 spill_values(c, c->creg, c->nr_creg);
1442 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1443
1444
1445 for (insn = 0; insn < c->nr_insns; insn++) {
1446
1447 struct brw_wm_instruction *inst = &c->instruction[insn];
1448 struct brw_reg args[3][4], dst[4];
1449 GLuint i, dst_flags;
1450
1451 /* Get argument regs:
1452 */
1453 for (i = 0; i < 3; i++)
1454 get_argument_regs(c, inst->src[i], args[i]);
1455
1456 /* Get dest regs:
1457 */
1458 for (i = 0; i < 4; i++)
1459 if (inst->dst[i])
1460 dst[i] = inst->dst[i]->hw_reg;
1461 else
1462 dst[i] = brw_null_reg();
1463
1464 /* Flags
1465 */
1466 dst_flags = inst->writemask;
1467 if (inst->saturate)
1468 dst_flags |= SATURATE;
1469
1470 switch (inst->opcode) {
1471 /* Generated instructions for calculating triangle interpolants:
1472 */
1473 case WM_PIXELXY:
1474 emit_pixel_xy(c, dst, dst_flags);
1475 break;
1476
1477 case WM_DELTAXY:
1478 emit_delta_xy(p, dst, dst_flags, args[0]);
1479 break;
1480
1481 case WM_WPOSXY:
1482 emit_wpos_xy(c, dst, dst_flags, args[0]);
1483 break;
1484
1485 case WM_PIXELW:
1486 emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
1487 break;
1488
1489 case WM_LINTERP:
1490 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1491 break;
1492
1493 case WM_PINTERP:
1494 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1495 break;
1496
1497 case WM_CINTERP:
1498 emit_cinterp(p, dst, dst_flags, args[0]);
1499 break;
1500
1501 case WM_FB_WRITE:
1502 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1503 break;
1504
1505 case WM_FRONTFACING:
1506 emit_frontfacing(p, dst, dst_flags);
1507 break;
1508
1509 /* Straightforward arithmetic:
1510 */
1511 case OPCODE_ADD:
1512 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1513 break;
1514
1515 case OPCODE_FRC:
1516 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1517 break;
1518
1519 case OPCODE_FLR:
1520 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1521 break;
1522
1523 case OPCODE_DDX:
1524 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1525 break;
1526
1527 case OPCODE_DDY:
1528 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1529 break;
1530
1531 case OPCODE_DP3:
1532 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1533 break;
1534
1535 case OPCODE_DP4:
1536 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1537 break;
1538
1539 case OPCODE_DPH:
1540 emit_dph(p, dst, dst_flags, args[0], args[1]);
1541 break;
1542
1543 case OPCODE_TRUNC:
1544 emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
1545 break;
1546
1547 case OPCODE_LRP:
1548 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1549 break;
1550
1551 case OPCODE_MAD:
1552 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1553 break;
1554
1555 case OPCODE_MOV:
1556 case OPCODE_SWZ:
1557 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1558 break;
1559
1560 case OPCODE_MUL:
1561 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1562 break;
1563
1564 case OPCODE_XPD:
1565 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1566 break;
1567
1568 /* Higher math functions:
1569 */
1570 case OPCODE_RCP:
1571 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1572 break;
1573
1574 case OPCODE_RSQ:
1575 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1576 break;
1577
1578 case OPCODE_SIN:
1579 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1580 break;
1581
1582 case OPCODE_COS:
1583 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1584 break;
1585
1586 case OPCODE_EX2:
1587 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1588 break;
1589
1590 case OPCODE_LG2:
1591 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1592 break;
1593
1594 case OPCODE_SCS:
1595 /* There is an scs math function, but it would need some
1596 * fixup for 16-element execution.
1597 */
1598 if (dst_flags & WRITEMASK_X)
1599 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1600 if (dst_flags & WRITEMASK_Y)
1601 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1602 break;
1603
1604 case OPCODE_POW:
1605 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1606 break;
1607
1608 /* Comparisons:
1609 */
1610 case OPCODE_CMP:
1611 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1612 break;
1613
1614 case OPCODE_MAX:
1615 emit_max(p, dst, dst_flags, args[0], args[1]);
1616 break;
1617
1618 case OPCODE_MIN:
1619 emit_min(p, dst, dst_flags, args[0], args[1]);
1620 break;
1621
1622 case OPCODE_SLT:
1623 emit_slt(p, dst, dst_flags, args[0], args[1]);
1624 break;
1625
1626 case OPCODE_SLE:
1627 emit_sle(p, dst, dst_flags, args[0], args[1]);
1628 break;
1629 case OPCODE_SGT:
1630 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1631 break;
1632 case OPCODE_SGE:
1633 emit_sge(p, dst, dst_flags, args[0], args[1]);
1634 break;
1635 case OPCODE_SEQ:
1636 emit_seq(p, dst, dst_flags, args[0], args[1]);
1637 break;
1638 case OPCODE_SNE:
1639 emit_sne(p, dst, dst_flags, args[0], args[1]);
1640 break;
1641
1642 case OPCODE_LIT:
1643 emit_lit(c, dst, dst_flags, args[0]);
1644 break;
1645
1646 /* Texturing operations:
1647 */
1648 case OPCODE_TEX:
1649 emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1650 inst->tex_idx, inst->tex_unit,
1651 inst->tex_shadow);
1652 break;
1653
1654 case OPCODE_TXB:
1655 emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1656 inst->tex_idx, inst->tex_unit);
1657 break;
1658
1659 case OPCODE_KIL:
1660 emit_kil(c, args[0]);
1661 break;
1662
1663 case OPCODE_KIL_NV:
1664 emit_kil_nv(c);
1665 break;
1666
1667 default:
1668 printf("Unsupported opcode %i (%s) in fragment shader\n",
1669 inst->opcode, inst->opcode < MAX_OPCODE ?
1670 _mesa_opcode_string(inst->opcode) :
1671 "unknown");
1672 }
1673
1674 for (i = 0; i < 4; i++)
1675 if (inst->dst[i] && inst->dst[i]->spill_slot)
1676 emit_spill(c,
1677 inst->dst[i]->hw_reg,
1678 inst->dst[i]->spill_slot);
1679 }
1680
1681 if (INTEL_DEBUG & DEBUG_WM) {
1682 int i;
1683
1684 printf("wm-native:\n");
1685 for (i = 0; i < p->nr_insn; i++)
1686 brw_disasm(stderr, &p->store[i]);
1687 printf("\n");
1688 }
1689 }