Merge branch '7.8'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 static GLboolean can_do_pln(struct intel_context *intel,
38 const struct brw_reg *deltas)
39 {
40 struct brw_context *brw = brw_context(&intel->ctx);
41
42 if (!brw->has_pln)
43 return GL_FALSE;
44
45 if (deltas[1].nr != deltas[0].nr + 1)
46 return GL_FALSE;
47
48 if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
49 return GL_FALSE;
50
51 return GL_TRUE;
52 }
53
54 /* Not quite sure how correct this is - need to understand horiz
55 * vs. vertical strides a little better.
56 */
57 static INLINE struct brw_reg sechalf( struct brw_reg reg )
58 {
59 if (reg.vstride)
60 reg.nr++;
61 return reg;
62 }
63
64
65 /* Payload R0:
66 *
67 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
68 * corresponding to each of the 16 execution channels.
69 * R0.1..8 -- ?
70 * R1.0 -- triangle vertex 0.X
71 * R1.1 -- triangle vertex 0.Y
72 * R1.2 -- tile 0 x,y coords (2 packed uwords)
73 * R1.3 -- tile 1 x,y coords (2 packed uwords)
74 * R1.4 -- tile 2 x,y coords (2 packed uwords)
75 * R1.5 -- tile 3 x,y coords (2 packed uwords)
76 * R1.6 -- ?
77 * R1.7 -- ?
78 * R1.8 -- ?
79 */
80
81 void emit_pixel_xy(struct brw_wm_compile *c,
82 const struct brw_reg *dst,
83 GLuint mask)
84 {
85 struct brw_compile *p = &c->func;
86 struct brw_reg r1 = brw_vec1_grf(1, 0);
87 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
88 struct brw_reg dst0_uw, dst1_uw;
89
90 brw_push_insn_state(p);
91 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
92
93 if (c->dispatch_width == 16) {
94 dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
95 dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
96 } else {
97 dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
98 dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
99 }
100
101 /* Calculate pixel centers by adding 1 or 0 to each of the
102 * micro-tile coordinates passed in r1.
103 */
104 if (mask & WRITEMASK_X) {
105 brw_ADD(p,
106 dst0_uw,
107 stride(suboffset(r1_uw, 4), 2, 4, 0),
108 brw_imm_v(0x10101010));
109 }
110
111 if (mask & WRITEMASK_Y) {
112 brw_ADD(p,
113 dst1_uw,
114 stride(suboffset(r1_uw,5), 2, 4, 0),
115 brw_imm_v(0x11001100));
116 }
117 brw_pop_insn_state(p);
118 }
119
120
121 void emit_delta_xy(struct brw_compile *p,
122 const struct brw_reg *dst,
123 GLuint mask,
124 const struct brw_reg *arg0)
125 {
126 struct brw_reg r1 = brw_vec1_grf(1, 0);
127
128 /* Calc delta X,Y by subtracting origin in r1 from the pixel
129 * centers.
130 */
131 if (mask & WRITEMASK_X) {
132 brw_ADD(p,
133 dst[0],
134 retype(arg0[0], BRW_REGISTER_TYPE_UW),
135 negate(r1));
136 }
137
138 if (mask & WRITEMASK_Y) {
139 brw_ADD(p,
140 dst[1],
141 retype(arg0[1], BRW_REGISTER_TYPE_UW),
142 negate(suboffset(r1,1)));
143
144 }
145 }
146
147 void emit_wpos_xy(struct brw_wm_compile *c,
148 const struct brw_reg *dst,
149 GLuint mask,
150 const struct brw_reg *arg0)
151 {
152 struct brw_compile *p = &c->func;
153
154 /* Calculate the pixel offset from window bottom left into destination
155 * X and Y channels.
156 */
157 if (mask & WRITEMASK_X) {
158 if (c->fp->program.PixelCenterInteger) {
159 /* X' = X */
160 brw_MOV(p,
161 dst[0],
162 retype(arg0[0], BRW_REGISTER_TYPE_W));
163 } else {
164 /* X' = X + 0.5 */
165 brw_ADD(p,
166 dst[0],
167 retype(arg0[0], BRW_REGISTER_TYPE_W),
168 brw_imm_f(0.5));
169 }
170 }
171
172 if (mask & WRITEMASK_Y) {
173 if (c->fp->program.OriginUpperLeft) {
174 if (c->fp->program.PixelCenterInteger) {
175 /* Y' = Y */
176 brw_MOV(p,
177 dst[1],
178 retype(arg0[1], BRW_REGISTER_TYPE_W));
179 } else {
180 /* Y' = Y + 0.5 */
181 brw_ADD(p,
182 dst[1],
183 retype(arg0[1], BRW_REGISTER_TYPE_W),
184 brw_imm_f(0.5));
185 }
186 } else {
187 float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
188
189 /* Y' = (height - 1) - Y + center */
190 brw_ADD(p,
191 dst[1],
192 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
193 brw_imm_f(c->key.drawable_height - 1 + center_offset));
194 }
195 }
196 }
197
198
199 void emit_pixel_w(struct brw_wm_compile *c,
200 const struct brw_reg *dst,
201 GLuint mask,
202 const struct brw_reg *arg0,
203 const struct brw_reg *deltas)
204 {
205 struct brw_compile *p = &c->func;
206 struct intel_context *intel = &p->brw->intel;
207
208 /* Don't need this if all you are doing is interpolating color, for
209 * instance.
210 */
211 if (mask & WRITEMASK_W) {
212 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
213
214 /* Calc 1/w - just linterp wpos[3] optimized by putting the
215 * result straight into a message reg.
216 */
217 if (can_do_pln(intel, deltas)) {
218 brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
219 } else {
220 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
221 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
222 }
223
224 /* Calc w */
225 if (c->dispatch_width == 16) {
226 brw_math_16(p, dst[3],
227 BRW_MATH_FUNCTION_INV,
228 BRW_MATH_SATURATE_NONE,
229 2, brw_null_reg(),
230 BRW_MATH_PRECISION_FULL);
231 } else {
232 brw_math(p, dst[3],
233 BRW_MATH_FUNCTION_INV,
234 BRW_MATH_SATURATE_NONE,
235 2, brw_null_reg(),
236 BRW_MATH_DATA_VECTOR,
237 BRW_MATH_PRECISION_FULL);
238 }
239 }
240 }
241
242
243 void emit_linterp(struct brw_compile *p,
244 const struct brw_reg *dst,
245 GLuint mask,
246 const struct brw_reg *arg0,
247 const struct brw_reg *deltas)
248 {
249 struct intel_context *intel = &p->brw->intel;
250 struct brw_reg interp[4];
251 GLuint nr = arg0[0].nr;
252 GLuint i;
253
254 interp[0] = brw_vec1_grf(nr, 0);
255 interp[1] = brw_vec1_grf(nr, 4);
256 interp[2] = brw_vec1_grf(nr+1, 0);
257 interp[3] = brw_vec1_grf(nr+1, 4);
258
259 for (i = 0; i < 4; i++) {
260 if (mask & (1<<i)) {
261 if (can_do_pln(intel, deltas)) {
262 brw_PLN(p, dst[i], interp[i], deltas[0]);
263 } else {
264 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
265 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
266 }
267 }
268 }
269 }
270
271
272 void emit_pinterp(struct brw_compile *p,
273 const struct brw_reg *dst,
274 GLuint mask,
275 const struct brw_reg *arg0,
276 const struct brw_reg *deltas,
277 const struct brw_reg *w)
278 {
279 struct intel_context *intel = &p->brw->intel;
280 struct brw_reg interp[4];
281 GLuint nr = arg0[0].nr;
282 GLuint i;
283
284 interp[0] = brw_vec1_grf(nr, 0);
285 interp[1] = brw_vec1_grf(nr, 4);
286 interp[2] = brw_vec1_grf(nr+1, 0);
287 interp[3] = brw_vec1_grf(nr+1, 4);
288
289 for (i = 0; i < 4; i++) {
290 if (mask & (1<<i)) {
291 if (can_do_pln(intel, deltas)) {
292 brw_PLN(p, dst[i], interp[i], deltas[0]);
293 } else {
294 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
295 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
296 }
297 }
298 }
299 for (i = 0; i < 4; i++) {
300 if (mask & (1<<i)) {
301 brw_MUL(p, dst[i], dst[i], w[3]);
302 }
303 }
304 }
305
306
307 void emit_cinterp(struct brw_compile *p,
308 const struct brw_reg *dst,
309 GLuint mask,
310 const struct brw_reg *arg0)
311 {
312 struct brw_reg interp[4];
313 GLuint nr = arg0[0].nr;
314 GLuint i;
315
316 interp[0] = brw_vec1_grf(nr, 0);
317 interp[1] = brw_vec1_grf(nr, 4);
318 interp[2] = brw_vec1_grf(nr+1, 0);
319 interp[3] = brw_vec1_grf(nr+1, 4);
320
321 for (i = 0; i < 4; i++) {
322 if (mask & (1<<i)) {
323 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
324 }
325 }
326 }
327
328 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
329 void emit_frontfacing(struct brw_compile *p,
330 const struct brw_reg *dst,
331 GLuint mask)
332 {
333 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
334 GLuint i;
335
336 if (!(mask & WRITEMASK_XYZW))
337 return;
338
339 for (i = 0; i < 4; i++) {
340 if (mask & (1<<i)) {
341 brw_MOV(p, dst[i], brw_imm_f(0.0));
342 }
343 }
344
345 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
346 * us front face
347 */
348 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
349 for (i = 0; i < 4; i++) {
350 if (mask & (1<<i)) {
351 brw_MOV(p, dst[i], brw_imm_f(1.0));
352 }
353 }
354 brw_set_predicate_control_flag_value(p, 0xff);
355 }
356
357 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
358 * looking like:
359 *
360 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
361 *
362 * and we're trying to produce:
363 *
364 * DDX DDY
365 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
366 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
367 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
368 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
369 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
370 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
371 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
372 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
373 *
374 * and add another set of two more subspans if in 16-pixel dispatch mode.
375 *
376 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
377 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
378 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
379 * between each other. We could probably do it like ddx and swizzle the right
380 * order later, but bail for now and just produce
381 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
382 */
383 void emit_ddxy(struct brw_compile *p,
384 const struct brw_reg *dst,
385 GLuint mask,
386 GLboolean is_ddx,
387 const struct brw_reg *arg0)
388 {
389 int i;
390 struct brw_reg src0, src1;
391
392 if (mask & SATURATE)
393 brw_set_saturate(p, 1);
394 for (i = 0; i < 4; i++ ) {
395 if (mask & (1<<i)) {
396 if (is_ddx) {
397 src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
398 BRW_REGISTER_TYPE_F,
399 BRW_VERTICAL_STRIDE_2,
400 BRW_WIDTH_2,
401 BRW_HORIZONTAL_STRIDE_0,
402 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
403 src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
404 BRW_REGISTER_TYPE_F,
405 BRW_VERTICAL_STRIDE_2,
406 BRW_WIDTH_2,
407 BRW_HORIZONTAL_STRIDE_0,
408 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
409 } else {
410 src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
411 BRW_REGISTER_TYPE_F,
412 BRW_VERTICAL_STRIDE_4,
413 BRW_WIDTH_4,
414 BRW_HORIZONTAL_STRIDE_0,
415 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
416 src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
417 BRW_REGISTER_TYPE_F,
418 BRW_VERTICAL_STRIDE_4,
419 BRW_WIDTH_4,
420 BRW_HORIZONTAL_STRIDE_0,
421 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
422 }
423 brw_ADD(p, dst[i], src0, negate(src1));
424 }
425 }
426 if (mask & SATURATE)
427 brw_set_saturate(p, 0);
428 }
429
430 void emit_alu1(struct brw_compile *p,
431 struct brw_instruction *(*func)(struct brw_compile *,
432 struct brw_reg,
433 struct brw_reg),
434 const struct brw_reg *dst,
435 GLuint mask,
436 const struct brw_reg *arg0)
437 {
438 GLuint i;
439
440 if (mask & SATURATE)
441 brw_set_saturate(p, 1);
442
443 for (i = 0; i < 4; i++) {
444 if (mask & (1<<i)) {
445 func(p, dst[i], arg0[i]);
446 }
447 }
448
449 if (mask & SATURATE)
450 brw_set_saturate(p, 0);
451 }
452
453
454 void emit_alu2(struct brw_compile *p,
455 struct brw_instruction *(*func)(struct brw_compile *,
456 struct brw_reg,
457 struct brw_reg,
458 struct brw_reg),
459 const struct brw_reg *dst,
460 GLuint mask,
461 const struct brw_reg *arg0,
462 const struct brw_reg *arg1)
463 {
464 GLuint i;
465
466 if (mask & SATURATE)
467 brw_set_saturate(p, 1);
468
469 for (i = 0; i < 4; i++) {
470 if (mask & (1<<i)) {
471 func(p, dst[i], arg0[i], arg1[i]);
472 }
473 }
474
475 if (mask & SATURATE)
476 brw_set_saturate(p, 0);
477 }
478
479
480 void emit_mad(struct brw_compile *p,
481 const struct brw_reg *dst,
482 GLuint mask,
483 const struct brw_reg *arg0,
484 const struct brw_reg *arg1,
485 const struct brw_reg *arg2)
486 {
487 GLuint i;
488
489 for (i = 0; i < 4; i++) {
490 if (mask & (1<<i)) {
491 brw_MUL(p, dst[i], arg0[i], arg1[i]);
492
493 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
494 brw_ADD(p, dst[i], dst[i], arg2[i]);
495 brw_set_saturate(p, 0);
496 }
497 }
498 }
499
500 void emit_lrp(struct brw_compile *p,
501 const struct brw_reg *dst,
502 GLuint mask,
503 const struct brw_reg *arg0,
504 const struct brw_reg *arg1,
505 const struct brw_reg *arg2)
506 {
507 GLuint i;
508
509 /* Uses dst as a temporary:
510 */
511 for (i = 0; i < 4; i++) {
512 if (mask & (1<<i)) {
513 /* Can I use the LINE instruction for this?
514 */
515 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
516 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
517
518 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
519 brw_MAC(p, dst[i], arg0[i], arg1[i]);
520 brw_set_saturate(p, 0);
521 }
522 }
523 }
524
525 void emit_sop(struct brw_compile *p,
526 const struct brw_reg *dst,
527 GLuint mask,
528 GLuint cond,
529 const struct brw_reg *arg0,
530 const struct brw_reg *arg1)
531 {
532 GLuint i;
533
534 for (i = 0; i < 4; i++) {
535 if (mask & (1<<i)) {
536 brw_push_insn_state(p);
537 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
538 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
539 brw_MOV(p, dst[i], brw_imm_f(0));
540 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
541 brw_MOV(p, dst[i], brw_imm_f(1.0));
542 brw_pop_insn_state(p);
543 }
544 }
545 }
546
547 static void emit_slt( struct brw_compile *p,
548 const struct brw_reg *dst,
549 GLuint mask,
550 const struct brw_reg *arg0,
551 const struct brw_reg *arg1 )
552 {
553 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
554 }
555
556 static void emit_sle( struct brw_compile *p,
557 const struct brw_reg *dst,
558 GLuint mask,
559 const struct brw_reg *arg0,
560 const struct brw_reg *arg1 )
561 {
562 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
563 }
564
565 static void emit_sgt( struct brw_compile *p,
566 const struct brw_reg *dst,
567 GLuint mask,
568 const struct brw_reg *arg0,
569 const struct brw_reg *arg1 )
570 {
571 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
572 }
573
574 static void emit_sge( struct brw_compile *p,
575 const struct brw_reg *dst,
576 GLuint mask,
577 const struct brw_reg *arg0,
578 const struct brw_reg *arg1 )
579 {
580 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
581 }
582
583 static void emit_seq( struct brw_compile *p,
584 const struct brw_reg *dst,
585 GLuint mask,
586 const struct brw_reg *arg0,
587 const struct brw_reg *arg1 )
588 {
589 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
590 }
591
592 static void emit_sne( struct brw_compile *p,
593 const struct brw_reg *dst,
594 GLuint mask,
595 const struct brw_reg *arg0,
596 const struct brw_reg *arg1 )
597 {
598 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
599 }
600
601 void emit_cmp(struct brw_compile *p,
602 const struct brw_reg *dst,
603 GLuint mask,
604 const struct brw_reg *arg0,
605 const struct brw_reg *arg1,
606 const struct brw_reg *arg2)
607 {
608 GLuint i;
609
610 for (i = 0; i < 4; i++) {
611 if (mask & (1<<i)) {
612 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
613 brw_MOV(p, dst[i], arg2[i]);
614 brw_set_saturate(p, 0);
615
616 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
617
618 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
619 brw_MOV(p, dst[i], arg1[i]);
620 brw_set_saturate(p, 0);
621 brw_set_predicate_control_flag_value(p, 0xff);
622 }
623 }
624 }
625
626 void emit_max(struct brw_compile *p,
627 const struct brw_reg *dst,
628 GLuint mask,
629 const struct brw_reg *arg0,
630 const struct brw_reg *arg1)
631 {
632 GLuint i;
633
634 for (i = 0; i < 4; i++) {
635 if (mask & (1<<i)) {
636 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
637 brw_MOV(p, dst[i], arg0[i]);
638 brw_set_saturate(p, 0);
639
640 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
641
642 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
643 brw_MOV(p, dst[i], arg1[i]);
644 brw_set_saturate(p, 0);
645 brw_set_predicate_control_flag_value(p, 0xff);
646 }
647 }
648 }
649
650 void emit_min(struct brw_compile *p,
651 const struct brw_reg *dst,
652 GLuint mask,
653 const struct brw_reg *arg0,
654 const struct brw_reg *arg1)
655 {
656 GLuint i;
657
658 for (i = 0; i < 4; i++) {
659 if (mask & (1<<i)) {
660 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
661 brw_MOV(p, dst[i], arg1[i]);
662 brw_set_saturate(p, 0);
663
664 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
665
666 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
667 brw_MOV(p, dst[i], arg0[i]);
668 brw_set_saturate(p, 0);
669 brw_set_predicate_control_flag_value(p, 0xff);
670 }
671 }
672 }
673
674
675 void emit_dp3(struct brw_compile *p,
676 const struct brw_reg *dst,
677 GLuint mask,
678 const struct brw_reg *arg0,
679 const struct brw_reg *arg1)
680 {
681 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
682
683 if (!(mask & WRITEMASK_XYZW))
684 return; /* Do not emit dead code */
685
686 assert(is_power_of_two(mask & WRITEMASK_XYZW));
687
688 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
689 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
690
691 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
692 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
693 brw_set_saturate(p, 0);
694 }
695
696
697 void emit_dp4(struct brw_compile *p,
698 const struct brw_reg *dst,
699 GLuint mask,
700 const struct brw_reg *arg0,
701 const struct brw_reg *arg1)
702 {
703 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
704
705 if (!(mask & WRITEMASK_XYZW))
706 return; /* Do not emit dead code */
707
708 assert(is_power_of_two(mask & WRITEMASK_XYZW));
709
710 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
711 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
712 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
713
714 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
715 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
716 brw_set_saturate(p, 0);
717 }
718
719
720 void emit_dph(struct brw_compile *p,
721 const struct brw_reg *dst,
722 GLuint mask,
723 const struct brw_reg *arg0,
724 const struct brw_reg *arg1)
725 {
726 const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
727
728 if (!(mask & WRITEMASK_XYZW))
729 return; /* Do not emit dead code */
730
731 assert(is_power_of_two(mask & WRITEMASK_XYZW));
732
733 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
734 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
735 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
736
737 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
738 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
739 brw_set_saturate(p, 0);
740 }
741
742
743 void emit_xpd(struct brw_compile *p,
744 const struct brw_reg *dst,
745 GLuint mask,
746 const struct brw_reg *arg0,
747 const struct brw_reg *arg1)
748 {
749 GLuint i;
750
751 assert((mask & WRITEMASK_W) != WRITEMASK_W);
752
753 for (i = 0 ; i < 3; i++) {
754 if (mask & (1<<i)) {
755 GLuint i2 = (i+2)%3;
756 GLuint i1 = (i+1)%3;
757
758 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
759
760 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
761 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
762 brw_set_saturate(p, 0);
763 }
764 }
765 }
766
767
768 void emit_math1(struct brw_wm_compile *c,
769 GLuint function,
770 const struct brw_reg *dst,
771 GLuint mask,
772 const struct brw_reg *arg0)
773 {
774 struct brw_compile *p = &c->func;
775 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
776 GLuint saturate = ((mask & SATURATE) ?
777 BRW_MATH_SATURATE_SATURATE :
778 BRW_MATH_SATURATE_NONE);
779
780 if (!(mask & WRITEMASK_XYZW))
781 return; /* Do not emit dead code */
782
783 assert(is_power_of_two(mask & WRITEMASK_XYZW));
784
785 /* If compressed, this will write message reg 2,3 from arg0.x's 16
786 * channels.
787 */
788 brw_MOV(p, brw_message_reg(2), arg0[0]);
789
790 /* Send two messages to perform all 16 operations:
791 */
792 brw_push_insn_state(p);
793 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
794 brw_math(p,
795 dst[dst_chan],
796 function,
797 saturate,
798 2,
799 brw_null_reg(),
800 BRW_MATH_DATA_VECTOR,
801 BRW_MATH_PRECISION_FULL);
802
803 if (c->dispatch_width == 16) {
804 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
805 brw_math(p,
806 offset(dst[dst_chan],1),
807 function,
808 saturate,
809 3,
810 brw_null_reg(),
811 BRW_MATH_DATA_VECTOR,
812 BRW_MATH_PRECISION_FULL);
813 }
814 brw_pop_insn_state(p);
815 }
816
817
818 void emit_math2(struct brw_wm_compile *c,
819 GLuint function,
820 const struct brw_reg *dst,
821 GLuint mask,
822 const struct brw_reg *arg0,
823 const struct brw_reg *arg1)
824 {
825 struct brw_compile *p = &c->func;
826 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
827 GLuint saturate = ((mask & SATURATE) ?
828 BRW_MATH_SATURATE_SATURATE :
829 BRW_MATH_SATURATE_NONE);
830
831 if (!(mask & WRITEMASK_XYZW))
832 return; /* Do not emit dead code */
833
834 assert(is_power_of_two(mask & WRITEMASK_XYZW));
835
836 brw_push_insn_state(p);
837
838 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
839 brw_MOV(p, brw_message_reg(2), arg0[0]);
840 if (c->dispatch_width == 16) {
841 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
842 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
843 }
844
845 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
846 brw_MOV(p, brw_message_reg(3), arg1[0]);
847 if (c->dispatch_width == 16) {
848 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
849 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
850 }
851
852 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
853 brw_math(p,
854 dst[dst_chan],
855 function,
856 saturate,
857 2,
858 brw_null_reg(),
859 BRW_MATH_DATA_VECTOR,
860 BRW_MATH_PRECISION_FULL);
861
862 /* Send two messages to perform all 16 operations:
863 */
864 if (c->dispatch_width == 16) {
865 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
866 brw_math(p,
867 offset(dst[dst_chan],1),
868 function,
869 saturate,
870 4,
871 brw_null_reg(),
872 BRW_MATH_DATA_VECTOR,
873 BRW_MATH_PRECISION_FULL);
874 }
875 brw_pop_insn_state(p);
876 }
877
878
879 void emit_tex(struct brw_wm_compile *c,
880 struct brw_reg *dst,
881 GLuint dst_flags,
882 struct brw_reg *arg,
883 struct brw_reg depth_payload,
884 GLuint tex_idx,
885 GLuint sampler,
886 GLboolean shadow)
887 {
888 struct brw_compile *p = &c->func;
889 struct intel_context *intel = &p->brw->intel;
890 struct brw_reg dst_retyped;
891 GLuint cur_mrf = 2, response_length;
892 GLuint i, nr_texcoords;
893 GLuint emit;
894 GLuint msg_type;
895 GLuint mrf_per_channel;
896 GLuint simd_mode;
897
898 if (c->dispatch_width == 16) {
899 mrf_per_channel = 2;
900 response_length = 8;
901 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
902 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
903 } else {
904 mrf_per_channel = 1;
905 response_length = 4;
906 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
907 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
908 }
909
910 /* How many input regs are there?
911 */
912 switch (tex_idx) {
913 case TEXTURE_1D_INDEX:
914 emit = WRITEMASK_X;
915 nr_texcoords = 1;
916 break;
917 case TEXTURE_2D_INDEX:
918 case TEXTURE_RECT_INDEX:
919 emit = WRITEMASK_XY;
920 nr_texcoords = 2;
921 break;
922 case TEXTURE_3D_INDEX:
923 case TEXTURE_CUBE_INDEX:
924 emit = WRITEMASK_XYZ;
925 nr_texcoords = 3;
926 break;
927 default:
928 /* unexpected target */
929 abort();
930 }
931
932 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
933 if (!intel->is_ironlake && c->dispatch_width == 8)
934 nr_texcoords = 3;
935
936 /* For shadow comparisons, we have to supply u,v,r. */
937 if (shadow)
938 nr_texcoords = 3;
939
940 /* Emit the texcoords. */
941 for (i = 0; i < nr_texcoords; i++) {
942 if (emit & (1<<i))
943 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
944 else
945 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
946 cur_mrf += mrf_per_channel;
947 }
948
949 /* Fill in the shadow comparison reference value. */
950 if (shadow) {
951 if (intel->is_ironlake) {
952 /* Fill in the cube map array index value. */
953 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
954 cur_mrf += mrf_per_channel;
955 } else if (c->dispatch_width == 8) {
956 /* Fill in the LOD bias value. */
957 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
958 cur_mrf += mrf_per_channel;
959 }
960 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
961 cur_mrf += mrf_per_channel;
962 }
963
964 if (intel->is_ironlake) {
965 if (shadow)
966 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
967 else
968 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
969 } else {
970 /* Note that G45 and older determines shadow compare and dispatch width
971 * from message length for most messages.
972 */
973 if (c->dispatch_width == 16 && shadow)
974 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
975 else
976 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
977 }
978
979 brw_SAMPLE(p,
980 dst_retyped,
981 1,
982 retype(depth_payload, BRW_REGISTER_TYPE_UW),
983 SURF_INDEX_TEXTURE(sampler),
984 sampler,
985 dst_flags & WRITEMASK_XYZW,
986 msg_type,
987 response_length,
988 cur_mrf - 1,
989 0,
990 1,
991 simd_mode);
992 }
993
994
995 void emit_txb(struct brw_wm_compile *c,
996 struct brw_reg *dst,
997 GLuint dst_flags,
998 struct brw_reg *arg,
999 struct brw_reg depth_payload,
1000 GLuint tex_idx,
1001 GLuint sampler)
1002 {
1003 struct brw_compile *p = &c->func;
1004 struct intel_context *intel = &p->brw->intel;
1005 GLuint msgLength;
1006 GLuint msg_type;
1007 GLuint mrf_per_channel;
1008 GLuint response_length;
1009 struct brw_reg dst_retyped;
1010
1011 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
1012 * samples, so we'll use the 16-wide instruction, leave the second halves
1013 * undefined, and trust the execution mask to keep the undefined pixels
1014 * from mattering.
1015 */
1016 if (c->dispatch_width == 16 || !intel->is_ironlake) {
1017 if (intel->is_ironlake)
1018 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
1019 else
1020 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1021 mrf_per_channel = 2;
1022 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
1023 response_length = 8;
1024 } else {
1025 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
1026 mrf_per_channel = 1;
1027 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
1028 response_length = 4;
1029 }
1030
1031 /* Shadow ignored for txb. */
1032 switch (tex_idx) {
1033 case TEXTURE_1D_INDEX:
1034 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1035 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
1036 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
1037 break;
1038 case TEXTURE_2D_INDEX:
1039 case TEXTURE_RECT_INDEX:
1040 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1041 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
1042 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
1043 break;
1044 case TEXTURE_3D_INDEX:
1045 case TEXTURE_CUBE_INDEX:
1046 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1047 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
1048 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
1049 break;
1050 default:
1051 /* unexpected target */
1052 abort();
1053 }
1054
1055 brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
1056 msgLength = 2 + 4 * mrf_per_channel - 1;
1057
1058 brw_SAMPLE(p,
1059 dst_retyped,
1060 1,
1061 retype(depth_payload, BRW_REGISTER_TYPE_UW),
1062 SURF_INDEX_TEXTURE(sampler),
1063 sampler,
1064 dst_flags & WRITEMASK_XYZW,
1065 msg_type,
1066 response_length,
1067 msgLength,
1068 0,
1069 1,
1070 BRW_SAMPLER_SIMD_MODE_SIMD16);
1071 }
1072
1073
1074 static void emit_lit(struct brw_wm_compile *c,
1075 const struct brw_reg *dst,
1076 GLuint mask,
1077 const struct brw_reg *arg0)
1078 {
1079 struct brw_compile *p = &c->func;
1080
1081 assert((mask & WRITEMASK_XW) == 0);
1082
1083 if (mask & WRITEMASK_Y) {
1084 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
1085 brw_MOV(p, dst[1], arg0[0]);
1086 brw_set_saturate(p, 0);
1087 }
1088
1089 if (mask & WRITEMASK_Z) {
1090 emit_math2(c, BRW_MATH_FUNCTION_POW,
1091 &dst[2],
1092 WRITEMASK_X | (mask & SATURATE),
1093 &arg0[1],
1094 &arg0[3]);
1095 }
1096
1097 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1098 * some of the POW calculations above, but 16-wide iff statements
1099 * seem to lock c1 hardware, so this is a nasty workaround:
1100 */
1101 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
1102 {
1103 if (mask & WRITEMASK_Y)
1104 brw_MOV(p, dst[1], brw_imm_f(0));
1105
1106 if (mask & WRITEMASK_Z)
1107 brw_MOV(p, dst[2], brw_imm_f(0));
1108 }
1109 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1110 }
1111
1112
1113 /* Kill pixel - set execution mask to zero for those pixels which
1114 * fail.
1115 */
1116 static void emit_kil( struct brw_wm_compile *c,
1117 struct brw_reg *arg0)
1118 {
1119 struct brw_compile *p = &c->func;
1120 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1121 GLuint i;
1122
1123 /* XXX - usually won't need 4 compares!
1124 */
1125 for (i = 0; i < 4; i++) {
1126 brw_push_insn_state(p);
1127 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
1128 brw_set_predicate_control_flag_value(p, 0xff);
1129 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1130 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
1131 brw_pop_insn_state(p);
1132 }
1133 }
1134
1135 /* KIL_NV kills the pixels that are currently executing, not based on a test
1136 * of the arguments.
1137 */
1138 static void emit_kil_nv( struct brw_wm_compile *c )
1139 {
1140 struct brw_compile *p = &c->func;
1141 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1142
1143 brw_push_insn_state(p);
1144 brw_set_mask_control(p, BRW_MASK_DISABLE);
1145 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1146 brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1147 brw_pop_insn_state(p);
1148 }
1149
1150 static void fire_fb_write( struct brw_wm_compile *c,
1151 GLuint base_reg,
1152 GLuint nr,
1153 GLuint target,
1154 GLuint eot )
1155 {
1156 struct brw_compile *p = &c->func;
1157 struct brw_reg dst;
1158
1159 if (c->dispatch_width == 16)
1160 dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1161 else
1162 dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1163
1164 /* Pass through control information:
1165 */
1166 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1167 {
1168 brw_push_insn_state(p);
1169 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1170 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1171 brw_MOV(p,
1172 brw_message_reg(base_reg + 1),
1173 brw_vec8_grf(1, 0));
1174 brw_pop_insn_state(p);
1175 }
1176
1177 /* Send framebuffer write message: */
1178 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1179 brw_fb_WRITE(p,
1180 dst,
1181 base_reg,
1182 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1183 target,
1184 nr,
1185 0,
1186 eot);
1187 }
1188
1189
1190 static void emit_aa( struct brw_wm_compile *c,
1191 struct brw_reg *arg1,
1192 GLuint reg )
1193 {
1194 struct brw_compile *p = &c->func;
1195 GLuint comp = c->key.aa_dest_stencil_reg / 2;
1196 GLuint off = c->key.aa_dest_stencil_reg % 2;
1197 struct brw_reg aa = offset(arg1[comp], off);
1198
1199 brw_push_insn_state(p);
1200 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1201 brw_MOV(p, brw_message_reg(reg), aa);
1202 brw_pop_insn_state(p);
1203 }
1204
1205
1206 /* Post-fragment-program processing. Send the results to the
1207 * framebuffer.
1208 * \param arg0 the fragment color
1209 * \param arg1 the pass-through depth value
1210 * \param arg2 the shader-computed depth value
1211 */
1212 void emit_fb_write(struct brw_wm_compile *c,
1213 struct brw_reg *arg0,
1214 struct brw_reg *arg1,
1215 struct brw_reg *arg2,
1216 GLuint target,
1217 GLuint eot)
1218 {
1219 struct brw_compile *p = &c->func;
1220 struct brw_context *brw = p->brw;
1221 GLuint nr = 2;
1222 GLuint channel;
1223
1224 /* Reserve a space for AA - may not be needed:
1225 */
1226 if (c->key.aa_dest_stencil_reg)
1227 nr += 1;
1228
1229 /* I don't really understand how this achieves the color interleave
1230 * (ie RGBARGBA) in the result: [Do the saturation here]
1231 */
1232 brw_push_insn_state(p);
1233
1234 for (channel = 0; channel < 4; channel++) {
1235 if (c->dispatch_width == 16 && brw->has_compr4) {
1236 /* By setting the high bit of the MRF register number, we indicate
1237 * that we want COMPR4 mode - instead of doing the usual destination
1238 * + 1 for the second half we get destination + 4.
1239 */
1240 brw_MOV(p,
1241 brw_message_reg(nr + channel + (1 << 7)),
1242 arg0[channel]);
1243 } else {
1244 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1245 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1246 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1247 brw_MOV(p,
1248 brw_message_reg(nr + channel),
1249 arg0[channel]);
1250
1251 if (c->dispatch_width == 16) {
1252 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1253 brw_MOV(p,
1254 brw_message_reg(nr + channel + 4),
1255 sechalf(arg0[channel]));
1256 }
1257 }
1258 }
1259 /* skip over the regs populated above:
1260 */
1261 nr += 8;
1262 brw_pop_insn_state(p);
1263
1264 if (c->key.source_depth_to_render_target)
1265 {
1266 if (c->key.computes_depth)
1267 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1268 else
1269 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1270
1271 nr += 2;
1272 }
1273
1274 if (c->key.dest_depth_reg)
1275 {
1276 GLuint comp = c->key.dest_depth_reg / 2;
1277 GLuint off = c->key.dest_depth_reg % 2;
1278
1279 if (off != 0) {
1280 brw_push_insn_state(p);
1281 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1282
1283 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1284 /* 2nd half? */
1285 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1286 brw_pop_insn_state(p);
1287 }
1288 else {
1289 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1290 }
1291 nr += 2;
1292 }
1293
1294 if (!c->key.runtime_check_aads_emit) {
1295 if (c->key.aa_dest_stencil_reg)
1296 emit_aa(c, arg1, 2);
1297
1298 fire_fb_write(c, 0, nr, target, eot);
1299 }
1300 else {
1301 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1302 struct brw_reg ip = brw_ip_reg();
1303 struct brw_instruction *jmp;
1304
1305 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1306 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1307 brw_AND(p,
1308 v1_null_ud,
1309 get_element_ud(brw_vec8_grf(1,0), 6),
1310 brw_imm_ud(1<<26));
1311
1312 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
1313 {
1314 emit_aa(c, arg1, 2);
1315 fire_fb_write(c, 0, nr, target, eot);
1316 /* note - thread killed in subroutine */
1317 }
1318 brw_land_fwd_jump(p, jmp);
1319
1320 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1321 */
1322 fire_fb_write(c, 1, nr-1, target, eot);
1323 }
1324 }
1325
1326 /**
1327 * Move a GPR to scratch memory.
1328 */
1329 static void emit_spill( struct brw_wm_compile *c,
1330 struct brw_reg reg,
1331 GLuint slot )
1332 {
1333 struct brw_compile *p = &c->func;
1334
1335 /*
1336 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1337 */
1338 brw_MOV(p, brw_message_reg(2), reg);
1339
1340 /*
1341 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1342 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1343 */
1344 brw_dp_WRITE_16(p,
1345 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1346 slot);
1347 }
1348
1349
1350 /**
1351 * Load a GPR from scratch memory.
1352 */
1353 static void emit_unspill( struct brw_wm_compile *c,
1354 struct brw_reg reg,
1355 GLuint slot )
1356 {
1357 struct brw_compile *p = &c->func;
1358
1359 /* Slot 0 is the undef value.
1360 */
1361 if (slot == 0) {
1362 brw_MOV(p, reg, brw_imm_f(0));
1363 return;
1364 }
1365
1366 /*
1367 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1368 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1369 */
1370
1371 brw_dp_READ_16(p,
1372 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1373 slot);
1374 }
1375
1376
1377 /**
1378 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1379 * Args with unspill_reg != 0 will be loaded from scratch memory.
1380 */
1381 static void get_argument_regs( struct brw_wm_compile *c,
1382 struct brw_wm_ref *arg[],
1383 struct brw_reg *regs )
1384 {
1385 GLuint i;
1386
1387 for (i = 0; i < 4; i++) {
1388 if (arg[i]) {
1389 if (arg[i]->unspill_reg)
1390 emit_unspill(c,
1391 brw_vec8_grf(arg[i]->unspill_reg, 0),
1392 arg[i]->value->spill_slot);
1393
1394 regs[i] = arg[i]->hw_reg;
1395 }
1396 else {
1397 regs[i] = brw_null_reg();
1398 }
1399 }
1400 }
1401
1402
1403 /**
1404 * For values that have a spill_slot!=0, write those regs to scratch memory.
1405 */
1406 static void spill_values( struct brw_wm_compile *c,
1407 struct brw_wm_value *values,
1408 GLuint nr )
1409 {
1410 GLuint i;
1411
1412 for (i = 0; i < nr; i++)
1413 if (values[i].spill_slot)
1414 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1415 }
1416
1417
1418 /* Emit the fragment program instructions here.
1419 */
1420 void brw_wm_emit( struct brw_wm_compile *c )
1421 {
1422 struct brw_compile *p = &c->func;
1423 GLuint insn;
1424
1425 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1426
1427 /* Check if any of the payload regs need to be spilled:
1428 */
1429 spill_values(c, c->payload.depth, 4);
1430 spill_values(c, c->creg, c->nr_creg);
1431 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1432
1433
1434 for (insn = 0; insn < c->nr_insns; insn++) {
1435
1436 struct brw_wm_instruction *inst = &c->instruction[insn];
1437 struct brw_reg args[3][4], dst[4];
1438 GLuint i, dst_flags;
1439
1440 /* Get argument regs:
1441 */
1442 for (i = 0; i < 3; i++)
1443 get_argument_regs(c, inst->src[i], args[i]);
1444
1445 /* Get dest regs:
1446 */
1447 for (i = 0; i < 4; i++)
1448 if (inst->dst[i])
1449 dst[i] = inst->dst[i]->hw_reg;
1450 else
1451 dst[i] = brw_null_reg();
1452
1453 /* Flags
1454 */
1455 dst_flags = inst->writemask;
1456 if (inst->saturate)
1457 dst_flags |= SATURATE;
1458
1459 switch (inst->opcode) {
1460 /* Generated instructions for calculating triangle interpolants:
1461 */
1462 case WM_PIXELXY:
1463 emit_pixel_xy(c, dst, dst_flags);
1464 break;
1465
1466 case WM_DELTAXY:
1467 emit_delta_xy(p, dst, dst_flags, args[0]);
1468 break;
1469
1470 case WM_WPOSXY:
1471 emit_wpos_xy(c, dst, dst_flags, args[0]);
1472 break;
1473
1474 case WM_PIXELW:
1475 emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
1476 break;
1477
1478 case WM_LINTERP:
1479 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1480 break;
1481
1482 case WM_PINTERP:
1483 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1484 break;
1485
1486 case WM_CINTERP:
1487 emit_cinterp(p, dst, dst_flags, args[0]);
1488 break;
1489
1490 case WM_FB_WRITE:
1491 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1492 break;
1493
1494 case WM_FRONTFACING:
1495 emit_frontfacing(p, dst, dst_flags);
1496 break;
1497
1498 /* Straightforward arithmetic:
1499 */
1500 case OPCODE_ADD:
1501 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1502 break;
1503
1504 case OPCODE_FRC:
1505 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1506 break;
1507
1508 case OPCODE_FLR:
1509 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1510 break;
1511
1512 case OPCODE_DDX:
1513 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1514 break;
1515
1516 case OPCODE_DDY:
1517 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1518 break;
1519
1520 case OPCODE_DP3:
1521 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1522 break;
1523
1524 case OPCODE_DP4:
1525 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1526 break;
1527
1528 case OPCODE_DPH:
1529 emit_dph(p, dst, dst_flags, args[0], args[1]);
1530 break;
1531
1532 case OPCODE_TRUNC:
1533 emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
1534 break;
1535
1536 case OPCODE_LRP:
1537 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1538 break;
1539
1540 case OPCODE_MAD:
1541 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1542 break;
1543
1544 case OPCODE_MOV:
1545 case OPCODE_SWZ:
1546 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1547 break;
1548
1549 case OPCODE_MUL:
1550 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1551 break;
1552
1553 case OPCODE_XPD:
1554 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1555 break;
1556
1557 /* Higher math functions:
1558 */
1559 case OPCODE_RCP:
1560 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1561 break;
1562
1563 case OPCODE_RSQ:
1564 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1565 break;
1566
1567 case OPCODE_SIN:
1568 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1569 break;
1570
1571 case OPCODE_COS:
1572 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1573 break;
1574
1575 case OPCODE_EX2:
1576 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1577 break;
1578
1579 case OPCODE_LG2:
1580 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1581 break;
1582
1583 case OPCODE_SCS:
1584 /* There is an scs math function, but it would need some
1585 * fixup for 16-element execution.
1586 */
1587 if (dst_flags & WRITEMASK_X)
1588 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1589 if (dst_flags & WRITEMASK_Y)
1590 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1591 break;
1592
1593 case OPCODE_POW:
1594 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1595 break;
1596
1597 /* Comparisons:
1598 */
1599 case OPCODE_CMP:
1600 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1601 break;
1602
1603 case OPCODE_MAX:
1604 emit_max(p, dst, dst_flags, args[0], args[1]);
1605 break;
1606
1607 case OPCODE_MIN:
1608 emit_min(p, dst, dst_flags, args[0], args[1]);
1609 break;
1610
1611 case OPCODE_SLT:
1612 emit_slt(p, dst, dst_flags, args[0], args[1]);
1613 break;
1614
1615 case OPCODE_SLE:
1616 emit_sle(p, dst, dst_flags, args[0], args[1]);
1617 break;
1618 case OPCODE_SGT:
1619 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1620 break;
1621 case OPCODE_SGE:
1622 emit_sge(p, dst, dst_flags, args[0], args[1]);
1623 break;
1624 case OPCODE_SEQ:
1625 emit_seq(p, dst, dst_flags, args[0], args[1]);
1626 break;
1627 case OPCODE_SNE:
1628 emit_sne(p, dst, dst_flags, args[0], args[1]);
1629 break;
1630
1631 case OPCODE_LIT:
1632 emit_lit(c, dst, dst_flags, args[0]);
1633 break;
1634
1635 /* Texturing operations:
1636 */
1637 case OPCODE_TEX:
1638 emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1639 inst->tex_idx, inst->tex_unit,
1640 inst->tex_shadow);
1641 break;
1642
1643 case OPCODE_TXB:
1644 emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1645 inst->tex_idx, inst->tex_unit);
1646 break;
1647
1648 case OPCODE_KIL:
1649 emit_kil(c, args[0]);
1650 break;
1651
1652 case OPCODE_KIL_NV:
1653 emit_kil_nv(c);
1654 break;
1655
1656 default:
1657 printf("Unsupported opcode %i (%s) in fragment shader\n",
1658 inst->opcode, inst->opcode < MAX_OPCODE ?
1659 _mesa_opcode_string(inst->opcode) :
1660 "unknown");
1661 }
1662
1663 for (i = 0; i < 4; i++)
1664 if (inst->dst[i] && inst->dst[i]->spill_slot)
1665 emit_spill(c,
1666 inst->dst[i]->hw_reg,
1667 inst->dst[i]->spill_slot);
1668 }
1669
1670 if (INTEL_DEBUG & DEBUG_WM) {
1671 int i;
1672
1673 printf("wm-native:\n");
1674 for (i = 0; i < p->nr_insn; i++)
1675 brw_disasm(stderr, &p->store[i]);
1676 printf("\n");
1677 }
1678 }