i965: Add support for the CMP opcode in the GLSL path.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 /* Not quite sure how correct this is - need to understand horiz
38 * vs. vertical strides a little better.
39 */
40 static INLINE struct brw_reg sechalf( struct brw_reg reg )
41 {
42 if (reg.vstride)
43 reg.nr++;
44 return reg;
45 }
46
47
48 /* Payload R0:
49 *
50 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
51 * corresponding to each of the 16 execution channels.
52 * R0.1..8 -- ?
53 * R1.0 -- triangle vertex 0.X
54 * R1.1 -- triangle vertex 0.Y
55 * R1.2 -- tile 0 x,y coords (2 packed uwords)
56 * R1.3 -- tile 1 x,y coords (2 packed uwords)
57 * R1.4 -- tile 2 x,y coords (2 packed uwords)
58 * R1.5 -- tile 3 x,y coords (2 packed uwords)
59 * R1.6 -- ?
60 * R1.7 -- ?
61 * R1.8 -- ?
62 */
63
64 void emit_pixel_xy(struct brw_wm_compile *c,
65 const struct brw_reg *dst,
66 GLuint mask)
67 {
68 struct brw_compile *p = &c->func;
69 struct brw_reg r1 = brw_vec1_grf(1, 0);
70 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
71 struct brw_reg dst0_uw, dst1_uw;
72
73 brw_push_insn_state(p);
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 if (c->dispatch_width == 16) {
77 dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
78 dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
79 } else {
80 dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
81 dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
82 }
83
84 /* Calculate pixel centers by adding 1 or 0 to each of the
85 * micro-tile coordinates passed in r1.
86 */
87 if (mask & WRITEMASK_X) {
88 brw_ADD(p,
89 dst0_uw,
90 stride(suboffset(r1_uw, 4), 2, 4, 0),
91 brw_imm_v(0x10101010));
92 }
93
94 if (mask & WRITEMASK_Y) {
95 brw_ADD(p,
96 dst1_uw,
97 stride(suboffset(r1_uw,5), 2, 4, 0),
98 brw_imm_v(0x11001100));
99 }
100 brw_pop_insn_state(p);
101 }
102
103
104 void emit_delta_xy(struct brw_compile *p,
105 const struct brw_reg *dst,
106 GLuint mask,
107 const struct brw_reg *arg0)
108 {
109 struct brw_reg r1 = brw_vec1_grf(1, 0);
110
111 /* Calc delta X,Y by subtracting origin in r1 from the pixel
112 * centers.
113 */
114 if (mask & WRITEMASK_X) {
115 brw_ADD(p,
116 dst[0],
117 retype(arg0[0], BRW_REGISTER_TYPE_UW),
118 negate(r1));
119 }
120
121 if (mask & WRITEMASK_Y) {
122 brw_ADD(p,
123 dst[1],
124 retype(arg0[1], BRW_REGISTER_TYPE_UW),
125 negate(suboffset(r1,1)));
126
127 }
128 }
129
130 void emit_wpos_xy(struct brw_wm_compile *c,
131 const struct brw_reg *dst,
132 GLuint mask,
133 const struct brw_reg *arg0)
134 {
135 struct brw_compile *p = &c->func;
136
137 /* Calculate the pixel offset from window bottom left into destination
138 * X and Y channels.
139 */
140 if (mask & WRITEMASK_X) {
141 if (c->fp->program.PixelCenterInteger) {
142 /* X' = X */
143 brw_MOV(p,
144 dst[0],
145 retype(arg0[0], BRW_REGISTER_TYPE_W));
146 } else {
147 /* X' = X + 0.5 */
148 brw_ADD(p,
149 dst[0],
150 retype(arg0[0], BRW_REGISTER_TYPE_W),
151 brw_imm_f(0.5));
152 }
153 }
154
155 if (mask & WRITEMASK_Y) {
156 if (c->fp->program.OriginUpperLeft) {
157 if (c->fp->program.PixelCenterInteger) {
158 /* Y' = Y */
159 brw_MOV(p,
160 dst[1],
161 retype(arg0[1], BRW_REGISTER_TYPE_W));
162 } else {
163 /* Y' = Y + 0.5 */
164 brw_ADD(p,
165 dst[1],
166 retype(arg0[1], BRW_REGISTER_TYPE_W),
167 brw_imm_f(0.5));
168 }
169 } else {
170 float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
171
172 /* Y' = (height - 1) - Y + center */
173 brw_ADD(p,
174 dst[1],
175 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
176 brw_imm_f(c->key.drawable_height - 1 + center_offset));
177 }
178 }
179 }
180
181
182 void emit_pixel_w(struct brw_wm_compile *c,
183 const struct brw_reg *dst,
184 GLuint mask,
185 const struct brw_reg *arg0,
186 const struct brw_reg *deltas)
187 {
188 struct brw_compile *p = &c->func;
189
190 /* Don't need this if all you are doing is interpolating color, for
191 * instance.
192 */
193 if (mask & WRITEMASK_W) {
194 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
195
196 /* Calc 1/w - just linterp wpos[3] optimized by putting the
197 * result straight into a message reg.
198 */
199 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
200 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
201
202 /* Calc w */
203 if (c->dispatch_width == 16) {
204 brw_math_16(p, dst[3],
205 BRW_MATH_FUNCTION_INV,
206 BRW_MATH_SATURATE_NONE,
207 2, brw_null_reg(),
208 BRW_MATH_PRECISION_FULL);
209 } else {
210 brw_math(p, dst[3],
211 BRW_MATH_FUNCTION_INV,
212 BRW_MATH_SATURATE_NONE,
213 2, brw_null_reg(),
214 BRW_MATH_DATA_VECTOR,
215 BRW_MATH_PRECISION_FULL);
216 }
217 }
218 }
219
220
221 void emit_linterp(struct brw_compile *p,
222 const struct brw_reg *dst,
223 GLuint mask,
224 const struct brw_reg *arg0,
225 const struct brw_reg *deltas)
226 {
227 struct brw_reg interp[4];
228 GLuint nr = arg0[0].nr;
229 GLuint i;
230
231 interp[0] = brw_vec1_grf(nr, 0);
232 interp[1] = brw_vec1_grf(nr, 4);
233 interp[2] = brw_vec1_grf(nr+1, 0);
234 interp[3] = brw_vec1_grf(nr+1, 4);
235
236 for (i = 0; i < 4; i++) {
237 if (mask & (1<<i)) {
238 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
239 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
240 }
241 }
242 }
243
244
245 void emit_pinterp(struct brw_compile *p,
246 const struct brw_reg *dst,
247 GLuint mask,
248 const struct brw_reg *arg0,
249 const struct brw_reg *deltas,
250 const struct brw_reg *w)
251 {
252 struct brw_reg interp[4];
253 GLuint nr = arg0[0].nr;
254 GLuint i;
255
256 interp[0] = brw_vec1_grf(nr, 0);
257 interp[1] = brw_vec1_grf(nr, 4);
258 interp[2] = brw_vec1_grf(nr+1, 0);
259 interp[3] = brw_vec1_grf(nr+1, 4);
260
261 for (i = 0; i < 4; i++) {
262 if (mask & (1<<i)) {
263 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
264 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
265 }
266 }
267 for (i = 0; i < 4; i++) {
268 if (mask & (1<<i)) {
269 brw_MUL(p, dst[i], dst[i], w[3]);
270 }
271 }
272 }
273
274
275 void emit_cinterp(struct brw_compile *p,
276 const struct brw_reg *dst,
277 GLuint mask,
278 const struct brw_reg *arg0)
279 {
280 struct brw_reg interp[4];
281 GLuint nr = arg0[0].nr;
282 GLuint i;
283
284 interp[0] = brw_vec1_grf(nr, 0);
285 interp[1] = brw_vec1_grf(nr, 4);
286 interp[2] = brw_vec1_grf(nr+1, 0);
287 interp[3] = brw_vec1_grf(nr+1, 4);
288
289 for (i = 0; i < 4; i++) {
290 if (mask & (1<<i)) {
291 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
292 }
293 }
294 }
295
296 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
297 void emit_frontfacing(struct brw_compile *p,
298 const struct brw_reg *dst,
299 GLuint mask)
300 {
301 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
302 GLuint i;
303
304 if (!(mask & WRITEMASK_XYZW))
305 return;
306
307 for (i = 0; i < 4; i++) {
308 if (mask & (1<<i)) {
309 brw_MOV(p, dst[i], brw_imm_f(0.0));
310 }
311 }
312
313 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
314 * us front face
315 */
316 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
317 for (i = 0; i < 4; i++) {
318 if (mask & (1<<i)) {
319 brw_MOV(p, dst[i], brw_imm_f(1.0));
320 }
321 }
322 brw_set_predicate_control_flag_value(p, 0xff);
323 }
324
325 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
326 * looking like:
327 *
328 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
329 *
330 * and we're trying to produce:
331 *
332 * DDX DDY
333 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
334 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
335 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
336 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
337 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
338 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
339 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
340 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
341 *
342 * and add another set of two more subspans if in 16-pixel dispatch mode.
343 *
344 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
345 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
346 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
347 * between each other. We could probably do it like ddx and swizzle the right
348 * order later, but bail for now and just produce
349 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
350 */
351 void emit_ddxy(struct brw_compile *p,
352 const struct brw_reg *dst,
353 GLuint mask,
354 GLboolean is_ddx,
355 const struct brw_reg *arg0)
356 {
357 int i;
358 struct brw_reg src0, src1;
359
360 if (mask & SATURATE)
361 brw_set_saturate(p, 1);
362 for (i = 0; i < 4; i++ ) {
363 if (mask & (1<<i)) {
364 if (is_ddx) {
365 src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
366 BRW_REGISTER_TYPE_F,
367 BRW_VERTICAL_STRIDE_2,
368 BRW_WIDTH_2,
369 BRW_HORIZONTAL_STRIDE_0,
370 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
371 src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
372 BRW_REGISTER_TYPE_F,
373 BRW_VERTICAL_STRIDE_2,
374 BRW_WIDTH_2,
375 BRW_HORIZONTAL_STRIDE_0,
376 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
377 } else {
378 src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
379 BRW_REGISTER_TYPE_F,
380 BRW_VERTICAL_STRIDE_4,
381 BRW_WIDTH_4,
382 BRW_HORIZONTAL_STRIDE_0,
383 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
384 src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
385 BRW_REGISTER_TYPE_F,
386 BRW_VERTICAL_STRIDE_4,
387 BRW_WIDTH_4,
388 BRW_HORIZONTAL_STRIDE_0,
389 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
390 }
391 brw_ADD(p, dst[i], src0, negate(src1));
392 }
393 }
394 if (mask & SATURATE)
395 brw_set_saturate(p, 0);
396 }
397
398 void emit_alu1(struct brw_compile *p,
399 struct brw_instruction *(*func)(struct brw_compile *,
400 struct brw_reg,
401 struct brw_reg),
402 const struct brw_reg *dst,
403 GLuint mask,
404 const struct brw_reg *arg0)
405 {
406 GLuint i;
407
408 if (mask & SATURATE)
409 brw_set_saturate(p, 1);
410
411 for (i = 0; i < 4; i++) {
412 if (mask & (1<<i)) {
413 func(p, dst[i], arg0[i]);
414 }
415 }
416
417 if (mask & SATURATE)
418 brw_set_saturate(p, 0);
419 }
420
421
422 void emit_alu2(struct brw_compile *p,
423 struct brw_instruction *(*func)(struct brw_compile *,
424 struct brw_reg,
425 struct brw_reg,
426 struct brw_reg),
427 const struct brw_reg *dst,
428 GLuint mask,
429 const struct brw_reg *arg0,
430 const struct brw_reg *arg1)
431 {
432 GLuint i;
433
434 if (mask & SATURATE)
435 brw_set_saturate(p, 1);
436
437 for (i = 0; i < 4; i++) {
438 if (mask & (1<<i)) {
439 func(p, dst[i], arg0[i], arg1[i]);
440 }
441 }
442
443 if (mask & SATURATE)
444 brw_set_saturate(p, 0);
445 }
446
447
448 void emit_mad(struct brw_compile *p,
449 const struct brw_reg *dst,
450 GLuint mask,
451 const struct brw_reg *arg0,
452 const struct brw_reg *arg1,
453 const struct brw_reg *arg2)
454 {
455 GLuint i;
456
457 for (i = 0; i < 4; i++) {
458 if (mask & (1<<i)) {
459 brw_MUL(p, dst[i], arg0[i], arg1[i]);
460
461 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
462 brw_ADD(p, dst[i], dst[i], arg2[i]);
463 brw_set_saturate(p, 0);
464 }
465 }
466 }
467
468 void emit_lrp(struct brw_compile *p,
469 const struct brw_reg *dst,
470 GLuint mask,
471 const struct brw_reg *arg0,
472 const struct brw_reg *arg1,
473 const struct brw_reg *arg2)
474 {
475 GLuint i;
476
477 /* Uses dst as a temporary:
478 */
479 for (i = 0; i < 4; i++) {
480 if (mask & (1<<i)) {
481 /* Can I use the LINE instruction for this?
482 */
483 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
484 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
485
486 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
487 brw_MAC(p, dst[i], arg0[i], arg1[i]);
488 brw_set_saturate(p, 0);
489 }
490 }
491 }
492
493 void emit_sop(struct brw_compile *p,
494 const struct brw_reg *dst,
495 GLuint mask,
496 GLuint cond,
497 const struct brw_reg *arg0,
498 const struct brw_reg *arg1)
499 {
500 GLuint i;
501
502 for (i = 0; i < 4; i++) {
503 if (mask & (1<<i)) {
504 brw_push_insn_state(p);
505 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
506 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
507 brw_MOV(p, dst[i], brw_imm_f(0));
508 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
509 brw_MOV(p, dst[i], brw_imm_f(1.0));
510 brw_pop_insn_state(p);
511 }
512 }
513 }
514
515 static void emit_slt( struct brw_compile *p,
516 const struct brw_reg *dst,
517 GLuint mask,
518 const struct brw_reg *arg0,
519 const struct brw_reg *arg1 )
520 {
521 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
522 }
523
524 static void emit_sle( struct brw_compile *p,
525 const struct brw_reg *dst,
526 GLuint mask,
527 const struct brw_reg *arg0,
528 const struct brw_reg *arg1 )
529 {
530 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
531 }
532
533 static void emit_sgt( struct brw_compile *p,
534 const struct brw_reg *dst,
535 GLuint mask,
536 const struct brw_reg *arg0,
537 const struct brw_reg *arg1 )
538 {
539 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
540 }
541
542 static void emit_sge( struct brw_compile *p,
543 const struct brw_reg *dst,
544 GLuint mask,
545 const struct brw_reg *arg0,
546 const struct brw_reg *arg1 )
547 {
548 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
549 }
550
551 static void emit_seq( struct brw_compile *p,
552 const struct brw_reg *dst,
553 GLuint mask,
554 const struct brw_reg *arg0,
555 const struct brw_reg *arg1 )
556 {
557 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
558 }
559
560 static void emit_sne( struct brw_compile *p,
561 const struct brw_reg *dst,
562 GLuint mask,
563 const struct brw_reg *arg0,
564 const struct brw_reg *arg1 )
565 {
566 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
567 }
568
569 void emit_cmp(struct brw_compile *p,
570 const struct brw_reg *dst,
571 GLuint mask,
572 const struct brw_reg *arg0,
573 const struct brw_reg *arg1,
574 const struct brw_reg *arg2)
575 {
576 GLuint i;
577
578 for (i = 0; i < 4; i++) {
579 if (mask & (1<<i)) {
580 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
581 brw_MOV(p, dst[i], arg2[i]);
582 brw_set_saturate(p, 0);
583
584 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
585
586 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
587 brw_MOV(p, dst[i], arg1[i]);
588 brw_set_saturate(p, 0);
589 brw_set_predicate_control_flag_value(p, 0xff);
590 }
591 }
592 }
593
594 void emit_max(struct brw_compile *p,
595 const struct brw_reg *dst,
596 GLuint mask,
597 const struct brw_reg *arg0,
598 const struct brw_reg *arg1)
599 {
600 GLuint i;
601
602 for (i = 0; i < 4; i++) {
603 if (mask & (1<<i)) {
604 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
605 brw_MOV(p, dst[i], arg0[i]);
606 brw_set_saturate(p, 0);
607
608 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
609
610 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
611 brw_MOV(p, dst[i], arg1[i]);
612 brw_set_saturate(p, 0);
613 brw_set_predicate_control_flag_value(p, 0xff);
614 }
615 }
616 }
617
618 void emit_min(struct brw_compile *p,
619 const struct brw_reg *dst,
620 GLuint mask,
621 const struct brw_reg *arg0,
622 const struct brw_reg *arg1)
623 {
624 GLuint i;
625
626 for (i = 0; i < 4; i++) {
627 if (mask & (1<<i)) {
628 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
629 brw_MOV(p, dst[i], arg1[i]);
630 brw_set_saturate(p, 0);
631
632 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
633
634 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
635 brw_MOV(p, dst[i], arg0[i]);
636 brw_set_saturate(p, 0);
637 brw_set_predicate_control_flag_value(p, 0xff);
638 }
639 }
640 }
641
642
643 void emit_dp3(struct brw_compile *p,
644 const struct brw_reg *dst,
645 GLuint mask,
646 const struct brw_reg *arg0,
647 const struct brw_reg *arg1)
648 {
649 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
650
651 if (!(mask & WRITEMASK_XYZW))
652 return; /* Do not emit dead code */
653
654 assert(is_power_of_two(mask & WRITEMASK_XYZW));
655
656 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
657 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
658
659 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
660 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
661 brw_set_saturate(p, 0);
662 }
663
664
665 void emit_dp4(struct brw_compile *p,
666 const struct brw_reg *dst,
667 GLuint mask,
668 const struct brw_reg *arg0,
669 const struct brw_reg *arg1)
670 {
671 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
672
673 if (!(mask & WRITEMASK_XYZW))
674 return; /* Do not emit dead code */
675
676 assert(is_power_of_two(mask & WRITEMASK_XYZW));
677
678 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
679 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
680 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
681
682 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
683 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
684 brw_set_saturate(p, 0);
685 }
686
687
688 void emit_dph(struct brw_compile *p,
689 const struct brw_reg *dst,
690 GLuint mask,
691 const struct brw_reg *arg0,
692 const struct brw_reg *arg1)
693 {
694 const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
695
696 if (!(mask & WRITEMASK_XYZW))
697 return; /* Do not emit dead code */
698
699 assert(is_power_of_two(mask & WRITEMASK_XYZW));
700
701 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
702 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
703 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
704
705 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
706 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
707 brw_set_saturate(p, 0);
708 }
709
710
711 void emit_xpd(struct brw_compile *p,
712 const struct brw_reg *dst,
713 GLuint mask,
714 const struct brw_reg *arg0,
715 const struct brw_reg *arg1)
716 {
717 GLuint i;
718
719 assert((mask & WRITEMASK_W) != WRITEMASK_W);
720
721 for (i = 0 ; i < 3; i++) {
722 if (mask & (1<<i)) {
723 GLuint i2 = (i+2)%3;
724 GLuint i1 = (i+1)%3;
725
726 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
727
728 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
729 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
730 brw_set_saturate(p, 0);
731 }
732 }
733 }
734
735
736 void emit_math1(struct brw_wm_compile *c,
737 GLuint function,
738 const struct brw_reg *dst,
739 GLuint mask,
740 const struct brw_reg *arg0)
741 {
742 struct brw_compile *p = &c->func;
743 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
744 GLuint saturate = ((mask & SATURATE) ?
745 BRW_MATH_SATURATE_SATURATE :
746 BRW_MATH_SATURATE_NONE);
747
748 if (!(mask & WRITEMASK_XYZW))
749 return; /* Do not emit dead code */
750
751 assert(is_power_of_two(mask & WRITEMASK_XYZW));
752
753 /* If compressed, this will write message reg 2,3 from arg0.x's 16
754 * channels.
755 */
756 brw_MOV(p, brw_message_reg(2), arg0[0]);
757
758 /* Send two messages to perform all 16 operations:
759 */
760 brw_push_insn_state(p);
761 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
762 brw_math(p,
763 dst[dst_chan],
764 function,
765 saturate,
766 2,
767 brw_null_reg(),
768 BRW_MATH_DATA_VECTOR,
769 BRW_MATH_PRECISION_FULL);
770
771 if (c->dispatch_width == 16) {
772 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
773 brw_math(p,
774 offset(dst[dst_chan],1),
775 function,
776 saturate,
777 3,
778 brw_null_reg(),
779 BRW_MATH_DATA_VECTOR,
780 BRW_MATH_PRECISION_FULL);
781 }
782 brw_pop_insn_state(p);
783 }
784
785
786 void emit_math2(struct brw_wm_compile *c,
787 GLuint function,
788 const struct brw_reg *dst,
789 GLuint mask,
790 const struct brw_reg *arg0,
791 const struct brw_reg *arg1)
792 {
793 struct brw_compile *p = &c->func;
794 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
795 GLuint saturate = ((mask & SATURATE) ?
796 BRW_MATH_SATURATE_SATURATE :
797 BRW_MATH_SATURATE_NONE);
798
799 if (!(mask & WRITEMASK_XYZW))
800 return; /* Do not emit dead code */
801
802 assert(is_power_of_two(mask & WRITEMASK_XYZW));
803
804 brw_push_insn_state(p);
805
806 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
807 brw_MOV(p, brw_message_reg(2), arg0[0]);
808 if (c->dispatch_width == 16) {
809 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
810 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
811 }
812
813 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
814 brw_MOV(p, brw_message_reg(3), arg1[0]);
815 if (c->dispatch_width == 16) {
816 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
817 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
818 }
819
820 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
821 brw_math(p,
822 dst[dst_chan],
823 function,
824 saturate,
825 2,
826 brw_null_reg(),
827 BRW_MATH_DATA_VECTOR,
828 BRW_MATH_PRECISION_FULL);
829
830 /* Send two messages to perform all 16 operations:
831 */
832 if (c->dispatch_width == 16) {
833 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
834 brw_math(p,
835 offset(dst[dst_chan],1),
836 function,
837 saturate,
838 4,
839 brw_null_reg(),
840 BRW_MATH_DATA_VECTOR,
841 BRW_MATH_PRECISION_FULL);
842 }
843 brw_pop_insn_state(p);
844 }
845
846
847 void emit_tex(struct brw_wm_compile *c,
848 struct brw_reg *dst,
849 GLuint dst_flags,
850 struct brw_reg *arg,
851 struct brw_reg depth_payload,
852 GLuint tex_idx,
853 GLuint sampler,
854 GLboolean shadow)
855 {
856 struct brw_compile *p = &c->func;
857 struct intel_context *intel = &p->brw->intel;
858 struct brw_reg dst_retyped;
859 GLuint cur_mrf = 2, response_length;
860 GLuint i, nr_texcoords;
861 GLuint emit;
862 GLuint msg_type;
863 GLuint mrf_per_channel;
864 GLuint simd_mode;
865
866 if (c->dispatch_width == 16) {
867 mrf_per_channel = 2;
868 response_length = 8;
869 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
870 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
871 } else {
872 mrf_per_channel = 1;
873 response_length = 4;
874 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
875 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
876 }
877
878 /* How many input regs are there?
879 */
880 switch (tex_idx) {
881 case TEXTURE_1D_INDEX:
882 emit = WRITEMASK_X;
883 nr_texcoords = 1;
884 break;
885 case TEXTURE_2D_INDEX:
886 case TEXTURE_RECT_INDEX:
887 emit = WRITEMASK_XY;
888 nr_texcoords = 2;
889 break;
890 case TEXTURE_3D_INDEX:
891 case TEXTURE_CUBE_INDEX:
892 emit = WRITEMASK_XYZ;
893 nr_texcoords = 3;
894 break;
895 default:
896 /* unexpected target */
897 abort();
898 }
899
900 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
901 if (!intel->is_ironlake && c->dispatch_width == 8)
902 nr_texcoords = 3;
903
904 /* For shadow comparisons, we have to supply u,v,r. */
905 if (shadow)
906 nr_texcoords = 3;
907
908 /* Emit the texcoords. */
909 for (i = 0; i < nr_texcoords; i++) {
910 if (emit & (1<<i))
911 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
912 else
913 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
914 cur_mrf += mrf_per_channel;
915 }
916
917 /* Fill in the shadow comparison reference value. */
918 if (shadow) {
919 if (intel->is_ironlake) {
920 /* Fill in the cube map array index value. */
921 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
922 cur_mrf += mrf_per_channel;
923 } else if (c->dispatch_width == 8) {
924 /* Fill in the LOD bias value. */
925 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
926 cur_mrf += mrf_per_channel;
927 }
928 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
929 cur_mrf += mrf_per_channel;
930 }
931
932 if (intel->is_ironlake) {
933 if (shadow)
934 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
935 else
936 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
937 } else {
938 /* Note that G45 and older determines shadow compare and dispatch width
939 * from message length for most messages.
940 */
941 if (c->dispatch_width == 16 && shadow)
942 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
943 else
944 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
945 }
946
947 brw_SAMPLE(p,
948 dst_retyped,
949 1,
950 retype(depth_payload, BRW_REGISTER_TYPE_UW),
951 SURF_INDEX_TEXTURE(sampler),
952 sampler,
953 dst_flags & WRITEMASK_XYZW,
954 msg_type,
955 response_length,
956 cur_mrf - 1,
957 0,
958 1,
959 simd_mode);
960 }
961
962
963 void emit_txb(struct brw_wm_compile *c,
964 struct brw_reg *dst,
965 GLuint dst_flags,
966 struct brw_reg *arg,
967 struct brw_reg depth_payload,
968 GLuint tex_idx,
969 GLuint sampler)
970 {
971 struct brw_compile *p = &c->func;
972 struct intel_context *intel = &p->brw->intel;
973 GLuint msgLength;
974 GLuint msg_type;
975 GLuint mrf_per_channel;
976 GLuint response_length;
977 struct brw_reg dst_retyped;
978
979 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
980 * samples, so we'll use the 16-wide instruction, leave the second halves
981 * undefined, and trust the execution mask to keep the undefined pixels
982 * from mattering.
983 */
984 if (c->dispatch_width == 16 || !intel->is_ironlake) {
985 if (intel->is_ironlake)
986 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
987 else
988 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
989 mrf_per_channel = 2;
990 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
991 response_length = 8;
992 } else {
993 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
994 mrf_per_channel = 1;
995 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
996 response_length = 4;
997 }
998
999 /* Shadow ignored for txb. */
1000 switch (tex_idx) {
1001 case TEXTURE_1D_INDEX:
1002 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1003 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
1004 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
1005 break;
1006 case TEXTURE_2D_INDEX:
1007 case TEXTURE_RECT_INDEX:
1008 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1009 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
1010 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
1011 break;
1012 case TEXTURE_3D_INDEX:
1013 case TEXTURE_CUBE_INDEX:
1014 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1015 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
1016 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
1017 break;
1018 default:
1019 /* unexpected target */
1020 abort();
1021 }
1022
1023 brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
1024 msgLength = 2 + 4 * mrf_per_channel - 1;
1025
1026 brw_SAMPLE(p,
1027 dst_retyped,
1028 1,
1029 retype(depth_payload, BRW_REGISTER_TYPE_UW),
1030 SURF_INDEX_TEXTURE(sampler),
1031 sampler,
1032 dst_flags & WRITEMASK_XYZW,
1033 msg_type,
1034 response_length,
1035 msgLength,
1036 0,
1037 1,
1038 BRW_SAMPLER_SIMD_MODE_SIMD16);
1039 }
1040
1041
1042 static void emit_lit(struct brw_wm_compile *c,
1043 const struct brw_reg *dst,
1044 GLuint mask,
1045 const struct brw_reg *arg0)
1046 {
1047 struct brw_compile *p = &c->func;
1048
1049 assert((mask & WRITEMASK_XW) == 0);
1050
1051 if (mask & WRITEMASK_Y) {
1052 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
1053 brw_MOV(p, dst[1], arg0[0]);
1054 brw_set_saturate(p, 0);
1055 }
1056
1057 if (mask & WRITEMASK_Z) {
1058 emit_math2(c, BRW_MATH_FUNCTION_POW,
1059 &dst[2],
1060 WRITEMASK_X | (mask & SATURATE),
1061 &arg0[1],
1062 &arg0[3]);
1063 }
1064
1065 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1066 * some of the POW calculations above, but 16-wide iff statements
1067 * seem to lock c1 hardware, so this is a nasty workaround:
1068 */
1069 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
1070 {
1071 if (mask & WRITEMASK_Y)
1072 brw_MOV(p, dst[1], brw_imm_f(0));
1073
1074 if (mask & WRITEMASK_Z)
1075 brw_MOV(p, dst[2], brw_imm_f(0));
1076 }
1077 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1078 }
1079
1080
1081 /* Kill pixel - set execution mask to zero for those pixels which
1082 * fail.
1083 */
1084 static void emit_kil( struct brw_wm_compile *c,
1085 struct brw_reg *arg0)
1086 {
1087 struct brw_compile *p = &c->func;
1088 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1089 GLuint i;
1090
1091 /* XXX - usually won't need 4 compares!
1092 */
1093 for (i = 0; i < 4; i++) {
1094 brw_push_insn_state(p);
1095 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
1096 brw_set_predicate_control_flag_value(p, 0xff);
1097 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1098 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
1099 brw_pop_insn_state(p);
1100 }
1101 }
1102
1103 /* KIL_NV kills the pixels that are currently executing, not based on a test
1104 * of the arguments.
1105 */
1106 static void emit_kil_nv( struct brw_wm_compile *c )
1107 {
1108 struct brw_compile *p = &c->func;
1109 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1110
1111 brw_push_insn_state(p);
1112 brw_set_mask_control(p, BRW_MASK_DISABLE);
1113 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1114 brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1115 brw_pop_insn_state(p);
1116 }
1117
1118 static void fire_fb_write( struct brw_wm_compile *c,
1119 GLuint base_reg,
1120 GLuint nr,
1121 GLuint target,
1122 GLuint eot )
1123 {
1124 struct brw_compile *p = &c->func;
1125 struct brw_reg dst;
1126
1127 if (c->dispatch_width == 16)
1128 dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1129 else
1130 dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1131
1132 /* Pass through control information:
1133 */
1134 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1135 {
1136 brw_push_insn_state(p);
1137 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1138 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1139 brw_MOV(p,
1140 brw_message_reg(base_reg + 1),
1141 brw_vec8_grf(1, 0));
1142 brw_pop_insn_state(p);
1143 }
1144
1145 /* Send framebuffer write message: */
1146 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1147 brw_fb_WRITE(p,
1148 dst,
1149 base_reg,
1150 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1151 target,
1152 nr,
1153 0,
1154 eot);
1155 }
1156
1157
1158 static void emit_aa( struct brw_wm_compile *c,
1159 struct brw_reg *arg1,
1160 GLuint reg )
1161 {
1162 struct brw_compile *p = &c->func;
1163 GLuint comp = c->key.aa_dest_stencil_reg / 2;
1164 GLuint off = c->key.aa_dest_stencil_reg % 2;
1165 struct brw_reg aa = offset(arg1[comp], off);
1166
1167 brw_push_insn_state(p);
1168 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1169 brw_MOV(p, brw_message_reg(reg), aa);
1170 brw_pop_insn_state(p);
1171 }
1172
1173
1174 /* Post-fragment-program processing. Send the results to the
1175 * framebuffer.
1176 * \param arg0 the fragment color
1177 * \param arg1 the pass-through depth value
1178 * \param arg2 the shader-computed depth value
1179 */
1180 void emit_fb_write(struct brw_wm_compile *c,
1181 struct brw_reg *arg0,
1182 struct brw_reg *arg1,
1183 struct brw_reg *arg2,
1184 GLuint target,
1185 GLuint eot)
1186 {
1187 struct brw_compile *p = &c->func;
1188 struct brw_context *brw = p->brw;
1189 GLuint nr = 2;
1190 GLuint channel;
1191
1192 /* Reserve a space for AA - may not be needed:
1193 */
1194 if (c->key.aa_dest_stencil_reg)
1195 nr += 1;
1196
1197 /* I don't really understand how this achieves the color interleave
1198 * (ie RGBARGBA) in the result: [Do the saturation here]
1199 */
1200 brw_push_insn_state(p);
1201
1202 for (channel = 0; channel < 4; channel++) {
1203 if (c->dispatch_width == 16 && brw->has_compr4) {
1204 /* By setting the high bit of the MRF register number, we indicate
1205 * that we want COMPR4 mode - instead of doing the usual destination
1206 * + 1 for the second half we get destination + 4.
1207 */
1208 brw_MOV(p,
1209 brw_message_reg(nr + channel + (1 << 7)),
1210 arg0[channel]);
1211 } else {
1212 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1213 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1214 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1215 brw_MOV(p,
1216 brw_message_reg(nr + channel),
1217 arg0[channel]);
1218
1219 if (c->dispatch_width == 16) {
1220 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1221 brw_MOV(p,
1222 brw_message_reg(nr + channel + 4),
1223 sechalf(arg0[channel]));
1224 }
1225 }
1226 }
1227 /* skip over the regs populated above:
1228 */
1229 nr += 8;
1230 brw_pop_insn_state(p);
1231
1232 if (c->key.source_depth_to_render_target)
1233 {
1234 if (c->key.computes_depth)
1235 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1236 else
1237 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1238
1239 nr += 2;
1240 }
1241
1242 if (c->key.dest_depth_reg)
1243 {
1244 GLuint comp = c->key.dest_depth_reg / 2;
1245 GLuint off = c->key.dest_depth_reg % 2;
1246
1247 if (off != 0) {
1248 brw_push_insn_state(p);
1249 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1250
1251 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1252 /* 2nd half? */
1253 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1254 brw_pop_insn_state(p);
1255 }
1256 else {
1257 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1258 }
1259 nr += 2;
1260 }
1261
1262 if (!c->key.runtime_check_aads_emit) {
1263 if (c->key.aa_dest_stencil_reg)
1264 emit_aa(c, arg1, 2);
1265
1266 fire_fb_write(c, 0, nr, target, eot);
1267 }
1268 else {
1269 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1270 struct brw_reg ip = brw_ip_reg();
1271 struct brw_instruction *jmp;
1272
1273 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1274 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1275 brw_AND(p,
1276 v1_null_ud,
1277 get_element_ud(brw_vec8_grf(1,0), 6),
1278 brw_imm_ud(1<<26));
1279
1280 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
1281 {
1282 emit_aa(c, arg1, 2);
1283 fire_fb_write(c, 0, nr, target, eot);
1284 /* note - thread killed in subroutine */
1285 }
1286 brw_land_fwd_jump(p, jmp);
1287
1288 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1289 */
1290 fire_fb_write(c, 1, nr-1, target, eot);
1291 }
1292 }
1293
1294 /**
1295 * Move a GPR to scratch memory.
1296 */
1297 static void emit_spill( struct brw_wm_compile *c,
1298 struct brw_reg reg,
1299 GLuint slot )
1300 {
1301 struct brw_compile *p = &c->func;
1302
1303 /*
1304 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1305 */
1306 brw_MOV(p, brw_message_reg(2), reg);
1307
1308 /*
1309 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1310 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1311 */
1312 brw_dp_WRITE_16(p,
1313 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1314 slot);
1315 }
1316
1317
1318 /**
1319 * Load a GPR from scratch memory.
1320 */
1321 static void emit_unspill( struct brw_wm_compile *c,
1322 struct brw_reg reg,
1323 GLuint slot )
1324 {
1325 struct brw_compile *p = &c->func;
1326
1327 /* Slot 0 is the undef value.
1328 */
1329 if (slot == 0) {
1330 brw_MOV(p, reg, brw_imm_f(0));
1331 return;
1332 }
1333
1334 /*
1335 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1336 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1337 */
1338
1339 brw_dp_READ_16(p,
1340 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1341 slot);
1342 }
1343
1344
1345 /**
1346 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1347 * Args with unspill_reg != 0 will be loaded from scratch memory.
1348 */
1349 static void get_argument_regs( struct brw_wm_compile *c,
1350 struct brw_wm_ref *arg[],
1351 struct brw_reg *regs )
1352 {
1353 GLuint i;
1354
1355 for (i = 0; i < 4; i++) {
1356 if (arg[i]) {
1357 if (arg[i]->unspill_reg)
1358 emit_unspill(c,
1359 brw_vec8_grf(arg[i]->unspill_reg, 0),
1360 arg[i]->value->spill_slot);
1361
1362 regs[i] = arg[i]->hw_reg;
1363 }
1364 else {
1365 regs[i] = brw_null_reg();
1366 }
1367 }
1368 }
1369
1370
1371 /**
1372 * For values that have a spill_slot!=0, write those regs to scratch memory.
1373 */
1374 static void spill_values( struct brw_wm_compile *c,
1375 struct brw_wm_value *values,
1376 GLuint nr )
1377 {
1378 GLuint i;
1379
1380 for (i = 0; i < nr; i++)
1381 if (values[i].spill_slot)
1382 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1383 }
1384
1385
1386 /* Emit the fragment program instructions here.
1387 */
1388 void brw_wm_emit( struct brw_wm_compile *c )
1389 {
1390 struct brw_compile *p = &c->func;
1391 GLuint insn;
1392
1393 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1394
1395 /* Check if any of the payload regs need to be spilled:
1396 */
1397 spill_values(c, c->payload.depth, 4);
1398 spill_values(c, c->creg, c->nr_creg);
1399 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1400
1401
1402 for (insn = 0; insn < c->nr_insns; insn++) {
1403
1404 struct brw_wm_instruction *inst = &c->instruction[insn];
1405 struct brw_reg args[3][4], dst[4];
1406 GLuint i, dst_flags;
1407
1408 /* Get argument regs:
1409 */
1410 for (i = 0; i < 3; i++)
1411 get_argument_regs(c, inst->src[i], args[i]);
1412
1413 /* Get dest regs:
1414 */
1415 for (i = 0; i < 4; i++)
1416 if (inst->dst[i])
1417 dst[i] = inst->dst[i]->hw_reg;
1418 else
1419 dst[i] = brw_null_reg();
1420
1421 /* Flags
1422 */
1423 dst_flags = inst->writemask;
1424 if (inst->saturate)
1425 dst_flags |= SATURATE;
1426
1427 switch (inst->opcode) {
1428 /* Generated instructions for calculating triangle interpolants:
1429 */
1430 case WM_PIXELXY:
1431 emit_pixel_xy(c, dst, dst_flags);
1432 break;
1433
1434 case WM_DELTAXY:
1435 emit_delta_xy(p, dst, dst_flags, args[0]);
1436 break;
1437
1438 case WM_WPOSXY:
1439 emit_wpos_xy(c, dst, dst_flags, args[0]);
1440 break;
1441
1442 case WM_PIXELW:
1443 emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
1444 break;
1445
1446 case WM_LINTERP:
1447 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1448 break;
1449
1450 case WM_PINTERP:
1451 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1452 break;
1453
1454 case WM_CINTERP:
1455 emit_cinterp(p, dst, dst_flags, args[0]);
1456 break;
1457
1458 case WM_FB_WRITE:
1459 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1460 break;
1461
1462 case WM_FRONTFACING:
1463 emit_frontfacing(p, dst, dst_flags);
1464 break;
1465
1466 /* Straightforward arithmetic:
1467 */
1468 case OPCODE_ADD:
1469 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1470 break;
1471
1472 case OPCODE_FRC:
1473 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1474 break;
1475
1476 case OPCODE_FLR:
1477 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1478 break;
1479
1480 case OPCODE_DDX:
1481 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1482 break;
1483
1484 case OPCODE_DDY:
1485 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1486 break;
1487
1488 case OPCODE_DP3:
1489 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1490 break;
1491
1492 case OPCODE_DP4:
1493 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1494 break;
1495
1496 case OPCODE_DPH:
1497 emit_dph(p, dst, dst_flags, args[0], args[1]);
1498 break;
1499
1500 case OPCODE_TRUNC:
1501 emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
1502 break;
1503
1504 case OPCODE_LRP:
1505 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1506 break;
1507
1508 case OPCODE_MAD:
1509 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1510 break;
1511
1512 case OPCODE_MOV:
1513 case OPCODE_SWZ:
1514 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1515 break;
1516
1517 case OPCODE_MUL:
1518 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1519 break;
1520
1521 case OPCODE_XPD:
1522 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1523 break;
1524
1525 /* Higher math functions:
1526 */
1527 case OPCODE_RCP:
1528 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1529 break;
1530
1531 case OPCODE_RSQ:
1532 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1533 break;
1534
1535 case OPCODE_SIN:
1536 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1537 break;
1538
1539 case OPCODE_COS:
1540 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1541 break;
1542
1543 case OPCODE_EX2:
1544 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1545 break;
1546
1547 case OPCODE_LG2:
1548 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1549 break;
1550
1551 case OPCODE_SCS:
1552 /* There is an scs math function, but it would need some
1553 * fixup for 16-element execution.
1554 */
1555 if (dst_flags & WRITEMASK_X)
1556 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1557 if (dst_flags & WRITEMASK_Y)
1558 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1559 break;
1560
1561 case OPCODE_POW:
1562 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1563 break;
1564
1565 /* Comparisons:
1566 */
1567 case OPCODE_CMP:
1568 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1569 break;
1570
1571 case OPCODE_MAX:
1572 emit_max(p, dst, dst_flags, args[0], args[1]);
1573 break;
1574
1575 case OPCODE_MIN:
1576 emit_min(p, dst, dst_flags, args[0], args[1]);
1577 break;
1578
1579 case OPCODE_SLT:
1580 emit_slt(p, dst, dst_flags, args[0], args[1]);
1581 break;
1582
1583 case OPCODE_SLE:
1584 emit_sle(p, dst, dst_flags, args[0], args[1]);
1585 break;
1586 case OPCODE_SGT:
1587 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1588 break;
1589 case OPCODE_SGE:
1590 emit_sge(p, dst, dst_flags, args[0], args[1]);
1591 break;
1592 case OPCODE_SEQ:
1593 emit_seq(p, dst, dst_flags, args[0], args[1]);
1594 break;
1595 case OPCODE_SNE:
1596 emit_sne(p, dst, dst_flags, args[0], args[1]);
1597 break;
1598
1599 case OPCODE_LIT:
1600 emit_lit(c, dst, dst_flags, args[0]);
1601 break;
1602
1603 /* Texturing operations:
1604 */
1605 case OPCODE_TEX:
1606 emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1607 inst->tex_idx, inst->tex_unit,
1608 inst->tex_shadow);
1609 break;
1610
1611 case OPCODE_TXB:
1612 emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1613 inst->tex_idx, inst->tex_unit);
1614 break;
1615
1616 case OPCODE_KIL:
1617 emit_kil(c, args[0]);
1618 break;
1619
1620 case OPCODE_KIL_NV:
1621 emit_kil_nv(c);
1622 break;
1623
1624 default:
1625 printf("Unsupported opcode %i (%s) in fragment shader\n",
1626 inst->opcode, inst->opcode < MAX_OPCODE ?
1627 _mesa_opcode_string(inst->opcode) :
1628 "unknown");
1629 }
1630
1631 for (i = 0; i < 4; i++)
1632 if (inst->dst[i] && inst->dst[i]->spill_slot)
1633 emit_spill(c,
1634 inst->dst[i]->hw_reg,
1635 inst->dst[i]->spill_slot);
1636 }
1637
1638 if (INTEL_DEBUG & DEBUG_WM) {
1639 int i;
1640
1641 printf("wm-native:\n");
1642 for (i = 0; i < p->nr_insn; i++)
1643 brw_disasm(stderr, &p->store[i]);
1644 printf("\n");
1645 }
1646 }