i965g: disassemble more than one instruction at a time
[mesa.git] / src / gallium / drivers / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32 #include "util/u_math.h"
33 #include "tgsi/tgsi_info.h"
34
35 #include "brw_context.h"
36 #include "brw_wm.h"
37 #include "brw_debug.h"
38
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
41 */
42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
43 {
44 if (reg.vstride)
45 reg.nr++;
46 return reg;
47 }
48
49 /* Payload R0:
50 *
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads,
52 * corresponding to each of the 16 execution channels.
53 * R0.1..8 -- ?
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- quad 0 x,y coords (2 packed uwords)
57 * R1.3 -- quad 1 x,y coords (2 packed uwords)
58 * R1.4 -- quad 2 x,y coords (2 packed uwords)
59 * R1.5 -- quad 3 x,y coords (2 packed uwords)
60 * R1.6 -- ?
61 * R1.7 -- ?
62 * R1.8 -- ?
63 */
64
65
66 static void emit_pixel_xy(struct brw_compile *p,
67 const struct brw_reg *dst,
68 GLuint mask)
69 {
70 struct brw_reg r1 = brw_vec1_grf(1, 0);
71 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
72
73 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74
75 /* Calculate pixel centers by adding 1 or 0 to each of the
76 * micro-tile coordinates passed in r1.
77 */
78 if (mask & BRW_WRITEMASK_X) {
79 brw_ADD(p,
80 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
81 stride(suboffset(r1_uw, 4), 2, 4, 0),
82 brw_imm_v(0x10101010));
83 }
84
85 if (mask & BRW_WRITEMASK_Y) {
86 brw_ADD(p,
87 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
88 stride(suboffset(r1_uw,5), 2, 4, 0),
89 brw_imm_v(0x11001100));
90 }
91
92 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
93 }
94
95
96
97 static void emit_delta_xy(struct brw_compile *p,
98 const struct brw_reg *dst,
99 GLuint mask,
100 const struct brw_reg *arg0)
101 {
102 struct brw_reg r1 = brw_vec1_grf(1, 0);
103
104 /* Calc delta X,Y by subtracting origin in r1 from the pixel
105 * centers.
106 */
107 if (mask & BRW_WRITEMASK_X) {
108 brw_ADD(p,
109 dst[0],
110 retype(arg0[0], BRW_REGISTER_TYPE_UW),
111 negate(r1));
112 }
113
114 if (mask & BRW_WRITEMASK_Y) {
115 brw_ADD(p,
116 dst[1],
117 retype(arg0[1], BRW_REGISTER_TYPE_UW),
118 negate(suboffset(r1,1)));
119
120 }
121 }
122
123 static void emit_wpos_xy(struct brw_wm_compile *c,
124 const struct brw_reg *dst,
125 GLuint mask,
126 const struct brw_reg *arg0)
127 {
128 struct brw_compile *p = &c->func;
129
130 if (mask & BRW_WRITEMASK_X) {
131 /* X' = X */
132 brw_MOV(p,
133 dst[0],
134 retype(arg0[0], BRW_REGISTER_TYPE_W));
135 }
136
137 /* XXX: is this needed any more, or is this a NOOP?
138 */
139 if (mask & BRW_WRITEMASK_Y) {
140 #if 0
141 /* Y' = height - 1 - Y */
142 brw_ADD(p,
143 dst[1],
144 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
145 brw_imm_d(c->key.drawable_height - 1));
146 #else
147 brw_MOV(p,
148 dst[0],
149 retype(arg0[0], BRW_REGISTER_TYPE_W));
150 #endif
151 }
152 }
153
154
155 static void emit_pixel_w( struct brw_compile *p,
156 const struct brw_reg *dst,
157 GLuint mask,
158 const struct brw_reg *arg0,
159 const struct brw_reg *deltas)
160 {
161 /* Don't need this if all you are doing is interpolating color, for
162 * instance.
163 */
164 if (mask & BRW_WRITEMASK_W) {
165 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
166
167 /* Calc 1/w - just linterp wpos[3] optimized by putting the
168 * result straight into a message reg.
169 */
170 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
171 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
172
173 /* Calc w */
174 brw_math_16( p, dst[3],
175 BRW_MATH_FUNCTION_INV,
176 BRW_MATH_SATURATE_NONE,
177 2, brw_null_reg(),
178 BRW_MATH_PRECISION_FULL);
179 }
180 }
181
182
183
184 static void emit_linterp( struct brw_compile *p,
185 const struct brw_reg *dst,
186 GLuint mask,
187 const struct brw_reg *arg0,
188 const struct brw_reg *deltas )
189 {
190 struct brw_reg interp[4];
191 GLuint nr = arg0[0].nr;
192 GLuint i;
193
194 interp[0] = brw_vec1_grf(nr, 0);
195 interp[1] = brw_vec1_grf(nr, 4);
196 interp[2] = brw_vec1_grf(nr+1, 0);
197 interp[3] = brw_vec1_grf(nr+1, 4);
198
199 for (i = 0; i < 4; i++) {
200 if (mask & (1<<i)) {
201 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
202 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
203 }
204 }
205 }
206
207
208 static void emit_pinterp( struct brw_compile *p,
209 const struct brw_reg *dst,
210 GLuint mask,
211 const struct brw_reg *arg0,
212 const struct brw_reg *deltas,
213 const struct brw_reg *w)
214 {
215 struct brw_reg interp[4];
216 GLuint nr = arg0[0].nr;
217 GLuint i;
218
219 interp[0] = brw_vec1_grf(nr, 0);
220 interp[1] = brw_vec1_grf(nr, 4);
221 interp[2] = brw_vec1_grf(nr+1, 0);
222 interp[3] = brw_vec1_grf(nr+1, 4);
223
224 for (i = 0; i < 4; i++) {
225 if (mask & (1<<i)) {
226 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
227 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
228 }
229 }
230 for (i = 0; i < 4; i++) {
231 if (mask & (1<<i)) {
232 brw_MUL(p, dst[i], dst[i], w[3]);
233 }
234 }
235 }
236
237
238 static void emit_cinterp( struct brw_compile *p,
239 const struct brw_reg *dst,
240 GLuint mask,
241 const struct brw_reg *arg0 )
242 {
243 struct brw_reg interp[4];
244 GLuint nr = arg0[0].nr;
245 GLuint i;
246
247 interp[0] = brw_vec1_grf(nr, 0);
248 interp[1] = brw_vec1_grf(nr, 4);
249 interp[2] = brw_vec1_grf(nr+1, 0);
250 interp[3] = brw_vec1_grf(nr+1, 4);
251
252 for (i = 0; i < 4; i++) {
253 if (mask & (1<<i)) {
254 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
255 }
256 }
257 }
258
259 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
260 static void emit_frontfacing( struct brw_compile *p,
261 const struct brw_reg *dst,
262 GLuint mask )
263 {
264 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
265 GLuint i;
266
267 if (!(mask & BRW_WRITEMASK_XYZW))
268 return;
269
270 for (i = 0; i < 4; i++) {
271 if (mask & (1<<i)) {
272 brw_MOV(p, dst[i], brw_imm_f(0.0));
273 }
274 }
275
276 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
277 * us front face
278 */
279 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
280 for (i = 0; i < 4; i++) {
281 if (mask & (1<<i)) {
282 brw_MOV(p, dst[i], brw_imm_f(1.0));
283 }
284 }
285 brw_set_predicate_control_flag_value(p, 0xff);
286 }
287
288 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
289 * looking like:
290 *
291 * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br
292 *
293 * and we're trying to produce:
294 *
295 * DDX DDY
296 * dst: (q0.tr - q0.tl) (q0.tl - q0.bl)
297 * (q0.tr - q0.tl) (q0.tr - q0.br)
298 * (q0.br - q0.bl) (q0.tl - q0.bl)
299 * (q0.br - q0.bl) (q0.tr - q0.br)
300 * (q1.tr - q1.tl) (q1.tl - q1.bl)
301 * (q1.tr - q1.tl) (q1.tr - q1.br)
302 * (q1.br - q1.bl) (q1.tl - q1.bl)
303 * (q1.br - q1.bl) (q1.tr - q1.br)
304 *
305 * and add two more quads if in 16-pixel dispatch mode.
306 *
307 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
308 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
309 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
310 * between each other. We could probably do it like ddx and swizzle the right
311 * order later, but bail for now and just produce
312 * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4)
313 */
314 void emit_ddxy(struct brw_compile *p,
315 const struct brw_reg *dst,
316 GLuint mask,
317 GLboolean is_ddx,
318 const struct brw_reg *arg0)
319 {
320 int i;
321 struct brw_reg src0, src1;
322
323 if (mask & SATURATE)
324 brw_set_saturate(p, 1);
325 for (i = 0; i < 4; i++ ) {
326 if (mask & (1<<i)) {
327 if (is_ddx) {
328 src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
329 BRW_REGISTER_TYPE_F,
330 BRW_VERTICAL_STRIDE_2,
331 BRW_WIDTH_2,
332 BRW_HORIZONTAL_STRIDE_0,
333 BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
334 src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
335 BRW_REGISTER_TYPE_F,
336 BRW_VERTICAL_STRIDE_2,
337 BRW_WIDTH_2,
338 BRW_HORIZONTAL_STRIDE_0,
339 BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
340 } else {
341 src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
342 BRW_REGISTER_TYPE_F,
343 BRW_VERTICAL_STRIDE_4,
344 BRW_WIDTH_4,
345 BRW_HORIZONTAL_STRIDE_0,
346 BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
347 src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
348 BRW_REGISTER_TYPE_F,
349 BRW_VERTICAL_STRIDE_4,
350 BRW_WIDTH_4,
351 BRW_HORIZONTAL_STRIDE_0,
352 BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
353 }
354 brw_ADD(p, dst[i], src0, negate(src1));
355 }
356 }
357 if (mask & SATURATE)
358 brw_set_saturate(p, 0);
359 }
360
361 static void emit_alu1( struct brw_compile *p,
362 struct brw_instruction *(*func)(struct brw_compile *,
363 struct brw_reg,
364 struct brw_reg),
365 const struct brw_reg *dst,
366 GLuint mask,
367 const struct brw_reg *arg0 )
368 {
369 GLuint i;
370
371 if (mask & SATURATE)
372 brw_set_saturate(p, 1);
373
374 for (i = 0; i < 4; i++) {
375 if (mask & (1<<i)) {
376 func(p, dst[i], arg0[i]);
377 }
378 }
379
380 if (mask & SATURATE)
381 brw_set_saturate(p, 0);
382 }
383
384
385 static void emit_alu2( struct brw_compile *p,
386 struct brw_instruction *(*func)(struct brw_compile *,
387 struct brw_reg,
388 struct brw_reg,
389 struct brw_reg),
390 const struct brw_reg *dst,
391 GLuint mask,
392 const struct brw_reg *arg0,
393 const struct brw_reg *arg1 )
394 {
395 GLuint i;
396
397 if (mask & SATURATE)
398 brw_set_saturate(p, 1);
399
400 for (i = 0; i < 4; i++) {
401 if (mask & (1<<i)) {
402 func(p, dst[i], arg0[i], arg1[i]);
403 }
404 }
405
406 if (mask & SATURATE)
407 brw_set_saturate(p, 0);
408 }
409
410
411 static void emit_mad( struct brw_compile *p,
412 const struct brw_reg *dst,
413 GLuint mask,
414 const struct brw_reg *arg0,
415 const struct brw_reg *arg1,
416 const struct brw_reg *arg2 )
417 {
418 GLuint i;
419
420 for (i = 0; i < 4; i++) {
421 if (mask & (1<<i)) {
422 brw_MUL(p, dst[i], arg0[i], arg1[i]);
423
424 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
425 brw_ADD(p, dst[i], dst[i], arg2[i]);
426 brw_set_saturate(p, 0);
427 }
428 }
429 }
430
431 static void emit_trunc( struct brw_compile *p,
432 const struct brw_reg *dst,
433 GLuint mask,
434 const struct brw_reg *arg0)
435 {
436 GLuint i;
437
438 for (i = 0; i < 4; i++) {
439 if (mask & (1<<i)) {
440 brw_RNDZ(p, dst[i], arg0[i]);
441 }
442 }
443 }
444
445 static void emit_lrp( struct brw_compile *p,
446 const struct brw_reg *dst,
447 GLuint mask,
448 const struct brw_reg *arg0,
449 const struct brw_reg *arg1,
450 const struct brw_reg *arg2 )
451 {
452 GLuint i;
453
454 /* Uses dst as a temporary:
455 */
456 for (i = 0; i < 4; i++) {
457 if (mask & (1<<i)) {
458 /* Can I use the LINE instruction for this?
459 */
460 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
461 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
462
463 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
464 brw_MAC(p, dst[i], arg0[i], arg1[i]);
465 brw_set_saturate(p, 0);
466 }
467 }
468 }
469
470 static void emit_sop( struct brw_compile *p,
471 const struct brw_reg *dst,
472 GLuint mask,
473 GLuint cond,
474 const struct brw_reg *arg0,
475 const struct brw_reg *arg1 )
476 {
477 GLuint i;
478
479 for (i = 0; i < 4; i++) {
480 if (mask & (1<<i)) {
481 brw_MOV(p, dst[i], brw_imm_f(0));
482 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
483 brw_MOV(p, dst[i], brw_imm_f(1.0));
484 brw_set_predicate_control_flag_value(p, 0xff);
485 }
486 }
487 }
488
489 static void emit_slt( struct brw_compile *p,
490 const struct brw_reg *dst,
491 GLuint mask,
492 const struct brw_reg *arg0,
493 const struct brw_reg *arg1 )
494 {
495 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
496 }
497
498 static void emit_sle( struct brw_compile *p,
499 const struct brw_reg *dst,
500 GLuint mask,
501 const struct brw_reg *arg0,
502 const struct brw_reg *arg1 )
503 {
504 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
505 }
506
507 static void emit_sgt( struct brw_compile *p,
508 const struct brw_reg *dst,
509 GLuint mask,
510 const struct brw_reg *arg0,
511 const struct brw_reg *arg1 )
512 {
513 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
514 }
515
516 static void emit_sge( struct brw_compile *p,
517 const struct brw_reg *dst,
518 GLuint mask,
519 const struct brw_reg *arg0,
520 const struct brw_reg *arg1 )
521 {
522 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
523 }
524
525 static void emit_seq( struct brw_compile *p,
526 const struct brw_reg *dst,
527 GLuint mask,
528 const struct brw_reg *arg0,
529 const struct brw_reg *arg1 )
530 {
531 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
532 }
533
534 static void emit_sne( struct brw_compile *p,
535 const struct brw_reg *dst,
536 GLuint mask,
537 const struct brw_reg *arg0,
538 const struct brw_reg *arg1 )
539 {
540 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
541 }
542
543 static void emit_cmp( struct brw_compile *p,
544 const struct brw_reg *dst,
545 GLuint mask,
546 const struct brw_reg *arg0,
547 const struct brw_reg *arg1,
548 const struct brw_reg *arg2 )
549 {
550 GLuint i;
551
552 for (i = 0; i < 4; i++) {
553 if (mask & (1<<i)) {
554 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
555 brw_MOV(p, dst[i], arg2[i]);
556 brw_set_saturate(p, 0);
557
558 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
559
560 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
561 brw_MOV(p, dst[i], arg1[i]);
562 brw_set_saturate(p, 0);
563 brw_set_predicate_control_flag_value(p, 0xff);
564 }
565 }
566 }
567
568 static void emit_max( struct brw_compile *p,
569 const struct brw_reg *dst,
570 GLuint mask,
571 const struct brw_reg *arg0,
572 const struct brw_reg *arg1 )
573 {
574 GLuint i;
575
576 for (i = 0; i < 4; i++) {
577 if (mask & (1<<i)) {
578 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
579 brw_MOV(p, dst[i], arg0[i]);
580 brw_set_saturate(p, 0);
581
582 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
583
584 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
585 brw_MOV(p, dst[i], arg1[i]);
586 brw_set_saturate(p, 0);
587 brw_set_predicate_control_flag_value(p, 0xff);
588 }
589 }
590 }
591
592 static void emit_min( struct brw_compile *p,
593 const struct brw_reg *dst,
594 GLuint mask,
595 const struct brw_reg *arg0,
596 const struct brw_reg *arg1 )
597 {
598 GLuint i;
599
600 for (i = 0; i < 4; i++) {
601 if (mask & (1<<i)) {
602 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
603 brw_MOV(p, dst[i], arg1[i]);
604 brw_set_saturate(p, 0);
605
606 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
607
608 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
609 brw_MOV(p, dst[i], arg0[i]);
610 brw_set_saturate(p, 0);
611 brw_set_predicate_control_flag_value(p, 0xff);
612 }
613 }
614 }
615
616
617 static void emit_dp3( struct brw_compile *p,
618 const struct brw_reg *dst,
619 GLuint mask,
620 const struct brw_reg *arg0,
621 const struct brw_reg *arg1 )
622 {
623 int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
624
625 if (!(mask & BRW_WRITEMASK_XYZW))
626 return; /* Do not emit dead code */
627
628 assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
629
630 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
631 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
632
633 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
634 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
635 brw_set_saturate(p, 0);
636 }
637
638
639 static void emit_dp4( struct brw_compile *p,
640 const struct brw_reg *dst,
641 GLuint mask,
642 const struct brw_reg *arg0,
643 const struct brw_reg *arg1 )
644 {
645 int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
646
647 if (!(mask & BRW_WRITEMASK_XYZW))
648 return; /* Do not emit dead code */
649
650 assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
651
652 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
653 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
654 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
655
656 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
657 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
658 brw_set_saturate(p, 0);
659 }
660
661
662 static void emit_dph( struct brw_compile *p,
663 const struct brw_reg *dst,
664 GLuint mask,
665 const struct brw_reg *arg0,
666 const struct brw_reg *arg1 )
667 {
668 const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
669
670 if (!(mask & BRW_WRITEMASK_XYZW))
671 return; /* Do not emit dead code */
672
673 assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
674
675 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
676 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
677 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
678
679 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
680 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
681 brw_set_saturate(p, 0);
682 }
683
684
685 static void emit_xpd( struct brw_compile *p,
686 const struct brw_reg *dst,
687 GLuint mask,
688 const struct brw_reg *arg0,
689 const struct brw_reg *arg1 )
690 {
691 GLuint i;
692
693 assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X);
694
695 for (i = 0 ; i < 3; i++) {
696 if (mask & (1<<i)) {
697 GLuint i2 = (i+2)%3;
698 GLuint i1 = (i+1)%3;
699
700 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
701
702 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
703 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
704 brw_set_saturate(p, 0);
705 }
706 }
707 }
708
709
710 static void emit_math1( struct brw_compile *p,
711 GLuint function,
712 const struct brw_reg *dst,
713 GLuint mask,
714 const struct brw_reg *arg0 )
715 {
716 int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
717
718 if (!(mask & BRW_WRITEMASK_XYZW))
719 return; /* Do not emit dead code */
720
721 assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
722
723 brw_MOV(p, brw_message_reg(2), arg0[0]);
724
725 /* Send two messages to perform all 16 operations:
726 */
727 brw_math_16(p,
728 dst[dst_chan],
729 function,
730 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
731 2,
732 brw_null_reg(),
733 BRW_MATH_PRECISION_FULL);
734 }
735
736
737 static void emit_math2( struct brw_compile *p,
738 GLuint function,
739 const struct brw_reg *dst,
740 GLuint mask,
741 const struct brw_reg *arg0,
742 const struct brw_reg *arg1)
743 {
744 int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
745
746 if (!(mask & BRW_WRITEMASK_XYZW))
747 return; /* Do not emit dead code */
748
749 assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
750
751 brw_push_insn_state(p);
752
753 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
754 brw_MOV(p, brw_message_reg(2), arg0[0]);
755 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
756 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
757
758 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
759 brw_MOV(p, brw_message_reg(3), arg1[0]);
760 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
761 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
762
763
764 /* Send two messages to perform all 16 operations:
765 */
766 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
767 brw_math(p,
768 dst[dst_chan],
769 function,
770 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
771 2,
772 brw_null_reg(),
773 BRW_MATH_DATA_VECTOR,
774 BRW_MATH_PRECISION_FULL);
775
776 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
777 brw_math(p,
778 offset(dst[dst_chan],1),
779 function,
780 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
781 4,
782 brw_null_reg(),
783 BRW_MATH_DATA_VECTOR,
784 BRW_MATH_PRECISION_FULL);
785
786 brw_pop_insn_state(p);
787 }
788
789
790
791 static void emit_tex( struct brw_wm_compile *c,
792 const struct brw_wm_instruction *inst,
793 struct brw_reg *dst,
794 GLuint dst_flags,
795 struct brw_reg *arg )
796 {
797 struct brw_compile *p = &c->func;
798 GLuint msgLength, responseLength;
799 GLuint i, nr;
800 GLuint emit;
801 GLuint msg_type;
802 GLboolean shadow = FALSE;
803
804 /* How many input regs are there?
805 */
806 switch (inst->tex_target) {
807 case TGSI_TEXTURE_1D:
808 emit = BRW_WRITEMASK_X;
809 nr = 1;
810 break;
811 case TGSI_TEXTURE_SHADOW1D:
812 emit = BRW_WRITEMASK_XW;
813 nr = 4;
814 shadow = TRUE;
815 break;
816 case TGSI_TEXTURE_2D:
817 emit = BRW_WRITEMASK_XY;
818 nr = 2;
819 break;
820 case TGSI_TEXTURE_SHADOW2D:
821 case TGSI_TEXTURE_SHADOWRECT:
822 emit = BRW_WRITEMASK_XYW;
823 nr = 4;
824 shadow = TRUE;
825 break;
826 case TGSI_TEXTURE_3D:
827 case TGSI_TEXTURE_CUBE:
828 emit = BRW_WRITEMASK_XYZ;
829 nr = 3;
830 break;
831 default:
832 /* unexpected target */
833 abort();
834 }
835
836 msgLength = 1;
837
838 for (i = 0; i < nr; i++) {
839 static const GLuint swz[4] = {0,1,2,2};
840 if (emit & (1<<i))
841 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
842 else
843 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
844 msgLength += 2;
845 }
846
847 responseLength = 8; /* always */
848
849 if (BRW_IS_IGDNG(p->brw)) {
850 if (shadow)
851 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
852 else
853 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
854 } else {
855 if (shadow)
856 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
857 else
858 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
859 }
860
861 brw_SAMPLE(p,
862 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
863 1,
864 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
865 SURF_INDEX_TEXTURE(inst->tex_unit),
866 inst->tex_unit, /* sampler */
867 inst->writemask,
868 msg_type,
869 responseLength,
870 msgLength,
871 0,
872 1,
873 BRW_SAMPLER_SIMD_MODE_SIMD16);
874 }
875
876
877 static void emit_txb( struct brw_wm_compile *c,
878 const struct brw_wm_instruction *inst,
879 struct brw_reg *dst,
880 GLuint dst_flags,
881 struct brw_reg *arg )
882 {
883 struct brw_compile *p = &c->func;
884 GLuint msgLength;
885 GLuint msg_type;
886 /* Shadow ignored for txb.
887 */
888 switch (inst->tex_target) {
889 case TGSI_TEXTURE_1D:
890 case TGSI_TEXTURE_SHADOW1D:
891 brw_MOV(p, brw_message_reg(2), arg[0]);
892 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
893 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
894 break;
895 case TGSI_TEXTURE_2D:
896 case TGSI_TEXTURE_RECT:
897 case TGSI_TEXTURE_SHADOW2D:
898 case TGSI_TEXTURE_SHADOWRECT:
899 brw_MOV(p, brw_message_reg(2), arg[0]);
900 brw_MOV(p, brw_message_reg(4), arg[1]);
901 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
902 break;
903 case TGSI_TEXTURE_3D:
904 case TGSI_TEXTURE_CUBE:
905 brw_MOV(p, brw_message_reg(2), arg[0]);
906 brw_MOV(p, brw_message_reg(4), arg[1]);
907 brw_MOV(p, brw_message_reg(6), arg[2]);
908 break;
909 default:
910 /* unexpected target */
911 abort();
912 }
913
914 brw_MOV(p, brw_message_reg(8), arg[3]);
915 msgLength = 9;
916
917 if (BRW_IS_IGDNG(p->brw))
918 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
919 else
920 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
921
922 brw_SAMPLE(p,
923 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
924 1,
925 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
926 SURF_INDEX_TEXTURE(inst->tex_unit),
927 inst->tex_unit, /* sampler */
928 inst->writemask,
929 msg_type,
930 8, /* responseLength */
931 msgLength,
932 0,
933 1,
934 BRW_SAMPLER_SIMD_MODE_SIMD16);
935 }
936
937
938 static void emit_lit( struct brw_compile *p,
939 const struct brw_reg *dst,
940 GLuint mask,
941 const struct brw_reg *arg0 )
942 {
943 assert((mask & BRW_WRITEMASK_XW) == 0);
944
945 if (mask & BRW_WRITEMASK_Y) {
946 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
947 brw_MOV(p, dst[1], arg0[0]);
948 brw_set_saturate(p, 0);
949 }
950
951 if (mask & BRW_WRITEMASK_Z) {
952 emit_math2(p, BRW_MATH_FUNCTION_POW,
953 &dst[2],
954 BRW_WRITEMASK_X | (mask & SATURATE),
955 &arg0[1],
956 &arg0[3]);
957 }
958
959 /* Ordinarily you'd use an iff statement to skip or shortcircuit
960 * some of the POW calculations above, but 16-wide iff statements
961 * seem to lock c1 hardware, so this is a nasty workaround:
962 */
963 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
964 {
965 if (mask & BRW_WRITEMASK_Y)
966 brw_MOV(p, dst[1], brw_imm_f(0));
967
968 if (mask & BRW_WRITEMASK_Z)
969 brw_MOV(p, dst[2], brw_imm_f(0));
970 }
971 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
972 }
973
974
975 /* Kill pixel - set execution mask to zero for those pixels which
976 * fail.
977 */
978 static void emit_kil( struct brw_wm_compile *c,
979 struct brw_reg *arg0)
980 {
981 struct brw_compile *p = &c->func;
982 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
983 GLuint i;
984
985 /* XXX - usually won't need 4 compares!
986 */
987 for (i = 0; i < 4; i++) {
988 brw_push_insn_state(p);
989 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
990 brw_set_predicate_control_flag_value(p, 0xff);
991 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
992 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
993 brw_pop_insn_state(p);
994 }
995 }
996
997 /* KILLP kills the pixels that are currently executing, not based on a test
998 * of the arguments.
999 */
1000 static void emit_killp( struct brw_wm_compile *c )
1001 {
1002 struct brw_compile *p = &c->func;
1003 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1004
1005 brw_push_insn_state(p);
1006 brw_set_mask_control(p, BRW_MASK_DISABLE);
1007 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
1008 brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1009 brw_pop_insn_state(p);
1010 }
1011
1012 static void fire_fb_write( struct brw_wm_compile *c,
1013 GLuint base_reg,
1014 GLuint nr,
1015 GLuint target,
1016 GLuint eot )
1017 {
1018 struct brw_compile *p = &c->func;
1019
1020 /* Pass through control information:
1021 */
1022 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1023 {
1024 brw_push_insn_state(p);
1025 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1026 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1027 brw_MOV(p,
1028 brw_message_reg(base_reg + 1),
1029 brw_vec8_grf(1, 0));
1030 brw_pop_insn_state(p);
1031 }
1032
1033 /* Send framebuffer write message: */
1034 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1035 brw_fb_WRITE(p,
1036 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1037 base_reg,
1038 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1039 target,
1040 nr,
1041 0,
1042 eot);
1043 }
1044
1045
1046 static void emit_aa( struct brw_wm_compile *c,
1047 struct brw_reg *arg1,
1048 GLuint reg )
1049 {
1050 struct brw_compile *p = &c->func;
1051 GLuint comp = c->key.aa_dest_stencil_reg / 2;
1052 GLuint off = c->key.aa_dest_stencil_reg % 2;
1053 struct brw_reg aa = offset(arg1[comp], off);
1054
1055 brw_push_insn_state(p);
1056 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1057 brw_MOV(p, brw_message_reg(reg), aa);
1058 brw_pop_insn_state(p);
1059 }
1060
1061
1062 /* Post-fragment-program processing. Send the results to the
1063 * framebuffer.
1064 * \param arg0 the fragment color
1065 * \param arg1 the pass-through depth value
1066 * \param arg2 the shader-computed depth value
1067 */
1068 static void emit_fb_write( struct brw_wm_compile *c,
1069 struct brw_reg *arg0,
1070 struct brw_reg *arg1,
1071 struct brw_reg *arg2,
1072 GLuint target,
1073 GLuint eot)
1074 {
1075 struct brw_compile *p = &c->func;
1076 GLuint nr = 2;
1077 GLuint channel;
1078
1079 /* Reserve a space for AA - may not be needed:
1080 */
1081 if (c->key.aa_dest_stencil_reg)
1082 nr += 1;
1083
1084 /* I don't really understand how this achieves the color interleave
1085 * (ie RGBARGBA) in the result: [Do the saturation here]
1086 */
1087 {
1088 brw_push_insn_state(p);
1089
1090 for (channel = 0; channel < 4; channel++) {
1091 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1092 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1093
1094 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1095 brw_MOV(p,
1096 brw_message_reg(nr + channel),
1097 arg0[channel]);
1098
1099 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1100 brw_MOV(p,
1101 brw_message_reg(nr + channel + 4),
1102 sechalf(arg0[channel]));
1103 }
1104
1105 /* skip over the regs populated above:
1106 */
1107 nr += 8;
1108
1109 brw_pop_insn_state(p);
1110 }
1111
1112 if (c->key.source_depth_to_render_target)
1113 {
1114 if (c->key.computes_depth)
1115 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1116 else
1117 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1118
1119 nr += 2;
1120 }
1121
1122 if (c->key.dest_depth_reg)
1123 {
1124 GLuint comp = c->key.dest_depth_reg / 2;
1125 GLuint off = c->key.dest_depth_reg % 2;
1126
1127 if (off != 0) {
1128 brw_push_insn_state(p);
1129 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1130
1131 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1132 /* 2nd half? */
1133 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1134 brw_pop_insn_state(p);
1135 }
1136 else {
1137 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1138 }
1139 nr += 2;
1140 }
1141
1142 if (!c->key.runtime_check_aads_emit) {
1143 if (c->key.aa_dest_stencil_reg)
1144 emit_aa(c, arg1, 2);
1145
1146 fire_fb_write(c, 0, nr, target, eot);
1147 }
1148 else {
1149 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1150 struct brw_reg ip = brw_ip_reg();
1151 struct brw_instruction *jmp;
1152
1153 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1154 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1155 brw_AND(p,
1156 v1_null_ud,
1157 get_element_ud(brw_vec8_grf(1,0), 6),
1158 brw_imm_ud(1<<26));
1159
1160 jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1161 {
1162 emit_aa(c, arg1, 2);
1163 fire_fb_write(c, 0, nr, target, eot);
1164 /* note - thread killed in subroutine */
1165 }
1166 brw_land_fwd_jump(p, jmp);
1167
1168 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1169 */
1170 fire_fb_write(c, 1, nr-1, target, eot);
1171 }
1172 }
1173
1174
1175 /**
1176 * Move a GPR to scratch memory.
1177 */
1178 static void emit_spill( struct brw_wm_compile *c,
1179 struct brw_reg reg,
1180 GLuint slot )
1181 {
1182 struct brw_compile *p = &c->func;
1183
1184 /*
1185 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1186 */
1187 brw_MOV(p, brw_message_reg(2), reg);
1188
1189 /*
1190 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1191 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1192 */
1193 brw_dp_WRITE_16(p,
1194 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1195 slot);
1196 }
1197
1198
1199 /**
1200 * Load a GPR from scratch memory.
1201 */
1202 static void emit_unspill( struct brw_wm_compile *c,
1203 struct brw_reg reg,
1204 GLuint slot )
1205 {
1206 struct brw_compile *p = &c->func;
1207
1208 /* Slot 0 is the undef value.
1209 */
1210 if (slot == 0) {
1211 brw_MOV(p, reg, brw_imm_f(0));
1212 return;
1213 }
1214
1215 /*
1216 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1217 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1218 */
1219
1220 brw_dp_READ_16(p,
1221 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1222 slot);
1223 }
1224
1225
1226 /**
1227 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1228 * Args with unspill_reg != 0 will be loaded from scratch memory.
1229 */
1230 static void get_argument_regs( struct brw_wm_compile *c,
1231 struct brw_wm_ref *arg[],
1232 struct brw_reg *regs )
1233 {
1234 GLuint i;
1235
1236 for (i = 0; i < 4; i++) {
1237 if (arg[i]) {
1238 if (arg[i]->unspill_reg)
1239 emit_unspill(c,
1240 brw_vec8_grf(arg[i]->unspill_reg, 0),
1241 arg[i]->value->spill_slot);
1242
1243 regs[i] = arg[i]->hw_reg;
1244 }
1245 else {
1246 regs[i] = brw_null_reg();
1247 }
1248 }
1249 }
1250
1251
1252 /**
1253 * For values that have a spill_slot!=0, write those regs to scratch memory.
1254 */
1255 static void spill_values( struct brw_wm_compile *c,
1256 struct brw_wm_value *values,
1257 GLuint nr )
1258 {
1259 GLuint i;
1260
1261 for (i = 0; i < nr; i++)
1262 if (values[i].spill_slot)
1263 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1264 }
1265
1266
1267 /* Emit the fragment program instructions here.
1268 */
1269 void brw_wm_emit( struct brw_wm_compile *c )
1270 {
1271 struct brw_compile *p = &c->func;
1272 GLuint insn;
1273
1274 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1275
1276 /* Check if any of the payload regs need to be spilled:
1277 */
1278 spill_values(c, c->payload.depth, 4);
1279 spill_values(c, c->creg, c->nr_creg);
1280 spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS);
1281
1282
1283 for (insn = 0; insn < c->nr_insns; insn++) {
1284
1285 struct brw_wm_instruction *inst = &c->instruction[insn];
1286 struct brw_reg args[3][4], dst[4];
1287 GLuint i, dst_flags;
1288
1289 /* Get argument regs:
1290 */
1291 for (i = 0; i < 3; i++)
1292 get_argument_regs(c, inst->src[i], args[i]);
1293
1294 /* Get dest regs:
1295 */
1296 for (i = 0; i < 4; i++)
1297 if (inst->dst[i])
1298 dst[i] = inst->dst[i]->hw_reg;
1299 else
1300 dst[i] = brw_null_reg();
1301
1302 /* Flags
1303 */
1304 dst_flags = inst->writemask;
1305 if (inst->saturate)
1306 dst_flags |= SATURATE;
1307
1308 switch (inst->opcode) {
1309 /* Generated instructions for calculating triangle interpolants:
1310 */
1311 case WM_PIXELXY:
1312 emit_pixel_xy(p, dst, dst_flags);
1313 break;
1314
1315 case WM_DELTAXY:
1316 emit_delta_xy(p, dst, dst_flags, args[0]);
1317 break;
1318
1319 case WM_WPOSXY:
1320 emit_wpos_xy(c, dst, dst_flags, args[0]);
1321 break;
1322
1323 case WM_PIXELW:
1324 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1325 break;
1326
1327 case WM_LINTERP:
1328 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1329 break;
1330
1331 case WM_PINTERP:
1332 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1333 break;
1334
1335 case WM_CINTERP:
1336 emit_cinterp(p, dst, dst_flags, args[0]);
1337 break;
1338
1339 case WM_FB_WRITE:
1340 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1341 break;
1342
1343 case WM_FRONTFACING:
1344 emit_frontfacing(p, dst, dst_flags);
1345 break;
1346
1347 /* Straightforward arithmetic:
1348 */
1349 case TGSI_OPCODE_ADD:
1350 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1351 break;
1352
1353 case TGSI_OPCODE_FRC:
1354 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1355 break;
1356
1357 case TGSI_OPCODE_FLR:
1358 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1359 break;
1360
1361 case TGSI_OPCODE_DDX:
1362 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1363 break;
1364
1365 case TGSI_OPCODE_DDY:
1366 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1367 break;
1368
1369 case TGSI_OPCODE_DP3:
1370 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1371 break;
1372
1373 case TGSI_OPCODE_DP4:
1374 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1375 break;
1376
1377 case TGSI_OPCODE_DPH:
1378 emit_dph(p, dst, dst_flags, args[0], args[1]);
1379 break;
1380
1381 case TGSI_OPCODE_TRUNC:
1382 emit_trunc(p, dst, dst_flags, args[0]);
1383 break;
1384
1385 case TGSI_OPCODE_LRP:
1386 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1387 break;
1388
1389 case TGSI_OPCODE_MAD:
1390 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1391 break;
1392
1393 case TGSI_OPCODE_MOV:
1394 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1395 break;
1396
1397 case TGSI_OPCODE_MUL:
1398 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1399 break;
1400
1401 case TGSI_OPCODE_XPD:
1402 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1403 break;
1404
1405 /* Higher math functions:
1406 */
1407 case TGSI_OPCODE_RCP:
1408 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1409 break;
1410
1411 case TGSI_OPCODE_RSQ:
1412 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1413 break;
1414
1415 case TGSI_OPCODE_SIN:
1416 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1417 break;
1418
1419 case TGSI_OPCODE_COS:
1420 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1421 break;
1422
1423 case TGSI_OPCODE_EX2:
1424 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1425 break;
1426
1427 case TGSI_OPCODE_LG2:
1428 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1429 break;
1430
1431 case TGSI_OPCODE_SCS:
1432 /* There is an scs math function, but it would need some
1433 * fixup for 16-element execution.
1434 */
1435 if (dst_flags & BRW_WRITEMASK_X)
1436 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
1437 if (dst_flags & BRW_WRITEMASK_Y)
1438 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
1439 break;
1440
1441 case TGSI_OPCODE_POW:
1442 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1443 break;
1444
1445 /* Comparisons:
1446 */
1447 case TGSI_OPCODE_CMP:
1448 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1449 break;
1450
1451 case TGSI_OPCODE_MAX:
1452 emit_max(p, dst, dst_flags, args[0], args[1]);
1453 break;
1454
1455 case TGSI_OPCODE_MIN:
1456 emit_min(p, dst, dst_flags, args[0], args[1]);
1457 break;
1458
1459 case TGSI_OPCODE_SLT:
1460 emit_slt(p, dst, dst_flags, args[0], args[1]);
1461 break;
1462
1463 case TGSI_OPCODE_SLE:
1464 emit_sle(p, dst, dst_flags, args[0], args[1]);
1465 break;
1466 case TGSI_OPCODE_SGT:
1467 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1468 break;
1469 case TGSI_OPCODE_SGE:
1470 emit_sge(p, dst, dst_flags, args[0], args[1]);
1471 break;
1472 case TGSI_OPCODE_SEQ:
1473 emit_seq(p, dst, dst_flags, args[0], args[1]);
1474 break;
1475 case TGSI_OPCODE_SNE:
1476 emit_sne(p, dst, dst_flags, args[0], args[1]);
1477 break;
1478
1479 case TGSI_OPCODE_LIT:
1480 emit_lit(p, dst, dst_flags, args[0]);
1481 break;
1482
1483 /* Texturing operations:
1484 */
1485 case TGSI_OPCODE_TEX:
1486 emit_tex(c, inst, dst, dst_flags, args[0]);
1487 break;
1488
1489 case TGSI_OPCODE_TXB:
1490 emit_txb(c, inst, dst, dst_flags, args[0]);
1491 break;
1492
1493 case TGSI_OPCODE_KIL:
1494 emit_kil(c, args[0]);
1495 break;
1496
1497 case TGSI_OPCODE_KILP:
1498 emit_killp(c);
1499 break;
1500
1501 default:
1502 debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
1503 inst->opcode,
1504 tgsi_get_opcode_info(inst->opcode)->mnemonic);
1505 }
1506
1507 for (i = 0; i < 4; i++)
1508 if (inst->dst[i] && inst->dst[i]->spill_slot)
1509 emit_spill(c,
1510 inst->dst[i]->hw_reg,
1511 inst->dst[i]->spill_slot);
1512 }
1513
1514 if (BRW_DEBUG & DEBUG_WM) {
1515 debug_printf("wm-native:\n");
1516 brw_disasm(stderr, p->store, p->nr_insn);
1517 }
1518 }