i965: Use a normal alu1 emit for OPCODE_TRUNC.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 /* Not quite sure how correct this is - need to understand horiz
38 * vs. vertical strides a little better.
39 */
40 static INLINE struct brw_reg sechalf( struct brw_reg reg )
41 {
42 if (reg.vstride)
43 reg.nr++;
44 return reg;
45 }
46
47 /* Payload R0:
48 *
49 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
50 * corresponding to each of the 16 execution channels.
51 * R0.1..8 -- ?
52 * R1.0 -- triangle vertex 0.X
53 * R1.1 -- triangle vertex 0.Y
54 * R1.2 -- tile 0 x,y coords (2 packed uwords)
55 * R1.3 -- tile 1 x,y coords (2 packed uwords)
56 * R1.4 -- tile 2 x,y coords (2 packed uwords)
57 * R1.5 -- tile 3 x,y coords (2 packed uwords)
58 * R1.6 -- ?
59 * R1.7 -- ?
60 * R1.8 -- ?
61 */
62
63
64 static void emit_pixel_xy(struct brw_compile *p,
65 const struct brw_reg *dst,
66 GLuint mask)
67 {
68 struct brw_reg r1 = brw_vec1_grf(1, 0);
69 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
70
71 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
72
73 /* Calculate pixel centers by adding 1 or 0 to each of the
74 * micro-tile coordinates passed in r1.
75 */
76 if (mask & WRITEMASK_X) {
77 brw_ADD(p,
78 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
79 stride(suboffset(r1_uw, 4), 2, 4, 0),
80 brw_imm_v(0x10101010));
81 }
82
83 if (mask & WRITEMASK_Y) {
84 brw_ADD(p,
85 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
86 stride(suboffset(r1_uw,5), 2, 4, 0),
87 brw_imm_v(0x11001100));
88 }
89
90 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
91 }
92
93
94
95 static void emit_delta_xy(struct brw_compile *p,
96 const struct brw_reg *dst,
97 GLuint mask,
98 const struct brw_reg *arg0)
99 {
100 struct brw_reg r1 = brw_vec1_grf(1, 0);
101
102 /* Calc delta X,Y by subtracting origin in r1 from the pixel
103 * centers.
104 */
105 if (mask & WRITEMASK_X) {
106 brw_ADD(p,
107 dst[0],
108 retype(arg0[0], BRW_REGISTER_TYPE_UW),
109 negate(r1));
110 }
111
112 if (mask & WRITEMASK_Y) {
113 brw_ADD(p,
114 dst[1],
115 retype(arg0[1], BRW_REGISTER_TYPE_UW),
116 negate(suboffset(r1,1)));
117
118 }
119 }
120
121 static void emit_wpos_xy(struct brw_wm_compile *c,
122 const struct brw_reg *dst,
123 GLuint mask,
124 const struct brw_reg *arg0)
125 {
126 struct brw_compile *p = &c->func;
127
128 /* Calculate the pixel offset from window bottom left into destination
129 * X and Y channels.
130 */
131 if (mask & WRITEMASK_X) {
132 /* X' = X - origin */
133 brw_ADD(p,
134 dst[0],
135 retype(arg0[0], BRW_REGISTER_TYPE_W),
136 brw_imm_d(0 - c->key.origin_x));
137 }
138
139 if (mask & WRITEMASK_Y) {
140 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
141 brw_ADD(p,
142 dst[1],
143 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
144 brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
145 }
146 }
147
148
149 static void emit_pixel_w( struct brw_compile *p,
150 const struct brw_reg *dst,
151 GLuint mask,
152 const struct brw_reg *arg0,
153 const struct brw_reg *deltas)
154 {
155 /* Don't need this if all you are doing is interpolating color, for
156 * instance.
157 */
158 if (mask & WRITEMASK_W) {
159 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
160
161 /* Calc 1/w - just linterp wpos[3] optimized by putting the
162 * result straight into a message reg.
163 */
164 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
165 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
166
167 /* Calc w */
168 brw_math_16( p, dst[3],
169 BRW_MATH_FUNCTION_INV,
170 BRW_MATH_SATURATE_NONE,
171 2, brw_null_reg(),
172 BRW_MATH_PRECISION_FULL);
173 }
174 }
175
176
177
178 static void emit_linterp( struct brw_compile *p,
179 const struct brw_reg *dst,
180 GLuint mask,
181 const struct brw_reg *arg0,
182 const struct brw_reg *deltas )
183 {
184 struct brw_reg interp[4];
185 GLuint nr = arg0[0].nr;
186 GLuint i;
187
188 interp[0] = brw_vec1_grf(nr, 0);
189 interp[1] = brw_vec1_grf(nr, 4);
190 interp[2] = brw_vec1_grf(nr+1, 0);
191 interp[3] = brw_vec1_grf(nr+1, 4);
192
193 for (i = 0; i < 4; i++) {
194 if (mask & (1<<i)) {
195 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
196 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
197 }
198 }
199 }
200
201
202 static void emit_pinterp( struct brw_compile *p,
203 const struct brw_reg *dst,
204 GLuint mask,
205 const struct brw_reg *arg0,
206 const struct brw_reg *deltas,
207 const struct brw_reg *w)
208 {
209 struct brw_reg interp[4];
210 GLuint nr = arg0[0].nr;
211 GLuint i;
212
213 interp[0] = brw_vec1_grf(nr, 0);
214 interp[1] = brw_vec1_grf(nr, 4);
215 interp[2] = brw_vec1_grf(nr+1, 0);
216 interp[3] = brw_vec1_grf(nr+1, 4);
217
218 for (i = 0; i < 4; i++) {
219 if (mask & (1<<i)) {
220 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
221 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
222 }
223 }
224 for (i = 0; i < 4; i++) {
225 if (mask & (1<<i)) {
226 brw_MUL(p, dst[i], dst[i], w[3]);
227 }
228 }
229 }
230
231
232 static void emit_cinterp( struct brw_compile *p,
233 const struct brw_reg *dst,
234 GLuint mask,
235 const struct brw_reg *arg0 )
236 {
237 struct brw_reg interp[4];
238 GLuint nr = arg0[0].nr;
239 GLuint i;
240
241 interp[0] = brw_vec1_grf(nr, 0);
242 interp[1] = brw_vec1_grf(nr, 4);
243 interp[2] = brw_vec1_grf(nr+1, 0);
244 interp[3] = brw_vec1_grf(nr+1, 4);
245
246 for (i = 0; i < 4; i++) {
247 if (mask & (1<<i)) {
248 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
249 }
250 }
251 }
252
253 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
254 static void emit_frontfacing( struct brw_compile *p,
255 const struct brw_reg *dst,
256 GLuint mask )
257 {
258 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
259 GLuint i;
260
261 if (!(mask & WRITEMASK_XYZW))
262 return;
263
264 for (i = 0; i < 4; i++) {
265 if (mask & (1<<i)) {
266 brw_MOV(p, dst[i], brw_imm_f(0.0));
267 }
268 }
269
270 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
271 * us front face
272 */
273 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
274 for (i = 0; i < 4; i++) {
275 if (mask & (1<<i)) {
276 brw_MOV(p, dst[i], brw_imm_f(1.0));
277 }
278 }
279 brw_set_predicate_control_flag_value(p, 0xff);
280 }
281
282 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
283 * looking like:
284 *
285 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
286 *
287 * and we're trying to produce:
288 *
289 * DDX DDY
290 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
291 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
292 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
293 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
294 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
295 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
296 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
297 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
298 *
299 * and add another set of two more subspans if in 16-pixel dispatch mode.
300 *
301 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
302 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
303 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
304 * between each other. We could probably do it like ddx and swizzle the right
305 * order later, but bail for now and just produce
306 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
307 */
308 void emit_ddxy(struct brw_compile *p,
309 const struct brw_reg *dst,
310 GLuint mask,
311 GLboolean is_ddx,
312 const struct brw_reg *arg0)
313 {
314 int i;
315 struct brw_reg src0, src1;
316
317 if (mask & SATURATE)
318 brw_set_saturate(p, 1);
319 for (i = 0; i < 4; i++ ) {
320 if (mask & (1<<i)) {
321 if (is_ddx) {
322 src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
323 BRW_REGISTER_TYPE_F,
324 BRW_VERTICAL_STRIDE_2,
325 BRW_WIDTH_2,
326 BRW_HORIZONTAL_STRIDE_0,
327 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
328 src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
329 BRW_REGISTER_TYPE_F,
330 BRW_VERTICAL_STRIDE_2,
331 BRW_WIDTH_2,
332 BRW_HORIZONTAL_STRIDE_0,
333 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
334 } else {
335 src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
336 BRW_REGISTER_TYPE_F,
337 BRW_VERTICAL_STRIDE_4,
338 BRW_WIDTH_4,
339 BRW_HORIZONTAL_STRIDE_0,
340 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
341 src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
342 BRW_REGISTER_TYPE_F,
343 BRW_VERTICAL_STRIDE_4,
344 BRW_WIDTH_4,
345 BRW_HORIZONTAL_STRIDE_0,
346 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
347 }
348 brw_ADD(p, dst[i], src0, negate(src1));
349 }
350 }
351 if (mask & SATURATE)
352 brw_set_saturate(p, 0);
353 }
354
355 void emit_alu1(struct brw_compile *p,
356 struct brw_instruction *(*func)(struct brw_compile *,
357 struct brw_reg,
358 struct brw_reg),
359 const struct brw_reg *dst,
360 GLuint mask,
361 const struct brw_reg *arg0)
362 {
363 GLuint i;
364
365 if (mask & SATURATE)
366 brw_set_saturate(p, 1);
367
368 for (i = 0; i < 4; i++) {
369 if (mask & (1<<i)) {
370 func(p, dst[i], arg0[i]);
371 }
372 }
373
374 if (mask & SATURATE)
375 brw_set_saturate(p, 0);
376 }
377
378
379 void emit_alu2(struct brw_compile *p,
380 struct brw_instruction *(*func)(struct brw_compile *,
381 struct brw_reg,
382 struct brw_reg,
383 struct brw_reg),
384 const struct brw_reg *dst,
385 GLuint mask,
386 const struct brw_reg *arg0,
387 const struct brw_reg *arg1)
388 {
389 GLuint i;
390
391 if (mask & SATURATE)
392 brw_set_saturate(p, 1);
393
394 for (i = 0; i < 4; i++) {
395 if (mask & (1<<i)) {
396 func(p, dst[i], arg0[i], arg1[i]);
397 }
398 }
399
400 if (mask & SATURATE)
401 brw_set_saturate(p, 0);
402 }
403
404
405 static void emit_mad( struct brw_compile *p,
406 const struct brw_reg *dst,
407 GLuint mask,
408 const struct brw_reg *arg0,
409 const struct brw_reg *arg1,
410 const struct brw_reg *arg2 )
411 {
412 GLuint i;
413
414 for (i = 0; i < 4; i++) {
415 if (mask & (1<<i)) {
416 brw_MUL(p, dst[i], arg0[i], arg1[i]);
417
418 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
419 brw_ADD(p, dst[i], dst[i], arg2[i]);
420 brw_set_saturate(p, 0);
421 }
422 }
423 }
424
425 static void emit_lrp( struct brw_compile *p,
426 const struct brw_reg *dst,
427 GLuint mask,
428 const struct brw_reg *arg0,
429 const struct brw_reg *arg1,
430 const struct brw_reg *arg2 )
431 {
432 GLuint i;
433
434 /* Uses dst as a temporary:
435 */
436 for (i = 0; i < 4; i++) {
437 if (mask & (1<<i)) {
438 /* Can I use the LINE instruction for this?
439 */
440 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
441 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
442
443 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
444 brw_MAC(p, dst[i], arg0[i], arg1[i]);
445 brw_set_saturate(p, 0);
446 }
447 }
448 }
449
450 static void emit_sop( struct brw_compile *p,
451 const struct brw_reg *dst,
452 GLuint mask,
453 GLuint cond,
454 const struct brw_reg *arg0,
455 const struct brw_reg *arg1 )
456 {
457 GLuint i;
458
459 for (i = 0; i < 4; i++) {
460 if (mask & (1<<i)) {
461 brw_MOV(p, dst[i], brw_imm_f(0));
462 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
463 brw_MOV(p, dst[i], brw_imm_f(1.0));
464 brw_set_predicate_control_flag_value(p, 0xff);
465 }
466 }
467 }
468
469 static void emit_slt( struct brw_compile *p,
470 const struct brw_reg *dst,
471 GLuint mask,
472 const struct brw_reg *arg0,
473 const struct brw_reg *arg1 )
474 {
475 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
476 }
477
478 static void emit_sle( struct brw_compile *p,
479 const struct brw_reg *dst,
480 GLuint mask,
481 const struct brw_reg *arg0,
482 const struct brw_reg *arg1 )
483 {
484 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
485 }
486
487 static void emit_sgt( struct brw_compile *p,
488 const struct brw_reg *dst,
489 GLuint mask,
490 const struct brw_reg *arg0,
491 const struct brw_reg *arg1 )
492 {
493 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
494 }
495
496 static void emit_sge( struct brw_compile *p,
497 const struct brw_reg *dst,
498 GLuint mask,
499 const struct brw_reg *arg0,
500 const struct brw_reg *arg1 )
501 {
502 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
503 }
504
505 static void emit_seq( struct brw_compile *p,
506 const struct brw_reg *dst,
507 GLuint mask,
508 const struct brw_reg *arg0,
509 const struct brw_reg *arg1 )
510 {
511 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
512 }
513
514 static void emit_sne( struct brw_compile *p,
515 const struct brw_reg *dst,
516 GLuint mask,
517 const struct brw_reg *arg0,
518 const struct brw_reg *arg1 )
519 {
520 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
521 }
522
523 static void emit_cmp( struct brw_compile *p,
524 const struct brw_reg *dst,
525 GLuint mask,
526 const struct brw_reg *arg0,
527 const struct brw_reg *arg1,
528 const struct brw_reg *arg2 )
529 {
530 GLuint i;
531
532 for (i = 0; i < 4; i++) {
533 if (mask & (1<<i)) {
534 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
535 brw_MOV(p, dst[i], arg2[i]);
536 brw_set_saturate(p, 0);
537
538 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
539
540 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
541 brw_MOV(p, dst[i], arg1[i]);
542 brw_set_saturate(p, 0);
543 brw_set_predicate_control_flag_value(p, 0xff);
544 }
545 }
546 }
547
548 static void emit_max( struct brw_compile *p,
549 const struct brw_reg *dst,
550 GLuint mask,
551 const struct brw_reg *arg0,
552 const struct brw_reg *arg1 )
553 {
554 GLuint i;
555
556 for (i = 0; i < 4; i++) {
557 if (mask & (1<<i)) {
558 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
559 brw_MOV(p, dst[i], arg0[i]);
560 brw_set_saturate(p, 0);
561
562 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
563
564 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
565 brw_MOV(p, dst[i], arg1[i]);
566 brw_set_saturate(p, 0);
567 brw_set_predicate_control_flag_value(p, 0xff);
568 }
569 }
570 }
571
572 static void emit_min( struct brw_compile *p,
573 const struct brw_reg *dst,
574 GLuint mask,
575 const struct brw_reg *arg0,
576 const struct brw_reg *arg1 )
577 {
578 GLuint i;
579
580 for (i = 0; i < 4; i++) {
581 if (mask & (1<<i)) {
582 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
583 brw_MOV(p, dst[i], arg1[i]);
584 brw_set_saturate(p, 0);
585
586 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
587
588 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
589 brw_MOV(p, dst[i], arg0[i]);
590 brw_set_saturate(p, 0);
591 brw_set_predicate_control_flag_value(p, 0xff);
592 }
593 }
594 }
595
596
597 static void emit_dp3( struct brw_compile *p,
598 const struct brw_reg *dst,
599 GLuint mask,
600 const struct brw_reg *arg0,
601 const struct brw_reg *arg1 )
602 {
603 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
604
605 if (!(mask & WRITEMASK_XYZW))
606 return; /* Do not emit dead code */
607
608 assert(is_power_of_two(mask & WRITEMASK_XYZW));
609
610 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
611 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
612
613 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
614 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
615 brw_set_saturate(p, 0);
616 }
617
618
619 static void emit_dp4( struct brw_compile *p,
620 const struct brw_reg *dst,
621 GLuint mask,
622 const struct brw_reg *arg0,
623 const struct brw_reg *arg1 )
624 {
625 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
626
627 if (!(mask & WRITEMASK_XYZW))
628 return; /* Do not emit dead code */
629
630 assert(is_power_of_two(mask & WRITEMASK_XYZW));
631
632 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
633 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
634 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
635
636 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
637 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
638 brw_set_saturate(p, 0);
639 }
640
641
642 static void emit_dph( struct brw_compile *p,
643 const struct brw_reg *dst,
644 GLuint mask,
645 const struct brw_reg *arg0,
646 const struct brw_reg *arg1 )
647 {
648 const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
649
650 if (!(mask & WRITEMASK_XYZW))
651 return; /* Do not emit dead code */
652
653 assert(is_power_of_two(mask & WRITEMASK_XYZW));
654
655 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
656 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
657 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
658
659 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
660 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
661 brw_set_saturate(p, 0);
662 }
663
664
665 static void emit_xpd( struct brw_compile *p,
666 const struct brw_reg *dst,
667 GLuint mask,
668 const struct brw_reg *arg0,
669 const struct brw_reg *arg1 )
670 {
671 GLuint i;
672
673 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
674
675 for (i = 0 ; i < 3; i++) {
676 if (mask & (1<<i)) {
677 GLuint i2 = (i+2)%3;
678 GLuint i1 = (i+1)%3;
679
680 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
681
682 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
683 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
684 brw_set_saturate(p, 0);
685 }
686 }
687 }
688
689
690 static void emit_math1( struct brw_compile *p,
691 GLuint function,
692 const struct brw_reg *dst,
693 GLuint mask,
694 const struct brw_reg *arg0 )
695 {
696 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
697
698 if (!(mask & WRITEMASK_XYZW))
699 return; /* Do not emit dead code */
700
701 assert(is_power_of_two(mask & WRITEMASK_XYZW));
702
703 brw_MOV(p, brw_message_reg(2), arg0[0]);
704
705 /* Send two messages to perform all 16 operations:
706 */
707 brw_math_16(p,
708 dst[dst_chan],
709 function,
710 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
711 2,
712 brw_null_reg(),
713 BRW_MATH_PRECISION_FULL);
714 }
715
716
717 static void emit_math2( struct brw_compile *p,
718 GLuint function,
719 const struct brw_reg *dst,
720 GLuint mask,
721 const struct brw_reg *arg0,
722 const struct brw_reg *arg1)
723 {
724 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
725
726 if (!(mask & WRITEMASK_XYZW))
727 return; /* Do not emit dead code */
728
729 assert(is_power_of_two(mask & WRITEMASK_XYZW));
730
731 brw_push_insn_state(p);
732
733 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
734 brw_MOV(p, brw_message_reg(2), arg0[0]);
735 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
736 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
737
738 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
739 brw_MOV(p, brw_message_reg(3), arg1[0]);
740 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
741 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
742
743
744 /* Send two messages to perform all 16 operations:
745 */
746 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
747 brw_math(p,
748 dst[dst_chan],
749 function,
750 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
751 2,
752 brw_null_reg(),
753 BRW_MATH_DATA_VECTOR,
754 BRW_MATH_PRECISION_FULL);
755
756 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
757 brw_math(p,
758 offset(dst[dst_chan],1),
759 function,
760 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
761 4,
762 brw_null_reg(),
763 BRW_MATH_DATA_VECTOR,
764 BRW_MATH_PRECISION_FULL);
765
766 brw_pop_insn_state(p);
767 }
768
769
770
771 static void emit_tex( struct brw_wm_compile *c,
772 const struct brw_wm_instruction *inst,
773 struct brw_reg *dst,
774 GLuint dst_flags,
775 struct brw_reg *arg )
776 {
777 struct brw_compile *p = &c->func;
778 GLuint msgLength, responseLength;
779 GLuint i, nr;
780 GLuint emit;
781 GLuint msg_type;
782
783 /* How many input regs are there?
784 */
785 switch (inst->tex_idx) {
786 case TEXTURE_1D_INDEX:
787 emit = WRITEMASK_X;
788 nr = 1;
789 break;
790 case TEXTURE_2D_INDEX:
791 case TEXTURE_RECT_INDEX:
792 emit = WRITEMASK_XY;
793 nr = 2;
794 break;
795 case TEXTURE_3D_INDEX:
796 case TEXTURE_CUBE_INDEX:
797 emit = WRITEMASK_XYZ;
798 nr = 3;
799 break;
800 default:
801 /* unexpected target */
802 abort();
803 }
804
805 if (inst->tex_shadow) {
806 nr = 4;
807 emit |= WRITEMASK_W;
808 }
809
810 msgLength = 1;
811
812 for (i = 0; i < nr; i++) {
813 static const GLuint swz[4] = {0,1,2,2};
814 if (emit & (1<<i))
815 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
816 else
817 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
818 msgLength += 2;
819 }
820
821 responseLength = 8; /* always */
822
823 if (BRW_IS_IGDNG(p->brw)) {
824 if (inst->tex_shadow)
825 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
826 else
827 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
828 } else {
829 if (inst->tex_shadow)
830 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
831 else
832 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
833 }
834
835 brw_SAMPLE(p,
836 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
837 1,
838 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
839 SURF_INDEX_TEXTURE(inst->tex_unit),
840 inst->tex_unit, /* sampler */
841 inst->writemask,
842 msg_type,
843 responseLength,
844 msgLength,
845 0,
846 1,
847 BRW_SAMPLER_SIMD_MODE_SIMD16);
848 }
849
850
851 static void emit_txb( struct brw_wm_compile *c,
852 const struct brw_wm_instruction *inst,
853 struct brw_reg *dst,
854 GLuint dst_flags,
855 struct brw_reg *arg )
856 {
857 struct brw_compile *p = &c->func;
858 GLuint msgLength;
859 GLuint msg_type;
860 /* Shadow ignored for txb.
861 */
862 switch (inst->tex_idx) {
863 case TEXTURE_1D_INDEX:
864 brw_MOV(p, brw_message_reg(2), arg[0]);
865 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
866 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
867 break;
868 case TEXTURE_2D_INDEX:
869 case TEXTURE_RECT_INDEX:
870 brw_MOV(p, brw_message_reg(2), arg[0]);
871 brw_MOV(p, brw_message_reg(4), arg[1]);
872 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
873 break;
874 case TEXTURE_3D_INDEX:
875 case TEXTURE_CUBE_INDEX:
876 brw_MOV(p, brw_message_reg(2), arg[0]);
877 brw_MOV(p, brw_message_reg(4), arg[1]);
878 brw_MOV(p, brw_message_reg(6), arg[2]);
879 break;
880 default:
881 /* unexpected target */
882 abort();
883 }
884
885 brw_MOV(p, brw_message_reg(8), arg[3]);
886 msgLength = 9;
887
888 if (BRW_IS_IGDNG(p->brw))
889 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
890 else
891 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
892
893 brw_SAMPLE(p,
894 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
895 1,
896 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
897 SURF_INDEX_TEXTURE(inst->tex_unit),
898 inst->tex_unit, /* sampler */
899 inst->writemask,
900 msg_type,
901 8, /* responseLength */
902 msgLength,
903 0,
904 1,
905 BRW_SAMPLER_SIMD_MODE_SIMD16);
906 }
907
908
909 static void emit_lit( struct brw_compile *p,
910 const struct brw_reg *dst,
911 GLuint mask,
912 const struct brw_reg *arg0 )
913 {
914 assert((mask & WRITEMASK_XW) == 0);
915
916 if (mask & WRITEMASK_Y) {
917 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
918 brw_MOV(p, dst[1], arg0[0]);
919 brw_set_saturate(p, 0);
920 }
921
922 if (mask & WRITEMASK_Z) {
923 emit_math2(p, BRW_MATH_FUNCTION_POW,
924 &dst[2],
925 WRITEMASK_X | (mask & SATURATE),
926 &arg0[1],
927 &arg0[3]);
928 }
929
930 /* Ordinarily you'd use an iff statement to skip or shortcircuit
931 * some of the POW calculations above, but 16-wide iff statements
932 * seem to lock c1 hardware, so this is a nasty workaround:
933 */
934 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
935 {
936 if (mask & WRITEMASK_Y)
937 brw_MOV(p, dst[1], brw_imm_f(0));
938
939 if (mask & WRITEMASK_Z)
940 brw_MOV(p, dst[2], brw_imm_f(0));
941 }
942 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
943 }
944
945
946 /* Kill pixel - set execution mask to zero for those pixels which
947 * fail.
948 */
949 static void emit_kil( struct brw_wm_compile *c,
950 struct brw_reg *arg0)
951 {
952 struct brw_compile *p = &c->func;
953 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
954 GLuint i;
955
956 /* XXX - usually won't need 4 compares!
957 */
958 for (i = 0; i < 4; i++) {
959 brw_push_insn_state(p);
960 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
961 brw_set_predicate_control_flag_value(p, 0xff);
962 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
963 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
964 brw_pop_insn_state(p);
965 }
966 }
967
968 /* KIL_NV kills the pixels that are currently executing, not based on a test
969 * of the arguments.
970 */
971 static void emit_kil_nv( struct brw_wm_compile *c )
972 {
973 struct brw_compile *p = &c->func;
974 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
975
976 brw_push_insn_state(p);
977 brw_set_mask_control(p, BRW_MASK_DISABLE);
978 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
979 brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
980 brw_pop_insn_state(p);
981 }
982
983 static void fire_fb_write( struct brw_wm_compile *c,
984 GLuint base_reg,
985 GLuint nr,
986 GLuint target,
987 GLuint eot )
988 {
989 struct brw_compile *p = &c->func;
990
991 /* Pass through control information:
992 */
993 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
994 {
995 brw_push_insn_state(p);
996 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
997 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
998 brw_MOV(p,
999 brw_message_reg(base_reg + 1),
1000 brw_vec8_grf(1, 0));
1001 brw_pop_insn_state(p);
1002 }
1003
1004 /* Send framebuffer write message: */
1005 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1006 brw_fb_WRITE(p,
1007 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1008 base_reg,
1009 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1010 target,
1011 nr,
1012 0,
1013 eot);
1014 }
1015
1016
1017 static void emit_aa( struct brw_wm_compile *c,
1018 struct brw_reg *arg1,
1019 GLuint reg )
1020 {
1021 struct brw_compile *p = &c->func;
1022 GLuint comp = c->key.aa_dest_stencil_reg / 2;
1023 GLuint off = c->key.aa_dest_stencil_reg % 2;
1024 struct brw_reg aa = offset(arg1[comp], off);
1025
1026 brw_push_insn_state(p);
1027 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1028 brw_MOV(p, brw_message_reg(reg), aa);
1029 brw_pop_insn_state(p);
1030 }
1031
1032
1033 /* Post-fragment-program processing. Send the results to the
1034 * framebuffer.
1035 * \param arg0 the fragment color
1036 * \param arg1 the pass-through depth value
1037 * \param arg2 the shader-computed depth value
1038 */
1039 static void emit_fb_write( struct brw_wm_compile *c,
1040 struct brw_reg *arg0,
1041 struct brw_reg *arg1,
1042 struct brw_reg *arg2,
1043 GLuint target,
1044 GLuint eot)
1045 {
1046 struct brw_compile *p = &c->func;
1047 GLuint nr = 2;
1048 GLuint channel;
1049
1050 /* Reserve a space for AA - may not be needed:
1051 */
1052 if (c->key.aa_dest_stencil_reg)
1053 nr += 1;
1054
1055 /* I don't really understand how this achieves the color interleave
1056 * (ie RGBARGBA) in the result: [Do the saturation here]
1057 */
1058 {
1059 brw_push_insn_state(p);
1060
1061 for (channel = 0; channel < 4; channel++) {
1062 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1063 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1064
1065 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1066 brw_MOV(p,
1067 brw_message_reg(nr + channel),
1068 arg0[channel]);
1069
1070 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1071 brw_MOV(p,
1072 brw_message_reg(nr + channel + 4),
1073 sechalf(arg0[channel]));
1074 }
1075
1076 /* skip over the regs populated above:
1077 */
1078 nr += 8;
1079
1080 brw_pop_insn_state(p);
1081 }
1082
1083 if (c->key.source_depth_to_render_target)
1084 {
1085 if (c->key.computes_depth)
1086 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1087 else
1088 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1089
1090 nr += 2;
1091 }
1092
1093 if (c->key.dest_depth_reg)
1094 {
1095 GLuint comp = c->key.dest_depth_reg / 2;
1096 GLuint off = c->key.dest_depth_reg % 2;
1097
1098 if (off != 0) {
1099 brw_push_insn_state(p);
1100 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1101
1102 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1103 /* 2nd half? */
1104 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1105 brw_pop_insn_state(p);
1106 }
1107 else {
1108 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1109 }
1110 nr += 2;
1111 }
1112
1113 if (!c->key.runtime_check_aads_emit) {
1114 if (c->key.aa_dest_stencil_reg)
1115 emit_aa(c, arg1, 2);
1116
1117 fire_fb_write(c, 0, nr, target, eot);
1118 }
1119 else {
1120 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1121 struct brw_reg ip = brw_ip_reg();
1122 struct brw_instruction *jmp;
1123
1124 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1125 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1126 brw_AND(p,
1127 v1_null_ud,
1128 get_element_ud(brw_vec8_grf(1,0), 6),
1129 brw_imm_ud(1<<26));
1130
1131 jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1132 {
1133 emit_aa(c, arg1, 2);
1134 fire_fb_write(c, 0, nr, target, eot);
1135 /* note - thread killed in subroutine */
1136 }
1137 brw_land_fwd_jump(p, jmp);
1138
1139 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1140 */
1141 fire_fb_write(c, 1, nr-1, target, eot);
1142 }
1143 }
1144
1145
1146 /**
1147 * Move a GPR to scratch memory.
1148 */
1149 static void emit_spill( struct brw_wm_compile *c,
1150 struct brw_reg reg,
1151 GLuint slot )
1152 {
1153 struct brw_compile *p = &c->func;
1154
1155 /*
1156 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1157 */
1158 brw_MOV(p, brw_message_reg(2), reg);
1159
1160 /*
1161 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1162 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1163 */
1164 brw_dp_WRITE_16(p,
1165 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1166 slot);
1167 }
1168
1169
1170 /**
1171 * Load a GPR from scratch memory.
1172 */
1173 static void emit_unspill( struct brw_wm_compile *c,
1174 struct brw_reg reg,
1175 GLuint slot )
1176 {
1177 struct brw_compile *p = &c->func;
1178
1179 /* Slot 0 is the undef value.
1180 */
1181 if (slot == 0) {
1182 brw_MOV(p, reg, brw_imm_f(0));
1183 return;
1184 }
1185
1186 /*
1187 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1188 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1189 */
1190
1191 brw_dp_READ_16(p,
1192 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1193 slot);
1194 }
1195
1196
1197 /**
1198 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1199 * Args with unspill_reg != 0 will be loaded from scratch memory.
1200 */
1201 static void get_argument_regs( struct brw_wm_compile *c,
1202 struct brw_wm_ref *arg[],
1203 struct brw_reg *regs )
1204 {
1205 GLuint i;
1206
1207 for (i = 0; i < 4; i++) {
1208 if (arg[i]) {
1209 if (arg[i]->unspill_reg)
1210 emit_unspill(c,
1211 brw_vec8_grf(arg[i]->unspill_reg, 0),
1212 arg[i]->value->spill_slot);
1213
1214 regs[i] = arg[i]->hw_reg;
1215 }
1216 else {
1217 regs[i] = brw_null_reg();
1218 }
1219 }
1220 }
1221
1222
1223 /**
1224 * For values that have a spill_slot!=0, write those regs to scratch memory.
1225 */
1226 static void spill_values( struct brw_wm_compile *c,
1227 struct brw_wm_value *values,
1228 GLuint nr )
1229 {
1230 GLuint i;
1231
1232 for (i = 0; i < nr; i++)
1233 if (values[i].spill_slot)
1234 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1235 }
1236
1237
1238 /* Emit the fragment program instructions here.
1239 */
1240 void brw_wm_emit( struct brw_wm_compile *c )
1241 {
1242 struct brw_compile *p = &c->func;
1243 GLuint insn;
1244
1245 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1246
1247 /* Check if any of the payload regs need to be spilled:
1248 */
1249 spill_values(c, c->payload.depth, 4);
1250 spill_values(c, c->creg, c->nr_creg);
1251 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1252
1253
1254 for (insn = 0; insn < c->nr_insns; insn++) {
1255
1256 struct brw_wm_instruction *inst = &c->instruction[insn];
1257 struct brw_reg args[3][4], dst[4];
1258 GLuint i, dst_flags;
1259
1260 /* Get argument regs:
1261 */
1262 for (i = 0; i < 3; i++)
1263 get_argument_regs(c, inst->src[i], args[i]);
1264
1265 /* Get dest regs:
1266 */
1267 for (i = 0; i < 4; i++)
1268 if (inst->dst[i])
1269 dst[i] = inst->dst[i]->hw_reg;
1270 else
1271 dst[i] = brw_null_reg();
1272
1273 /* Flags
1274 */
1275 dst_flags = inst->writemask;
1276 if (inst->saturate)
1277 dst_flags |= SATURATE;
1278
1279 switch (inst->opcode) {
1280 /* Generated instructions for calculating triangle interpolants:
1281 */
1282 case WM_PIXELXY:
1283 emit_pixel_xy(p, dst, dst_flags);
1284 break;
1285
1286 case WM_DELTAXY:
1287 emit_delta_xy(p, dst, dst_flags, args[0]);
1288 break;
1289
1290 case WM_WPOSXY:
1291 emit_wpos_xy(c, dst, dst_flags, args[0]);
1292 break;
1293
1294 case WM_PIXELW:
1295 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1296 break;
1297
1298 case WM_LINTERP:
1299 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1300 break;
1301
1302 case WM_PINTERP:
1303 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1304 break;
1305
1306 case WM_CINTERP:
1307 emit_cinterp(p, dst, dst_flags, args[0]);
1308 break;
1309
1310 case WM_FB_WRITE:
1311 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1312 break;
1313
1314 case WM_FRONTFACING:
1315 emit_frontfacing(p, dst, dst_flags);
1316 break;
1317
1318 /* Straightforward arithmetic:
1319 */
1320 case OPCODE_ADD:
1321 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1322 break;
1323
1324 case OPCODE_FRC:
1325 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1326 break;
1327
1328 case OPCODE_FLR:
1329 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1330 break;
1331
1332 case OPCODE_DDX:
1333 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1334 break;
1335
1336 case OPCODE_DDY:
1337 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1338 break;
1339
1340 case OPCODE_DP3:
1341 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1342 break;
1343
1344 case OPCODE_DP4:
1345 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1346 break;
1347
1348 case OPCODE_DPH:
1349 emit_dph(p, dst, dst_flags, args[0], args[1]);
1350 break;
1351
1352 case OPCODE_TRUNC:
1353 emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
1354 break;
1355
1356 case OPCODE_LRP:
1357 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1358 break;
1359
1360 case OPCODE_MAD:
1361 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1362 break;
1363
1364 case OPCODE_MOV:
1365 case OPCODE_SWZ:
1366 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1367 break;
1368
1369 case OPCODE_MUL:
1370 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1371 break;
1372
1373 case OPCODE_XPD:
1374 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1375 break;
1376
1377 /* Higher math functions:
1378 */
1379 case OPCODE_RCP:
1380 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1381 break;
1382
1383 case OPCODE_RSQ:
1384 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1385 break;
1386
1387 case OPCODE_SIN:
1388 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1389 break;
1390
1391 case OPCODE_COS:
1392 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1393 break;
1394
1395 case OPCODE_EX2:
1396 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1397 break;
1398
1399 case OPCODE_LG2:
1400 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1401 break;
1402
1403 case OPCODE_SCS:
1404 /* There is an scs math function, but it would need some
1405 * fixup for 16-element execution.
1406 */
1407 if (dst_flags & WRITEMASK_X)
1408 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1409 if (dst_flags & WRITEMASK_Y)
1410 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1411 break;
1412
1413 case OPCODE_POW:
1414 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1415 break;
1416
1417 /* Comparisons:
1418 */
1419 case OPCODE_CMP:
1420 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1421 break;
1422
1423 case OPCODE_MAX:
1424 emit_max(p, dst, dst_flags, args[0], args[1]);
1425 break;
1426
1427 case OPCODE_MIN:
1428 emit_min(p, dst, dst_flags, args[0], args[1]);
1429 break;
1430
1431 case OPCODE_SLT:
1432 emit_slt(p, dst, dst_flags, args[0], args[1]);
1433 break;
1434
1435 case OPCODE_SLE:
1436 emit_sle(p, dst, dst_flags, args[0], args[1]);
1437 break;
1438 case OPCODE_SGT:
1439 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1440 break;
1441 case OPCODE_SGE:
1442 emit_sge(p, dst, dst_flags, args[0], args[1]);
1443 break;
1444 case OPCODE_SEQ:
1445 emit_seq(p, dst, dst_flags, args[0], args[1]);
1446 break;
1447 case OPCODE_SNE:
1448 emit_sne(p, dst, dst_flags, args[0], args[1]);
1449 break;
1450
1451 case OPCODE_LIT:
1452 emit_lit(p, dst, dst_flags, args[0]);
1453 break;
1454
1455 /* Texturing operations:
1456 */
1457 case OPCODE_TEX:
1458 emit_tex(c, inst, dst, dst_flags, args[0]);
1459 break;
1460
1461 case OPCODE_TXB:
1462 emit_txb(c, inst, dst, dst_flags, args[0]);
1463 break;
1464
1465 case OPCODE_KIL:
1466 emit_kil(c, args[0]);
1467 break;
1468
1469 case OPCODE_KIL_NV:
1470 emit_kil_nv(c);
1471 break;
1472
1473 default:
1474 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1475 inst->opcode, inst->opcode < MAX_OPCODE ?
1476 _mesa_opcode_string(inst->opcode) :
1477 "unknown");
1478 }
1479
1480 for (i = 0; i < 4; i++)
1481 if (inst->dst[i] && inst->dst[i]->spill_slot)
1482 emit_spill(c,
1483 inst->dst[i]->hw_reg,
1484 inst->dst[i]->spill_slot);
1485 }
1486
1487 if (INTEL_DEBUG & DEBUG_WM) {
1488 int i;
1489
1490 _mesa_printf("wm-native:\n");
1491 for (i = 0; i < p->nr_insn; i++)
1492 brw_disasm(stderr, &p->store[i]);
1493 _mesa_printf("\n");
1494 }
1495 }