i965: Implement frag prog DPH like DP4
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 #define SATURATE (1<<5)
38
39 /* Not quite sure how correct this is - need to understand horiz
40 * vs. vertical strides a little better.
41 */
42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
43 {
44 if (reg.vstride)
45 reg.nr++;
46 return reg;
47 }
48
49 /* Payload R0:
50 *
51 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
52 * corresponding to each of the 16 execution channels.
53 * R0.1..8 -- ?
54 * R1.0 -- triangle vertex 0.X
55 * R1.1 -- triangle vertex 0.Y
56 * R1.2 -- tile 0 x,y coords (2 packed uwords)
57 * R1.3 -- tile 1 x,y coords (2 packed uwords)
58 * R1.4 -- tile 2 x,y coords (2 packed uwords)
59 * R1.5 -- tile 3 x,y coords (2 packed uwords)
60 * R1.6 -- ?
61 * R1.7 -- ?
62 * R1.8 -- ?
63 */
64
65
66 static void emit_pixel_xy(struct brw_compile *p,
67 const struct brw_reg *dst,
68 GLuint mask)
69 {
70 struct brw_reg r1 = brw_vec1_grf(1, 0);
71 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
72
73 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74
75 /* Calculate pixel centers by adding 1 or 0 to each of the
76 * micro-tile coordinates passed in r1.
77 */
78 if (mask & WRITEMASK_X) {
79 brw_ADD(p,
80 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
81 stride(suboffset(r1_uw, 4), 2, 4, 0),
82 brw_imm_v(0x10101010));
83 }
84
85 if (mask & WRITEMASK_Y) {
86 brw_ADD(p,
87 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
88 stride(suboffset(r1_uw,5), 2, 4, 0),
89 brw_imm_v(0x11001100));
90 }
91
92 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
93 }
94
95
96
97 static void emit_delta_xy(struct brw_compile *p,
98 const struct brw_reg *dst,
99 GLuint mask,
100 const struct brw_reg *arg0)
101 {
102 struct brw_reg r1 = brw_vec1_grf(1, 0);
103
104 /* Calc delta X,Y by subtracting origin in r1 from the pixel
105 * centers.
106 */
107 if (mask & WRITEMASK_X) {
108 brw_ADD(p,
109 dst[0],
110 retype(arg0[0], BRW_REGISTER_TYPE_UW),
111 negate(r1));
112 }
113
114 if (mask & WRITEMASK_Y) {
115 brw_ADD(p,
116 dst[1],
117 retype(arg0[1], BRW_REGISTER_TYPE_UW),
118 negate(suboffset(r1,1)));
119
120 }
121 }
122
123 static void emit_wpos_xy(struct brw_wm_compile *c,
124 const struct brw_reg *dst,
125 GLuint mask,
126 const struct brw_reg *arg0)
127 {
128 struct brw_compile *p = &c->func;
129
130 /* Calculate the pixel offset from window bottom left into destination
131 * X and Y channels.
132 */
133 if (mask & WRITEMASK_X) {
134 /* X' = X - origin */
135 brw_ADD(p,
136 dst[0],
137 retype(arg0[0], BRW_REGISTER_TYPE_W),
138 brw_imm_d(0 - c->key.origin_x));
139 }
140
141 if (mask & WRITEMASK_Y) {
142 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
143 brw_ADD(p,
144 dst[1],
145 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
146 brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
147 }
148 }
149
150
151 static void emit_pixel_w( struct brw_compile *p,
152 const struct brw_reg *dst,
153 GLuint mask,
154 const struct brw_reg *arg0,
155 const struct brw_reg *deltas)
156 {
157 /* Don't need this if all you are doing is interpolating color, for
158 * instance.
159 */
160 if (mask & WRITEMASK_W) {
161 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
162
163 /* Calc 1/w - just linterp wpos[3] optimized by putting the
164 * result straight into a message reg.
165 */
166 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
167 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
168
169 /* Calc w */
170 brw_math_16( p, dst[3],
171 BRW_MATH_FUNCTION_INV,
172 BRW_MATH_SATURATE_NONE,
173 2, brw_null_reg(),
174 BRW_MATH_PRECISION_FULL);
175 }
176 }
177
178
179
180 static void emit_linterp( struct brw_compile *p,
181 const struct brw_reg *dst,
182 GLuint mask,
183 const struct brw_reg *arg0,
184 const struct brw_reg *deltas )
185 {
186 struct brw_reg interp[4];
187 GLuint nr = arg0[0].nr;
188 GLuint i;
189
190 interp[0] = brw_vec1_grf(nr, 0);
191 interp[1] = brw_vec1_grf(nr, 4);
192 interp[2] = brw_vec1_grf(nr+1, 0);
193 interp[3] = brw_vec1_grf(nr+1, 4);
194
195 for (i = 0; i < 4; i++) {
196 if (mask & (1<<i)) {
197 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
198 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
199 }
200 }
201 }
202
203
204 static void emit_pinterp( struct brw_compile *p,
205 const struct brw_reg *dst,
206 GLuint mask,
207 const struct brw_reg *arg0,
208 const struct brw_reg *deltas,
209 const struct brw_reg *w)
210 {
211 struct brw_reg interp[4];
212 GLuint nr = arg0[0].nr;
213 GLuint i;
214
215 interp[0] = brw_vec1_grf(nr, 0);
216 interp[1] = brw_vec1_grf(nr, 4);
217 interp[2] = brw_vec1_grf(nr+1, 0);
218 interp[3] = brw_vec1_grf(nr+1, 4);
219
220 for (i = 0; i < 4; i++) {
221 if (mask & (1<<i)) {
222 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
223 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
224 }
225 }
226 for (i = 0; i < 4; i++) {
227 if (mask & (1<<i)) {
228 brw_MUL(p, dst[i], dst[i], w[3]);
229 }
230 }
231 }
232
233
234 static void emit_cinterp( struct brw_compile *p,
235 const struct brw_reg *dst,
236 GLuint mask,
237 const struct brw_reg *arg0 )
238 {
239 struct brw_reg interp[4];
240 GLuint nr = arg0[0].nr;
241 GLuint i;
242
243 interp[0] = brw_vec1_grf(nr, 0);
244 interp[1] = brw_vec1_grf(nr, 4);
245 interp[2] = brw_vec1_grf(nr+1, 0);
246 interp[3] = brw_vec1_grf(nr+1, 4);
247
248 for (i = 0; i < 4; i++) {
249 if (mask & (1<<i)) {
250 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
251 }
252 }
253 }
254
255 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
256 static void emit_frontfacing( struct brw_compile *p,
257 const struct brw_reg *dst,
258 GLuint mask )
259 {
260 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
261 GLuint i;
262
263 if (!(mask & WRITEMASK_XYZW))
264 return;
265
266 for (i = 0; i < 4; i++) {
267 if (mask & (1<<i)) {
268 brw_MOV(p, dst[i], brw_imm_f(0.0));
269 }
270 }
271
272 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
273 * us front face
274 */
275 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
276 for (i = 0; i < 4; i++) {
277 if (mask & (1<<i)) {
278 brw_MOV(p, dst[i], brw_imm_f(1.0));
279 }
280 }
281 brw_set_predicate_control_flag_value(p, 0xff);
282 }
283
284 static void emit_alu1( struct brw_compile *p,
285 struct brw_instruction *(*func)(struct brw_compile *,
286 struct brw_reg,
287 struct brw_reg),
288 const struct brw_reg *dst,
289 GLuint mask,
290 const struct brw_reg *arg0 )
291 {
292 GLuint i;
293
294 if (mask & SATURATE)
295 brw_set_saturate(p, 1);
296
297 for (i = 0; i < 4; i++) {
298 if (mask & (1<<i)) {
299 func(p, dst[i], arg0[i]);
300 }
301 }
302
303 if (mask & SATURATE)
304 brw_set_saturate(p, 0);
305 }
306
307
308 static void emit_alu2( struct brw_compile *p,
309 struct brw_instruction *(*func)(struct brw_compile *,
310 struct brw_reg,
311 struct brw_reg,
312 struct brw_reg),
313 const struct brw_reg *dst,
314 GLuint mask,
315 const struct brw_reg *arg0,
316 const struct brw_reg *arg1 )
317 {
318 GLuint i;
319
320 if (mask & SATURATE)
321 brw_set_saturate(p, 1);
322
323 for (i = 0; i < 4; i++) {
324 if (mask & (1<<i)) {
325 func(p, dst[i], arg0[i], arg1[i]);
326 }
327 }
328
329 if (mask & SATURATE)
330 brw_set_saturate(p, 0);
331 }
332
333
334 static void emit_mad( struct brw_compile *p,
335 const struct brw_reg *dst,
336 GLuint mask,
337 const struct brw_reg *arg0,
338 const struct brw_reg *arg1,
339 const struct brw_reg *arg2 )
340 {
341 GLuint i;
342
343 for (i = 0; i < 4; i++) {
344 if (mask & (1<<i)) {
345 brw_MUL(p, dst[i], arg0[i], arg1[i]);
346
347 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
348 brw_ADD(p, dst[i], dst[i], arg2[i]);
349 brw_set_saturate(p, 0);
350 }
351 }
352 }
353
354 static void emit_trunc( struct brw_compile *p,
355 const struct brw_reg *dst,
356 GLuint mask,
357 const struct brw_reg *arg0)
358 {
359 GLuint i;
360
361 for (i = 0; i < 4; i++) {
362 if (mask & (1<<i)) {
363 brw_RNDZ(p, dst[i], arg0[i]);
364 }
365 }
366 }
367
368 static void emit_lrp( struct brw_compile *p,
369 const struct brw_reg *dst,
370 GLuint mask,
371 const struct brw_reg *arg0,
372 const struct brw_reg *arg1,
373 const struct brw_reg *arg2 )
374 {
375 GLuint i;
376
377 /* Uses dst as a temporary:
378 */
379 for (i = 0; i < 4; i++) {
380 if (mask & (1<<i)) {
381 /* Can I use the LINE instruction for this?
382 */
383 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
384 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
385
386 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
387 brw_MAC(p, dst[i], arg0[i], arg1[i]);
388 brw_set_saturate(p, 0);
389 }
390 }
391 }
392
393 static void emit_sop( struct brw_compile *p,
394 const struct brw_reg *dst,
395 GLuint mask,
396 GLuint cond,
397 const struct brw_reg *arg0,
398 const struct brw_reg *arg1 )
399 {
400 GLuint i;
401
402 for (i = 0; i < 4; i++) {
403 if (mask & (1<<i)) {
404 brw_MOV(p, dst[i], brw_imm_f(0));
405 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
406 brw_MOV(p, dst[i], brw_imm_f(1.0));
407 brw_set_predicate_control_flag_value(p, 0xff);
408 }
409 }
410 }
411
412 static void emit_slt( struct brw_compile *p,
413 const struct brw_reg *dst,
414 GLuint mask,
415 const struct brw_reg *arg0,
416 const struct brw_reg *arg1 )
417 {
418 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
419 }
420
421 static void emit_sle( struct brw_compile *p,
422 const struct brw_reg *dst,
423 GLuint mask,
424 const struct brw_reg *arg0,
425 const struct brw_reg *arg1 )
426 {
427 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
428 }
429
430 static void emit_sgt( struct brw_compile *p,
431 const struct brw_reg *dst,
432 GLuint mask,
433 const struct brw_reg *arg0,
434 const struct brw_reg *arg1 )
435 {
436 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
437 }
438
439 static void emit_sge( struct brw_compile *p,
440 const struct brw_reg *dst,
441 GLuint mask,
442 const struct brw_reg *arg0,
443 const struct brw_reg *arg1 )
444 {
445 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
446 }
447
448 static void emit_seq( struct brw_compile *p,
449 const struct brw_reg *dst,
450 GLuint mask,
451 const struct brw_reg *arg0,
452 const struct brw_reg *arg1 )
453 {
454 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
455 }
456
457 static void emit_sne( struct brw_compile *p,
458 const struct brw_reg *dst,
459 GLuint mask,
460 const struct brw_reg *arg0,
461 const struct brw_reg *arg1 )
462 {
463 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
464 }
465
466 static void emit_cmp( struct brw_compile *p,
467 const struct brw_reg *dst,
468 GLuint mask,
469 const struct brw_reg *arg0,
470 const struct brw_reg *arg1,
471 const struct brw_reg *arg2 )
472 {
473 GLuint i;
474
475 for (i = 0; i < 4; i++) {
476 if (mask & (1<<i)) {
477 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
478 brw_MOV(p, dst[i], arg2[i]);
479 brw_set_saturate(p, 0);
480
481 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
482
483 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
484 brw_MOV(p, dst[i], arg1[i]);
485 brw_set_saturate(p, 0);
486 brw_set_predicate_control_flag_value(p, 0xff);
487 }
488 }
489 }
490
491 static void emit_max( struct brw_compile *p,
492 const struct brw_reg *dst,
493 GLuint mask,
494 const struct brw_reg *arg0,
495 const struct brw_reg *arg1 )
496 {
497 GLuint i;
498
499 for (i = 0; i < 4; i++) {
500 if (mask & (1<<i)) {
501 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
502 brw_MOV(p, dst[i], arg0[i]);
503 brw_set_saturate(p, 0);
504
505 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
506
507 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
508 brw_MOV(p, dst[i], arg1[i]);
509 brw_set_saturate(p, 0);
510 brw_set_predicate_control_flag_value(p, 0xff);
511 }
512 }
513 }
514
515 static void emit_min( struct brw_compile *p,
516 const struct brw_reg *dst,
517 GLuint mask,
518 const struct brw_reg *arg0,
519 const struct brw_reg *arg1 )
520 {
521 GLuint i;
522
523 for (i = 0; i < 4; i++) {
524 if (mask & (1<<i)) {
525 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
526 brw_MOV(p, dst[i], arg1[i]);
527 brw_set_saturate(p, 0);
528
529 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
530
531 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
532 brw_MOV(p, dst[i], arg0[i]);
533 brw_set_saturate(p, 0);
534 brw_set_predicate_control_flag_value(p, 0xff);
535 }
536 }
537 }
538
539
540 static void emit_dp3( struct brw_compile *p,
541 const struct brw_reg *dst,
542 GLuint mask,
543 const struct brw_reg *arg0,
544 const struct brw_reg *arg1 )
545 {
546 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
547
548 if (!(mask & WRITEMASK_XYZW))
549 return; /* Do not emit dead code */
550
551 assert(is_power_of_two(mask & WRITEMASK_XYZW));
552
553 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
554 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
555
556 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
557 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
558 brw_set_saturate(p, 0);
559 }
560
561
562 static void emit_dp4( struct brw_compile *p,
563 const struct brw_reg *dst,
564 GLuint mask,
565 const struct brw_reg *arg0,
566 const struct brw_reg *arg1 )
567 {
568 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
569
570 if (!(mask & WRITEMASK_XYZW))
571 return; /* Do not emit dead code */
572
573 assert(is_power_of_two(mask & WRITEMASK_XYZW));
574
575 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
576 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
577 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
578
579 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
580 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
581 brw_set_saturate(p, 0);
582 }
583
584
585 static void emit_dph( struct brw_compile *p,
586 const struct brw_reg *dst,
587 GLuint mask,
588 const struct brw_reg *arg0,
589 const struct brw_reg *arg1 )
590 {
591 const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
592
593 if (!(mask & WRITEMASK_XYZW))
594 return; /* Do not emit dead code */
595
596 assert(is_power_of_two(mask & WRITEMASK_XYZW));
597
598 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
599 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
600 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
601
602 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
603 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
604 brw_set_saturate(p, 0);
605 }
606
607
608 static void emit_xpd( struct brw_compile *p,
609 const struct brw_reg *dst,
610 GLuint mask,
611 const struct brw_reg *arg0,
612 const struct brw_reg *arg1 )
613 {
614 GLuint i;
615
616 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
617
618 for (i = 0 ; i < 3; i++) {
619 if (mask & (1<<i)) {
620 GLuint i2 = (i+2)%3;
621 GLuint i1 = (i+1)%3;
622
623 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
624
625 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
626 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
627 brw_set_saturate(p, 0);
628 }
629 }
630 }
631
632
633 static void emit_math1( struct brw_compile *p,
634 GLuint function,
635 const struct brw_reg *dst,
636 GLuint mask,
637 const struct brw_reg *arg0 )
638 {
639 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
640
641 if (!(mask & WRITEMASK_XYZW))
642 return; /* Do not emit dead code */
643
644 assert(is_power_of_two(mask & WRITEMASK_XYZW));
645
646 brw_MOV(p, brw_message_reg(2), arg0[0]);
647
648 /* Send two messages to perform all 16 operations:
649 */
650 brw_math_16(p,
651 dst[dst_chan],
652 function,
653 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
654 2,
655 brw_null_reg(),
656 BRW_MATH_PRECISION_FULL);
657 }
658
659
660 static void emit_math2( struct brw_compile *p,
661 GLuint function,
662 const struct brw_reg *dst,
663 GLuint mask,
664 const struct brw_reg *arg0,
665 const struct brw_reg *arg1)
666 {
667 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
668
669 if (!(mask & WRITEMASK_XYZW))
670 return; /* Do not emit dead code */
671
672 assert(is_power_of_two(mask & WRITEMASK_XYZW));
673
674 brw_push_insn_state(p);
675
676 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
677 brw_MOV(p, brw_message_reg(2), arg0[0]);
678 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
679 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
680
681 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
682 brw_MOV(p, brw_message_reg(3), arg1[0]);
683 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
684 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
685
686
687 /* Send two messages to perform all 16 operations:
688 */
689 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
690 brw_math(p,
691 dst[dst_chan],
692 function,
693 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
694 2,
695 brw_null_reg(),
696 BRW_MATH_DATA_VECTOR,
697 BRW_MATH_PRECISION_FULL);
698
699 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
700 brw_math(p,
701 offset(dst[dst_chan],1),
702 function,
703 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
704 4,
705 brw_null_reg(),
706 BRW_MATH_DATA_VECTOR,
707 BRW_MATH_PRECISION_FULL);
708
709 brw_pop_insn_state(p);
710 }
711
712
713
714 static void emit_tex( struct brw_wm_compile *c,
715 const struct brw_wm_instruction *inst,
716 struct brw_reg *dst,
717 GLuint dst_flags,
718 struct brw_reg *arg )
719 {
720 struct brw_compile *p = &c->func;
721 GLuint msgLength, responseLength;
722 GLuint i, nr;
723 GLuint emit;
724 GLuint msg_type;
725
726 /* How many input regs are there?
727 */
728 switch (inst->tex_idx) {
729 case TEXTURE_1D_INDEX:
730 emit = WRITEMASK_X;
731 nr = 1;
732 break;
733 case TEXTURE_2D_INDEX:
734 case TEXTURE_RECT_INDEX:
735 emit = WRITEMASK_XY;
736 nr = 2;
737 break;
738 default:
739 emit = WRITEMASK_XYZ;
740 nr = 3;
741 break;
742 }
743
744 if (inst->tex_shadow) {
745 nr = 4;
746 emit |= WRITEMASK_W;
747 }
748
749 msgLength = 1;
750
751 for (i = 0; i < nr; i++) {
752 static const GLuint swz[4] = {0,1,2,2};
753 if (emit & (1<<i))
754 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
755 else
756 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
757 msgLength += 2;
758 }
759
760 responseLength = 8; /* always */
761
762 if (BRW_IS_IGDNG(p->brw)) {
763 if (inst->tex_shadow)
764 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
765 else
766 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
767 } else {
768 if (inst->tex_shadow)
769 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
770 else
771 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
772 }
773
774 brw_SAMPLE(p,
775 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
776 1,
777 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
778 SURF_INDEX_TEXTURE(inst->tex_unit),
779 inst->tex_unit, /* sampler */
780 inst->writemask,
781 msg_type,
782 responseLength,
783 msgLength,
784 0,
785 1,
786 BRW_SAMPLER_SIMD_MODE_SIMD16);
787 }
788
789
790 static void emit_txb( struct brw_wm_compile *c,
791 const struct brw_wm_instruction *inst,
792 struct brw_reg *dst,
793 GLuint dst_flags,
794 struct brw_reg *arg )
795 {
796 struct brw_compile *p = &c->func;
797 GLuint msgLength;
798 GLuint msg_type;
799 /* Shadow ignored for txb.
800 */
801 switch (inst->tex_idx) {
802 case TEXTURE_1D_INDEX:
803 brw_MOV(p, brw_message_reg(2), arg[0]);
804 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
805 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
806 break;
807 case TEXTURE_2D_INDEX:
808 case TEXTURE_RECT_INDEX:
809 brw_MOV(p, brw_message_reg(2), arg[0]);
810 brw_MOV(p, brw_message_reg(4), arg[1]);
811 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
812 break;
813 default:
814 brw_MOV(p, brw_message_reg(2), arg[0]);
815 brw_MOV(p, brw_message_reg(4), arg[1]);
816 brw_MOV(p, brw_message_reg(6), arg[2]);
817 break;
818 }
819
820 brw_MOV(p, brw_message_reg(8), arg[3]);
821 msgLength = 9;
822
823 if (BRW_IS_IGDNG(p->brw))
824 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
825 else
826 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
827
828 brw_SAMPLE(p,
829 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
830 1,
831 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
832 SURF_INDEX_TEXTURE(inst->tex_unit),
833 inst->tex_unit, /* sampler */
834 inst->writemask,
835 msg_type,
836 8, /* responseLength */
837 msgLength,
838 0,
839 1,
840 BRW_SAMPLER_SIMD_MODE_SIMD16);
841 }
842
843
844 static void emit_lit( struct brw_compile *p,
845 const struct brw_reg *dst,
846 GLuint mask,
847 const struct brw_reg *arg0 )
848 {
849 assert((mask & WRITEMASK_XW) == 0);
850
851 if (mask & WRITEMASK_Y) {
852 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
853 brw_MOV(p, dst[1], arg0[0]);
854 brw_set_saturate(p, 0);
855 }
856
857 if (mask & WRITEMASK_Z) {
858 emit_math2(p, BRW_MATH_FUNCTION_POW,
859 &dst[2],
860 WRITEMASK_X | (mask & SATURATE),
861 &arg0[1],
862 &arg0[3]);
863 }
864
865 /* Ordinarily you'd use an iff statement to skip or shortcircuit
866 * some of the POW calculations above, but 16-wide iff statements
867 * seem to lock c1 hardware, so this is a nasty workaround:
868 */
869 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
870 {
871 if (mask & WRITEMASK_Y)
872 brw_MOV(p, dst[1], brw_imm_f(0));
873
874 if (mask & WRITEMASK_Z)
875 brw_MOV(p, dst[2], brw_imm_f(0));
876 }
877 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
878 }
879
880
881 /* Kill pixel - set execution mask to zero for those pixels which
882 * fail.
883 */
884 static void emit_kil( struct brw_wm_compile *c,
885 struct brw_reg *arg0)
886 {
887 struct brw_compile *p = &c->func;
888 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
889 GLuint i;
890
891 /* XXX - usually won't need 4 compares!
892 */
893 for (i = 0; i < 4; i++) {
894 brw_push_insn_state(p);
895 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
896 brw_set_predicate_control_flag_value(p, 0xff);
897 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
898 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
899 brw_pop_insn_state(p);
900 }
901 }
902
903
904 static void fire_fb_write( struct brw_wm_compile *c,
905 GLuint base_reg,
906 GLuint nr,
907 GLuint target,
908 GLuint eot )
909 {
910 struct brw_compile *p = &c->func;
911
912 /* Pass through control information:
913 */
914 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
915 {
916 brw_push_insn_state(p);
917 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
918 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
919 brw_MOV(p,
920 brw_message_reg(base_reg + 1),
921 brw_vec8_grf(1, 0));
922 brw_pop_insn_state(p);
923 }
924
925 /* Send framebuffer write message: */
926 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
927 brw_fb_WRITE(p,
928 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
929 base_reg,
930 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
931 target,
932 nr,
933 0,
934 eot);
935 }
936
937
938 static void emit_aa( struct brw_wm_compile *c,
939 struct brw_reg *arg1,
940 GLuint reg )
941 {
942 struct brw_compile *p = &c->func;
943 GLuint comp = c->key.aa_dest_stencil_reg / 2;
944 GLuint off = c->key.aa_dest_stencil_reg % 2;
945 struct brw_reg aa = offset(arg1[comp], off);
946
947 brw_push_insn_state(p);
948 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
949 brw_MOV(p, brw_message_reg(reg), aa);
950 brw_pop_insn_state(p);
951 }
952
953
954 /* Post-fragment-program processing. Send the results to the
955 * framebuffer.
956 * \param arg0 the fragment color
957 * \param arg1 the pass-through depth value
958 * \param arg2 the shader-computed depth value
959 */
960 static void emit_fb_write( struct brw_wm_compile *c,
961 struct brw_reg *arg0,
962 struct brw_reg *arg1,
963 struct brw_reg *arg2,
964 GLuint target,
965 GLuint eot)
966 {
967 struct brw_compile *p = &c->func;
968 GLuint nr = 2;
969 GLuint channel;
970
971 /* Reserve a space for AA - may not be needed:
972 */
973 if (c->key.aa_dest_stencil_reg)
974 nr += 1;
975
976 /* I don't really understand how this achieves the color interleave
977 * (ie RGBARGBA) in the result: [Do the saturation here]
978 */
979 {
980 brw_push_insn_state(p);
981
982 for (channel = 0; channel < 4; channel++) {
983 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
984 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
985
986 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
987 brw_MOV(p,
988 brw_message_reg(nr + channel),
989 arg0[channel]);
990
991 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
992 brw_MOV(p,
993 brw_message_reg(nr + channel + 4),
994 sechalf(arg0[channel]));
995 }
996
997 /* skip over the regs populated above:
998 */
999 nr += 8;
1000
1001 brw_pop_insn_state(p);
1002 }
1003
1004 if (c->key.source_depth_to_render_target)
1005 {
1006 if (c->key.computes_depth)
1007 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1008 else
1009 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1010
1011 nr += 2;
1012 }
1013
1014 if (c->key.dest_depth_reg)
1015 {
1016 GLuint comp = c->key.dest_depth_reg / 2;
1017 GLuint off = c->key.dest_depth_reg % 2;
1018
1019 if (off != 0) {
1020 brw_push_insn_state(p);
1021 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1022
1023 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1024 /* 2nd half? */
1025 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1026 brw_pop_insn_state(p);
1027 }
1028 else {
1029 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1030 }
1031 nr += 2;
1032 }
1033
1034 if (!c->key.runtime_check_aads_emit) {
1035 if (c->key.aa_dest_stencil_reg)
1036 emit_aa(c, arg1, 2);
1037
1038 fire_fb_write(c, 0, nr, target, eot);
1039 }
1040 else {
1041 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1042 struct brw_reg ip = brw_ip_reg();
1043 struct brw_instruction *jmp;
1044
1045 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1046 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1047 brw_AND(p,
1048 v1_null_ud,
1049 get_element_ud(brw_vec8_grf(1,0), 6),
1050 brw_imm_ud(1<<26));
1051
1052 jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1053 {
1054 emit_aa(c, arg1, 2);
1055 fire_fb_write(c, 0, nr, target, eot);
1056 /* note - thread killed in subroutine */
1057 }
1058 brw_land_fwd_jump(p, jmp);
1059
1060 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1061 */
1062 fire_fb_write(c, 1, nr-1, target, eot);
1063 }
1064 }
1065
1066
1067 /**
1068 * Move a GPR to scratch memory.
1069 */
1070 static void emit_spill( struct brw_wm_compile *c,
1071 struct brw_reg reg,
1072 GLuint slot )
1073 {
1074 struct brw_compile *p = &c->func;
1075
1076 /*
1077 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1078 */
1079 brw_MOV(p, brw_message_reg(2), reg);
1080
1081 /*
1082 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1083 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1084 */
1085 brw_dp_WRITE_16(p,
1086 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1087 slot);
1088 }
1089
1090
1091 /**
1092 * Load a GPR from scratch memory.
1093 */
1094 static void emit_unspill( struct brw_wm_compile *c,
1095 struct brw_reg reg,
1096 GLuint slot )
1097 {
1098 struct brw_compile *p = &c->func;
1099
1100 /* Slot 0 is the undef value.
1101 */
1102 if (slot == 0) {
1103 brw_MOV(p, reg, brw_imm_f(0));
1104 return;
1105 }
1106
1107 /*
1108 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1109 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1110 */
1111
1112 brw_dp_READ_16(p,
1113 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1114 slot);
1115 }
1116
1117
1118 /**
1119 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1120 * Args with unspill_reg != 0 will be loaded from scratch memory.
1121 */
1122 static void get_argument_regs( struct brw_wm_compile *c,
1123 struct brw_wm_ref *arg[],
1124 struct brw_reg *regs )
1125 {
1126 GLuint i;
1127
1128 for (i = 0; i < 4; i++) {
1129 if (arg[i]) {
1130 if (arg[i]->unspill_reg)
1131 emit_unspill(c,
1132 brw_vec8_grf(arg[i]->unspill_reg, 0),
1133 arg[i]->value->spill_slot);
1134
1135 regs[i] = arg[i]->hw_reg;
1136 }
1137 else {
1138 regs[i] = brw_null_reg();
1139 }
1140 }
1141 }
1142
1143
1144 /**
1145 * For values that have a spill_slot!=0, write those regs to scratch memory.
1146 */
1147 static void spill_values( struct brw_wm_compile *c,
1148 struct brw_wm_value *values,
1149 GLuint nr )
1150 {
1151 GLuint i;
1152
1153 for (i = 0; i < nr; i++)
1154 if (values[i].spill_slot)
1155 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1156 }
1157
1158
1159 /* Emit the fragment program instructions here.
1160 */
1161 void brw_wm_emit( struct brw_wm_compile *c )
1162 {
1163 struct brw_compile *p = &c->func;
1164 GLuint insn;
1165
1166 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1167
1168 /* Check if any of the payload regs need to be spilled:
1169 */
1170 spill_values(c, c->payload.depth, 4);
1171 spill_values(c, c->creg, c->nr_creg);
1172 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1173
1174
1175 for (insn = 0; insn < c->nr_insns; insn++) {
1176
1177 struct brw_wm_instruction *inst = &c->instruction[insn];
1178 struct brw_reg args[3][4], dst[4];
1179 GLuint i, dst_flags;
1180
1181 /* Get argument regs:
1182 */
1183 for (i = 0; i < 3; i++)
1184 get_argument_regs(c, inst->src[i], args[i]);
1185
1186 /* Get dest regs:
1187 */
1188 for (i = 0; i < 4; i++)
1189 if (inst->dst[i])
1190 dst[i] = inst->dst[i]->hw_reg;
1191 else
1192 dst[i] = brw_null_reg();
1193
1194 /* Flags
1195 */
1196 dst_flags = inst->writemask;
1197 if (inst->saturate)
1198 dst_flags |= SATURATE;
1199
1200 switch (inst->opcode) {
1201 /* Generated instructions for calculating triangle interpolants:
1202 */
1203 case WM_PIXELXY:
1204 emit_pixel_xy(p, dst, dst_flags);
1205 break;
1206
1207 case WM_DELTAXY:
1208 emit_delta_xy(p, dst, dst_flags, args[0]);
1209 break;
1210
1211 case WM_WPOSXY:
1212 emit_wpos_xy(c, dst, dst_flags, args[0]);
1213 break;
1214
1215 case WM_PIXELW:
1216 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1217 break;
1218
1219 case WM_LINTERP:
1220 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1221 break;
1222
1223 case WM_PINTERP:
1224 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1225 break;
1226
1227 case WM_CINTERP:
1228 emit_cinterp(p, dst, dst_flags, args[0]);
1229 break;
1230
1231 case WM_FB_WRITE:
1232 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1233 break;
1234
1235 case WM_FRONTFACING:
1236 emit_frontfacing(p, dst, dst_flags);
1237 break;
1238
1239 /* Straightforward arithmetic:
1240 */
1241 case OPCODE_ADD:
1242 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1243 break;
1244
1245 case OPCODE_FRC:
1246 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1247 break;
1248
1249 case OPCODE_FLR:
1250 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1251 break;
1252
1253 case OPCODE_DP3:
1254 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1255 break;
1256
1257 case OPCODE_DP4:
1258 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1259 break;
1260
1261 case OPCODE_DPH:
1262 emit_dph(p, dst, dst_flags, args[0], args[1]);
1263 break;
1264
1265 case OPCODE_TRUNC:
1266 emit_trunc(p, dst, dst_flags, args[0]);
1267 break;
1268
1269 case OPCODE_LRP:
1270 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1271 break;
1272
1273 case OPCODE_MAD:
1274 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1275 break;
1276
1277 case OPCODE_MOV:
1278 case OPCODE_SWZ:
1279 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1280 break;
1281
1282 case OPCODE_MUL:
1283 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1284 break;
1285
1286 case OPCODE_XPD:
1287 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1288 break;
1289
1290 /* Higher math functions:
1291 */
1292 case OPCODE_RCP:
1293 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1294 break;
1295
1296 case OPCODE_RSQ:
1297 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1298 break;
1299
1300 case OPCODE_SIN:
1301 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1302 break;
1303
1304 case OPCODE_COS:
1305 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1306 break;
1307
1308 case OPCODE_EX2:
1309 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1310 break;
1311
1312 case OPCODE_LG2:
1313 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1314 break;
1315
1316 case OPCODE_SCS:
1317 /* There is an scs math function, but it would need some
1318 * fixup for 16-element execution.
1319 */
1320 if (dst_flags & WRITEMASK_X)
1321 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1322 if (dst_flags & WRITEMASK_Y)
1323 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1324 break;
1325
1326 case OPCODE_POW:
1327 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1328 break;
1329
1330 /* Comparisons:
1331 */
1332 case OPCODE_CMP:
1333 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1334 break;
1335
1336 case OPCODE_MAX:
1337 emit_max(p, dst, dst_flags, args[0], args[1]);
1338 break;
1339
1340 case OPCODE_MIN:
1341 emit_min(p, dst, dst_flags, args[0], args[1]);
1342 break;
1343
1344 case OPCODE_SLT:
1345 emit_slt(p, dst, dst_flags, args[0], args[1]);
1346 break;
1347
1348 case OPCODE_SLE:
1349 emit_sle(p, dst, dst_flags, args[0], args[1]);
1350 break;
1351 case OPCODE_SGT:
1352 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1353 break;
1354 case OPCODE_SGE:
1355 emit_sge(p, dst, dst_flags, args[0], args[1]);
1356 break;
1357 case OPCODE_SEQ:
1358 emit_seq(p, dst, dst_flags, args[0], args[1]);
1359 break;
1360 case OPCODE_SNE:
1361 emit_sne(p, dst, dst_flags, args[0], args[1]);
1362 break;
1363
1364 case OPCODE_LIT:
1365 emit_lit(p, dst, dst_flags, args[0]);
1366 break;
1367
1368 /* Texturing operations:
1369 */
1370 case OPCODE_TEX:
1371 emit_tex(c, inst, dst, dst_flags, args[0]);
1372 break;
1373
1374 case OPCODE_TXB:
1375 emit_txb(c, inst, dst, dst_flags, args[0]);
1376 break;
1377
1378 case OPCODE_KIL:
1379 emit_kil(c, args[0]);
1380 break;
1381
1382 default:
1383 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1384 inst->opcode, inst->opcode < MAX_OPCODE ?
1385 _mesa_opcode_string(inst->opcode) :
1386 "unknown");
1387 }
1388
1389 for (i = 0; i < 4; i++)
1390 if (inst->dst[i] && inst->dst[i]->spill_slot)
1391 emit_spill(c,
1392 inst->dst[i]->hw_reg,
1393 inst->dst[i]->spill_slot);
1394 }
1395
1396 if (INTEL_DEBUG & DEBUG_WM) {
1397 int i;
1398
1399 _mesa_printf("wm-native:\n");
1400 for (i = 0; i < p->nr_insn; i++)
1401 brw_disasm(stderr, &p->store[i]);
1402 _mesa_printf("\n");
1403 }
1404 }