st/dri: Don't check for null when user ensures non-null
[mesa.git] / src / gallium / drivers / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32 #include "util/u_math.h"
33 #include "tgsi/tgsi_info.h"
34
35 #include "brw_context.h"
36 #include "brw_wm.h"
37 #include "brw_debug.h"
38 #include "brw_disasm.h"
39
40 /* Not quite sure how correct this is - need to understand horiz
41 * vs. vertical strides a little better.
42 */
43 static INLINE struct brw_reg sechalf( struct brw_reg reg )
44 {
45 if (reg.vstride)
46 reg.nr++;
47 return reg;
48 }
49
50 /* Payload R0:
51 *
52 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads,
53 * corresponding to each of the 16 execution channels.
54 * R0.1..8 -- ?
55 * R1.0 -- triangle vertex 0.X
56 * R1.1 -- triangle vertex 0.Y
57 * R1.2 -- quad 0 x,y coords (2 packed uwords)
58 * R1.3 -- quad 1 x,y coords (2 packed uwords)
59 * R1.4 -- quad 2 x,y coords (2 packed uwords)
60 * R1.5 -- quad 3 x,y coords (2 packed uwords)
61 * R1.6 -- ?
62 * R1.7 -- ?
63 * R1.8 -- ?
64 */
65
66
67 static void emit_pixel_xy(struct brw_compile *p,
68 const struct brw_reg *dst,
69 GLuint mask)
70 {
71 struct brw_reg r1 = brw_vec1_grf(1, 0);
72 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
73
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 /* Calculate pixel centers by adding 1 or 0 to each of the
77 * micro-tile coordinates passed in r1.
78 */
79 if (mask & BRW_WRITEMASK_X) {
80 brw_ADD(p,
81 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
82 stride(suboffset(r1_uw, 4), 2, 4, 0),
83 brw_imm_v(0x10101010));
84 }
85
86 if (mask & BRW_WRITEMASK_Y) {
87 brw_ADD(p,
88 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
89 stride(suboffset(r1_uw,5), 2, 4, 0),
90 brw_imm_v(0x11001100));
91 }
92
93 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
94 }
95
96
97
98 static void emit_delta_xy(struct brw_compile *p,
99 const struct brw_reg *dst,
100 GLuint mask,
101 const struct brw_reg *arg0)
102 {
103 struct brw_reg r1 = brw_vec1_grf(1, 0);
104
105 /* Calc delta X,Y by subtracting origin in r1 from the pixel
106 * centers.
107 */
108 if (mask & BRW_WRITEMASK_X) {
109 brw_ADD(p,
110 dst[0],
111 retype(arg0[0], BRW_REGISTER_TYPE_UW),
112 negate(r1));
113 }
114
115 if (mask & BRW_WRITEMASK_Y) {
116 brw_ADD(p,
117 dst[1],
118 retype(arg0[1], BRW_REGISTER_TYPE_UW),
119 negate(suboffset(r1,1)));
120
121 }
122 }
123
124 static void emit_wpos_xy(struct brw_wm_compile *c,
125 const struct brw_reg *dst,
126 GLuint mask,
127 const struct brw_reg *arg0)
128 {
129 struct brw_compile *p = &c->func;
130
131 if (mask & BRW_WRITEMASK_X) {
132 /* X' = X */
133 brw_MOV(p,
134 dst[0],
135 retype(arg0[0], BRW_REGISTER_TYPE_W));
136 }
137
138 /* XXX: is this needed any more, or is this a NOOP?
139 */
140 if (mask & BRW_WRITEMASK_Y) {
141 #if 0
142 /* Y' = height - 1 - Y */
143 brw_ADD(p,
144 dst[1],
145 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
146 brw_imm_d(c->key.drawable_height - 1));
147 #else
148 brw_MOV(p,
149 dst[0],
150 retype(arg0[0], BRW_REGISTER_TYPE_W));
151 #endif
152 }
153 }
154
155
156 static void emit_pixel_w( struct brw_compile *p,
157 const struct brw_reg *dst,
158 GLuint mask,
159 const struct brw_reg *arg0,
160 const struct brw_reg *deltas)
161 {
162 /* Don't need this if all you are doing is interpolating color, for
163 * instance.
164 */
165 if (mask & BRW_WRITEMASK_W) {
166 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
167
168 /* Calc 1/w - just linterp wpos[3] optimized by putting the
169 * result straight into a message reg.
170 */
171 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
172 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
173
174 /* Calc w */
175 brw_math_16( p, dst[3],
176 BRW_MATH_FUNCTION_INV,
177 BRW_MATH_SATURATE_NONE,
178 2, brw_null_reg(),
179 BRW_MATH_PRECISION_FULL);
180 }
181 }
182
183
184
185 static void emit_linterp( struct brw_compile *p,
186 const struct brw_reg *dst,
187 GLuint mask,
188 const struct brw_reg *arg0,
189 const struct brw_reg *deltas )
190 {
191 struct brw_reg interp[4];
192 GLuint nr = arg0[0].nr;
193 GLuint i;
194
195 interp[0] = brw_vec1_grf(nr, 0);
196 interp[1] = brw_vec1_grf(nr, 4);
197 interp[2] = brw_vec1_grf(nr+1, 0);
198 interp[3] = brw_vec1_grf(nr+1, 4);
199
200 for (i = 0; i < 4; i++) {
201 if (mask & (1<<i)) {
202 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
203 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
204 }
205 }
206 }
207
208
209 static void emit_pinterp( struct brw_compile *p,
210 const struct brw_reg *dst,
211 GLuint mask,
212 const struct brw_reg *arg0,
213 const struct brw_reg *deltas,
214 const struct brw_reg *w)
215 {
216 struct brw_reg interp[4];
217 GLuint nr = arg0[0].nr;
218 GLuint i;
219
220 interp[0] = brw_vec1_grf(nr, 0);
221 interp[1] = brw_vec1_grf(nr, 4);
222 interp[2] = brw_vec1_grf(nr+1, 0);
223 interp[3] = brw_vec1_grf(nr+1, 4);
224
225 for (i = 0; i < 4; i++) {
226 if (mask & (1<<i)) {
227 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
228 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
229 }
230 }
231 for (i = 0; i < 4; i++) {
232 if (mask & (1<<i)) {
233 brw_MUL(p, dst[i], dst[i], w[3]);
234 }
235 }
236 }
237
238
239 static void emit_cinterp( struct brw_compile *p,
240 const struct brw_reg *dst,
241 GLuint mask,
242 const struct brw_reg *arg0 )
243 {
244 struct brw_reg interp[4];
245 GLuint nr = arg0[0].nr;
246 GLuint i;
247
248 interp[0] = brw_vec1_grf(nr, 0);
249 interp[1] = brw_vec1_grf(nr, 4);
250 interp[2] = brw_vec1_grf(nr+1, 0);
251 interp[3] = brw_vec1_grf(nr+1, 4);
252
253 for (i = 0; i < 4; i++) {
254 if (mask & (1<<i)) {
255 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
256 }
257 }
258 }
259
260 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
261 static void emit_frontfacing( struct brw_compile *p,
262 const struct brw_reg *dst,
263 GLuint mask )
264 {
265 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
266 GLuint i;
267
268 if (!(mask & BRW_WRITEMASK_XYZW))
269 return;
270
271 for (i = 0; i < 4; i++) {
272 if (mask & (1<<i)) {
273 brw_MOV(p, dst[i], brw_imm_f(0.0));
274 }
275 }
276
277 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
278 * us front face
279 */
280 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
281 for (i = 0; i < 4; i++) {
282 if (mask & (1<<i)) {
283 brw_MOV(p, dst[i], brw_imm_f(1.0));
284 }
285 }
286 brw_set_predicate_control_flag_value(p, 0xff);
287 }
288
289 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
290 * looking like:
291 *
292 * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br
293 *
294 * and we're trying to produce:
295 *
296 * DDX DDY
297 * dst: (q0.tr - q0.tl) (q0.tl - q0.bl)
298 * (q0.tr - q0.tl) (q0.tr - q0.br)
299 * (q0.br - q0.bl) (q0.tl - q0.bl)
300 * (q0.br - q0.bl) (q0.tr - q0.br)
301 * (q1.tr - q1.tl) (q1.tl - q1.bl)
302 * (q1.tr - q1.tl) (q1.tr - q1.br)
303 * (q1.br - q1.bl) (q1.tl - q1.bl)
304 * (q1.br - q1.bl) (q1.tr - q1.br)
305 *
306 * and add two more quads if in 16-pixel dispatch mode.
307 *
308 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
309 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
310 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
311 * between each other. We could probably do it like ddx and swizzle the right
312 * order later, but bail for now and just produce
313 * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4)
314 */
315 void emit_ddxy(struct brw_compile *p,
316 const struct brw_reg *dst,
317 GLuint mask,
318 GLboolean is_ddx,
319 const struct brw_reg *arg0)
320 {
321 int i;
322 struct brw_reg src0, src1;
323
324 if (mask & SATURATE)
325 brw_set_saturate(p, 1);
326 for (i = 0; i < 4; i++ ) {
327 if (mask & (1<<i)) {
328 if (is_ddx) {
329 src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
330 BRW_REGISTER_TYPE_F,
331 BRW_VERTICAL_STRIDE_2,
332 BRW_WIDTH_2,
333 BRW_HORIZONTAL_STRIDE_0,
334 BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
335 src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
336 BRW_REGISTER_TYPE_F,
337 BRW_VERTICAL_STRIDE_2,
338 BRW_WIDTH_2,
339 BRW_HORIZONTAL_STRIDE_0,
340 BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
341 } else {
342 src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
343 BRW_REGISTER_TYPE_F,
344 BRW_VERTICAL_STRIDE_4,
345 BRW_WIDTH_4,
346 BRW_HORIZONTAL_STRIDE_0,
347 BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
348 src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
349 BRW_REGISTER_TYPE_F,
350 BRW_VERTICAL_STRIDE_4,
351 BRW_WIDTH_4,
352 BRW_HORIZONTAL_STRIDE_0,
353 BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
354 }
355 brw_ADD(p, dst[i], src0, negate(src1));
356 }
357 }
358 if (mask & SATURATE)
359 brw_set_saturate(p, 0);
360 }
361
362 static void emit_alu1( struct brw_compile *p,
363 struct brw_instruction *(*func)(struct brw_compile *,
364 struct brw_reg,
365 struct brw_reg),
366 const struct brw_reg *dst,
367 GLuint mask,
368 const struct brw_reg *arg0 )
369 {
370 GLuint i;
371
372 if (mask & SATURATE)
373 brw_set_saturate(p, 1);
374
375 for (i = 0; i < 4; i++) {
376 if (mask & (1<<i)) {
377 func(p, dst[i], arg0[i]);
378 }
379 }
380
381 if (mask & SATURATE)
382 brw_set_saturate(p, 0);
383 }
384
385
386 static void emit_alu2( struct brw_compile *p,
387 struct brw_instruction *(*func)(struct brw_compile *,
388 struct brw_reg,
389 struct brw_reg,
390 struct brw_reg),
391 const struct brw_reg *dst,
392 GLuint mask,
393 const struct brw_reg *arg0,
394 const struct brw_reg *arg1 )
395 {
396 GLuint i;
397
398 if (mask & SATURATE)
399 brw_set_saturate(p, 1);
400
401 for (i = 0; i < 4; i++) {
402 if (mask & (1<<i)) {
403 func(p, dst[i], arg0[i], arg1[i]);
404 }
405 }
406
407 if (mask & SATURATE)
408 brw_set_saturate(p, 0);
409 }
410
411
412 static void emit_mad( struct brw_compile *p,
413 const struct brw_reg *dst,
414 GLuint mask,
415 const struct brw_reg *arg0,
416 const struct brw_reg *arg1,
417 const struct brw_reg *arg2 )
418 {
419 GLuint i;
420
421 for (i = 0; i < 4; i++) {
422 if (mask & (1<<i)) {
423 brw_MUL(p, dst[i], arg0[i], arg1[i]);
424
425 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
426 brw_ADD(p, dst[i], dst[i], arg2[i]);
427 brw_set_saturate(p, 0);
428 }
429 }
430 }
431
432 static void emit_trunc( struct brw_compile *p,
433 const struct brw_reg *dst,
434 GLuint mask,
435 const struct brw_reg *arg0)
436 {
437 GLuint i;
438
439 for (i = 0; i < 4; i++) {
440 if (mask & (1<<i)) {
441 brw_RNDZ(p, dst[i], arg0[i]);
442 }
443 }
444 }
445
446 static void emit_lrp( struct brw_compile *p,
447 const struct brw_reg *dst,
448 GLuint mask,
449 const struct brw_reg *arg0,
450 const struct brw_reg *arg1,
451 const struct brw_reg *arg2 )
452 {
453 GLuint i;
454
455 /* Uses dst as a temporary:
456 */
457 for (i = 0; i < 4; i++) {
458 if (mask & (1<<i)) {
459 /* Can I use the LINE instruction for this?
460 */
461 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
462 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
463
464 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
465 brw_MAC(p, dst[i], arg0[i], arg1[i]);
466 brw_set_saturate(p, 0);
467 }
468 }
469 }
470
471 static void emit_sop( struct brw_compile *p,
472 const struct brw_reg *dst,
473 GLuint mask,
474 GLuint cond,
475 const struct brw_reg *arg0,
476 const struct brw_reg *arg1 )
477 {
478 GLuint i;
479
480 for (i = 0; i < 4; i++) {
481 if (mask & (1<<i)) {
482 brw_MOV(p, dst[i], brw_imm_f(0));
483 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
484 brw_MOV(p, dst[i], brw_imm_f(1.0));
485 brw_set_predicate_control_flag_value(p, 0xff);
486 }
487 }
488 }
489
490 static void emit_slt( struct brw_compile *p,
491 const struct brw_reg *dst,
492 GLuint mask,
493 const struct brw_reg *arg0,
494 const struct brw_reg *arg1 )
495 {
496 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
497 }
498
499 static void emit_sle( struct brw_compile *p,
500 const struct brw_reg *dst,
501 GLuint mask,
502 const struct brw_reg *arg0,
503 const struct brw_reg *arg1 )
504 {
505 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
506 }
507
508 static void emit_sgt( struct brw_compile *p,
509 const struct brw_reg *dst,
510 GLuint mask,
511 const struct brw_reg *arg0,
512 const struct brw_reg *arg1 )
513 {
514 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
515 }
516
517 static void emit_sge( struct brw_compile *p,
518 const struct brw_reg *dst,
519 GLuint mask,
520 const struct brw_reg *arg0,
521 const struct brw_reg *arg1 )
522 {
523 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
524 }
525
526 static void emit_seq( struct brw_compile *p,
527 const struct brw_reg *dst,
528 GLuint mask,
529 const struct brw_reg *arg0,
530 const struct brw_reg *arg1 )
531 {
532 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
533 }
534
535 static void emit_sne( struct brw_compile *p,
536 const struct brw_reg *dst,
537 GLuint mask,
538 const struct brw_reg *arg0,
539 const struct brw_reg *arg1 )
540 {
541 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
542 }
543
544 static void emit_cmp( struct brw_compile *p,
545 const struct brw_reg *dst,
546 GLuint mask,
547 const struct brw_reg *arg0,
548 const struct brw_reg *arg1,
549 const struct brw_reg *arg2 )
550 {
551 GLuint i;
552
553 for (i = 0; i < 4; i++) {
554 if (mask & (1<<i)) {
555 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
556 brw_MOV(p, dst[i], arg2[i]);
557 brw_set_saturate(p, 0);
558
559 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
560
561 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
562 brw_MOV(p, dst[i], arg1[i]);
563 brw_set_saturate(p, 0);
564 brw_set_predicate_control_flag_value(p, 0xff);
565 }
566 }
567 }
568
569 static void emit_max( struct brw_compile *p,
570 const struct brw_reg *dst,
571 GLuint mask,
572 const struct brw_reg *arg0,
573 const struct brw_reg *arg1 )
574 {
575 GLuint i;
576
577 for (i = 0; i < 4; i++) {
578 if (mask & (1<<i)) {
579 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
580 brw_MOV(p, dst[i], arg0[i]);
581 brw_set_saturate(p, 0);
582
583 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
584
585 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
586 brw_MOV(p, dst[i], arg1[i]);
587 brw_set_saturate(p, 0);
588 brw_set_predicate_control_flag_value(p, 0xff);
589 }
590 }
591 }
592
593 static void emit_min( struct brw_compile *p,
594 const struct brw_reg *dst,
595 GLuint mask,
596 const struct brw_reg *arg0,
597 const struct brw_reg *arg1 )
598 {
599 GLuint i;
600
601 for (i = 0; i < 4; i++) {
602 if (mask & (1<<i)) {
603 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
604 brw_MOV(p, dst[i], arg1[i]);
605 brw_set_saturate(p, 0);
606
607 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
608
609 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
610 brw_MOV(p, dst[i], arg0[i]);
611 brw_set_saturate(p, 0);
612 brw_set_predicate_control_flag_value(p, 0xff);
613 }
614 }
615 }
616
617
618 static void emit_dp3( struct brw_compile *p,
619 const struct brw_reg *dst,
620 GLuint mask,
621 const struct brw_reg *arg0,
622 const struct brw_reg *arg1 )
623 {
624 int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
625
626 if (!(mask & BRW_WRITEMASK_XYZW))
627 return; /* Do not emit dead code */
628
629 assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
630
631 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
632 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
633
634 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
635 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
636 brw_set_saturate(p, 0);
637 }
638
639
640 static void emit_dp4( struct brw_compile *p,
641 const struct brw_reg *dst,
642 GLuint mask,
643 const struct brw_reg *arg0,
644 const struct brw_reg *arg1 )
645 {
646 int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
647
648 if (!(mask & BRW_WRITEMASK_XYZW))
649 return; /* Do not emit dead code */
650
651 assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
652
653 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
654 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
655 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
656
657 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
658 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
659 brw_set_saturate(p, 0);
660 }
661
662
663 static void emit_dph( struct brw_compile *p,
664 const struct brw_reg *dst,
665 GLuint mask,
666 const struct brw_reg *arg0,
667 const struct brw_reg *arg1 )
668 {
669 const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
670
671 if (!(mask & BRW_WRITEMASK_XYZW))
672 return; /* Do not emit dead code */
673
674 assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
675
676 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
677 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
678 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
679
680 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
681 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
682 brw_set_saturate(p, 0);
683 }
684
685
686 static void emit_xpd( struct brw_compile *p,
687 const struct brw_reg *dst,
688 GLuint mask,
689 const struct brw_reg *arg0,
690 const struct brw_reg *arg1 )
691 {
692 GLuint i;
693
694 assert((mask & BRW_WRITEMASK_W) != BRW_WRITEMASK_W);
695
696 for (i = 0 ; i < 3; i++) {
697 if (mask & (1<<i)) {
698 GLuint i2 = (i+2)%3;
699 GLuint i1 = (i+1)%3;
700
701 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
702
703 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
704 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
705 brw_set_saturate(p, 0);
706 }
707 }
708 }
709
710
711 static void emit_math1( struct brw_compile *p,
712 GLuint function,
713 const struct brw_reg *dst,
714 GLuint mask,
715 const struct brw_reg *arg0 )
716 {
717 int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
718
719 if (!(mask & BRW_WRITEMASK_XYZW))
720 return; /* Do not emit dead code */
721
722 assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
723
724 brw_MOV(p, brw_message_reg(2), arg0[0]);
725
726 /* Send two messages to perform all 16 operations:
727 */
728 brw_math_16(p,
729 dst[dst_chan],
730 function,
731 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
732 2,
733 brw_null_reg(),
734 BRW_MATH_PRECISION_FULL);
735 }
736
737
738 static void emit_math2( struct brw_compile *p,
739 GLuint function,
740 const struct brw_reg *dst,
741 GLuint mask,
742 const struct brw_reg *arg0,
743 const struct brw_reg *arg1)
744 {
745 int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
746
747 if (!(mask & BRW_WRITEMASK_XYZW))
748 return; /* Do not emit dead code */
749
750 assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
751
752 brw_push_insn_state(p);
753
754 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
755 brw_MOV(p, brw_message_reg(2), arg0[0]);
756 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
757 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
758
759 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
760 brw_MOV(p, brw_message_reg(3), arg1[0]);
761 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
762 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
763
764
765 /* Send two messages to perform all 16 operations:
766 */
767 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
768 brw_math(p,
769 dst[dst_chan],
770 function,
771 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
772 2,
773 brw_null_reg(),
774 BRW_MATH_DATA_VECTOR,
775 BRW_MATH_PRECISION_FULL);
776
777 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
778 brw_math(p,
779 offset(dst[dst_chan],1),
780 function,
781 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
782 4,
783 brw_null_reg(),
784 BRW_MATH_DATA_VECTOR,
785 BRW_MATH_PRECISION_FULL);
786
787 brw_pop_insn_state(p);
788 }
789
790
791
792 static void emit_tex( struct brw_wm_compile *c,
793 const struct brw_wm_instruction *inst,
794 struct brw_reg *dst,
795 GLuint dst_flags,
796 struct brw_reg *coord,
797 GLuint sampler)
798 {
799 struct brw_compile *p = &c->func;
800 GLuint msgLength, responseLength;
801 GLuint i, nr;
802 GLuint emit;
803 GLuint msg_type;
804 GLboolean shadow = FALSE;
805
806 /* How many input regs are there?
807 */
808 switch (inst->target) {
809 case TGSI_TEXTURE_1D:
810 emit = BRW_WRITEMASK_X;
811 nr = 1;
812 break;
813 case TGSI_TEXTURE_SHADOW1D:
814 emit = BRW_WRITEMASK_XW;
815 nr = 4;
816 shadow = TRUE;
817 break;
818 case TGSI_TEXTURE_2D:
819 emit = BRW_WRITEMASK_XY;
820 nr = 2;
821 break;
822 case TGSI_TEXTURE_SHADOW2D:
823 case TGSI_TEXTURE_SHADOWRECT:
824 emit = BRW_WRITEMASK_XYW;
825 nr = 4;
826 shadow = TRUE;
827 break;
828 case TGSI_TEXTURE_3D:
829 case TGSI_TEXTURE_CUBE:
830 emit = BRW_WRITEMASK_XYZ;
831 nr = 3;
832 break;
833 default:
834 /* unexpected target */
835 abort();
836 }
837
838 msgLength = 1;
839
840 for (i = 0; i < nr; i++) {
841 static const GLuint swz[4] = {0,1,2,2};
842 if (emit & (1<<i))
843 brw_MOV(p, brw_message_reg(msgLength+1), coord[swz[i]]);
844 else
845 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
846 msgLength += 2;
847 }
848
849 responseLength = 8; /* always */
850
851 if (BRW_IS_IGDNG(p->brw)) {
852 if (shadow)
853 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
854 else
855 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
856 } else {
857 if (shadow)
858 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
859 else
860 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
861 }
862
863 brw_SAMPLE(p,
864 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
865 1,
866 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
867 BTI_TEXTURE(inst->tex_unit),
868 sampler, /* sampler index */
869 inst->writemask,
870 msg_type,
871 responseLength,
872 msgLength,
873 0,
874 1,
875 BRW_SAMPLER_SIMD_MODE_SIMD16);
876 }
877
878
879 static void emit_txb( struct brw_wm_compile *c,
880 const struct brw_wm_instruction *inst,
881 struct brw_reg *dst,
882 GLuint dst_flags,
883 struct brw_reg *coord,
884 GLuint sampler )
885 {
886 struct brw_compile *p = &c->func;
887 GLuint msgLength;
888 GLuint msg_type;
889 /* Shadow ignored for txb.
890 */
891 switch (inst->target) {
892 case TGSI_TEXTURE_1D:
893 case TGSI_TEXTURE_SHADOW1D:
894 brw_MOV(p, brw_message_reg(2), coord[0]);
895 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
896 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
897 break;
898 case TGSI_TEXTURE_2D:
899 case TGSI_TEXTURE_RECT:
900 case TGSI_TEXTURE_SHADOW2D:
901 case TGSI_TEXTURE_SHADOWRECT:
902 brw_MOV(p, brw_message_reg(2), coord[0]);
903 brw_MOV(p, brw_message_reg(4), coord[1]);
904 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
905 break;
906 case TGSI_TEXTURE_3D:
907 case TGSI_TEXTURE_CUBE:
908 brw_MOV(p, brw_message_reg(2), coord[0]);
909 brw_MOV(p, brw_message_reg(4), coord[1]);
910 brw_MOV(p, brw_message_reg(6), coord[2]);
911 break;
912 default:
913 /* unexpected target */
914 abort();
915 }
916
917 brw_MOV(p, brw_message_reg(8), coord[3]);
918 msgLength = 9;
919
920 if (BRW_IS_IGDNG(p->brw))
921 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
922 else
923 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
924
925 brw_SAMPLE(p,
926 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
927 1,
928 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
929 BTI_TEXTURE(inst->tex_unit),
930 sampler, /* sampler index */
931 inst->writemask,
932 msg_type,
933 8, /* responseLength */
934 msgLength,
935 0,
936 1,
937 BRW_SAMPLER_SIMD_MODE_SIMD16);
938 }
939
940
941 static void emit_lit( struct brw_compile *p,
942 const struct brw_reg *dst,
943 GLuint mask,
944 const struct brw_reg *arg0 )
945 {
946 assert((mask & BRW_WRITEMASK_XW) == 0);
947
948 if (mask & BRW_WRITEMASK_Y) {
949 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
950 brw_MOV(p, dst[1], arg0[0]);
951 brw_set_saturate(p, 0);
952 }
953
954 if (mask & BRW_WRITEMASK_Z) {
955 emit_math2(p, BRW_MATH_FUNCTION_POW,
956 &dst[2],
957 BRW_WRITEMASK_X | (mask & SATURATE),
958 &arg0[1],
959 &arg0[3]);
960 }
961
962 /* Ordinarily you'd use an iff statement to skip or shortcircuit
963 * some of the POW calculations above, but 16-wide iff statements
964 * seem to lock c1 hardware, so this is a nasty workaround:
965 */
966 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
967 {
968 if (mask & BRW_WRITEMASK_Y)
969 brw_MOV(p, dst[1], brw_imm_f(0));
970
971 if (mask & BRW_WRITEMASK_Z)
972 brw_MOV(p, dst[2], brw_imm_f(0));
973 }
974 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
975 }
976
977
978 /* Kill pixel - set execution mask to zero for those pixels which
979 * fail.
980 */
981 static void emit_kil( struct brw_wm_compile *c,
982 struct brw_reg *arg0)
983 {
984 struct brw_compile *p = &c->func;
985 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
986 GLuint i;
987
988 /* XXX - usually won't need 4 compares!
989 */
990 for (i = 0; i < 4; i++) {
991 brw_push_insn_state(p);
992 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
993 brw_set_predicate_control_flag_value(p, 0xff);
994 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
995 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
996 brw_pop_insn_state(p);
997 }
998 }
999
1000 /* KILLP kills the pixels that are currently executing, not based on a test
1001 * of the arguments.
1002 */
1003 static void emit_killp( struct brw_wm_compile *c )
1004 {
1005 struct brw_compile *p = &c->func;
1006 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1007
1008 brw_push_insn_state(p);
1009 brw_set_mask_control(p, BRW_MASK_DISABLE);
1010 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1011 brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1012 brw_pop_insn_state(p);
1013 }
1014
1015 static void fire_fb_write( struct brw_wm_compile *c,
1016 GLuint base_reg,
1017 GLuint nr,
1018 GLuint target,
1019 GLuint eot )
1020 {
1021 struct brw_compile *p = &c->func;
1022
1023 /* Pass through control information:
1024 */
1025 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1026 {
1027 brw_push_insn_state(p);
1028 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1029 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1030 brw_MOV(p,
1031 brw_message_reg(base_reg + 1),
1032 brw_vec8_grf(1, 0));
1033 brw_pop_insn_state(p);
1034 }
1035
1036 /* Send framebuffer write message: */
1037 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1038 brw_fb_WRITE(p,
1039 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1040 base_reg,
1041 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1042 target,
1043 nr,
1044 0,
1045 eot);
1046 }
1047
1048
1049 static void emit_aa( struct brw_wm_compile *c,
1050 struct brw_reg *arg1,
1051 GLuint reg )
1052 {
1053 struct brw_compile *p = &c->func;
1054 GLuint comp = c->key.aa_dest_stencil_reg / 2;
1055 GLuint off = c->key.aa_dest_stencil_reg % 2;
1056 struct brw_reg aa = offset(arg1[comp], off);
1057
1058 brw_push_insn_state(p);
1059 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1060 brw_MOV(p, brw_message_reg(reg), aa);
1061 brw_pop_insn_state(p);
1062 }
1063
1064
1065 /* Post-fragment-program processing. Send the results to the
1066 * framebuffer.
1067 * \param arg0 the fragment color
1068 * \param arg1 the pass-through depth value
1069 * \param arg2 the shader-computed depth value
1070 */
1071 static void emit_fb_write( struct brw_wm_compile *c,
1072 struct brw_reg *arg0,
1073 struct brw_reg *arg1,
1074 struct brw_reg *arg2,
1075 GLuint target,
1076 GLuint eot)
1077 {
1078 struct brw_compile *p = &c->func;
1079 GLuint nr = 2;
1080 GLuint channel;
1081
1082 /* Reserve a space for AA - may not be needed:
1083 */
1084 if (c->key.aa_dest_stencil_reg)
1085 nr += 1;
1086
1087 /* I don't really understand how this achieves the color interleave
1088 * (ie RGBARGBA) in the result: [Do the saturation here]
1089 */
1090 {
1091 brw_push_insn_state(p);
1092
1093 for (channel = 0; channel < 4; channel++) {
1094 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1095 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1096
1097 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1098 brw_MOV(p,
1099 brw_message_reg(nr + channel),
1100 arg0[channel]);
1101
1102 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1103 brw_MOV(p,
1104 brw_message_reg(nr + channel + 4),
1105 sechalf(arg0[channel]));
1106 }
1107
1108 /* skip over the regs populated above:
1109 */
1110 nr += 8;
1111
1112 brw_pop_insn_state(p);
1113 }
1114
1115 if (c->key.source_depth_to_render_target)
1116 {
1117 if (c->key.computes_depth)
1118 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1119 else
1120 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1121
1122 nr += 2;
1123 }
1124
1125 if (c->key.dest_depth_reg)
1126 {
1127 GLuint comp = c->key.dest_depth_reg / 2;
1128 GLuint off = c->key.dest_depth_reg % 2;
1129
1130 if (off != 0) {
1131 brw_push_insn_state(p);
1132 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1133
1134 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1135 /* 2nd half? */
1136 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1137 brw_pop_insn_state(p);
1138 }
1139 else {
1140 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1141 }
1142 nr += 2;
1143 }
1144
1145 if (!c->key.runtime_check_aads_emit) {
1146 if (c->key.aa_dest_stencil_reg)
1147 emit_aa(c, arg1, 2);
1148
1149 fire_fb_write(c, 0, nr, target, eot);
1150 }
1151 else {
1152 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1153 struct brw_reg ip = brw_ip_reg();
1154 struct brw_instruction *jmp;
1155
1156 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1157 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1158 brw_AND(p,
1159 v1_null_ud,
1160 get_element_ud(brw_vec8_grf(1,0), 6),
1161 brw_imm_ud(1<<26));
1162
1163 jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1164 {
1165 emit_aa(c, arg1, 2);
1166 fire_fb_write(c, 0, nr, target, eot);
1167 /* note - thread killed in subroutine */
1168 }
1169 brw_land_fwd_jump(p, jmp);
1170
1171 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1172 */
1173 fire_fb_write(c, 1, nr-1, target, eot);
1174 }
1175 }
1176
1177
1178 /**
1179 * Move a GPR to scratch memory.
1180 */
1181 static void emit_spill( struct brw_wm_compile *c,
1182 struct brw_reg reg,
1183 GLuint slot )
1184 {
1185 struct brw_compile *p = &c->func;
1186
1187 /*
1188 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1189 */
1190 brw_MOV(p, brw_message_reg(2), reg);
1191
1192 /*
1193 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1194 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1195 */
1196 brw_dp_WRITE_16(p,
1197 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1198 slot);
1199 }
1200
1201
1202 /**
1203 * Load a GPR from scratch memory.
1204 */
1205 static void emit_unspill( struct brw_wm_compile *c,
1206 struct brw_reg reg,
1207 GLuint slot )
1208 {
1209 struct brw_compile *p = &c->func;
1210
1211 /* Slot 0 is the undef value.
1212 */
1213 if (slot == 0) {
1214 brw_MOV(p, reg, brw_imm_f(0));
1215 return;
1216 }
1217
1218 /*
1219 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1220 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1221 */
1222
1223 brw_dp_READ_16(p,
1224 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1225 slot);
1226 }
1227
1228
1229 /**
1230 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1231 * Args with unspill_reg != 0 will be loaded from scratch memory.
1232 */
1233 static void get_argument_regs( struct brw_wm_compile *c,
1234 struct brw_wm_ref *arg[],
1235 struct brw_reg *regs )
1236 {
1237 GLuint i;
1238
1239 for (i = 0; i < 4; i++) {
1240 if (arg[i]) {
1241 if (arg[i]->unspill_reg)
1242 emit_unspill(c,
1243 brw_vec8_grf(arg[i]->unspill_reg, 0),
1244 arg[i]->value->spill_slot);
1245
1246 regs[i] = arg[i]->hw_reg;
1247 }
1248 else {
1249 regs[i] = brw_null_reg();
1250 }
1251 }
1252 }
1253
1254
1255 /**
1256 * For values that have a spill_slot!=0, write those regs to scratch memory.
1257 */
1258 static void spill_values( struct brw_wm_compile *c,
1259 struct brw_wm_value *values,
1260 GLuint nr )
1261 {
1262 GLuint i;
1263
1264 for (i = 0; i < nr; i++)
1265 if (values[i].spill_slot)
1266 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1267 }
1268
1269
1270 /* Emit the fragment program instructions here.
1271 */
1272 void brw_wm_emit( struct brw_wm_compile *c )
1273 {
1274 struct brw_compile *p = &c->func;
1275 GLuint insn;
1276
1277 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1278
1279 /* Check if any of the payload regs need to be spilled:
1280 */
1281 spill_values(c, c->payload.depth, 4);
1282 spill_values(c, c->creg, c->nr_creg);
1283 spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS);
1284
1285
1286 for (insn = 0; insn < c->nr_insns; insn++) {
1287
1288 struct brw_wm_instruction *inst = &c->instruction[insn];
1289 struct brw_reg args[3][4], dst[4];
1290 GLuint i, dst_flags;
1291
1292 /* Get argument regs:
1293 */
1294 for (i = 0; i < 3; i++)
1295 get_argument_regs(c, inst->src[i], args[i]);
1296
1297 /* Get dest regs:
1298 */
1299 for (i = 0; i < 4; i++)
1300 if (inst->dst[i])
1301 dst[i] = inst->dst[i]->hw_reg;
1302 else
1303 dst[i] = brw_null_reg();
1304
1305 /* Flags
1306 */
1307 dst_flags = inst->writemask;
1308 if (inst->saturate)
1309 dst_flags |= SATURATE;
1310
1311 switch (inst->opcode) {
1312 /* Generated instructions for calculating triangle interpolants:
1313 */
1314 case WM_PIXELXY:
1315 emit_pixel_xy(p, dst, dst_flags);
1316 break;
1317
1318 case WM_DELTAXY:
1319 emit_delta_xy(p, dst, dst_flags, args[0]);
1320 break;
1321
1322 case WM_WPOSXY:
1323 emit_wpos_xy(c, dst, dst_flags, args[0]);
1324 break;
1325
1326 case WM_PIXELW:
1327 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1328 break;
1329
1330 case WM_LINTERP:
1331 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1332 break;
1333
1334 case WM_PINTERP:
1335 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1336 break;
1337
1338 case WM_CINTERP:
1339 emit_cinterp(p, dst, dst_flags, args[0]);
1340 break;
1341
1342 case WM_FB_WRITE:
1343 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1344 break;
1345
1346 case WM_FRONTFACING:
1347 emit_frontfacing(p, dst, dst_flags);
1348 break;
1349
1350 /* Straightforward arithmetic:
1351 */
1352 case TGSI_OPCODE_ADD:
1353 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1354 break;
1355
1356 case TGSI_OPCODE_FRC:
1357 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1358 break;
1359
1360 case TGSI_OPCODE_FLR:
1361 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1362 break;
1363
1364 case TGSI_OPCODE_DDX:
1365 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1366 break;
1367
1368 case TGSI_OPCODE_DDY:
1369 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1370 break;
1371
1372 case TGSI_OPCODE_DP3:
1373 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1374 break;
1375
1376 case TGSI_OPCODE_DP4:
1377 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1378 break;
1379
1380 case TGSI_OPCODE_DPH:
1381 emit_dph(p, dst, dst_flags, args[0], args[1]);
1382 break;
1383
1384 case TGSI_OPCODE_TRUNC:
1385 emit_trunc(p, dst, dst_flags, args[0]);
1386 break;
1387
1388 case TGSI_OPCODE_LRP:
1389 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1390 break;
1391
1392 case TGSI_OPCODE_MAD:
1393 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1394 break;
1395
1396 case TGSI_OPCODE_MOV:
1397 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1398 break;
1399
1400 case TGSI_OPCODE_MUL:
1401 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1402 break;
1403
1404 case TGSI_OPCODE_XPD:
1405 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1406 break;
1407
1408 /* Higher math functions:
1409 */
1410 case TGSI_OPCODE_RCP:
1411 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1412 break;
1413
1414 case TGSI_OPCODE_RSQ:
1415 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1416 break;
1417
1418 case TGSI_OPCODE_SIN:
1419 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1420 break;
1421
1422 case TGSI_OPCODE_COS:
1423 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1424 break;
1425
1426 case TGSI_OPCODE_EX2:
1427 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1428 break;
1429
1430 case TGSI_OPCODE_LG2:
1431 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1432 break;
1433
1434 case TGSI_OPCODE_SCS:
1435 /* There is an scs math function, but it would need some
1436 * fixup for 16-element execution.
1437 */
1438 if (dst_flags & BRW_WRITEMASK_X)
1439 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
1440 if (dst_flags & BRW_WRITEMASK_Y)
1441 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
1442 break;
1443
1444 case TGSI_OPCODE_POW:
1445 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1446 break;
1447
1448 /* Comparisons:
1449 */
1450 case TGSI_OPCODE_CMP:
1451 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1452 break;
1453
1454 case TGSI_OPCODE_MAX:
1455 emit_max(p, dst, dst_flags, args[0], args[1]);
1456 break;
1457
1458 case TGSI_OPCODE_MIN:
1459 emit_min(p, dst, dst_flags, args[0], args[1]);
1460 break;
1461
1462 case TGSI_OPCODE_SLT:
1463 emit_slt(p, dst, dst_flags, args[0], args[1]);
1464 break;
1465
1466 case TGSI_OPCODE_SLE:
1467 emit_sle(p, dst, dst_flags, args[0], args[1]);
1468 break;
1469 case TGSI_OPCODE_SGT:
1470 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1471 break;
1472 case TGSI_OPCODE_SGE:
1473 emit_sge(p, dst, dst_flags, args[0], args[1]);
1474 break;
1475 case TGSI_OPCODE_SEQ:
1476 emit_seq(p, dst, dst_flags, args[0], args[1]);
1477 break;
1478 case TGSI_OPCODE_SNE:
1479 emit_sne(p, dst, dst_flags, args[0], args[1]);
1480 break;
1481
1482 case TGSI_OPCODE_LIT:
1483 emit_lit(p, dst, dst_flags, args[0]);
1484 break;
1485
1486 /* Texturing operations:
1487 */
1488 case TGSI_OPCODE_TEX:
1489 emit_tex(c, inst, dst, dst_flags, args[0], inst->sampler);
1490 break;
1491
1492 case TGSI_OPCODE_TXB:
1493 emit_txb(c, inst, dst, dst_flags, args[0], inst->sampler);
1494 break;
1495
1496 case TGSI_OPCODE_KIL:
1497 emit_kil(c, args[0]);
1498 break;
1499
1500 case TGSI_OPCODE_KILP:
1501 emit_killp(c);
1502 break;
1503
1504 default:
1505 debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
1506 inst->opcode,
1507 tgsi_get_opcode_info(inst->opcode)->mnemonic);
1508 }
1509
1510 for (i = 0; i < 4; i++)
1511 if (inst->dst[i] && inst->dst[i]->spill_slot)
1512 emit_spill(c,
1513 inst->dst[i]->hw_reg,
1514 inst->dst[i]->spill_slot);
1515 }
1516
1517 if (BRW_DEBUG & DEBUG_WM) {
1518 debug_printf("wm-native:\n");
1519 brw_disasm(stderr, p->store, p->nr_insn);
1520 }
1521 }