Merge branch 'mesa_7_6_branch'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 /* Not quite sure how correct this is - need to understand horiz
38 * vs. vertical strides a little better.
39 */
40 static INLINE struct brw_reg sechalf( struct brw_reg reg )
41 {
42 if (reg.vstride)
43 reg.nr++;
44 return reg;
45 }
46
47 /* Payload R0:
48 *
49 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
50 * corresponding to each of the 16 execution channels.
51 * R0.1..8 -- ?
52 * R1.0 -- triangle vertex 0.X
53 * R1.1 -- triangle vertex 0.Y
54 * R1.2 -- tile 0 x,y coords (2 packed uwords)
55 * R1.3 -- tile 1 x,y coords (2 packed uwords)
56 * R1.4 -- tile 2 x,y coords (2 packed uwords)
57 * R1.5 -- tile 3 x,y coords (2 packed uwords)
58 * R1.6 -- ?
59 * R1.7 -- ?
60 * R1.8 -- ?
61 */
62
63
64 static void emit_pixel_xy(struct brw_compile *p,
65 const struct brw_reg *dst,
66 GLuint mask)
67 {
68 struct brw_reg r1 = brw_vec1_grf(1, 0);
69 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
70
71 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
72
73 /* Calculate pixel centers by adding 1 or 0 to each of the
74 * micro-tile coordinates passed in r1.
75 */
76 if (mask & WRITEMASK_X) {
77 brw_ADD(p,
78 vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
79 stride(suboffset(r1_uw, 4), 2, 4, 0),
80 brw_imm_v(0x10101010));
81 }
82
83 if (mask & WRITEMASK_Y) {
84 brw_ADD(p,
85 vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
86 stride(suboffset(r1_uw,5), 2, 4, 0),
87 brw_imm_v(0x11001100));
88 }
89
90 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
91 }
92
93
94
95 static void emit_delta_xy(struct brw_compile *p,
96 const struct brw_reg *dst,
97 GLuint mask,
98 const struct brw_reg *arg0)
99 {
100 struct brw_reg r1 = brw_vec1_grf(1, 0);
101
102 /* Calc delta X,Y by subtracting origin in r1 from the pixel
103 * centers.
104 */
105 if (mask & WRITEMASK_X) {
106 brw_ADD(p,
107 dst[0],
108 retype(arg0[0], BRW_REGISTER_TYPE_UW),
109 negate(r1));
110 }
111
112 if (mask & WRITEMASK_Y) {
113 brw_ADD(p,
114 dst[1],
115 retype(arg0[1], BRW_REGISTER_TYPE_UW),
116 negate(suboffset(r1,1)));
117
118 }
119 }
120
121 static void emit_wpos_xy(struct brw_wm_compile *c,
122 const struct brw_reg *dst,
123 GLuint mask,
124 const struct brw_reg *arg0)
125 {
126 struct brw_compile *p = &c->func;
127
128 /* Calculate the pixel offset from window bottom left into destination
129 * X and Y channels.
130 */
131 if (mask & WRITEMASK_X) {
132 /* X' = X - origin */
133 brw_ADD(p,
134 dst[0],
135 retype(arg0[0], BRW_REGISTER_TYPE_W),
136 brw_imm_d(0 - c->key.origin_x));
137 }
138
139 if (mask & WRITEMASK_Y) {
140 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
141 brw_ADD(p,
142 dst[1],
143 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
144 brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
145 }
146 }
147
148
149 static void emit_pixel_w( struct brw_compile *p,
150 const struct brw_reg *dst,
151 GLuint mask,
152 const struct brw_reg *arg0,
153 const struct brw_reg *deltas)
154 {
155 /* Don't need this if all you are doing is interpolating color, for
156 * instance.
157 */
158 if (mask & WRITEMASK_W) {
159 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
160
161 /* Calc 1/w - just linterp wpos[3] optimized by putting the
162 * result straight into a message reg.
163 */
164 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
165 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
166
167 /* Calc w */
168 brw_math_16( p, dst[3],
169 BRW_MATH_FUNCTION_INV,
170 BRW_MATH_SATURATE_NONE,
171 2, brw_null_reg(),
172 BRW_MATH_PRECISION_FULL);
173 }
174 }
175
176
177
178 static void emit_linterp( struct brw_compile *p,
179 const struct brw_reg *dst,
180 GLuint mask,
181 const struct brw_reg *arg0,
182 const struct brw_reg *deltas )
183 {
184 struct brw_reg interp[4];
185 GLuint nr = arg0[0].nr;
186 GLuint i;
187
188 interp[0] = brw_vec1_grf(nr, 0);
189 interp[1] = brw_vec1_grf(nr, 4);
190 interp[2] = brw_vec1_grf(nr+1, 0);
191 interp[3] = brw_vec1_grf(nr+1, 4);
192
193 for (i = 0; i < 4; i++) {
194 if (mask & (1<<i)) {
195 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
196 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
197 }
198 }
199 }
200
201
202 static void emit_pinterp( struct brw_compile *p,
203 const struct brw_reg *dst,
204 GLuint mask,
205 const struct brw_reg *arg0,
206 const struct brw_reg *deltas,
207 const struct brw_reg *w)
208 {
209 struct brw_reg interp[4];
210 GLuint nr = arg0[0].nr;
211 GLuint i;
212
213 interp[0] = brw_vec1_grf(nr, 0);
214 interp[1] = brw_vec1_grf(nr, 4);
215 interp[2] = brw_vec1_grf(nr+1, 0);
216 interp[3] = brw_vec1_grf(nr+1, 4);
217
218 for (i = 0; i < 4; i++) {
219 if (mask & (1<<i)) {
220 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
221 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
222 }
223 }
224 for (i = 0; i < 4; i++) {
225 if (mask & (1<<i)) {
226 brw_MUL(p, dst[i], dst[i], w[3]);
227 }
228 }
229 }
230
231
232 static void emit_cinterp( struct brw_compile *p,
233 const struct brw_reg *dst,
234 GLuint mask,
235 const struct brw_reg *arg0 )
236 {
237 struct brw_reg interp[4];
238 GLuint nr = arg0[0].nr;
239 GLuint i;
240
241 interp[0] = brw_vec1_grf(nr, 0);
242 interp[1] = brw_vec1_grf(nr, 4);
243 interp[2] = brw_vec1_grf(nr+1, 0);
244 interp[3] = brw_vec1_grf(nr+1, 4);
245
246 for (i = 0; i < 4; i++) {
247 if (mask & (1<<i)) {
248 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
249 }
250 }
251 }
252
253 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
254 static void emit_frontfacing( struct brw_compile *p,
255 const struct brw_reg *dst,
256 GLuint mask )
257 {
258 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
259 GLuint i;
260
261 if (!(mask & WRITEMASK_XYZW))
262 return;
263
264 for (i = 0; i < 4; i++) {
265 if (mask & (1<<i)) {
266 brw_MOV(p, dst[i], brw_imm_f(0.0));
267 }
268 }
269
270 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
271 * us front face
272 */
273 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
274 for (i = 0; i < 4; i++) {
275 if (mask & (1<<i)) {
276 brw_MOV(p, dst[i], brw_imm_f(1.0));
277 }
278 }
279 brw_set_predicate_control_flag_value(p, 0xff);
280 }
281
282 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
283 * looking like:
284 *
285 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
286 *
287 * and we're trying to produce:
288 *
289 * DDX DDY
290 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
291 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
292 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
293 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
294 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
295 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
296 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
297 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
298 *
299 * and add another set of two more subspans if in 16-pixel dispatch mode.
300 *
301 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
302 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
303 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
304 * between each other. We could probably do it like ddx and swizzle the right
305 * order later, but bail for now and just produce
306 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
307 */
308 void emit_ddxy(struct brw_compile *p,
309 const struct brw_reg *dst,
310 GLuint mask,
311 GLboolean is_ddx,
312 const struct brw_reg *arg0)
313 {
314 int i;
315 struct brw_reg src0, src1;
316
317 if (mask & SATURATE)
318 brw_set_saturate(p, 1);
319 for (i = 0; i < 4; i++ ) {
320 if (mask & (1<<i)) {
321 if (is_ddx) {
322 src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
323 BRW_REGISTER_TYPE_F,
324 BRW_VERTICAL_STRIDE_2,
325 BRW_WIDTH_2,
326 BRW_HORIZONTAL_STRIDE_0,
327 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
328 src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
329 BRW_REGISTER_TYPE_F,
330 BRW_VERTICAL_STRIDE_2,
331 BRW_WIDTH_2,
332 BRW_HORIZONTAL_STRIDE_0,
333 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
334 } else {
335 src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
336 BRW_REGISTER_TYPE_F,
337 BRW_VERTICAL_STRIDE_4,
338 BRW_WIDTH_4,
339 BRW_HORIZONTAL_STRIDE_0,
340 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
341 src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
342 BRW_REGISTER_TYPE_F,
343 BRW_VERTICAL_STRIDE_4,
344 BRW_WIDTH_4,
345 BRW_HORIZONTAL_STRIDE_0,
346 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
347 }
348 brw_ADD(p, dst[i], src0, negate(src1));
349 }
350 }
351 if (mask & SATURATE)
352 brw_set_saturate(p, 0);
353 }
354
355 static void emit_alu1( struct brw_compile *p,
356 struct brw_instruction *(*func)(struct brw_compile *,
357 struct brw_reg,
358 struct brw_reg),
359 const struct brw_reg *dst,
360 GLuint mask,
361 const struct brw_reg *arg0 )
362 {
363 GLuint i;
364
365 if (mask & SATURATE)
366 brw_set_saturate(p, 1);
367
368 for (i = 0; i < 4; i++) {
369 if (mask & (1<<i)) {
370 func(p, dst[i], arg0[i]);
371 }
372 }
373
374 if (mask & SATURATE)
375 brw_set_saturate(p, 0);
376 }
377
378
379 static void emit_alu2( struct brw_compile *p,
380 struct brw_instruction *(*func)(struct brw_compile *,
381 struct brw_reg,
382 struct brw_reg,
383 struct brw_reg),
384 const struct brw_reg *dst,
385 GLuint mask,
386 const struct brw_reg *arg0,
387 const struct brw_reg *arg1 )
388 {
389 GLuint i;
390
391 if (mask & SATURATE)
392 brw_set_saturate(p, 1);
393
394 for (i = 0; i < 4; i++) {
395 if (mask & (1<<i)) {
396 func(p, dst[i], arg0[i], arg1[i]);
397 }
398 }
399
400 if (mask & SATURATE)
401 brw_set_saturate(p, 0);
402 }
403
404
405 static void emit_mad( struct brw_compile *p,
406 const struct brw_reg *dst,
407 GLuint mask,
408 const struct brw_reg *arg0,
409 const struct brw_reg *arg1,
410 const struct brw_reg *arg2 )
411 {
412 GLuint i;
413
414 for (i = 0; i < 4; i++) {
415 if (mask & (1<<i)) {
416 brw_MUL(p, dst[i], arg0[i], arg1[i]);
417
418 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
419 brw_ADD(p, dst[i], dst[i], arg2[i]);
420 brw_set_saturate(p, 0);
421 }
422 }
423 }
424
425 static void emit_trunc( struct brw_compile *p,
426 const struct brw_reg *dst,
427 GLuint mask,
428 const struct brw_reg *arg0)
429 {
430 GLuint i;
431
432 for (i = 0; i < 4; i++) {
433 if (mask & (1<<i)) {
434 brw_RNDZ(p, dst[i], arg0[i]);
435 }
436 }
437 }
438
439 static void emit_lrp( struct brw_compile *p,
440 const struct brw_reg *dst,
441 GLuint mask,
442 const struct brw_reg *arg0,
443 const struct brw_reg *arg1,
444 const struct brw_reg *arg2 )
445 {
446 GLuint i;
447
448 /* Uses dst as a temporary:
449 */
450 for (i = 0; i < 4; i++) {
451 if (mask & (1<<i)) {
452 /* Can I use the LINE instruction for this?
453 */
454 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
455 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
456
457 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
458 brw_MAC(p, dst[i], arg0[i], arg1[i]);
459 brw_set_saturate(p, 0);
460 }
461 }
462 }
463
464 static void emit_sop( struct brw_compile *p,
465 const struct brw_reg *dst,
466 GLuint mask,
467 GLuint cond,
468 const struct brw_reg *arg0,
469 const struct brw_reg *arg1 )
470 {
471 GLuint i;
472
473 for (i = 0; i < 4; i++) {
474 if (mask & (1<<i)) {
475 brw_MOV(p, dst[i], brw_imm_f(0));
476 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
477 brw_MOV(p, dst[i], brw_imm_f(1.0));
478 brw_set_predicate_control_flag_value(p, 0xff);
479 }
480 }
481 }
482
483 static void emit_slt( struct brw_compile *p,
484 const struct brw_reg *dst,
485 GLuint mask,
486 const struct brw_reg *arg0,
487 const struct brw_reg *arg1 )
488 {
489 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
490 }
491
492 static void emit_sle( struct brw_compile *p,
493 const struct brw_reg *dst,
494 GLuint mask,
495 const struct brw_reg *arg0,
496 const struct brw_reg *arg1 )
497 {
498 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
499 }
500
501 static void emit_sgt( struct brw_compile *p,
502 const struct brw_reg *dst,
503 GLuint mask,
504 const struct brw_reg *arg0,
505 const struct brw_reg *arg1 )
506 {
507 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
508 }
509
510 static void emit_sge( struct brw_compile *p,
511 const struct brw_reg *dst,
512 GLuint mask,
513 const struct brw_reg *arg0,
514 const struct brw_reg *arg1 )
515 {
516 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
517 }
518
519 static void emit_seq( struct brw_compile *p,
520 const struct brw_reg *dst,
521 GLuint mask,
522 const struct brw_reg *arg0,
523 const struct brw_reg *arg1 )
524 {
525 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
526 }
527
528 static void emit_sne( struct brw_compile *p,
529 const struct brw_reg *dst,
530 GLuint mask,
531 const struct brw_reg *arg0,
532 const struct brw_reg *arg1 )
533 {
534 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
535 }
536
537 static void emit_cmp( struct brw_compile *p,
538 const struct brw_reg *dst,
539 GLuint mask,
540 const struct brw_reg *arg0,
541 const struct brw_reg *arg1,
542 const struct brw_reg *arg2 )
543 {
544 GLuint i;
545
546 for (i = 0; i < 4; i++) {
547 if (mask & (1<<i)) {
548 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
549 brw_MOV(p, dst[i], arg2[i]);
550 brw_set_saturate(p, 0);
551
552 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
553
554 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
555 brw_MOV(p, dst[i], arg1[i]);
556 brw_set_saturate(p, 0);
557 brw_set_predicate_control_flag_value(p, 0xff);
558 }
559 }
560 }
561
562 static void emit_max( struct brw_compile *p,
563 const struct brw_reg *dst,
564 GLuint mask,
565 const struct brw_reg *arg0,
566 const struct brw_reg *arg1 )
567 {
568 GLuint i;
569
570 for (i = 0; i < 4; i++) {
571 if (mask & (1<<i)) {
572 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
573 brw_MOV(p, dst[i], arg0[i]);
574 brw_set_saturate(p, 0);
575
576 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
577
578 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
579 brw_MOV(p, dst[i], arg1[i]);
580 brw_set_saturate(p, 0);
581 brw_set_predicate_control_flag_value(p, 0xff);
582 }
583 }
584 }
585
586 static void emit_min( struct brw_compile *p,
587 const struct brw_reg *dst,
588 GLuint mask,
589 const struct brw_reg *arg0,
590 const struct brw_reg *arg1 )
591 {
592 GLuint i;
593
594 for (i = 0; i < 4; i++) {
595 if (mask & (1<<i)) {
596 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
597 brw_MOV(p, dst[i], arg1[i]);
598 brw_set_saturate(p, 0);
599
600 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
601
602 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
603 brw_MOV(p, dst[i], arg0[i]);
604 brw_set_saturate(p, 0);
605 brw_set_predicate_control_flag_value(p, 0xff);
606 }
607 }
608 }
609
610
611 static void emit_dp3( struct brw_compile *p,
612 const struct brw_reg *dst,
613 GLuint mask,
614 const struct brw_reg *arg0,
615 const struct brw_reg *arg1 )
616 {
617 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
618
619 if (!(mask & WRITEMASK_XYZW))
620 return; /* Do not emit dead code */
621
622 assert(is_power_of_two(mask & WRITEMASK_XYZW));
623
624 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
625 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
626
627 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
628 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
629 brw_set_saturate(p, 0);
630 }
631
632
633 static void emit_dp4( struct brw_compile *p,
634 const struct brw_reg *dst,
635 GLuint mask,
636 const struct brw_reg *arg0,
637 const struct brw_reg *arg1 )
638 {
639 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
640
641 if (!(mask & WRITEMASK_XYZW))
642 return; /* Do not emit dead code */
643
644 assert(is_power_of_two(mask & WRITEMASK_XYZW));
645
646 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
647 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
648 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
649
650 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
651 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
652 brw_set_saturate(p, 0);
653 }
654
655
656 static void emit_dph( struct brw_compile *p,
657 const struct brw_reg *dst,
658 GLuint mask,
659 const struct brw_reg *arg0,
660 const struct brw_reg *arg1 )
661 {
662 const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
663
664 if (!(mask & WRITEMASK_XYZW))
665 return; /* Do not emit dead code */
666
667 assert(is_power_of_two(mask & WRITEMASK_XYZW));
668
669 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
670 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
671 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
672
673 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
674 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
675 brw_set_saturate(p, 0);
676 }
677
678
679 static void emit_xpd( struct brw_compile *p,
680 const struct brw_reg *dst,
681 GLuint mask,
682 const struct brw_reg *arg0,
683 const struct brw_reg *arg1 )
684 {
685 GLuint i;
686
687 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
688
689 for (i = 0 ; i < 3; i++) {
690 if (mask & (1<<i)) {
691 GLuint i2 = (i+2)%3;
692 GLuint i1 = (i+1)%3;
693
694 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
695
696 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
697 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
698 brw_set_saturate(p, 0);
699 }
700 }
701 }
702
703
704 static void emit_math1( struct brw_compile *p,
705 GLuint function,
706 const struct brw_reg *dst,
707 GLuint mask,
708 const struct brw_reg *arg0 )
709 {
710 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
711
712 if (!(mask & WRITEMASK_XYZW))
713 return; /* Do not emit dead code */
714
715 assert(is_power_of_two(mask & WRITEMASK_XYZW));
716
717 brw_MOV(p, brw_message_reg(2), arg0[0]);
718
719 /* Send two messages to perform all 16 operations:
720 */
721 brw_math_16(p,
722 dst[dst_chan],
723 function,
724 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
725 2,
726 brw_null_reg(),
727 BRW_MATH_PRECISION_FULL);
728 }
729
730
731 static void emit_math2( struct brw_compile *p,
732 GLuint function,
733 const struct brw_reg *dst,
734 GLuint mask,
735 const struct brw_reg *arg0,
736 const struct brw_reg *arg1)
737 {
738 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
739
740 if (!(mask & WRITEMASK_XYZW))
741 return; /* Do not emit dead code */
742
743 assert(is_power_of_two(mask & WRITEMASK_XYZW));
744
745 brw_push_insn_state(p);
746
747 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
748 brw_MOV(p, brw_message_reg(2), arg0[0]);
749 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
750 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
751
752 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
753 brw_MOV(p, brw_message_reg(3), arg1[0]);
754 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
755 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
756
757
758 /* Send two messages to perform all 16 operations:
759 */
760 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
761 brw_math(p,
762 dst[dst_chan],
763 function,
764 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
765 2,
766 brw_null_reg(),
767 BRW_MATH_DATA_VECTOR,
768 BRW_MATH_PRECISION_FULL);
769
770 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
771 brw_math(p,
772 offset(dst[dst_chan],1),
773 function,
774 (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
775 4,
776 brw_null_reg(),
777 BRW_MATH_DATA_VECTOR,
778 BRW_MATH_PRECISION_FULL);
779
780 brw_pop_insn_state(p);
781 }
782
783
784
785 static void emit_tex( struct brw_wm_compile *c,
786 const struct brw_wm_instruction *inst,
787 struct brw_reg *dst,
788 GLuint dst_flags,
789 struct brw_reg *arg )
790 {
791 struct brw_compile *p = &c->func;
792 GLuint msgLength, responseLength;
793 GLuint i, nr;
794 GLuint emit;
795 GLuint msg_type;
796
797 /* How many input regs are there?
798 */
799 switch (inst->tex_idx) {
800 case TEXTURE_1D_INDEX:
801 emit = WRITEMASK_X;
802 nr = 1;
803 break;
804 case TEXTURE_2D_INDEX:
805 case TEXTURE_RECT_INDEX:
806 emit = WRITEMASK_XY;
807 nr = 2;
808 break;
809 case TEXTURE_3D_INDEX:
810 case TEXTURE_CUBE_INDEX:
811 emit = WRITEMASK_XYZ;
812 nr = 3;
813 break;
814 default:
815 /* unexpected target */
816 abort();
817 }
818
819 if (inst->tex_shadow) {
820 nr = 4;
821 emit |= WRITEMASK_W;
822 }
823
824 msgLength = 1;
825
826 for (i = 0; i < nr; i++) {
827 static const GLuint swz[4] = {0,1,2,2};
828 if (emit & (1<<i))
829 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
830 else
831 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
832 msgLength += 2;
833 }
834
835 responseLength = 8; /* always */
836
837 if (BRW_IS_IGDNG(p->brw)) {
838 if (inst->tex_shadow)
839 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
840 else
841 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
842 } else {
843 if (inst->tex_shadow)
844 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
845 else
846 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
847 }
848
849 brw_SAMPLE(p,
850 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
851 1,
852 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
853 SURF_INDEX_TEXTURE(inst->tex_unit),
854 inst->tex_unit, /* sampler */
855 inst->writemask,
856 msg_type,
857 responseLength,
858 msgLength,
859 0,
860 1,
861 BRW_SAMPLER_SIMD_MODE_SIMD16);
862 }
863
864
865 static void emit_txb( struct brw_wm_compile *c,
866 const struct brw_wm_instruction *inst,
867 struct brw_reg *dst,
868 GLuint dst_flags,
869 struct brw_reg *arg )
870 {
871 struct brw_compile *p = &c->func;
872 GLuint msgLength;
873 GLuint msg_type;
874 /* Shadow ignored for txb.
875 */
876 switch (inst->tex_idx) {
877 case TEXTURE_1D_INDEX:
878 brw_MOV(p, brw_message_reg(2), arg[0]);
879 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
880 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
881 break;
882 case TEXTURE_2D_INDEX:
883 case TEXTURE_RECT_INDEX:
884 brw_MOV(p, brw_message_reg(2), arg[0]);
885 brw_MOV(p, brw_message_reg(4), arg[1]);
886 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
887 break;
888 case TEXTURE_3D_INDEX:
889 case TEXTURE_CUBE_INDEX:
890 brw_MOV(p, brw_message_reg(2), arg[0]);
891 brw_MOV(p, brw_message_reg(4), arg[1]);
892 brw_MOV(p, brw_message_reg(6), arg[2]);
893 break;
894 default:
895 /* unexpected target */
896 abort();
897 }
898
899 brw_MOV(p, brw_message_reg(8), arg[3]);
900 msgLength = 9;
901
902 if (BRW_IS_IGDNG(p->brw))
903 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
904 else
905 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
906
907 brw_SAMPLE(p,
908 retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
909 1,
910 retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
911 SURF_INDEX_TEXTURE(inst->tex_unit),
912 inst->tex_unit, /* sampler */
913 inst->writemask,
914 msg_type,
915 8, /* responseLength */
916 msgLength,
917 0,
918 1,
919 BRW_SAMPLER_SIMD_MODE_SIMD16);
920 }
921
922
923 static void emit_lit( struct brw_compile *p,
924 const struct brw_reg *dst,
925 GLuint mask,
926 const struct brw_reg *arg0 )
927 {
928 assert((mask & WRITEMASK_XW) == 0);
929
930 if (mask & WRITEMASK_Y) {
931 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
932 brw_MOV(p, dst[1], arg0[0]);
933 brw_set_saturate(p, 0);
934 }
935
936 if (mask & WRITEMASK_Z) {
937 emit_math2(p, BRW_MATH_FUNCTION_POW,
938 &dst[2],
939 WRITEMASK_X | (mask & SATURATE),
940 &arg0[1],
941 &arg0[3]);
942 }
943
944 /* Ordinarily you'd use an iff statement to skip or shortcircuit
945 * some of the POW calculations above, but 16-wide iff statements
946 * seem to lock c1 hardware, so this is a nasty workaround:
947 */
948 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
949 {
950 if (mask & WRITEMASK_Y)
951 brw_MOV(p, dst[1], brw_imm_f(0));
952
953 if (mask & WRITEMASK_Z)
954 brw_MOV(p, dst[2], brw_imm_f(0));
955 }
956 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
957 }
958
959
960 /* Kill pixel - set execution mask to zero for those pixels which
961 * fail.
962 */
963 static void emit_kil( struct brw_wm_compile *c,
964 struct brw_reg *arg0)
965 {
966 struct brw_compile *p = &c->func;
967 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
968 GLuint i;
969
970 /* XXX - usually won't need 4 compares!
971 */
972 for (i = 0; i < 4; i++) {
973 brw_push_insn_state(p);
974 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
975 brw_set_predicate_control_flag_value(p, 0xff);
976 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
977 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
978 brw_pop_insn_state(p);
979 }
980 }
981
982 /* KIL_NV kills the pixels that are currently executing, not based on a test
983 * of the arguments.
984 */
985 static void emit_kil_nv( struct brw_wm_compile *c )
986 {
987 struct brw_compile *p = &c->func;
988 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
989
990 brw_push_insn_state(p);
991 brw_set_mask_control(p, BRW_MASK_DISABLE);
992 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
993 brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
994 brw_pop_insn_state(p);
995 }
996
997 static void fire_fb_write( struct brw_wm_compile *c,
998 GLuint base_reg,
999 GLuint nr,
1000 GLuint target,
1001 GLuint eot )
1002 {
1003 struct brw_compile *p = &c->func;
1004
1005 /* Pass through control information:
1006 */
1007 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1008 {
1009 brw_push_insn_state(p);
1010 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1011 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1012 brw_MOV(p,
1013 brw_message_reg(base_reg + 1),
1014 brw_vec8_grf(1, 0));
1015 brw_pop_insn_state(p);
1016 }
1017
1018 /* Send framebuffer write message: */
1019 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1020 brw_fb_WRITE(p,
1021 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1022 base_reg,
1023 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1024 target,
1025 nr,
1026 0,
1027 eot);
1028 }
1029
1030
1031 static void emit_aa( struct brw_wm_compile *c,
1032 struct brw_reg *arg1,
1033 GLuint reg )
1034 {
1035 struct brw_compile *p = &c->func;
1036 GLuint comp = c->key.aa_dest_stencil_reg / 2;
1037 GLuint off = c->key.aa_dest_stencil_reg % 2;
1038 struct brw_reg aa = offset(arg1[comp], off);
1039
1040 brw_push_insn_state(p);
1041 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1042 brw_MOV(p, brw_message_reg(reg), aa);
1043 brw_pop_insn_state(p);
1044 }
1045
1046
1047 /* Post-fragment-program processing. Send the results to the
1048 * framebuffer.
1049 * \param arg0 the fragment color
1050 * \param arg1 the pass-through depth value
1051 * \param arg2 the shader-computed depth value
1052 */
1053 static void emit_fb_write( struct brw_wm_compile *c,
1054 struct brw_reg *arg0,
1055 struct brw_reg *arg1,
1056 struct brw_reg *arg2,
1057 GLuint target,
1058 GLuint eot)
1059 {
1060 struct brw_compile *p = &c->func;
1061 GLuint nr = 2;
1062 GLuint channel;
1063
1064 /* Reserve a space for AA - may not be needed:
1065 */
1066 if (c->key.aa_dest_stencil_reg)
1067 nr += 1;
1068
1069 /* I don't really understand how this achieves the color interleave
1070 * (ie RGBARGBA) in the result: [Do the saturation here]
1071 */
1072 {
1073 brw_push_insn_state(p);
1074
1075 for (channel = 0; channel < 4; channel++) {
1076 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1077 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1078
1079 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1080 brw_MOV(p,
1081 brw_message_reg(nr + channel),
1082 arg0[channel]);
1083
1084 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1085 brw_MOV(p,
1086 brw_message_reg(nr + channel + 4),
1087 sechalf(arg0[channel]));
1088 }
1089
1090 /* skip over the regs populated above:
1091 */
1092 nr += 8;
1093
1094 brw_pop_insn_state(p);
1095 }
1096
1097 if (c->key.source_depth_to_render_target)
1098 {
1099 if (c->key.computes_depth)
1100 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1101 else
1102 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1103
1104 nr += 2;
1105 }
1106
1107 if (c->key.dest_depth_reg)
1108 {
1109 GLuint comp = c->key.dest_depth_reg / 2;
1110 GLuint off = c->key.dest_depth_reg % 2;
1111
1112 if (off != 0) {
1113 brw_push_insn_state(p);
1114 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1115
1116 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1117 /* 2nd half? */
1118 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1119 brw_pop_insn_state(p);
1120 }
1121 else {
1122 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1123 }
1124 nr += 2;
1125 }
1126
1127 if (!c->key.runtime_check_aads_emit) {
1128 if (c->key.aa_dest_stencil_reg)
1129 emit_aa(c, arg1, 2);
1130
1131 fire_fb_write(c, 0, nr, target, eot);
1132 }
1133 else {
1134 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1135 struct brw_reg ip = brw_ip_reg();
1136 struct brw_instruction *jmp;
1137
1138 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1139 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1140 brw_AND(p,
1141 v1_null_ud,
1142 get_element_ud(brw_vec8_grf(1,0), 6),
1143 brw_imm_ud(1<<26));
1144
1145 jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1146 {
1147 emit_aa(c, arg1, 2);
1148 fire_fb_write(c, 0, nr, target, eot);
1149 /* note - thread killed in subroutine */
1150 }
1151 brw_land_fwd_jump(p, jmp);
1152
1153 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1154 */
1155 fire_fb_write(c, 1, nr-1, target, eot);
1156 }
1157 }
1158
1159
1160 /**
1161 * Move a GPR to scratch memory.
1162 */
1163 static void emit_spill( struct brw_wm_compile *c,
1164 struct brw_reg reg,
1165 GLuint slot )
1166 {
1167 struct brw_compile *p = &c->func;
1168
1169 /*
1170 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1171 */
1172 brw_MOV(p, brw_message_reg(2), reg);
1173
1174 /*
1175 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1176 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1177 */
1178 brw_dp_WRITE_16(p,
1179 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1180 slot);
1181 }
1182
1183
1184 /**
1185 * Load a GPR from scratch memory.
1186 */
1187 static void emit_unspill( struct brw_wm_compile *c,
1188 struct brw_reg reg,
1189 GLuint slot )
1190 {
1191 struct brw_compile *p = &c->func;
1192
1193 /* Slot 0 is the undef value.
1194 */
1195 if (slot == 0) {
1196 brw_MOV(p, reg, brw_imm_f(0));
1197 return;
1198 }
1199
1200 /*
1201 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1202 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1203 */
1204
1205 brw_dp_READ_16(p,
1206 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1207 slot);
1208 }
1209
1210
1211 /**
1212 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1213 * Args with unspill_reg != 0 will be loaded from scratch memory.
1214 */
1215 static void get_argument_regs( struct brw_wm_compile *c,
1216 struct brw_wm_ref *arg[],
1217 struct brw_reg *regs )
1218 {
1219 GLuint i;
1220
1221 for (i = 0; i < 4; i++) {
1222 if (arg[i]) {
1223 if (arg[i]->unspill_reg)
1224 emit_unspill(c,
1225 brw_vec8_grf(arg[i]->unspill_reg, 0),
1226 arg[i]->value->spill_slot);
1227
1228 regs[i] = arg[i]->hw_reg;
1229 }
1230 else {
1231 regs[i] = brw_null_reg();
1232 }
1233 }
1234 }
1235
1236
1237 /**
1238 * For values that have a spill_slot!=0, write those regs to scratch memory.
1239 */
1240 static void spill_values( struct brw_wm_compile *c,
1241 struct brw_wm_value *values,
1242 GLuint nr )
1243 {
1244 GLuint i;
1245
1246 for (i = 0; i < nr; i++)
1247 if (values[i].spill_slot)
1248 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1249 }
1250
1251
1252 /* Emit the fragment program instructions here.
1253 */
1254 void brw_wm_emit( struct brw_wm_compile *c )
1255 {
1256 struct brw_compile *p = &c->func;
1257 GLuint insn;
1258
1259 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1260
1261 /* Check if any of the payload regs need to be spilled:
1262 */
1263 spill_values(c, c->payload.depth, 4);
1264 spill_values(c, c->creg, c->nr_creg);
1265 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1266
1267
1268 for (insn = 0; insn < c->nr_insns; insn++) {
1269
1270 struct brw_wm_instruction *inst = &c->instruction[insn];
1271 struct brw_reg args[3][4], dst[4];
1272 GLuint i, dst_flags;
1273
1274 /* Get argument regs:
1275 */
1276 for (i = 0; i < 3; i++)
1277 get_argument_regs(c, inst->src[i], args[i]);
1278
1279 /* Get dest regs:
1280 */
1281 for (i = 0; i < 4; i++)
1282 if (inst->dst[i])
1283 dst[i] = inst->dst[i]->hw_reg;
1284 else
1285 dst[i] = brw_null_reg();
1286
1287 /* Flags
1288 */
1289 dst_flags = inst->writemask;
1290 if (inst->saturate)
1291 dst_flags |= SATURATE;
1292
1293 switch (inst->opcode) {
1294 /* Generated instructions for calculating triangle interpolants:
1295 */
1296 case WM_PIXELXY:
1297 emit_pixel_xy(p, dst, dst_flags);
1298 break;
1299
1300 case WM_DELTAXY:
1301 emit_delta_xy(p, dst, dst_flags, args[0]);
1302 break;
1303
1304 case WM_WPOSXY:
1305 emit_wpos_xy(c, dst, dst_flags, args[0]);
1306 break;
1307
1308 case WM_PIXELW:
1309 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1310 break;
1311
1312 case WM_LINTERP:
1313 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1314 break;
1315
1316 case WM_PINTERP:
1317 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1318 break;
1319
1320 case WM_CINTERP:
1321 emit_cinterp(p, dst, dst_flags, args[0]);
1322 break;
1323
1324 case WM_FB_WRITE:
1325 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1326 break;
1327
1328 case WM_FRONTFACING:
1329 emit_frontfacing(p, dst, dst_flags);
1330 break;
1331
1332 /* Straightforward arithmetic:
1333 */
1334 case OPCODE_ADD:
1335 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1336 break;
1337
1338 case OPCODE_FRC:
1339 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1340 break;
1341
1342 case OPCODE_FLR:
1343 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1344 break;
1345
1346 case OPCODE_DDX:
1347 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1348 break;
1349
1350 case OPCODE_DDY:
1351 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1352 break;
1353
1354 case OPCODE_DP3:
1355 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1356 break;
1357
1358 case OPCODE_DP4:
1359 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1360 break;
1361
1362 case OPCODE_DPH:
1363 emit_dph(p, dst, dst_flags, args[0], args[1]);
1364 break;
1365
1366 case OPCODE_TRUNC:
1367 emit_trunc(p, dst, dst_flags, args[0]);
1368 break;
1369
1370 case OPCODE_LRP:
1371 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1372 break;
1373
1374 case OPCODE_MAD:
1375 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1376 break;
1377
1378 case OPCODE_MOV:
1379 case OPCODE_SWZ:
1380 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1381 break;
1382
1383 case OPCODE_MUL:
1384 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1385 break;
1386
1387 case OPCODE_XPD:
1388 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1389 break;
1390
1391 /* Higher math functions:
1392 */
1393 case OPCODE_RCP:
1394 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1395 break;
1396
1397 case OPCODE_RSQ:
1398 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1399 break;
1400
1401 case OPCODE_SIN:
1402 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1403 break;
1404
1405 case OPCODE_COS:
1406 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1407 break;
1408
1409 case OPCODE_EX2:
1410 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1411 break;
1412
1413 case OPCODE_LG2:
1414 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1415 break;
1416
1417 case OPCODE_SCS:
1418 /* There is an scs math function, but it would need some
1419 * fixup for 16-element execution.
1420 */
1421 if (dst_flags & WRITEMASK_X)
1422 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1423 if (dst_flags & WRITEMASK_Y)
1424 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1425 break;
1426
1427 case OPCODE_POW:
1428 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1429 break;
1430
1431 /* Comparisons:
1432 */
1433 case OPCODE_CMP:
1434 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1435 break;
1436
1437 case OPCODE_MAX:
1438 emit_max(p, dst, dst_flags, args[0], args[1]);
1439 break;
1440
1441 case OPCODE_MIN:
1442 emit_min(p, dst, dst_flags, args[0], args[1]);
1443 break;
1444
1445 case OPCODE_SLT:
1446 emit_slt(p, dst, dst_flags, args[0], args[1]);
1447 break;
1448
1449 case OPCODE_SLE:
1450 emit_sle(p, dst, dst_flags, args[0], args[1]);
1451 break;
1452 case OPCODE_SGT:
1453 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1454 break;
1455 case OPCODE_SGE:
1456 emit_sge(p, dst, dst_flags, args[0], args[1]);
1457 break;
1458 case OPCODE_SEQ:
1459 emit_seq(p, dst, dst_flags, args[0], args[1]);
1460 break;
1461 case OPCODE_SNE:
1462 emit_sne(p, dst, dst_flags, args[0], args[1]);
1463 break;
1464
1465 case OPCODE_LIT:
1466 emit_lit(p, dst, dst_flags, args[0]);
1467 break;
1468
1469 /* Texturing operations:
1470 */
1471 case OPCODE_TEX:
1472 emit_tex(c, inst, dst, dst_flags, args[0]);
1473 break;
1474
1475 case OPCODE_TXB:
1476 emit_txb(c, inst, dst, dst_flags, args[0]);
1477 break;
1478
1479 case OPCODE_KIL:
1480 emit_kil(c, args[0]);
1481 break;
1482
1483 case OPCODE_KIL_NV:
1484 emit_kil_nv(c);
1485 break;
1486
1487 default:
1488 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1489 inst->opcode, inst->opcode < MAX_OPCODE ?
1490 _mesa_opcode_string(inst->opcode) :
1491 "unknown");
1492 }
1493
1494 for (i = 0; i < 4; i++)
1495 if (inst->dst[i] && inst->dst[i]->spill_slot)
1496 emit_spill(c,
1497 inst->dst[i]->hw_reg,
1498 inst->dst[i]->spill_slot);
1499 }
1500
1501 if (INTEL_DEBUG & DEBUG_WM) {
1502 int i;
1503
1504 _mesa_printf("wm-native:\n");
1505 for (i = 0; i < p->nr_insn; i++)
1506 brw_disasm(stderr, &p->store[i]);
1507 _mesa_printf("\n");
1508 }
1509 }