i965g: fixes to build after merge of master
[mesa.git] / src / gallium / drivers / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "pipe/p_shader_tokens.h"
34
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
37
38 #include "tgsi/tgsi_parse.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_util.h"
42
43 #include "brw_wm.h"
44 #include "brw_util.h"
45 #include "brw_debug.h"
46
47
48 /***********************************************************************
49 * Source regs
50 */
51
52 static struct brw_fp_src src_reg(GLuint file, GLuint idx)
53 {
54 struct brw_fp_src reg;
55 reg.file = file;
56 reg.index = idx;
57 reg.swizzle = BRW_SWIZZLE_XYZW;
58 reg.indirect = 0;
59 reg.negate = 0;
60 reg.abs = 0;
61 return reg;
62 }
63
64 static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
65 {
66 return src_reg(dst.file, dst.index);
67 }
68
69 static struct brw_fp_src src_undef( void )
70 {
71 return src_reg(TGSI_FILE_NULL, 0);
72 }
73
74 static GLboolean src_is_undef(struct brw_fp_src src)
75 {
76 return src.file == TGSI_FILE_NULL;
77 }
78
79 static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
80 {
81 unsigned swz = reg.swizzle;
82
83 reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 |
84 BRW_GET_SWZ(swz, y) << 2 |
85 BRW_GET_SWZ(swz, z) << 4 |
86 BRW_GET_SWZ(swz, w) << 6 );
87
88 return reg;
89 }
90
91 static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
92 {
93 return src_swizzle(reg, x, x, x, x);
94 }
95
96 static struct brw_fp_src src_abs( struct brw_fp_src src )
97 {
98 src.negate = 0;
99 src.abs = 1;
100 return src;
101 }
102
103 static struct brw_fp_src src_negate( struct brw_fp_src src )
104 {
105 src.negate = 1;
106 src.abs = 0;
107 return src;
108 }
109
110
111 static int match_or_expand_immediate( const float *v,
112 unsigned nr,
113 float *v2,
114 unsigned *nr2,
115 unsigned *swizzle )
116 {
117 unsigned i, j;
118
119 *swizzle = 0;
120
121 for (i = 0; i < nr; i++) {
122 boolean found = FALSE;
123
124 for (j = 0; j < *nr2 && !found; j++) {
125 if (v[i] == v2[j]) {
126 *swizzle |= j << (i * 2);
127 found = TRUE;
128 }
129 }
130
131 if (!found) {
132 if (*nr2 >= 4)
133 return FALSE;
134
135 v2[*nr2] = v[i];
136 *swizzle |= *nr2 << (i * 2);
137 (*nr2)++;
138 }
139 }
140
141 return TRUE;
142 }
143
144
145
146 /* Internally generated immediates: overkill...
147 */
148 static struct brw_fp_src src_imm( struct brw_wm_compile *c,
149 const GLfloat *v,
150 unsigned nr)
151 {
152 unsigned i, j;
153 unsigned swizzle;
154
155 /* Could do a first pass where we examine all existing immediates
156 * without expanding.
157 */
158
159 for (i = 0; i < c->nr_immediates; i++) {
160 if (match_or_expand_immediate( v,
161 nr,
162 c->immediate[i].v,
163 &c->immediate[i].nr,
164 &swizzle ))
165 goto out;
166 }
167
168 if (c->nr_immediates < Elements(c->immediate)) {
169 i = c->nr_immediates++;
170 if (match_or_expand_immediate( v,
171 nr,
172 c->immediate[i].v,
173 &c->immediate[i].nr,
174 &swizzle ))
175 goto out;
176 }
177
178 c->error = 1;
179 return src_undef();
180
181 out:
182 /* Make sure that all referenced elements are from this immediate.
183 * Has the effect of making size-one immediates into scalars.
184 */
185 for (j = nr; j < 4; j++)
186 swizzle |= (swizzle & 0x3) << (j * 2);
187
188 return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
189 BRW_GET_SWZ(swizzle, X),
190 BRW_GET_SWZ(swizzle, Y),
191 BRW_GET_SWZ(swizzle, Z),
192 BRW_GET_SWZ(swizzle, W) );
193 }
194
195
196
197 static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
198 GLfloat f )
199 {
200 return src_imm(c, &f, 1);
201 }
202
203 static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
204 GLfloat x,
205 GLfloat y,
206 GLfloat z,
207 GLfloat w)
208 {
209 GLfloat f[4] = {x,y,z,w};
210 return src_imm(c, f, 4);
211 }
212
213
214
215 /***********************************************************************
216 * Dest regs
217 */
218
219 static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
220 {
221 struct brw_fp_dst reg;
222 reg.file = file;
223 reg.index = idx;
224 reg.writemask = BRW_WRITEMASK_XYZW;
225 reg.indirect = 0;
226 reg.saturate = 0;
227 return reg;
228 }
229
230 static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
231 {
232 reg.writemask &= mask;
233 return reg;
234 }
235
236 static struct brw_fp_dst dst_undef( void )
237 {
238 return dst_reg(TGSI_FILE_NULL, 0);
239 }
240
241 static boolean dst_is_undef( struct brw_fp_dst dst )
242 {
243 return dst.file == TGSI_FILE_NULL;
244 }
245
246 static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
247 {
248 reg.saturate = flag;
249 return reg;
250 }
251
252 static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
253 {
254 int bit = ffs( ~c->fp_temp );
255
256 if (!bit) {
257 debug_printf("%s: out of temporaries\n", __FILE__);
258 }
259
260 c->fp_temp |= 1<<(bit-1);
261 return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
262 }
263
264
265 static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
266 {
267 c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
268 }
269
270
271 /***********************************************************************
272 * Instructions
273 */
274
275 static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
276 {
277 return &c->fp_instructions[c->nr_fp_insns++];
278 }
279
280 static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
281 GLuint op,
282 struct brw_fp_dst dest,
283 GLuint tex_unit,
284 GLuint target,
285 GLuint sampler,
286 struct brw_fp_src src0,
287 struct brw_fp_src src1,
288 struct brw_fp_src src2 )
289 {
290 struct brw_fp_instruction *inst = get_fp_inst(c);
291
292 if (tex_unit || target)
293 assert(op == TGSI_OPCODE_TXP ||
294 op == TGSI_OPCODE_TXB ||
295 op == TGSI_OPCODE_TEX ||
296 op == WM_FB_WRITE);
297
298 inst->opcode = op;
299 inst->dst = dest;
300 inst->tex_unit = tex_unit;
301 inst->target = target;
302 inst->sampler = sampler;
303 inst->src[0] = src0;
304 inst->src[1] = src1;
305 inst->src[2] = src2;
306
307 return inst;
308 }
309
310
311 static INLINE void emit_op3(struct brw_wm_compile *c,
312 GLuint op,
313 struct brw_fp_dst dest,
314 struct brw_fp_src src0,
315 struct brw_fp_src src1,
316 struct brw_fp_src src2 )
317 {
318 emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2);
319 }
320
321
322 static INLINE void emit_op2(struct brw_wm_compile *c,
323 GLuint op,
324 struct brw_fp_dst dest,
325 struct brw_fp_src src0,
326 struct brw_fp_src src1)
327 {
328 emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef());
329 }
330
331 static INLINE void emit_op1(struct brw_wm_compile *c,
332 GLuint op,
333 struct brw_fp_dst dest,
334 struct brw_fp_src src0)
335 {
336 emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef());
337 }
338
339 static INLINE void emit_op0(struct brw_wm_compile *c,
340 GLuint op,
341 struct brw_fp_dst dest)
342 {
343 emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef());
344 }
345
346
347
348 /* Many opcodes produce the same value across all the result channels.
349 * We'd rather not have to support that splatting in the opcode implementations,
350 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
351 * anyway. We can easily get both by emitting the opcode to one channel, and
352 * then MOVing it to the others, which brw_wm_pass*.c already understands.
353 */
354 static void emit_scalar_insn(struct brw_wm_compile *c,
355 unsigned opcode,
356 struct brw_fp_dst dst,
357 struct brw_fp_src src0,
358 struct brw_fp_src src1,
359 struct brw_fp_src src2 )
360 {
361 unsigned first_chan = ffs(dst.writemask) - 1;
362 unsigned first_mask = 1 << first_chan;
363
364 if (dst.writemask == 0)
365 return;
366
367 emit_op3( c, opcode,
368 dst_mask(dst, first_mask),
369 src0, src1, src2 );
370
371 if (dst.writemask != first_mask) {
372 emit_op1(c, TGSI_OPCODE_MOV,
373 dst_mask(dst, ~first_mask),
374 src_scalar(src_reg_from_dst(dst), first_chan));
375 }
376 }
377
378
379 /***********************************************************************
380 * Special instructions for interpolation and other tasks
381 */
382
383 static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
384 {
385 if (src_is_undef(c->fp_pixel_xy)) {
386 struct brw_fp_dst pixel_xy = get_temp(c);
387 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
388
389
390 /* Emit the out calculations, and hold onto the results. Use
391 * two instructions as a temporary is required.
392 */
393 /* pixel_xy.xy = PIXELXY payload[0];
394 */
395 emit_op1(c,
396 WM_PIXELXY,
397 dst_mask(pixel_xy, BRW_WRITEMASK_XY),
398 payload_r0_depth);
399
400 c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
401 }
402
403 return c->fp_pixel_xy;
404 }
405
406 static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
407 {
408 if (src_is_undef(c->fp_delta_xy)) {
409 struct brw_fp_dst delta_xy = get_temp(c);
410 struct brw_fp_src pixel_xy = get_pixel_xy(c);
411 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
412
413 /* deltas.xy = DELTAXY pixel_xy, payload[0]
414 */
415 emit_op3(c,
416 WM_DELTAXY,
417 dst_mask(delta_xy, BRW_WRITEMASK_XY),
418 pixel_xy,
419 payload_r0_depth,
420 src_undef());
421
422 c->fp_delta_xy = src_reg_from_dst(delta_xy);
423 }
424
425 return c->fp_delta_xy;
426 }
427
428 static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
429 {
430 if (src_is_undef(c->fp_pixel_w)) {
431 struct brw_fp_dst pixel_w = get_temp(c);
432 struct brw_fp_src deltas = get_delta_xy(c);
433
434 /* XXX: assuming position is always first -- valid?
435 */
436 struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
437
438 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
439 */
440 emit_op3(c,
441 WM_PIXELW,
442 dst_mask(pixel_w, BRW_WRITEMASK_W),
443 interp_wpos,
444 deltas,
445 src_undef());
446
447
448 c->fp_pixel_w = src_reg_from_dst(pixel_w);
449 }
450
451 return c->fp_pixel_w;
452 }
453
454
455 /***********************************************************************
456 * Emit INTERP instructions ahead of first use of each attrib.
457 */
458
459 static void emit_interp( struct brw_wm_compile *c,
460 GLuint idx,
461 GLuint semantic,
462 GLuint interp_mode )
463 {
464 struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
465 struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
466 struct brw_fp_src deltas = get_delta_xy(c);
467
468 /* Need to use PINTERP on attributes which have been
469 * multiplied by 1/W in the SF program, and LINTERP on those
470 * which have not:
471 */
472 switch (semantic) {
473 case TGSI_SEMANTIC_POSITION:
474 /* Have to treat wpos.xy specially:
475 */
476 emit_op1(c,
477 WM_WPOSXY,
478 dst_mask(dst, BRW_WRITEMASK_XY),
479 get_pixel_xy(c));
480
481 /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
482 */
483 emit_op2(c,
484 WM_LINTERP,
485 dst_mask(dst, BRW_WRITEMASK_ZW),
486 interp,
487 deltas);
488 break;
489
490 case TGSI_SEMANTIC_COLOR:
491 if (c->key.flat_shade) {
492 emit_op1(c,
493 WM_CINTERP,
494 dst,
495 interp);
496 }
497 else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
498 emit_op2(c,
499 WM_LINTERP,
500 dst,
501 interp,
502 deltas);
503 }
504 else {
505 emit_op3(c,
506 WM_PINTERP,
507 dst,
508 interp,
509 deltas,
510 get_pixel_w(c));
511 }
512
513 break;
514
515 case TGSI_SEMANTIC_FOG:
516 /* Interpolate the fog coordinate */
517 emit_op3(c,
518 WM_PINTERP,
519 dst_mask(dst, BRW_WRITEMASK_X),
520 interp,
521 deltas,
522 get_pixel_w(c));
523
524 emit_op1(c,
525 TGSI_OPCODE_MOV,
526 dst_mask(dst, BRW_WRITEMASK_YZ),
527 src_imm1f(c, 0.0));
528
529 emit_op1(c,
530 TGSI_OPCODE_MOV,
531 dst_mask(dst, BRW_WRITEMASK_W),
532 src_imm1f(c, 1.0));
533 break;
534
535 case TGSI_SEMANTIC_FACE:
536 /* XXX review/test this case */
537 emit_op0(c,
538 WM_FRONTFACING,
539 dst_mask(dst, BRW_WRITEMASK_X));
540
541 emit_op1(c,
542 TGSI_OPCODE_MOV,
543 dst_mask(dst, BRW_WRITEMASK_YZ),
544 src_imm1f(c, 0.0));
545
546 emit_op1(c,
547 TGSI_OPCODE_MOV,
548 dst_mask(dst, BRW_WRITEMASK_W),
549 src_imm1f(c, 1.0));
550 break;
551
552 case TGSI_SEMANTIC_PSIZE:
553 /* XXX review/test this case */
554 emit_op3(c,
555 WM_PINTERP,
556 dst_mask(dst, BRW_WRITEMASK_XY),
557 interp,
558 deltas,
559 get_pixel_w(c));
560
561 emit_op1(c,
562 TGSI_OPCODE_MOV,
563 dst_mask(dst, BRW_WRITEMASK_Z),
564 src_imm1f(c, 0.0f));
565
566 emit_op1(c,
567 TGSI_OPCODE_MOV,
568 dst_mask(dst, BRW_WRITEMASK_W),
569 src_imm1f(c, 1.0f));
570 break;
571
572 default:
573 switch (interp_mode) {
574 case TGSI_INTERPOLATE_CONSTANT:
575 emit_op1(c,
576 WM_CINTERP,
577 dst,
578 interp);
579 break;
580
581 case TGSI_INTERPOLATE_LINEAR:
582 emit_op2(c,
583 WM_LINTERP,
584 dst,
585 interp,
586 deltas);
587 break;
588
589 case TGSI_INTERPOLATE_PERSPECTIVE:
590 emit_op3(c,
591 WM_PINTERP,
592 dst,
593 interp,
594 deltas,
595 get_pixel_w(c));
596 break;
597 }
598 break;
599 }
600 }
601
602
603 /***********************************************************************
604 * Expand various instructions here to simpler forms.
605 */
606 static void precalc_dst( struct brw_wm_compile *c,
607 struct brw_fp_dst dst,
608 struct brw_fp_src src0,
609 struct brw_fp_src src1 )
610 {
611 if (dst.writemask & BRW_WRITEMASK_Y) {
612 /* dst.y = mul src0.y, src1.y
613 */
614 emit_op2(c,
615 TGSI_OPCODE_MUL,
616 dst_mask(dst, BRW_WRITEMASK_Y),
617 src0,
618 src1);
619 }
620
621 if (dst.writemask & BRW_WRITEMASK_XZ) {
622 /* dst.z = mov src0.zzzz
623 */
624 emit_op1(c,
625 TGSI_OPCODE_MOV,
626 dst_mask(dst, BRW_WRITEMASK_Z),
627 src_scalar(src0, Z));
628
629 /* dst.x = imm1f(1.0)
630 */
631 emit_op1(c,
632 TGSI_OPCODE_MOV,
633 dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
634 src_imm1f(c, 1.0));
635 }
636 if (dst.writemask & BRW_WRITEMASK_W) {
637 /* dst.w = mov src1.w
638 */
639 emit_op1(c,
640 TGSI_OPCODE_MOV,
641 dst_mask(dst, BRW_WRITEMASK_W),
642 src1);
643 }
644 }
645
646
647 static void precalc_lit( struct brw_wm_compile *c,
648 struct brw_fp_dst dst,
649 struct brw_fp_src src0 )
650 {
651 if (dst.writemask & BRW_WRITEMASK_XW) {
652 /* dst.xw = imm(1.0f)
653 */
654 emit_op1(c,
655 TGSI_OPCODE_MOV,
656 dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
657 src_imm1f(c, 1.0f));
658 }
659
660 if (dst.writemask & BRW_WRITEMASK_YZ) {
661 emit_op1(c,
662 TGSI_OPCODE_LIT,
663 dst_mask(dst, BRW_WRITEMASK_YZ),
664 src0);
665 }
666 }
667
668
669 /**
670 * Some TEX instructions require extra code, cube map coordinate
671 * normalization, or coordinate scaling for RECT textures, etc.
672 * This function emits those extra instructions and the TEX
673 * instruction itself.
674 */
675 static void precalc_tex( struct brw_wm_compile *c,
676 struct brw_fp_dst dst,
677 unsigned target,
678 unsigned unit,
679 struct brw_fp_src src0,
680 struct brw_fp_src sampler )
681 {
682 struct brw_fp_src coord = src_undef();
683 struct brw_fp_dst tmp = dst_undef();
684
685 assert(unit < BRW_MAX_TEX_UNIT);
686
687 /* Cubemap: find longest component of coord vector and normalize
688 * it.
689 */
690 if (target == TGSI_TEXTURE_CUBE) {
691 struct brw_fp_src tmpsrc;
692
693 tmp = get_temp(c);
694 tmpsrc = src_reg_from_dst(tmp);
695
696 /* tmp = abs(src0) */
697 emit_op1(c,
698 TGSI_OPCODE_MOV,
699 tmp,
700 src_abs(src0));
701
702 /* tmp.X = MAX(tmp.X, tmp.Y) */
703 emit_op2(c, TGSI_OPCODE_MAX,
704 dst_mask(tmp, BRW_WRITEMASK_X),
705 src_scalar(tmpsrc, X),
706 src_scalar(tmpsrc, Y));
707
708 /* tmp.X = MAX(tmp.X, tmp.Z) */
709 emit_op2(c, TGSI_OPCODE_MAX,
710 dst_mask(tmp, BRW_WRITEMASK_X),
711 tmpsrc,
712 src_scalar(tmpsrc, Z));
713
714 /* tmp.X = 1 / tmp.X */
715 emit_op1(c, TGSI_OPCODE_RCP,
716 dst_mask(tmp, BRW_WRITEMASK_X),
717 tmpsrc);
718
719 /* tmp = src0 * tmp.xxxx */
720 emit_op2(c, TGSI_OPCODE_MUL,
721 tmp,
722 src0,
723 src_scalar(tmpsrc, X));
724
725 coord = tmpsrc;
726 }
727 else if (target == TGSI_TEXTURE_RECT ||
728 target == TGSI_TEXTURE_SHADOWRECT) {
729 /* XXX: need a mechanism for internally generated constants.
730 */
731 coord = src0;
732 }
733 else {
734 coord = src0;
735 }
736
737 /* Need to emit YUV texture conversions by hand. Probably need to
738 * do this here - the alternative is in brw_wm_emit.c, but the
739 * conversion requires allocating a temporary variable which we
740 * don't have the facility to do that late in the compilation.
741 */
742 if (c->key.yuvtex_mask & (1 << unit)) {
743 /* convert ycbcr to RGBA */
744 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
745 struct brw_fp_dst tmp = get_temp(c);
746 struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
747 struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 );
748 struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
749
750 /* tmp = TEX ...
751 */
752 emit_tex_op(c,
753 TGSI_OPCODE_TEX,
754 dst_saturate(tmp, dst.saturate),
755 unit,
756 target,
757 sampler.index,
758 coord,
759 src_undef(),
760 src_undef());
761
762 /* tmp.xyz = ADD TMP, C0
763 */
764 emit_op2(c, TGSI_OPCODE_ADD,
765 dst_mask(tmp, BRW_WRITEMASK_XYZ),
766 tmpsrc,
767 C0);
768
769 /* YUV.y = MUL YUV.y, C0.w
770 */
771 emit_op2(c, TGSI_OPCODE_MUL,
772 dst_mask(tmp, BRW_WRITEMASK_Y),
773 tmpsrc,
774 src_scalar(C0, W));
775
776 /*
777 * if (UV swaped)
778 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
779 * else
780 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
781 */
782
783 emit_op3(c, TGSI_OPCODE_MAD,
784 dst_mask(dst, BRW_WRITEMASK_XYZ),
785 ( swap_uv ?
786 src_swizzle(tmpsrc, Z,Z,X,X) :
787 src_swizzle(tmpsrc, X,X,Z,Z)),
788 C1,
789 src_scalar(tmpsrc, Y));
790
791 /* RGB.y = MAD YUV.z, C1.w, RGB.y
792 */
793 emit_op3(c,
794 TGSI_OPCODE_MAD,
795 dst_mask(dst, BRW_WRITEMASK_Y),
796 src_scalar(tmpsrc, Z),
797 src_scalar(C1, W),
798 src_scalar(src_reg_from_dst(dst), Y));
799
800 release_temp(c, tmp);
801 }
802 else {
803 /* ordinary RGBA tex instruction */
804 emit_tex_op(c,
805 TGSI_OPCODE_TEX,
806 dst,
807 unit,
808 target,
809 sampler.index,
810 coord,
811 src_undef(),
812 src_undef());
813 }
814
815 /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
816 * generating shader varients in mesa state tracker.
817 */
818
819 /* Release this temp if we ended up allocating it:
820 */
821 if (!dst_is_undef(tmp))
822 release_temp(c, tmp);
823 }
824
825
826 /**
827 * Check if the given TXP instruction really needs the divide-by-W step.
828 */
829 static GLboolean projtex( struct brw_wm_compile *c,
830 unsigned target,
831 struct brw_fp_src src )
832 {
833 /* Only try to detect the simplest cases. Could detect (later)
834 * cases where we are trying to emit code like RCP {1.0}, MUL x,
835 * {1.0}, and so on.
836 *
837 * More complex cases than this typically only arise from
838 * user-provided fragment programs anyway:
839 */
840 if (target == TGSI_TEXTURE_CUBE)
841 return GL_FALSE; /* ut2004 gun rendering !?! */
842
843 if (src.file == TGSI_FILE_INPUT &&
844 BRW_GET_SWZ(src.swizzle, W) == W &&
845 c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE)
846 return GL_FALSE;
847
848 return GL_TRUE;
849 }
850
851
852 /**
853 * Emit code for TXP.
854 */
855 static void precalc_txp( struct brw_wm_compile *c,
856 struct brw_fp_dst dst,
857 unsigned target,
858 unsigned unit,
859 struct brw_fp_src src0,
860 struct brw_fp_src sampler )
861 {
862 if (projtex(c, target, src0)) {
863 struct brw_fp_dst tmp = get_temp(c);
864
865 /* tmp0.w = RCP inst.arg[0][3]
866 */
867 emit_op1(c,
868 TGSI_OPCODE_RCP,
869 dst_mask(tmp, BRW_WRITEMASK_W),
870 src_scalar(src0, W));
871
872 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
873 */
874 emit_op2(c,
875 TGSI_OPCODE_MUL,
876 dst_mask(tmp, BRW_WRITEMASK_XYZ),
877 src0,
878 src_scalar(src_reg_from_dst(tmp), W));
879
880 /* dst = TEX tmp0
881 */
882 precalc_tex(c,
883 dst,
884 target,
885 unit,
886 src_reg_from_dst(tmp),
887 sampler );
888
889 release_temp(c, tmp);
890 }
891 else
892 {
893 /* dst = TEX src0
894 */
895 precalc_tex(c, dst, target, unit, src0, sampler);
896 }
897 }
898
899
900 /* XXX: note this returns a src_reg.
901 */
902 static struct brw_fp_src
903 find_output_by_semantic( struct brw_wm_compile *c,
904 unsigned semantic,
905 unsigned index )
906 {
907 const struct tgsi_shader_info *info = &c->fp->info;
908 unsigned i;
909
910 for (i = 0; i < info->num_outputs; i++)
911 if (info->output_semantic_name[i] == semantic &&
912 info->output_semantic_index[i] == index)
913 return src_reg( TGSI_FILE_OUTPUT, i );
914
915 /* If not found, return some arbitrary immediate value:
916 *
917 * XXX: this is a good idea but immediates are up generating extra
918 * curbe entries atm, as they would have in the original driver.
919 */
920 return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */
921 }
922
923
924 static void emit_fb_write( struct brw_wm_compile *c )
925 {
926 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
927 struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
928 GLuint i;
929
930
931 outdepth = src_scalar(outdepth, Z);
932
933 for (i = 0 ; i < c->key.nr_cbufs; i++) {
934 struct brw_fp_src outcolor;
935
936 outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
937
938 /* Use emit_tex_op so that we can specify the inst->target
939 * field, which is abused to contain the FB write target and the
940 * EOT marker
941 */
942 emit_tex_op(c, WM_FB_WRITE,
943 dst_undef(),
944 (i == c->key.nr_cbufs - 1), /* EOT */
945 i,
946 0, /* no sampler */
947 outcolor,
948 payload_r0_depth,
949 outdepth);
950 }
951 }
952
953
954 static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
955 const struct tgsi_full_dst_register *dst,
956 unsigned saturate )
957 {
958 struct brw_fp_dst out;
959
960 out.file = dst->Register.File;
961 out.index = dst->Register.Index;
962 out.writemask = dst->Register.WriteMask;
963 out.indirect = dst->Register.Indirect;
964 out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
965
966 if (out.indirect) {
967 assert(dst->Indirect.File == TGSI_FILE_ADDRESS);
968 assert(dst->Indirect.Index == 0);
969 }
970
971 return out;
972 }
973
974
975 static struct brw_fp_src translate_src( struct brw_wm_compile *c,
976 const struct tgsi_full_src_register *src )
977 {
978 struct brw_fp_src out;
979
980 out.file = src->Register.File;
981 out.index = src->Register.Index;
982 out.indirect = src->Register.Indirect;
983
984 out.swizzle = ((src->Register.SwizzleX << 0) |
985 (src->Register.SwizzleY << 2) |
986 (src->Register.SwizzleZ << 4) |
987 (src->Register.SwizzleW << 6));
988
989 switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
990 case TGSI_UTIL_SIGN_CLEAR:
991 out.abs = 1;
992 out.negate = 0;
993 break;
994
995 case TGSI_UTIL_SIGN_SET:
996 out.abs = 1;
997 out.negate = 1;
998 break;
999
1000 case TGSI_UTIL_SIGN_TOGGLE:
1001 out.abs = 0;
1002 out.negate = 1;
1003 break;
1004
1005 case TGSI_UTIL_SIGN_KEEP:
1006 default:
1007 out.abs = 0;
1008 out.negate = 0;
1009 break;
1010 }
1011
1012 if (out.indirect) {
1013 assert(src->Indirect.File == TGSI_FILE_ADDRESS);
1014 assert(src->Indirect.Index == 0);
1015 }
1016
1017 return out;
1018 }
1019
1020
1021
1022 static void emit_insn( struct brw_wm_compile *c,
1023 const struct tgsi_full_instruction *inst )
1024 {
1025 unsigned opcode = inst->Instruction.Opcode;
1026 struct brw_fp_dst dst;
1027 struct brw_fp_src src[3];
1028 int i;
1029
1030 dst = translate_dst( c, &inst->Dst[0],
1031 inst->Instruction.Saturate );
1032
1033 for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
1034 src[i] = translate_src( c, &inst->Src[i] );
1035
1036 switch (opcode) {
1037 case TGSI_OPCODE_ABS:
1038 emit_op1(c, TGSI_OPCODE_MOV,
1039 dst,
1040 src_abs(src[0]));
1041 break;
1042
1043 case TGSI_OPCODE_SUB:
1044 emit_op2(c, TGSI_OPCODE_ADD,
1045 dst,
1046 src[0],
1047 src_negate(src[1]));
1048 break;
1049
1050 case TGSI_OPCODE_SCS:
1051 emit_op1(c, TGSI_OPCODE_SCS,
1052 dst_mask(dst, BRW_WRITEMASK_XY),
1053 src[0]);
1054 break;
1055
1056 case TGSI_OPCODE_DST:
1057 precalc_dst(c, dst, src[0], src[1]);
1058 break;
1059
1060 case TGSI_OPCODE_LIT:
1061 precalc_lit(c, dst, src[0]);
1062 break;
1063
1064 case TGSI_OPCODE_TEX:
1065 precalc_tex(c, dst,
1066 inst->Texture.Texture,
1067 src[1].index, /* use sampler unit for tex idx */
1068 src[0], /* coord */
1069 src[1]); /* sampler */
1070 break;
1071
1072 case TGSI_OPCODE_TXP:
1073 precalc_txp(c, dst,
1074 inst->Texture.Texture,
1075 src[1].index, /* use sampler unit for tex idx */
1076 src[0], /* coord */
1077 src[1]); /* sampler */
1078 break;
1079
1080 case TGSI_OPCODE_TXB:
1081 /* XXX: TXB not done
1082 */
1083 precalc_tex(c, dst,
1084 inst->Texture.Texture,
1085 src[1].index, /* use sampler unit for tex idx*/
1086 src[0],
1087 src[1]);
1088 break;
1089
1090 case TGSI_OPCODE_XPD:
1091 emit_op2(c, TGSI_OPCODE_XPD,
1092 dst_mask(dst, BRW_WRITEMASK_XYZ),
1093 src[0],
1094 src[1]);
1095 break;
1096
1097 case TGSI_OPCODE_KIL:
1098 emit_op1(c, TGSI_OPCODE_KIL,
1099 dst_mask(dst_undef(), 0),
1100 src[0]);
1101 break;
1102
1103 case TGSI_OPCODE_END:
1104 emit_fb_write(c);
1105 break;
1106 default:
1107 if (!c->key.has_flow_control &&
1108 brw_wm_is_scalar_result(opcode))
1109 emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
1110 else
1111 emit_op3(c, opcode, dst, src[0], src[1], src[2]);
1112 break;
1113 }
1114 }
1115
1116 /**
1117 * Initial pass for fragment program code generation.
1118 * This function is used by both the GLSL and non-GLSL paths.
1119 */
1120 int brw_wm_pass_fp( struct brw_wm_compile *c )
1121 {
1122 struct brw_fragment_shader *fs = c->fp;
1123 struct tgsi_parse_context parse;
1124 struct tgsi_full_instruction *inst;
1125 struct tgsi_full_declaration *decl;
1126 const float *imm;
1127 GLuint size;
1128 GLuint i;
1129
1130 if (BRW_DEBUG & DEBUG_WM) {
1131 debug_printf("pre-fp:\n");
1132 tgsi_dump(fs->tokens, 0);
1133 }
1134
1135 c->fp_pixel_xy = src_undef();
1136 c->fp_delta_xy = src_undef();
1137 c->fp_pixel_w = src_undef();
1138 c->nr_fp_insns = 0;
1139 c->nr_immediates = 0;
1140
1141
1142 /* Loop over all instructions doing assorted simplifications and
1143 * transformations.
1144 */
1145 tgsi_parse_init( &parse, fs->tokens );
1146 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1147 tgsi_parse_token( &parse );
1148
1149 switch( parse.FullToken.Token.Type ) {
1150 case TGSI_TOKEN_TYPE_DECLARATION:
1151 /* Turn intput declarations into special WM_* instructions.
1152 *
1153 * XXX: For non-branching shaders, consider deferring variable
1154 * initialization as late as possible to minimize register
1155 * usage. This is how the original BRW driver worked.
1156 *
1157 * In a branching shader, must preamble instructions at decl
1158 * time, as instruction order in the shader does not
1159 * correspond to the order instructions are executed in the
1160 * wild.
1161 *
1162 * This is where special instructions such as WM_CINTERP,
1163 * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1164 * compute shader inputs from the payload registers and pixel
1165 * position.
1166 */
1167 decl = &parse.FullToken.FullDeclaration;
1168 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1169 unsigned first, last, mask;
1170 unsigned attrib;
1171
1172 first = decl->Range.First;
1173 last = decl->Range.Last;
1174 mask = decl->Declaration.UsageMask;
1175
1176 for (attrib = first; attrib <= last; attrib++) {
1177 emit_interp(c,
1178 attrib,
1179 decl->Semantic.Name,
1180 decl->Declaration.Interpolate );
1181 }
1182 }
1183
1184 break;
1185
1186 case TGSI_TOKEN_TYPE_IMMEDIATE:
1187 /* Unlike VS programs we can probably manage fine encoding
1188 * immediate values directly into the emitted EU
1189 * instructions, as we probably only need to reference one
1190 * float value per instruction. Just save the data for now
1191 * and use directly later.
1192 */
1193 i = c->nr_immediates++;
1194 imm = &parse.FullToken.FullImmediate.u[i].Float;
1195 size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1196
1197 if (c->nr_immediates >= BRW_WM_MAX_CONST)
1198 return PIPE_ERROR_OUT_OF_MEMORY;
1199
1200 for (i = 0; i < size; i++)
1201 c->immediate[c->nr_immediates].v[i] = imm[i];
1202
1203 for (; i < 4; i++)
1204 c->immediate[c->nr_immediates].v[i] = 0.0;
1205
1206 c->immediate[c->nr_immediates].nr = size;
1207 c->nr_immediates++;
1208 break;
1209
1210 case TGSI_TOKEN_TYPE_INSTRUCTION:
1211 inst = &parse.FullToken.FullInstruction;
1212 emit_insn(c, inst);
1213 break;
1214 }
1215 }
1216
1217 if (BRW_DEBUG & DEBUG_WM) {
1218 brw_wm_print_fp_program( c, "pass_fp" );
1219 debug_printf("\n");
1220 }
1221
1222 return c->error;
1223 }
1224