i965g: add dumping for our new pass_fp output
[mesa.git] / src / gallium / drivers / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "pipe/p_shader_tokens.h"
34
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
37
38 #include "tgsi/tgsi_parse.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_util.h"
42
43 #include "brw_wm.h"
44 #include "brw_util.h"
45 #include "brw_debug.h"
46
47
48 /***********************************************************************
49 * Source regs
50 */
51
52 static struct brw_fp_src src_reg(GLuint file, GLuint idx)
53 {
54 struct brw_fp_src reg;
55 reg.file = file;
56 reg.index = idx;
57 reg.swizzle = BRW_SWIZZLE_XYZW;
58 reg.indirect = 0;
59 reg.negate = 0;
60 reg.abs = 0;
61 return reg;
62 }
63
64 static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
65 {
66 return src_reg(dst.file, dst.index);
67 }
68
69 static struct brw_fp_src src_undef( void )
70 {
71 return src_reg(TGSI_FILE_NULL, 0);
72 }
73
74 static GLboolean src_is_undef(struct brw_fp_src src)
75 {
76 return src.file == TGSI_FILE_NULL;
77 }
78
79 static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
80 {
81 unsigned swz = reg.swizzle;
82
83 reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 |
84 BRW_GET_SWZ(swz, y) << 2 |
85 BRW_GET_SWZ(swz, z) << 4 |
86 BRW_GET_SWZ(swz, w) << 6 );
87
88 return reg;
89 }
90
91 static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
92 {
93 return src_swizzle(reg, x, x, x, x);
94 }
95
96 static struct brw_fp_src src_abs( struct brw_fp_src src )
97 {
98 src.negate = 0;
99 src.abs = 1;
100 return src;
101 }
102
103 static struct brw_fp_src src_negate( struct brw_fp_src src )
104 {
105 src.negate = 1;
106 src.abs = 0;
107 return src;
108 }
109
110
111 static int match_or_expand_immediate( const float *v,
112 unsigned nr,
113 float *v2,
114 unsigned *nr2,
115 unsigned *swizzle )
116 {
117 unsigned i, j;
118
119 *swizzle = 0;
120
121 for (i = 0; i < nr; i++) {
122 boolean found = FALSE;
123
124 for (j = 0; j < *nr2 && !found; j++) {
125 if (v[i] == v2[j]) {
126 *swizzle |= j << (i * 2);
127 found = TRUE;
128 }
129 }
130
131 if (!found) {
132 if (*nr2 >= 4)
133 return FALSE;
134
135 v2[*nr2] = v[i];
136 *swizzle |= *nr2 << (i * 2);
137 (*nr2)++;
138 }
139 }
140
141 return TRUE;
142 }
143
144
145
146 /* Internally generated immediates: overkill...
147 */
148 static struct brw_fp_src src_imm( struct brw_wm_compile *c,
149 const GLfloat *v,
150 unsigned nr)
151 {
152 unsigned i, j;
153 unsigned swizzle;
154
155 /* Could do a first pass where we examine all existing immediates
156 * without expanding.
157 */
158
159 for (i = 0; i < c->nr_immediates; i++) {
160 if (match_or_expand_immediate( v,
161 nr,
162 c->immediate[i].v,
163 &c->immediate[i].nr,
164 &swizzle ))
165 goto out;
166 }
167
168 if (c->nr_immediates < Elements(c->immediate)) {
169 i = c->nr_immediates++;
170 if (match_or_expand_immediate( v,
171 nr,
172 c->immediate[i].v,
173 &c->immediate[i].nr,
174 &swizzle ))
175 goto out;
176 }
177
178 c->error = 1;
179 return src_undef();
180
181 out:
182 /* Make sure that all referenced elements are from this immediate.
183 * Has the effect of making size-one immediates into scalars.
184 */
185 for (j = nr; j < 4; j++)
186 swizzle |= (swizzle & 0x3) << (j * 2);
187
188 return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
189 BRW_GET_SWZ(swizzle, X),
190 BRW_GET_SWZ(swizzle, Y),
191 BRW_GET_SWZ(swizzle, Z),
192 BRW_GET_SWZ(swizzle, W) );
193 }
194
195
196
197 static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
198 GLfloat f )
199 {
200 return src_imm(c, &f, 1);
201 }
202
203 static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
204 GLfloat x,
205 GLfloat y,
206 GLfloat z,
207 GLfloat w)
208 {
209 GLfloat f[4] = {x,y,z,w};
210 return src_imm(c, f, 4);
211 }
212
213
214
215 /***********************************************************************
216 * Dest regs
217 */
218
219 static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
220 {
221 struct brw_fp_dst reg;
222 reg.file = file;
223 reg.index = idx;
224 reg.writemask = BRW_WRITEMASK_XYZW;
225 reg.indirect = 0;
226 return reg;
227 }
228
229 static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
230 {
231 reg.writemask &= mask;
232 return reg;
233 }
234
235 static struct brw_fp_dst dst_undef( void )
236 {
237 return dst_reg(TGSI_FILE_NULL, 0);
238 }
239
240 static boolean dst_is_undef( struct brw_fp_dst dst )
241 {
242 return dst.file == TGSI_FILE_NULL;
243 }
244
245 static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
246 {
247 reg.saturate = flag;
248 return reg;
249 }
250
251 static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
252 {
253 int bit = ffs( ~c->fp_temp );
254
255 if (!bit) {
256 debug_printf("%s: out of temporaries\n", __FILE__);
257 }
258
259 c->fp_temp |= 1<<(bit-1);
260 return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
261 }
262
263
264 static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
265 {
266 c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
267 }
268
269
270 /***********************************************************************
271 * Instructions
272 */
273
274 static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
275 {
276 return &c->fp_instructions[c->nr_fp_insns++];
277 }
278
279 static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
280 GLuint op,
281 struct brw_fp_dst dest,
282 GLuint tex_src_unit,
283 GLuint tex_src_target,
284 struct brw_fp_src src0,
285 struct brw_fp_src src1,
286 struct brw_fp_src src2 )
287 {
288 struct brw_fp_instruction *inst = get_fp_inst(c);
289
290 inst->opcode = op;
291 inst->dst = dest;
292 inst->tex_unit = tex_src_unit;
293 inst->tex_target = tex_src_target;
294 inst->src[0] = src0;
295 inst->src[1] = src1;
296 inst->src[2] = src2;
297
298 return inst;
299 }
300
301
302 static INLINE void emit_op3(struct brw_wm_compile *c,
303 GLuint op,
304 struct brw_fp_dst dest,
305 struct brw_fp_src src0,
306 struct brw_fp_src src1,
307 struct brw_fp_src src2 )
308 {
309 emit_tex_op(c, op, dest, 0, 0, src0, src1, src2);
310 }
311
312
313 static INLINE void emit_op2(struct brw_wm_compile *c,
314 GLuint op,
315 struct brw_fp_dst dest,
316 struct brw_fp_src src0,
317 struct brw_fp_src src1)
318 {
319 emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef());
320 }
321
322 static INLINE void emit_op1(struct brw_wm_compile *c,
323 GLuint op,
324 struct brw_fp_dst dest,
325 struct brw_fp_src src0)
326 {
327 emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef());
328 }
329
330 static INLINE void emit_op0(struct brw_wm_compile *c,
331 GLuint op,
332 struct brw_fp_dst dest)
333 {
334 emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef());
335 }
336
337
338
339 /* Many opcodes produce the same value across all the result channels.
340 * We'd rather not have to support that splatting in the opcode implementations,
341 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
342 * anyway. We can easily get both by emitting the opcode to one channel, and
343 * then MOVing it to the others, which brw_wm_pass*.c already understands.
344 */
345 static void emit_scalar_insn(struct brw_wm_compile *c,
346 unsigned opcode,
347 struct brw_fp_dst dst,
348 struct brw_fp_src src0,
349 struct brw_fp_src src1,
350 struct brw_fp_src src2 )
351 {
352 unsigned first_chan = ffs(dst.writemask) - 1;
353 unsigned first_mask = 1 << first_chan;
354
355 if (dst.writemask == 0)
356 return;
357
358 emit_op3( c, opcode,
359 dst_mask(dst, first_mask),
360 src0, src1, src2 );
361
362 if (dst.writemask != first_mask) {
363 emit_op1(c, TGSI_OPCODE_MOV,
364 dst_mask(dst, ~first_mask),
365 src_scalar(src_reg_from_dst(dst), first_chan));
366 }
367 }
368
369
370 /***********************************************************************
371 * Special instructions for interpolation and other tasks
372 */
373
374 static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
375 {
376 if (src_is_undef(c->fp_pixel_xy)) {
377 struct brw_fp_dst pixel_xy = get_temp(c);
378 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
379
380
381 /* Emit the out calculations, and hold onto the results. Use
382 * two instructions as a temporary is required.
383 */
384 /* pixel_xy.xy = PIXELXY payload[0];
385 */
386 emit_op1(c,
387 WM_PIXELXY,
388 dst_mask(pixel_xy, BRW_WRITEMASK_XY),
389 payload_r0_depth);
390
391 c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
392 }
393
394 return c->fp_pixel_xy;
395 }
396
397 static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
398 {
399 if (src_is_undef(c->fp_delta_xy)) {
400 struct brw_fp_dst delta_xy = get_temp(c);
401 struct brw_fp_src pixel_xy = get_pixel_xy(c);
402 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
403
404 /* deltas.xy = DELTAXY pixel_xy, payload[0]
405 */
406 emit_op3(c,
407 WM_DELTAXY,
408 dst_mask(delta_xy, BRW_WRITEMASK_XY),
409 pixel_xy,
410 payload_r0_depth,
411 src_undef());
412
413 c->fp_delta_xy = src_reg_from_dst(delta_xy);
414 }
415
416 return c->fp_delta_xy;
417 }
418
419 static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
420 {
421 if (src_is_undef(c->fp_pixel_w)) {
422 struct brw_fp_dst pixel_w = get_temp(c);
423 struct brw_fp_src deltas = get_delta_xy(c);
424
425 /* XXX: assuming position is always first -- valid?
426 */
427 struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
428
429 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
430 */
431 emit_op3(c,
432 WM_PIXELW,
433 dst_mask(pixel_w, BRW_WRITEMASK_W),
434 interp_wpos,
435 deltas,
436 src_undef());
437
438
439 c->fp_pixel_w = src_reg_from_dst(pixel_w);
440 }
441
442 return c->fp_pixel_w;
443 }
444
445
446 /***********************************************************************
447 * Emit INTERP instructions ahead of first use of each attrib.
448 */
449
450 static void emit_interp( struct brw_wm_compile *c,
451 GLuint idx,
452 GLuint semantic,
453 GLuint interp_mode )
454 {
455 struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
456 struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
457 struct brw_fp_src deltas = get_delta_xy(c);
458
459 /* Need to use PINTERP on attributes which have been
460 * multiplied by 1/W in the SF program, and LINTERP on those
461 * which have not:
462 */
463 switch (semantic) {
464 case TGSI_SEMANTIC_POSITION:
465 /* Have to treat wpos.xy specially:
466 */
467 emit_op1(c,
468 WM_WPOSXY,
469 dst_mask(dst, BRW_WRITEMASK_XY),
470 get_pixel_xy(c));
471
472 /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
473 */
474 emit_op2(c,
475 WM_LINTERP,
476 dst_mask(dst, BRW_WRITEMASK_ZW),
477 interp,
478 deltas);
479 break;
480
481 case TGSI_SEMANTIC_COLOR:
482 if (c->key.flat_shade) {
483 emit_op1(c,
484 WM_CINTERP,
485 dst,
486 interp);
487 }
488 else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
489 emit_op2(c,
490 WM_LINTERP,
491 dst,
492 interp,
493 deltas);
494 }
495 else {
496 emit_op3(c,
497 WM_PINTERP,
498 dst,
499 interp,
500 deltas,
501 get_pixel_w(c));
502 }
503
504 break;
505
506 case TGSI_SEMANTIC_FOG:
507 /* Interpolate the fog coordinate */
508 emit_op3(c,
509 WM_PINTERP,
510 dst_mask(dst, BRW_WRITEMASK_X),
511 interp,
512 deltas,
513 get_pixel_w(c));
514
515 emit_op1(c,
516 TGSI_OPCODE_MOV,
517 dst_mask(dst, BRW_WRITEMASK_YZ),
518 src_imm1f(c, 0.0));
519
520 emit_op1(c,
521 TGSI_OPCODE_MOV,
522 dst_mask(dst, BRW_WRITEMASK_W),
523 src_imm1f(c, 1.0));
524 break;
525
526 case TGSI_SEMANTIC_FACE:
527 /* XXX review/test this case */
528 emit_op0(c,
529 WM_FRONTFACING,
530 dst_mask(dst, BRW_WRITEMASK_X));
531
532 emit_op1(c,
533 TGSI_OPCODE_MOV,
534 dst_mask(dst, BRW_WRITEMASK_YZ),
535 src_imm1f(c, 0.0));
536
537 emit_op1(c,
538 TGSI_OPCODE_MOV,
539 dst_mask(dst, BRW_WRITEMASK_W),
540 src_imm1f(c, 1.0));
541 break;
542
543 case TGSI_SEMANTIC_PSIZE:
544 /* XXX review/test this case */
545 emit_op3(c,
546 WM_PINTERP,
547 dst_mask(dst, BRW_WRITEMASK_XY),
548 interp,
549 deltas,
550 get_pixel_w(c));
551
552 emit_op1(c,
553 TGSI_OPCODE_MOV,
554 dst_mask(dst, BRW_WRITEMASK_Z),
555 src_imm1f(c, 0.0f));
556
557 emit_op1(c,
558 TGSI_OPCODE_MOV,
559 dst_mask(dst, BRW_WRITEMASK_W),
560 src_imm1f(c, 1.0f));
561 break;
562
563 default:
564 switch (interp_mode) {
565 case TGSI_INTERPOLATE_CONSTANT:
566 emit_op1(c,
567 WM_CINTERP,
568 dst,
569 interp);
570 break;
571
572 case TGSI_INTERPOLATE_LINEAR:
573 emit_op2(c,
574 WM_LINTERP,
575 dst,
576 interp,
577 deltas);
578 break;
579
580 case TGSI_INTERPOLATE_PERSPECTIVE:
581 emit_op3(c,
582 WM_PINTERP,
583 dst,
584 interp,
585 deltas,
586 get_pixel_w(c));
587 break;
588 }
589 break;
590 }
591 }
592
593
594 /***********************************************************************
595 * Expand various instructions here to simpler forms.
596 */
597 static void precalc_dst( struct brw_wm_compile *c,
598 struct brw_fp_dst dst,
599 struct brw_fp_src src0,
600 struct brw_fp_src src1 )
601 {
602 if (dst.writemask & BRW_WRITEMASK_Y) {
603 /* dst.y = mul src0.y, src1.y
604 */
605 emit_op2(c,
606 TGSI_OPCODE_MUL,
607 dst_mask(dst, BRW_WRITEMASK_Y),
608 src0,
609 src1);
610 }
611
612 if (dst.writemask & BRW_WRITEMASK_XZ) {
613 /* dst.z = mov src0.zzzz
614 */
615 emit_op1(c,
616 TGSI_OPCODE_MOV,
617 dst_mask(dst, BRW_WRITEMASK_Z),
618 src_scalar(src0, Z));
619
620 /* dst.x = imm1f(1.0)
621 */
622 emit_op1(c,
623 TGSI_OPCODE_MOV,
624 dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
625 src_imm1f(c, 1.0));
626 }
627 if (dst.writemask & BRW_WRITEMASK_W) {
628 /* dst.w = mov src1.w
629 */
630 emit_op1(c,
631 TGSI_OPCODE_MOV,
632 dst_mask(dst, BRW_WRITEMASK_W),
633 src1);
634 }
635 }
636
637
638 static void precalc_lit( struct brw_wm_compile *c,
639 struct brw_fp_dst dst,
640 struct brw_fp_src src0 )
641 {
642 if (dst.writemask & BRW_WRITEMASK_XW) {
643 /* dst.xw = imm(1.0f)
644 */
645 emit_op1(c,
646 TGSI_OPCODE_MOV,
647 dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
648 src_imm1f(c, 1.0f));
649 }
650
651 if (dst.writemask & BRW_WRITEMASK_YZ) {
652 emit_op1(c,
653 TGSI_OPCODE_LIT,
654 dst_mask(dst, BRW_WRITEMASK_YZ),
655 src0);
656 }
657 }
658
659
660 /**
661 * Some TEX instructions require extra code, cube map coordinate
662 * normalization, or coordinate scaling for RECT textures, etc.
663 * This function emits those extra instructions and the TEX
664 * instruction itself.
665 */
666 static void precalc_tex( struct brw_wm_compile *c,
667 struct brw_fp_dst dst,
668 unsigned target,
669 unsigned unit,
670 struct brw_fp_src src0 )
671 {
672 struct brw_fp_src coord = src_undef();
673 struct brw_fp_dst tmp = dst_undef();
674
675 assert(unit < BRW_MAX_TEX_UNIT);
676
677 /* Cubemap: find longest component of coord vector and normalize
678 * it.
679 */
680 if (target == TGSI_TEXTURE_CUBE) {
681 struct brw_fp_src tmpsrc;
682
683 tmp = get_temp(c);
684 tmpsrc = src_reg_from_dst(tmp);
685
686 /* tmp = abs(src0) */
687 emit_op1(c,
688 TGSI_OPCODE_MOV,
689 tmp,
690 src_abs(src0));
691
692 /* tmp.X = MAX(tmp.X, tmp.Y) */
693 emit_op2(c, TGSI_OPCODE_MAX,
694 dst_mask(tmp, BRW_WRITEMASK_X),
695 src_scalar(tmpsrc, X),
696 src_scalar(tmpsrc, Y));
697
698 /* tmp.X = MAX(tmp.X, tmp.Z) */
699 emit_op2(c, TGSI_OPCODE_MAX,
700 dst_mask(tmp, BRW_WRITEMASK_X),
701 tmpsrc,
702 src_scalar(tmpsrc, Z));
703
704 /* tmp.X = 1 / tmp.X */
705 emit_op1(c, TGSI_OPCODE_RCP,
706 dst_mask(tmp, BRW_WRITEMASK_X),
707 tmpsrc);
708
709 /* tmp = src0 * tmp.xxxx */
710 emit_op2(c, TGSI_OPCODE_MUL,
711 tmp,
712 src0,
713 src_scalar(tmpsrc, X));
714
715 coord = tmpsrc;
716 }
717 else if (target == TGSI_TEXTURE_RECT ||
718 target == TGSI_TEXTURE_SHADOWRECT) {
719 /* XXX: need a mechanism for internally generated constants.
720 */
721 coord = src0;
722 }
723 else {
724 coord = src0;
725 }
726
727 /* Need to emit YUV texture conversions by hand. Probably need to
728 * do this here - the alternative is in brw_wm_emit.c, but the
729 * conversion requires allocating a temporary variable which we
730 * don't have the facility to do that late in the compilation.
731 */
732 if (c->key.yuvtex_mask & (1 << unit)) {
733 /* convert ycbcr to RGBA */
734 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
735 struct brw_fp_dst tmp = get_temp(c);
736 struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
737 struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 );
738 struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
739
740 /* tmp = TEX ...
741 */
742 emit_tex_op(c,
743 TGSI_OPCODE_TEX,
744 dst_saturate(tmp, dst.saturate),
745 unit,
746 target,
747 coord,
748 src_undef(),
749 src_undef());
750
751 /* tmp.xyz = ADD TMP, C0
752 */
753 emit_op2(c, TGSI_OPCODE_ADD,
754 dst_mask(tmp, BRW_WRITEMASK_XYZ),
755 tmpsrc,
756 C0);
757
758 /* YUV.y = MUL YUV.y, C0.w
759 */
760 emit_op2(c, TGSI_OPCODE_MUL,
761 dst_mask(tmp, BRW_WRITEMASK_Y),
762 tmpsrc,
763 src_scalar(C0, W));
764
765 /*
766 * if (UV swaped)
767 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
768 * else
769 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
770 */
771
772 emit_op3(c, TGSI_OPCODE_MAD,
773 dst_mask(dst, BRW_WRITEMASK_XYZ),
774 ( swap_uv ?
775 src_swizzle(tmpsrc, Z,Z,X,X) :
776 src_swizzle(tmpsrc, X,X,Z,Z)),
777 C1,
778 src_scalar(tmpsrc, Y));
779
780 /* RGB.y = MAD YUV.z, C1.w, RGB.y
781 */
782 emit_op3(c,
783 TGSI_OPCODE_MAD,
784 dst_mask(dst, BRW_WRITEMASK_Y),
785 src_scalar(tmpsrc, Z),
786 src_scalar(C1, W),
787 src_scalar(src_reg_from_dst(dst), Y));
788
789 release_temp(c, tmp);
790 }
791 else {
792 /* ordinary RGBA tex instruction */
793 emit_tex_op(c,
794 TGSI_OPCODE_TEX,
795 dst,
796 unit,
797 target,
798 coord,
799 src_undef(),
800 src_undef());
801 }
802
803 /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
804 * generating shader varients in mesa state tracker.
805 */
806
807 /* Release this temp if we ended up allocating it:
808 */
809 if (!dst_is_undef(tmp))
810 release_temp(c, tmp);
811 }
812
813
814 /**
815 * Check if the given TXP instruction really needs the divide-by-W step.
816 */
817 static GLboolean projtex( struct brw_wm_compile *c,
818 unsigned target,
819 struct brw_fp_src src )
820 {
821 /* Only try to detect the simplest cases. Could detect (later)
822 * cases where we are trying to emit code like RCP {1.0}, MUL x,
823 * {1.0}, and so on.
824 *
825 * More complex cases than this typically only arise from
826 * user-provided fragment programs anyway:
827 */
828 if (target == TGSI_TEXTURE_CUBE)
829 return GL_FALSE; /* ut2004 gun rendering !?! */
830
831 if (src.file == TGSI_FILE_INPUT &&
832 BRW_GET_SWZ(src.swizzle, W) == W &&
833 c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE)
834 return GL_FALSE;
835
836 return GL_TRUE;
837 }
838
839
840 /**
841 * Emit code for TXP.
842 */
843 static void precalc_txp( struct brw_wm_compile *c,
844 struct brw_fp_dst dst,
845 unsigned target,
846 unsigned unit,
847 struct brw_fp_src src0 )
848 {
849 if (projtex(c, target, src0)) {
850 struct brw_fp_dst tmp = get_temp(c);
851
852 /* tmp0.w = RCP inst.arg[0][3]
853 */
854 emit_op1(c,
855 TGSI_OPCODE_RCP,
856 dst_mask(tmp, BRW_WRITEMASK_W),
857 src_scalar(src0, W));
858
859 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
860 */
861 emit_op2(c,
862 TGSI_OPCODE_MUL,
863 dst_mask(tmp, BRW_WRITEMASK_XYZ),
864 src0,
865 src_scalar(src_reg_from_dst(tmp), W));
866
867 /* dst = TEX tmp0
868 */
869 precalc_tex(c,
870 dst,
871 target,
872 unit,
873 src_reg_from_dst(tmp));
874
875 release_temp(c, tmp);
876 }
877 else
878 {
879 /* dst = TEX src0
880 */
881 precalc_tex(c, dst, target, unit, src0);
882 }
883 }
884
885
886 /* XXX: note this returns a src_reg.
887 */
888 static struct brw_fp_src
889 find_output_by_semantic( struct brw_wm_compile *c,
890 unsigned semantic,
891 unsigned index )
892 {
893 const struct tgsi_shader_info *info = &c->fp->info;
894 unsigned i;
895
896 for (i = 0; i < info->num_outputs; i++)
897 if (info->output_semantic_name[i] == semantic &&
898 info->output_semantic_index[i] == index)
899 return src_reg( TGSI_FILE_OUTPUT, i );
900
901 /* If not found, return some arbitrary immediate value:
902 */
903 return src_imm1f(c, 1.0);
904 }
905
906
907 static void emit_fb_write( struct brw_wm_compile *c )
908 {
909 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
910 struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
911 GLuint i;
912
913
914 outdepth = src_scalar(outdepth, Z);
915
916 for (i = 0 ; i < c->key.nr_cbufs; i++) {
917 struct brw_fp_src outcolor;
918 unsigned target = 1<<i;
919
920 /* Set EOT flag on last inst:
921 */
922 if (i == c->key.nr_cbufs - 1)
923 target |= 1;
924
925 outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
926
927 /* Use emit_tex_op so that we can specify the inst->tex_target
928 * field, which is abused to contain the FB write target and the
929 * EOT marker
930 */
931 emit_tex_op(c, WM_FB_WRITE,
932 dst_undef(),
933 target,
934 0,
935 outcolor,
936 payload_r0_depth,
937 outdepth);
938 }
939 }
940
941
942 static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
943 const struct tgsi_full_dst_register *dst,
944 unsigned saturate )
945 {
946 struct brw_fp_dst out;
947
948 out.file = dst->DstRegister.File;
949 out.index = dst->DstRegister.Index;
950 out.writemask = dst->DstRegister.WriteMask;
951 out.indirect = dst->DstRegister.Indirect;
952 out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
953
954 if (out.indirect) {
955 assert(dst->DstRegisterInd.File == TGSI_FILE_ADDRESS);
956 assert(dst->DstRegisterInd.Index == 0);
957 }
958
959 return out;
960 }
961
962
963 static struct brw_fp_src translate_src( struct brw_wm_compile *c,
964 const struct tgsi_full_src_register *src )
965 {
966 struct brw_fp_src out;
967
968 out.file = src->SrcRegister.File;
969 out.index = src->SrcRegister.Index;
970 out.indirect = src->SrcRegister.Indirect;
971
972 out.swizzle = ((src->SrcRegister.SwizzleX << 0) |
973 (src->SrcRegister.SwizzleY << 2) |
974 (src->SrcRegister.SwizzleZ << 4) |
975 (src->SrcRegister.SwizzleW << 6));
976
977 switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
978 case TGSI_UTIL_SIGN_CLEAR:
979 out.abs = 1;
980 out.negate = 0;
981 break;
982
983 case TGSI_UTIL_SIGN_SET:
984 out.abs = 1;
985 out.negate = 1;
986 break;
987
988 case TGSI_UTIL_SIGN_TOGGLE:
989 out.abs = 0;
990 out.negate = 1;
991 break;
992
993 case TGSI_UTIL_SIGN_KEEP:
994 default:
995 out.abs = 0;
996 out.negate = 0;
997 break;
998 }
999
1000 if (out.indirect) {
1001 assert(src->SrcRegisterInd.File == TGSI_FILE_ADDRESS);
1002 assert(src->SrcRegisterInd.Index == 0);
1003 }
1004
1005 return out;
1006 }
1007
1008
1009
1010 static void emit_insn( struct brw_wm_compile *c,
1011 const struct tgsi_full_instruction *inst )
1012 {
1013 unsigned opcode = inst->Instruction.Opcode;
1014 struct brw_fp_dst dst;
1015 struct brw_fp_src src[3];
1016 int i;
1017
1018 dst = translate_dst( c, &inst->FullDstRegisters[0],
1019 inst->Instruction.Saturate );
1020
1021 for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
1022 src[i] = translate_src( c, &inst->FullSrcRegisters[0] );
1023
1024 switch (opcode) {
1025 case TGSI_OPCODE_ABS:
1026 emit_op1(c, TGSI_OPCODE_MOV,
1027 dst,
1028 src_abs(src[0]));
1029 break;
1030
1031 case TGSI_OPCODE_SUB:
1032 emit_op2(c, TGSI_OPCODE_ADD,
1033 dst,
1034 src[0],
1035 src_negate(src[1]));
1036 break;
1037
1038 case TGSI_OPCODE_SCS:
1039 emit_op1(c, TGSI_OPCODE_SCS,
1040 dst_mask(dst, BRW_WRITEMASK_XY),
1041 src[0]);
1042 break;
1043
1044 case TGSI_OPCODE_DST:
1045 precalc_dst(c, dst, src[0], src[1]);
1046 break;
1047
1048 case TGSI_OPCODE_LIT:
1049 precalc_lit(c, dst, src[0]);
1050 break;
1051
1052 case TGSI_OPCODE_TEX:
1053 precalc_tex(c, dst,
1054 inst->InstructionExtTexture.Texture,
1055 src[0].file, /* sampler unit */
1056 src[1] );
1057 break;
1058
1059 case TGSI_OPCODE_TXP:
1060 precalc_txp(c, dst,
1061 inst->InstructionExtTexture.Texture,
1062 src[0].file, /* sampler unit */
1063 src[1] );
1064 break;
1065
1066 case TGSI_OPCODE_TXB:
1067 /* XXX: TXB not done
1068 */
1069 precalc_tex(c, dst,
1070 inst->InstructionExtTexture.Texture,
1071 src[0].file, /* sampler unit */
1072 src[1] );
1073 break;
1074
1075 case TGSI_OPCODE_XPD:
1076 emit_op2(c, TGSI_OPCODE_XPD,
1077 dst_mask(dst, BRW_WRITEMASK_XYZ),
1078 src[0],
1079 src[1]);
1080 break;
1081
1082 case TGSI_OPCODE_KIL:
1083 emit_op1(c, TGSI_OPCODE_KIL,
1084 dst_mask(dst_undef(), 0),
1085 src[0]);
1086 break;
1087
1088 case TGSI_OPCODE_END:
1089 emit_fb_write(c);
1090 break;
1091 default:
1092 if (!c->key.has_flow_control &&
1093 brw_wm_is_scalar_result(opcode))
1094 emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
1095 else
1096 emit_op3(c, opcode, dst, src[0], src[1], src[2]);
1097 break;
1098 }
1099 }
1100
1101 /**
1102 * Initial pass for fragment program code generation.
1103 * This function is used by both the GLSL and non-GLSL paths.
1104 */
1105 int brw_wm_pass_fp( struct brw_wm_compile *c )
1106 {
1107 struct brw_fragment_shader *fs = c->fp;
1108 struct tgsi_parse_context parse;
1109 struct tgsi_full_instruction *inst;
1110 struct tgsi_full_declaration *decl;
1111 const float *imm;
1112 GLuint size;
1113 GLuint i;
1114
1115 if (BRW_DEBUG & DEBUG_WM) {
1116 debug_printf("pre-fp:\n");
1117 tgsi_dump(fs->tokens, 0);
1118 }
1119
1120 c->fp_pixel_xy = src_undef();
1121 c->fp_delta_xy = src_undef();
1122 c->fp_pixel_w = src_undef();
1123 c->nr_fp_insns = 0;
1124 c->nr_immediates = 0;
1125
1126
1127 /* Loop over all instructions doing assorted simplifications and
1128 * transformations.
1129 */
1130 tgsi_parse_init( &parse, fs->tokens );
1131 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1132 tgsi_parse_token( &parse );
1133
1134 switch( parse.FullToken.Token.Type ) {
1135 case TGSI_TOKEN_TYPE_DECLARATION:
1136 /* Turn intput declarations into special WM_* instructions.
1137 *
1138 * XXX: For non-branching shaders, consider deferring variable
1139 * initialization as late as possible to minimize register
1140 * usage. This is how the original BRW driver worked.
1141 *
1142 * In a branching shader, must preamble instructions at decl
1143 * time, as instruction order in the shader does not
1144 * correspond to the order instructions are executed in the
1145 * wild.
1146 *
1147 * This is where special instructions such as WM_CINTERP,
1148 * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1149 * compute shader inputs from the payload registers and pixel
1150 * position.
1151 */
1152 decl = &parse.FullToken.FullDeclaration;
1153 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1154 unsigned first, last, mask;
1155 unsigned attrib;
1156
1157 first = decl->DeclarationRange.First;
1158 last = decl->DeclarationRange.Last;
1159 mask = decl->Declaration.UsageMask;
1160
1161 for (attrib = first; attrib <= last; attrib++) {
1162 emit_interp(c,
1163 attrib,
1164 decl->Semantic.SemanticName,
1165 decl->Declaration.Interpolate );
1166 }
1167 }
1168
1169 break;
1170
1171 case TGSI_TOKEN_TYPE_IMMEDIATE:
1172 /* Unlike VS programs we can probably manage fine encoding
1173 * immediate values directly into the emitted EU
1174 * instructions, as we probably only need to reference one
1175 * float value per instruction. Just save the data for now
1176 * and use directly later.
1177 */
1178 i = c->nr_immediates++;
1179 imm = &parse.FullToken.FullImmediate.u[i].Float;
1180 size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1181
1182 if (c->nr_immediates >= BRW_WM_MAX_CONST)
1183 return PIPE_ERROR_OUT_OF_MEMORY;
1184
1185 for (i = 0; i < size; i++)
1186 c->immediate[c->nr_immediates].v[i] = imm[i];
1187
1188 for (; i < 4; i++)
1189 c->immediate[c->nr_immediates].v[i] = 0.0;
1190
1191 c->immediate[c->nr_immediates].nr = size;
1192 c->nr_immediates++;
1193 break;
1194
1195 case TGSI_TOKEN_TYPE_INSTRUCTION:
1196 inst = &parse.FullToken.FullInstruction;
1197 emit_insn(c, inst);
1198 break;
1199 }
1200 }
1201
1202 if (BRW_DEBUG & DEBUG_WM) {
1203 brw_wm_print_fp_program( c, "pass_fp" );
1204 debug_printf("\n");
1205 }
1206
1207 return c->error;
1208 }
1209