i965g: fix typo converting wm src regs
[mesa.git] / src / gallium / drivers / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "pipe/p_shader_tokens.h"
34
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
37
38 #include "tgsi/tgsi_parse.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_util.h"
42
43 #include "brw_wm.h"
44 #include "brw_util.h"
45 #include "brw_debug.h"
46
47
48 /***********************************************************************
49 * Source regs
50 */
51
52 static struct brw_fp_src src_reg(GLuint file, GLuint idx)
53 {
54 struct brw_fp_src reg;
55 reg.file = file;
56 reg.index = idx;
57 reg.swizzle = BRW_SWIZZLE_XYZW;
58 reg.indirect = 0;
59 reg.negate = 0;
60 reg.abs = 0;
61 return reg;
62 }
63
64 static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
65 {
66 return src_reg(dst.file, dst.index);
67 }
68
69 static struct brw_fp_src src_undef( void )
70 {
71 return src_reg(TGSI_FILE_NULL, 0);
72 }
73
74 static GLboolean src_is_undef(struct brw_fp_src src)
75 {
76 return src.file == TGSI_FILE_NULL;
77 }
78
79 static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
80 {
81 unsigned swz = reg.swizzle;
82
83 reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 |
84 BRW_GET_SWZ(swz, y) << 2 |
85 BRW_GET_SWZ(swz, z) << 4 |
86 BRW_GET_SWZ(swz, w) << 6 );
87
88 return reg;
89 }
90
91 static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
92 {
93 return src_swizzle(reg, x, x, x, x);
94 }
95
96 static struct brw_fp_src src_abs( struct brw_fp_src src )
97 {
98 src.negate = 0;
99 src.abs = 1;
100 return src;
101 }
102
103 static struct brw_fp_src src_negate( struct brw_fp_src src )
104 {
105 src.negate = 1;
106 src.abs = 0;
107 return src;
108 }
109
110
111 static int match_or_expand_immediate( const float *v,
112 unsigned nr,
113 float *v2,
114 unsigned *nr2,
115 unsigned *swizzle )
116 {
117 unsigned i, j;
118
119 *swizzle = 0;
120
121 for (i = 0; i < nr; i++) {
122 boolean found = FALSE;
123
124 for (j = 0; j < *nr2 && !found; j++) {
125 if (v[i] == v2[j]) {
126 *swizzle |= j << (i * 2);
127 found = TRUE;
128 }
129 }
130
131 if (!found) {
132 if (*nr2 >= 4)
133 return FALSE;
134
135 v2[*nr2] = v[i];
136 *swizzle |= *nr2 << (i * 2);
137 (*nr2)++;
138 }
139 }
140
141 return TRUE;
142 }
143
144
145
146 /* Internally generated immediates: overkill...
147 */
148 static struct brw_fp_src src_imm( struct brw_wm_compile *c,
149 const GLfloat *v,
150 unsigned nr)
151 {
152 unsigned i, j;
153 unsigned swizzle;
154
155 /* Could do a first pass where we examine all existing immediates
156 * without expanding.
157 */
158
159 for (i = 0; i < c->nr_immediates; i++) {
160 if (match_or_expand_immediate( v,
161 nr,
162 c->immediate[i].v,
163 &c->immediate[i].nr,
164 &swizzle ))
165 goto out;
166 }
167
168 if (c->nr_immediates < Elements(c->immediate)) {
169 i = c->nr_immediates++;
170 if (match_or_expand_immediate( v,
171 nr,
172 c->immediate[i].v,
173 &c->immediate[i].nr,
174 &swizzle ))
175 goto out;
176 }
177
178 c->error = 1;
179 return src_undef();
180
181 out:
182 /* Make sure that all referenced elements are from this immediate.
183 * Has the effect of making size-one immediates into scalars.
184 */
185 for (j = nr; j < 4; j++)
186 swizzle |= (swizzle & 0x3) << (j * 2);
187
188 return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
189 BRW_GET_SWZ(swizzle, X),
190 BRW_GET_SWZ(swizzle, Y),
191 BRW_GET_SWZ(swizzle, Z),
192 BRW_GET_SWZ(swizzle, W) );
193 }
194
195
196
197 static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
198 GLfloat f )
199 {
200 return src_imm(c, &f, 1);
201 }
202
203 static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
204 GLfloat x,
205 GLfloat y,
206 GLfloat z,
207 GLfloat w)
208 {
209 GLfloat f[4] = {x,y,z,w};
210 return src_imm(c, f, 4);
211 }
212
213
214
215 /***********************************************************************
216 * Dest regs
217 */
218
219 static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
220 {
221 struct brw_fp_dst reg;
222 reg.file = file;
223 reg.index = idx;
224 reg.writemask = BRW_WRITEMASK_XYZW;
225 reg.indirect = 0;
226 reg.saturate = 0;
227 return reg;
228 }
229
230 static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
231 {
232 reg.writemask &= mask;
233 return reg;
234 }
235
236 static struct brw_fp_dst dst_undef( void )
237 {
238 return dst_reg(TGSI_FILE_NULL, 0);
239 }
240
241 static boolean dst_is_undef( struct brw_fp_dst dst )
242 {
243 return dst.file == TGSI_FILE_NULL;
244 }
245
246 static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
247 {
248 reg.saturate = flag;
249 return reg;
250 }
251
252 static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
253 {
254 int bit = ffs( ~c->fp_temp );
255
256 if (!bit) {
257 debug_printf("%s: out of temporaries\n", __FILE__);
258 }
259
260 c->fp_temp |= 1<<(bit-1);
261 return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
262 }
263
264
265 static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
266 {
267 c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
268 }
269
270
271 /***********************************************************************
272 * Instructions
273 */
274
275 static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
276 {
277 return &c->fp_instructions[c->nr_fp_insns++];
278 }
279
280 static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
281 GLuint op,
282 struct brw_fp_dst dest,
283 GLuint tex_unit,
284 GLuint target,
285 struct brw_fp_src src0,
286 struct brw_fp_src src1,
287 struct brw_fp_src src2 )
288 {
289 struct brw_fp_instruction *inst = get_fp_inst(c);
290
291 if (tex_unit || target)
292 assert(op == TGSI_OPCODE_TXP ||
293 op == TGSI_OPCODE_TXB ||
294 op == TGSI_OPCODE_TEX ||
295 op == WM_FB_WRITE);
296
297 inst->opcode = op;
298 inst->dst = dest;
299 inst->tex_unit = tex_unit;
300 inst->target = target;
301 inst->src[0] = src0;
302 inst->src[1] = src1;
303 inst->src[2] = src2;
304
305 return inst;
306 }
307
308
309 static INLINE void emit_op3(struct brw_wm_compile *c,
310 GLuint op,
311 struct brw_fp_dst dest,
312 struct brw_fp_src src0,
313 struct brw_fp_src src1,
314 struct brw_fp_src src2 )
315 {
316 emit_tex_op(c, op, dest, 0, 0, src0, src1, src2);
317 }
318
319
320 static INLINE void emit_op2(struct brw_wm_compile *c,
321 GLuint op,
322 struct brw_fp_dst dest,
323 struct brw_fp_src src0,
324 struct brw_fp_src src1)
325 {
326 emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef());
327 }
328
329 static INLINE void emit_op1(struct brw_wm_compile *c,
330 GLuint op,
331 struct brw_fp_dst dest,
332 struct brw_fp_src src0)
333 {
334 emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef());
335 }
336
337 static INLINE void emit_op0(struct brw_wm_compile *c,
338 GLuint op,
339 struct brw_fp_dst dest)
340 {
341 emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef());
342 }
343
344
345
346 /* Many opcodes produce the same value across all the result channels.
347 * We'd rather not have to support that splatting in the opcode implementations,
348 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
349 * anyway. We can easily get both by emitting the opcode to one channel, and
350 * then MOVing it to the others, which brw_wm_pass*.c already understands.
351 */
352 static void emit_scalar_insn(struct brw_wm_compile *c,
353 unsigned opcode,
354 struct brw_fp_dst dst,
355 struct brw_fp_src src0,
356 struct brw_fp_src src1,
357 struct brw_fp_src src2 )
358 {
359 unsigned first_chan = ffs(dst.writemask) - 1;
360 unsigned first_mask = 1 << first_chan;
361
362 if (dst.writemask == 0)
363 return;
364
365 emit_op3( c, opcode,
366 dst_mask(dst, first_mask),
367 src0, src1, src2 );
368
369 if (dst.writemask != first_mask) {
370 emit_op1(c, TGSI_OPCODE_MOV,
371 dst_mask(dst, ~first_mask),
372 src_scalar(src_reg_from_dst(dst), first_chan));
373 }
374 }
375
376
377 /***********************************************************************
378 * Special instructions for interpolation and other tasks
379 */
380
381 static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
382 {
383 if (src_is_undef(c->fp_pixel_xy)) {
384 struct brw_fp_dst pixel_xy = get_temp(c);
385 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
386
387
388 /* Emit the out calculations, and hold onto the results. Use
389 * two instructions as a temporary is required.
390 */
391 /* pixel_xy.xy = PIXELXY payload[0];
392 */
393 emit_op1(c,
394 WM_PIXELXY,
395 dst_mask(pixel_xy, BRW_WRITEMASK_XY),
396 payload_r0_depth);
397
398 c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
399 }
400
401 return c->fp_pixel_xy;
402 }
403
404 static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
405 {
406 if (src_is_undef(c->fp_delta_xy)) {
407 struct brw_fp_dst delta_xy = get_temp(c);
408 struct brw_fp_src pixel_xy = get_pixel_xy(c);
409 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
410
411 /* deltas.xy = DELTAXY pixel_xy, payload[0]
412 */
413 emit_op3(c,
414 WM_DELTAXY,
415 dst_mask(delta_xy, BRW_WRITEMASK_XY),
416 pixel_xy,
417 payload_r0_depth,
418 src_undef());
419
420 c->fp_delta_xy = src_reg_from_dst(delta_xy);
421 }
422
423 return c->fp_delta_xy;
424 }
425
426 static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
427 {
428 if (src_is_undef(c->fp_pixel_w)) {
429 struct brw_fp_dst pixel_w = get_temp(c);
430 struct brw_fp_src deltas = get_delta_xy(c);
431
432 /* XXX: assuming position is always first -- valid?
433 */
434 struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
435
436 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
437 */
438 emit_op3(c,
439 WM_PIXELW,
440 dst_mask(pixel_w, BRW_WRITEMASK_W),
441 interp_wpos,
442 deltas,
443 src_undef());
444
445
446 c->fp_pixel_w = src_reg_from_dst(pixel_w);
447 }
448
449 return c->fp_pixel_w;
450 }
451
452
453 /***********************************************************************
454 * Emit INTERP instructions ahead of first use of each attrib.
455 */
456
457 static void emit_interp( struct brw_wm_compile *c,
458 GLuint idx,
459 GLuint semantic,
460 GLuint interp_mode )
461 {
462 struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
463 struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
464 struct brw_fp_src deltas = get_delta_xy(c);
465
466 /* Need to use PINTERP on attributes which have been
467 * multiplied by 1/W in the SF program, and LINTERP on those
468 * which have not:
469 */
470 switch (semantic) {
471 case TGSI_SEMANTIC_POSITION:
472 /* Have to treat wpos.xy specially:
473 */
474 emit_op1(c,
475 WM_WPOSXY,
476 dst_mask(dst, BRW_WRITEMASK_XY),
477 get_pixel_xy(c));
478
479 /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
480 */
481 emit_op2(c,
482 WM_LINTERP,
483 dst_mask(dst, BRW_WRITEMASK_ZW),
484 interp,
485 deltas);
486 break;
487
488 case TGSI_SEMANTIC_COLOR:
489 if (c->key.flat_shade) {
490 emit_op1(c,
491 WM_CINTERP,
492 dst,
493 interp);
494 }
495 else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
496 emit_op2(c,
497 WM_LINTERP,
498 dst,
499 interp,
500 deltas);
501 }
502 else {
503 emit_op3(c,
504 WM_PINTERP,
505 dst,
506 interp,
507 deltas,
508 get_pixel_w(c));
509 }
510
511 break;
512
513 case TGSI_SEMANTIC_FOG:
514 /* Interpolate the fog coordinate */
515 emit_op3(c,
516 WM_PINTERP,
517 dst_mask(dst, BRW_WRITEMASK_X),
518 interp,
519 deltas,
520 get_pixel_w(c));
521
522 emit_op1(c,
523 TGSI_OPCODE_MOV,
524 dst_mask(dst, BRW_WRITEMASK_YZ),
525 src_imm1f(c, 0.0));
526
527 emit_op1(c,
528 TGSI_OPCODE_MOV,
529 dst_mask(dst, BRW_WRITEMASK_W),
530 src_imm1f(c, 1.0));
531 break;
532
533 case TGSI_SEMANTIC_FACE:
534 /* XXX review/test this case */
535 emit_op0(c,
536 WM_FRONTFACING,
537 dst_mask(dst, BRW_WRITEMASK_X));
538
539 emit_op1(c,
540 TGSI_OPCODE_MOV,
541 dst_mask(dst, BRW_WRITEMASK_YZ),
542 src_imm1f(c, 0.0));
543
544 emit_op1(c,
545 TGSI_OPCODE_MOV,
546 dst_mask(dst, BRW_WRITEMASK_W),
547 src_imm1f(c, 1.0));
548 break;
549
550 case TGSI_SEMANTIC_PSIZE:
551 /* XXX review/test this case */
552 emit_op3(c,
553 WM_PINTERP,
554 dst_mask(dst, BRW_WRITEMASK_XY),
555 interp,
556 deltas,
557 get_pixel_w(c));
558
559 emit_op1(c,
560 TGSI_OPCODE_MOV,
561 dst_mask(dst, BRW_WRITEMASK_Z),
562 src_imm1f(c, 0.0f));
563
564 emit_op1(c,
565 TGSI_OPCODE_MOV,
566 dst_mask(dst, BRW_WRITEMASK_W),
567 src_imm1f(c, 1.0f));
568 break;
569
570 default:
571 switch (interp_mode) {
572 case TGSI_INTERPOLATE_CONSTANT:
573 emit_op1(c,
574 WM_CINTERP,
575 dst,
576 interp);
577 break;
578
579 case TGSI_INTERPOLATE_LINEAR:
580 emit_op2(c,
581 WM_LINTERP,
582 dst,
583 interp,
584 deltas);
585 break;
586
587 case TGSI_INTERPOLATE_PERSPECTIVE:
588 emit_op3(c,
589 WM_PINTERP,
590 dst,
591 interp,
592 deltas,
593 get_pixel_w(c));
594 break;
595 }
596 break;
597 }
598 }
599
600
601 /***********************************************************************
602 * Expand various instructions here to simpler forms.
603 */
604 static void precalc_dst( struct brw_wm_compile *c,
605 struct brw_fp_dst dst,
606 struct brw_fp_src src0,
607 struct brw_fp_src src1 )
608 {
609 if (dst.writemask & BRW_WRITEMASK_Y) {
610 /* dst.y = mul src0.y, src1.y
611 */
612 emit_op2(c,
613 TGSI_OPCODE_MUL,
614 dst_mask(dst, BRW_WRITEMASK_Y),
615 src0,
616 src1);
617 }
618
619 if (dst.writemask & BRW_WRITEMASK_XZ) {
620 /* dst.z = mov src0.zzzz
621 */
622 emit_op1(c,
623 TGSI_OPCODE_MOV,
624 dst_mask(dst, BRW_WRITEMASK_Z),
625 src_scalar(src0, Z));
626
627 /* dst.x = imm1f(1.0)
628 */
629 emit_op1(c,
630 TGSI_OPCODE_MOV,
631 dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
632 src_imm1f(c, 1.0));
633 }
634 if (dst.writemask & BRW_WRITEMASK_W) {
635 /* dst.w = mov src1.w
636 */
637 emit_op1(c,
638 TGSI_OPCODE_MOV,
639 dst_mask(dst, BRW_WRITEMASK_W),
640 src1);
641 }
642 }
643
644
645 static void precalc_lit( struct brw_wm_compile *c,
646 struct brw_fp_dst dst,
647 struct brw_fp_src src0 )
648 {
649 if (dst.writemask & BRW_WRITEMASK_XW) {
650 /* dst.xw = imm(1.0f)
651 */
652 emit_op1(c,
653 TGSI_OPCODE_MOV,
654 dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
655 src_imm1f(c, 1.0f));
656 }
657
658 if (dst.writemask & BRW_WRITEMASK_YZ) {
659 emit_op1(c,
660 TGSI_OPCODE_LIT,
661 dst_mask(dst, BRW_WRITEMASK_YZ),
662 src0);
663 }
664 }
665
666
667 /**
668 * Some TEX instructions require extra code, cube map coordinate
669 * normalization, or coordinate scaling for RECT textures, etc.
670 * This function emits those extra instructions and the TEX
671 * instruction itself.
672 */
673 static void precalc_tex( struct brw_wm_compile *c,
674 struct brw_fp_dst dst,
675 unsigned target,
676 unsigned unit,
677 struct brw_fp_src src0 )
678 {
679 struct brw_fp_src coord = src_undef();
680 struct brw_fp_dst tmp = dst_undef();
681
682 assert(unit < BRW_MAX_TEX_UNIT);
683
684 /* Cubemap: find longest component of coord vector and normalize
685 * it.
686 */
687 if (target == TGSI_TEXTURE_CUBE) {
688 struct brw_fp_src tmpsrc;
689
690 tmp = get_temp(c);
691 tmpsrc = src_reg_from_dst(tmp);
692
693 /* tmp = abs(src0) */
694 emit_op1(c,
695 TGSI_OPCODE_MOV,
696 tmp,
697 src_abs(src0));
698
699 /* tmp.X = MAX(tmp.X, tmp.Y) */
700 emit_op2(c, TGSI_OPCODE_MAX,
701 dst_mask(tmp, BRW_WRITEMASK_X),
702 src_scalar(tmpsrc, X),
703 src_scalar(tmpsrc, Y));
704
705 /* tmp.X = MAX(tmp.X, tmp.Z) */
706 emit_op2(c, TGSI_OPCODE_MAX,
707 dst_mask(tmp, BRW_WRITEMASK_X),
708 tmpsrc,
709 src_scalar(tmpsrc, Z));
710
711 /* tmp.X = 1 / tmp.X */
712 emit_op1(c, TGSI_OPCODE_RCP,
713 dst_mask(tmp, BRW_WRITEMASK_X),
714 tmpsrc);
715
716 /* tmp = src0 * tmp.xxxx */
717 emit_op2(c, TGSI_OPCODE_MUL,
718 tmp,
719 src0,
720 src_scalar(tmpsrc, X));
721
722 coord = tmpsrc;
723 }
724 else if (target == TGSI_TEXTURE_RECT ||
725 target == TGSI_TEXTURE_SHADOWRECT) {
726 /* XXX: need a mechanism for internally generated constants.
727 */
728 coord = src0;
729 }
730 else {
731 coord = src0;
732 }
733
734 /* Need to emit YUV texture conversions by hand. Probably need to
735 * do this here - the alternative is in brw_wm_emit.c, but the
736 * conversion requires allocating a temporary variable which we
737 * don't have the facility to do that late in the compilation.
738 */
739 if (c->key.yuvtex_mask & (1 << unit)) {
740 /* convert ycbcr to RGBA */
741 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
742 struct brw_fp_dst tmp = get_temp(c);
743 struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
744 struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 );
745 struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
746
747 /* tmp = TEX ...
748 */
749 emit_tex_op(c,
750 TGSI_OPCODE_TEX,
751 dst_saturate(tmp, dst.saturate),
752 unit,
753 target,
754 coord,
755 src_undef(),
756 src_undef());
757
758 /* tmp.xyz = ADD TMP, C0
759 */
760 emit_op2(c, TGSI_OPCODE_ADD,
761 dst_mask(tmp, BRW_WRITEMASK_XYZ),
762 tmpsrc,
763 C0);
764
765 /* YUV.y = MUL YUV.y, C0.w
766 */
767 emit_op2(c, TGSI_OPCODE_MUL,
768 dst_mask(tmp, BRW_WRITEMASK_Y),
769 tmpsrc,
770 src_scalar(C0, W));
771
772 /*
773 * if (UV swaped)
774 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
775 * else
776 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
777 */
778
779 emit_op3(c, TGSI_OPCODE_MAD,
780 dst_mask(dst, BRW_WRITEMASK_XYZ),
781 ( swap_uv ?
782 src_swizzle(tmpsrc, Z,Z,X,X) :
783 src_swizzle(tmpsrc, X,X,Z,Z)),
784 C1,
785 src_scalar(tmpsrc, Y));
786
787 /* RGB.y = MAD YUV.z, C1.w, RGB.y
788 */
789 emit_op3(c,
790 TGSI_OPCODE_MAD,
791 dst_mask(dst, BRW_WRITEMASK_Y),
792 src_scalar(tmpsrc, Z),
793 src_scalar(C1, W),
794 src_scalar(src_reg_from_dst(dst), Y));
795
796 release_temp(c, tmp);
797 }
798 else {
799 /* ordinary RGBA tex instruction */
800 emit_tex_op(c,
801 TGSI_OPCODE_TEX,
802 dst,
803 unit,
804 target,
805 coord,
806 src_undef(),
807 src_undef());
808 }
809
810 /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
811 * generating shader varients in mesa state tracker.
812 */
813
814 /* Release this temp if we ended up allocating it:
815 */
816 if (!dst_is_undef(tmp))
817 release_temp(c, tmp);
818 }
819
820
821 /**
822 * Check if the given TXP instruction really needs the divide-by-W step.
823 */
824 static GLboolean projtex( struct brw_wm_compile *c,
825 unsigned target,
826 struct brw_fp_src src )
827 {
828 /* Only try to detect the simplest cases. Could detect (later)
829 * cases where we are trying to emit code like RCP {1.0}, MUL x,
830 * {1.0}, and so on.
831 *
832 * More complex cases than this typically only arise from
833 * user-provided fragment programs anyway:
834 */
835 if (target == TGSI_TEXTURE_CUBE)
836 return GL_FALSE; /* ut2004 gun rendering !?! */
837
838 if (src.file == TGSI_FILE_INPUT &&
839 BRW_GET_SWZ(src.swizzle, W) == W &&
840 c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE)
841 return GL_FALSE;
842
843 return GL_TRUE;
844 }
845
846
847 /**
848 * Emit code for TXP.
849 */
850 static void precalc_txp( struct brw_wm_compile *c,
851 struct brw_fp_dst dst,
852 unsigned target,
853 unsigned unit,
854 struct brw_fp_src src0 )
855 {
856 if (projtex(c, target, src0)) {
857 struct brw_fp_dst tmp = get_temp(c);
858
859 /* tmp0.w = RCP inst.arg[0][3]
860 */
861 emit_op1(c,
862 TGSI_OPCODE_RCP,
863 dst_mask(tmp, BRW_WRITEMASK_W),
864 src_scalar(src0, W));
865
866 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
867 */
868 emit_op2(c,
869 TGSI_OPCODE_MUL,
870 dst_mask(tmp, BRW_WRITEMASK_XYZ),
871 src0,
872 src_scalar(src_reg_from_dst(tmp), W));
873
874 /* dst = TEX tmp0
875 */
876 precalc_tex(c,
877 dst,
878 target,
879 unit,
880 src_reg_from_dst(tmp));
881
882 release_temp(c, tmp);
883 }
884 else
885 {
886 /* dst = TEX src0
887 */
888 precalc_tex(c, dst, target, unit, src0);
889 }
890 }
891
892
893 /* XXX: note this returns a src_reg.
894 */
895 static struct brw_fp_src
896 find_output_by_semantic( struct brw_wm_compile *c,
897 unsigned semantic,
898 unsigned index )
899 {
900 const struct tgsi_shader_info *info = &c->fp->info;
901 unsigned i;
902
903 for (i = 0; i < info->num_outputs; i++)
904 if (info->output_semantic_name[i] == semantic &&
905 info->output_semantic_index[i] == index)
906 return src_reg( TGSI_FILE_OUTPUT, i );
907
908 /* If not found, return some arbitrary immediate value:
909 *
910 * XXX: this is a good idea but immediates are up generating extra
911 * curbe entries atm, as they would have in the original driver.
912 */
913 return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */
914 }
915
916
917 static void emit_fb_write( struct brw_wm_compile *c )
918 {
919 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
920 struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
921 GLuint i;
922
923
924 outdepth = src_scalar(outdepth, Z);
925
926 for (i = 0 ; i < c->key.nr_cbufs; i++) {
927 struct brw_fp_src outcolor;
928
929 outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
930
931 /* Use emit_tex_op so that we can specify the inst->target
932 * field, which is abused to contain the FB write target and the
933 * EOT marker
934 */
935 emit_tex_op(c, WM_FB_WRITE,
936 dst_undef(),
937 (i == c->key.nr_cbufs - 1), /* EOT */
938 i,
939 outcolor,
940 payload_r0_depth,
941 outdepth);
942 }
943 }
944
945
946 static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
947 const struct tgsi_full_dst_register *dst,
948 unsigned saturate )
949 {
950 struct brw_fp_dst out;
951
952 out.file = dst->DstRegister.File;
953 out.index = dst->DstRegister.Index;
954 out.writemask = dst->DstRegister.WriteMask;
955 out.indirect = dst->DstRegister.Indirect;
956 out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
957
958 if (out.indirect) {
959 assert(dst->DstRegisterInd.File == TGSI_FILE_ADDRESS);
960 assert(dst->DstRegisterInd.Index == 0);
961 }
962
963 return out;
964 }
965
966
967 static struct brw_fp_src translate_src( struct brw_wm_compile *c,
968 const struct tgsi_full_src_register *src )
969 {
970 struct brw_fp_src out;
971
972 out.file = src->SrcRegister.File;
973 out.index = src->SrcRegister.Index;
974 out.indirect = src->SrcRegister.Indirect;
975
976 out.swizzle = ((src->SrcRegister.SwizzleX << 0) |
977 (src->SrcRegister.SwizzleY << 2) |
978 (src->SrcRegister.SwizzleZ << 4) |
979 (src->SrcRegister.SwizzleW << 6));
980
981 switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
982 case TGSI_UTIL_SIGN_CLEAR:
983 out.abs = 1;
984 out.negate = 0;
985 break;
986
987 case TGSI_UTIL_SIGN_SET:
988 out.abs = 1;
989 out.negate = 1;
990 break;
991
992 case TGSI_UTIL_SIGN_TOGGLE:
993 out.abs = 0;
994 out.negate = 1;
995 break;
996
997 case TGSI_UTIL_SIGN_KEEP:
998 default:
999 out.abs = 0;
1000 out.negate = 0;
1001 break;
1002 }
1003
1004 if (out.indirect) {
1005 assert(src->SrcRegisterInd.File == TGSI_FILE_ADDRESS);
1006 assert(src->SrcRegisterInd.Index == 0);
1007 }
1008
1009 return out;
1010 }
1011
1012
1013
1014 static void emit_insn( struct brw_wm_compile *c,
1015 const struct tgsi_full_instruction *inst )
1016 {
1017 unsigned opcode = inst->Instruction.Opcode;
1018 struct brw_fp_dst dst;
1019 struct brw_fp_src src[3];
1020 int i;
1021
1022 dst = translate_dst( c, &inst->FullDstRegisters[0],
1023 inst->Instruction.Saturate );
1024
1025 for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
1026 src[i] = translate_src( c, &inst->FullSrcRegisters[i] );
1027
1028 switch (opcode) {
1029 case TGSI_OPCODE_ABS:
1030 emit_op1(c, TGSI_OPCODE_MOV,
1031 dst,
1032 src_abs(src[0]));
1033 break;
1034
1035 case TGSI_OPCODE_SUB:
1036 emit_op2(c, TGSI_OPCODE_ADD,
1037 dst,
1038 src[0],
1039 src_negate(src[1]));
1040 break;
1041
1042 case TGSI_OPCODE_SCS:
1043 emit_op1(c, TGSI_OPCODE_SCS,
1044 dst_mask(dst, BRW_WRITEMASK_XY),
1045 src[0]);
1046 break;
1047
1048 case TGSI_OPCODE_DST:
1049 precalc_dst(c, dst, src[0], src[1]);
1050 break;
1051
1052 case TGSI_OPCODE_LIT:
1053 precalc_lit(c, dst, src[0]);
1054 break;
1055
1056 case TGSI_OPCODE_TEX:
1057 precalc_tex(c, dst,
1058 inst->InstructionExtTexture.Texture,
1059 src[0].file, /* sampler unit */
1060 src[1] );
1061 break;
1062
1063 case TGSI_OPCODE_TXP:
1064 precalc_txp(c, dst,
1065 inst->InstructionExtTexture.Texture,
1066 src[0].file, /* sampler unit */
1067 src[1] );
1068 break;
1069
1070 case TGSI_OPCODE_TXB:
1071 /* XXX: TXB not done
1072 */
1073 precalc_tex(c, dst,
1074 inst->InstructionExtTexture.Texture,
1075 src[0].file, /* sampler unit */
1076 src[1] );
1077 break;
1078
1079 case TGSI_OPCODE_XPD:
1080 emit_op2(c, TGSI_OPCODE_XPD,
1081 dst_mask(dst, BRW_WRITEMASK_XYZ),
1082 src[0],
1083 src[1]);
1084 break;
1085
1086 case TGSI_OPCODE_KIL:
1087 emit_op1(c, TGSI_OPCODE_KIL,
1088 dst_mask(dst_undef(), 0),
1089 src[0]);
1090 break;
1091
1092 case TGSI_OPCODE_END:
1093 emit_fb_write(c);
1094 break;
1095 default:
1096 if (!c->key.has_flow_control &&
1097 brw_wm_is_scalar_result(opcode))
1098 emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
1099 else
1100 emit_op3(c, opcode, dst, src[0], src[1], src[2]);
1101 break;
1102 }
1103 }
1104
1105 /**
1106 * Initial pass for fragment program code generation.
1107 * This function is used by both the GLSL and non-GLSL paths.
1108 */
1109 int brw_wm_pass_fp( struct brw_wm_compile *c )
1110 {
1111 struct brw_fragment_shader *fs = c->fp;
1112 struct tgsi_parse_context parse;
1113 struct tgsi_full_instruction *inst;
1114 struct tgsi_full_declaration *decl;
1115 const float *imm;
1116 GLuint size;
1117 GLuint i;
1118
1119 if (BRW_DEBUG & DEBUG_WM) {
1120 debug_printf("pre-fp:\n");
1121 tgsi_dump(fs->tokens, 0);
1122 }
1123
1124 c->fp_pixel_xy = src_undef();
1125 c->fp_delta_xy = src_undef();
1126 c->fp_pixel_w = src_undef();
1127 c->nr_fp_insns = 0;
1128 c->nr_immediates = 0;
1129
1130
1131 /* Loop over all instructions doing assorted simplifications and
1132 * transformations.
1133 */
1134 tgsi_parse_init( &parse, fs->tokens );
1135 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1136 tgsi_parse_token( &parse );
1137
1138 switch( parse.FullToken.Token.Type ) {
1139 case TGSI_TOKEN_TYPE_DECLARATION:
1140 /* Turn intput declarations into special WM_* instructions.
1141 *
1142 * XXX: For non-branching shaders, consider deferring variable
1143 * initialization as late as possible to minimize register
1144 * usage. This is how the original BRW driver worked.
1145 *
1146 * In a branching shader, must preamble instructions at decl
1147 * time, as instruction order in the shader does not
1148 * correspond to the order instructions are executed in the
1149 * wild.
1150 *
1151 * This is where special instructions such as WM_CINTERP,
1152 * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1153 * compute shader inputs from the payload registers and pixel
1154 * position.
1155 */
1156 decl = &parse.FullToken.FullDeclaration;
1157 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1158 unsigned first, last, mask;
1159 unsigned attrib;
1160
1161 first = decl->DeclarationRange.First;
1162 last = decl->DeclarationRange.Last;
1163 mask = decl->Declaration.UsageMask;
1164
1165 for (attrib = first; attrib <= last; attrib++) {
1166 emit_interp(c,
1167 attrib,
1168 decl->Semantic.SemanticName,
1169 decl->Declaration.Interpolate );
1170 }
1171 }
1172
1173 break;
1174
1175 case TGSI_TOKEN_TYPE_IMMEDIATE:
1176 /* Unlike VS programs we can probably manage fine encoding
1177 * immediate values directly into the emitted EU
1178 * instructions, as we probably only need to reference one
1179 * float value per instruction. Just save the data for now
1180 * and use directly later.
1181 */
1182 i = c->nr_immediates++;
1183 imm = &parse.FullToken.FullImmediate.u[i].Float;
1184 size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1185
1186 if (c->nr_immediates >= BRW_WM_MAX_CONST)
1187 return PIPE_ERROR_OUT_OF_MEMORY;
1188
1189 for (i = 0; i < size; i++)
1190 c->immediate[c->nr_immediates].v[i] = imm[i];
1191
1192 for (; i < 4; i++)
1193 c->immediate[c->nr_immediates].v[i] = 0.0;
1194
1195 c->immediate[c->nr_immediates].nr = size;
1196 c->nr_immediates++;
1197 break;
1198
1199 case TGSI_TOKEN_TYPE_INSTRUCTION:
1200 inst = &parse.FullToken.FullInstruction;
1201 emit_insn(c, inst);
1202 break;
1203 }
1204 }
1205
1206 if (BRW_DEBUG & DEBUG_WM) {
1207 brw_wm_print_fp_program( c, "pass_fp" );
1208 debug_printf("\n");
1209 }
1210
1211 return c->error;
1212 }
1213