st/dri: Don't check for null when user ensures non-null
[mesa.git] / src / gallium / drivers / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "pipe/p_shader_tokens.h"
34
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
37
38 #include "tgsi/tgsi_parse.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_util.h"
42
43 #include "brw_wm.h"
44 #include "brw_debug.h"
45
46
47 /***********************************************************************
48 * Source regs
49 */
50
51 static struct brw_fp_src src_reg(GLuint file, GLuint idx)
52 {
53 struct brw_fp_src reg;
54 reg.file = file;
55 reg.index = idx;
56 reg.swizzle = BRW_SWIZZLE_XYZW;
57 reg.indirect = 0;
58 reg.negate = 0;
59 reg.abs = 0;
60 return reg;
61 }
62
63 static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
64 {
65 return src_reg(dst.file, dst.index);
66 }
67
68 static struct brw_fp_src src_undef( void )
69 {
70 return src_reg(TGSI_FILE_NULL, 0);
71 }
72
73 static GLboolean src_is_undef(struct brw_fp_src src)
74 {
75 return src.file == TGSI_FILE_NULL;
76 }
77
78 static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
79 {
80 unsigned swz = reg.swizzle;
81
82 reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 |
83 BRW_GET_SWZ(swz, y) << 2 |
84 BRW_GET_SWZ(swz, z) << 4 |
85 BRW_GET_SWZ(swz, w) << 6 );
86
87 return reg;
88 }
89
90 static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
91 {
92 return src_swizzle(reg, x, x, x, x);
93 }
94
95 static struct brw_fp_src src_abs( struct brw_fp_src src )
96 {
97 src.negate = 0;
98 src.abs = 1;
99 return src;
100 }
101
102 static struct brw_fp_src src_negate( struct brw_fp_src src )
103 {
104 src.negate = 1;
105 src.abs = 0;
106 return src;
107 }
108
109
110 static int match_or_expand_immediate( const float *v,
111 unsigned nr,
112 float *v2,
113 unsigned *nr2,
114 unsigned *swizzle )
115 {
116 unsigned i, j;
117
118 *swizzle = 0;
119
120 for (i = 0; i < nr; i++) {
121 boolean found = FALSE;
122
123 for (j = 0; j < *nr2 && !found; j++) {
124 if (v[i] == v2[j]) {
125 *swizzle |= j << (i * 2);
126 found = TRUE;
127 }
128 }
129
130 if (!found) {
131 if (*nr2 >= 4)
132 return FALSE;
133
134 v2[*nr2] = v[i];
135 *swizzle |= *nr2 << (i * 2);
136 (*nr2)++;
137 }
138 }
139
140 return TRUE;
141 }
142
143
144
145 /* Internally generated immediates: overkill...
146 */
147 static struct brw_fp_src src_imm( struct brw_wm_compile *c,
148 const GLfloat *v,
149 unsigned nr)
150 {
151 unsigned i, j;
152 unsigned swizzle;
153
154 /* Could do a first pass where we examine all existing immediates
155 * without expanding.
156 */
157
158 for (i = 0; i < c->nr_immediates; i++) {
159 if (match_or_expand_immediate( v,
160 nr,
161 c->immediate[i].v,
162 &c->immediate[i].nr,
163 &swizzle ))
164 goto out;
165 }
166
167 if (c->nr_immediates < Elements(c->immediate)) {
168 i = c->nr_immediates++;
169 if (match_or_expand_immediate( v,
170 nr,
171 c->immediate[i].v,
172 &c->immediate[i].nr,
173 &swizzle ))
174 goto out;
175 }
176
177 c->error = 1;
178 return src_undef();
179
180 out:
181 /* Make sure that all referenced elements are from this immediate.
182 * Has the effect of making size-one immediates into scalars.
183 */
184 for (j = nr; j < 4; j++)
185 swizzle |= (swizzle & 0x3) << (j * 2);
186
187 return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
188 BRW_GET_SWZ(swizzle, X),
189 BRW_GET_SWZ(swizzle, Y),
190 BRW_GET_SWZ(swizzle, Z),
191 BRW_GET_SWZ(swizzle, W) );
192 }
193
194
195
196 static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
197 GLfloat f )
198 {
199 return src_imm(c, &f, 1);
200 }
201
202 static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
203 GLfloat x,
204 GLfloat y,
205 GLfloat z,
206 GLfloat w)
207 {
208 GLfloat f[4] = {x,y,z,w};
209 return src_imm(c, f, 4);
210 }
211
212
213
214 /***********************************************************************
215 * Dest regs
216 */
217
218 static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
219 {
220 struct brw_fp_dst reg;
221 reg.file = file;
222 reg.index = idx;
223 reg.writemask = BRW_WRITEMASK_XYZW;
224 reg.indirect = 0;
225 reg.saturate = 0;
226 return reg;
227 }
228
229 static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
230 {
231 reg.writemask &= mask;
232 return reg;
233 }
234
235 static struct brw_fp_dst dst_undef( void )
236 {
237 return dst_reg(TGSI_FILE_NULL, 0);
238 }
239
240 static boolean dst_is_undef( struct brw_fp_dst dst )
241 {
242 return dst.file == TGSI_FILE_NULL;
243 }
244
245 static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
246 {
247 reg.saturate = flag;
248 return reg;
249 }
250
251 static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
252 {
253 int bit = ffs( ~c->fp_temp );
254
255 if (!bit) {
256 debug_printf("%s: out of temporaries\n", __FILE__);
257 }
258
259 c->fp_temp |= 1<<(bit-1);
260 return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
261 }
262
263
264 static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
265 {
266 c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
267 }
268
269
270 /***********************************************************************
271 * Instructions
272 */
273
274 static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
275 {
276 return &c->fp_instructions[c->nr_fp_insns++];
277 }
278
279 static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
280 GLuint op,
281 struct brw_fp_dst dest,
282 GLuint tex_unit,
283 GLuint target,
284 GLuint sampler,
285 struct brw_fp_src src0,
286 struct brw_fp_src src1,
287 struct brw_fp_src src2 )
288 {
289 struct brw_fp_instruction *inst = get_fp_inst(c);
290
291 if (tex_unit || target)
292 assert(op == TGSI_OPCODE_TXP ||
293 op == TGSI_OPCODE_TXB ||
294 op == TGSI_OPCODE_TEX ||
295 op == WM_FB_WRITE);
296
297 inst->opcode = op;
298 inst->dst = dest;
299 inst->tex_unit = tex_unit;
300 inst->target = target;
301 inst->sampler = sampler;
302 inst->src[0] = src0;
303 inst->src[1] = src1;
304 inst->src[2] = src2;
305
306 return inst;
307 }
308
309
310 static INLINE void emit_op3(struct brw_wm_compile *c,
311 GLuint op,
312 struct brw_fp_dst dest,
313 struct brw_fp_src src0,
314 struct brw_fp_src src1,
315 struct brw_fp_src src2 )
316 {
317 emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2);
318 }
319
320
321 static INLINE void emit_op2(struct brw_wm_compile *c,
322 GLuint op,
323 struct brw_fp_dst dest,
324 struct brw_fp_src src0,
325 struct brw_fp_src src1)
326 {
327 emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef());
328 }
329
330 static INLINE void emit_op1(struct brw_wm_compile *c,
331 GLuint op,
332 struct brw_fp_dst dest,
333 struct brw_fp_src src0)
334 {
335 emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef());
336 }
337
338 static INLINE void emit_op0(struct brw_wm_compile *c,
339 GLuint op,
340 struct brw_fp_dst dest)
341 {
342 emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef());
343 }
344
345
346
347 /* Many opcodes produce the same value across all the result channels.
348 * We'd rather not have to support that splatting in the opcode implementations,
349 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
350 * anyway. We can easily get both by emitting the opcode to one channel, and
351 * then MOVing it to the others, which brw_wm_pass*.c already understands.
352 */
353 static void emit_scalar_insn(struct brw_wm_compile *c,
354 unsigned opcode,
355 struct brw_fp_dst dst,
356 struct brw_fp_src src0,
357 struct brw_fp_src src1,
358 struct brw_fp_src src2 )
359 {
360 unsigned first_chan = ffs(dst.writemask) - 1;
361 unsigned first_mask = 1 << first_chan;
362
363 if (dst.writemask == 0)
364 return;
365
366 emit_op3( c, opcode,
367 dst_mask(dst, first_mask),
368 src0, src1, src2 );
369
370 if (dst.writemask != first_mask) {
371 emit_op1(c, TGSI_OPCODE_MOV,
372 dst_mask(dst, ~first_mask),
373 src_scalar(src_reg_from_dst(dst), first_chan));
374 }
375 }
376
377
378 /***********************************************************************
379 * Special instructions for interpolation and other tasks
380 */
381
382 static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
383 {
384 if (src_is_undef(c->fp_pixel_xy)) {
385 struct brw_fp_dst pixel_xy = get_temp(c);
386 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
387
388
389 /* Emit the out calculations, and hold onto the results. Use
390 * two instructions as a temporary is required.
391 */
392 /* pixel_xy.xy = PIXELXY payload[0];
393 */
394 emit_op1(c,
395 WM_PIXELXY,
396 dst_mask(pixel_xy, BRW_WRITEMASK_XY),
397 payload_r0_depth);
398
399 c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
400 }
401
402 return c->fp_pixel_xy;
403 }
404
405 static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
406 {
407 if (src_is_undef(c->fp_delta_xy)) {
408 struct brw_fp_dst delta_xy = get_temp(c);
409 struct brw_fp_src pixel_xy = get_pixel_xy(c);
410 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
411
412 /* deltas.xy = DELTAXY pixel_xy, payload[0]
413 */
414 emit_op3(c,
415 WM_DELTAXY,
416 dst_mask(delta_xy, BRW_WRITEMASK_XY),
417 pixel_xy,
418 payload_r0_depth,
419 src_undef());
420
421 c->fp_delta_xy = src_reg_from_dst(delta_xy);
422 }
423
424 return c->fp_delta_xy;
425 }
426
427 static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
428 {
429 if (src_is_undef(c->fp_pixel_w)) {
430 struct brw_fp_dst pixel_w = get_temp(c);
431 struct brw_fp_src deltas = get_delta_xy(c);
432
433 /* XXX: assuming position is always first -- valid?
434 */
435 struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
436
437 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
438 */
439 emit_op3(c,
440 WM_PIXELW,
441 dst_mask(pixel_w, BRW_WRITEMASK_W),
442 interp_wpos,
443 deltas,
444 src_undef());
445
446
447 c->fp_pixel_w = src_reg_from_dst(pixel_w);
448 }
449
450 return c->fp_pixel_w;
451 }
452
453
454 /***********************************************************************
455 * Emit INTERP instructions ahead of first use of each attrib.
456 */
457
458 static void emit_interp( struct brw_wm_compile *c,
459 GLuint idx,
460 GLuint semantic,
461 GLuint interp_mode )
462 {
463 struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
464 struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
465 struct brw_fp_src deltas = get_delta_xy(c);
466
467 /* Need to use PINTERP on attributes which have been
468 * multiplied by 1/W in the SF program, and LINTERP on those
469 * which have not:
470 */
471 switch (semantic) {
472 case TGSI_SEMANTIC_POSITION:
473 /* Have to treat wpos.xy specially:
474 */
475 emit_op1(c,
476 WM_WPOSXY,
477 dst_mask(dst, BRW_WRITEMASK_XY),
478 get_pixel_xy(c));
479
480 /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
481 */
482 emit_op2(c,
483 WM_LINTERP,
484 dst_mask(dst, BRW_WRITEMASK_ZW),
485 interp,
486 deltas);
487 break;
488
489 case TGSI_SEMANTIC_COLOR:
490 if (c->key.flat_shade) {
491 emit_op1(c,
492 WM_CINTERP,
493 dst,
494 interp);
495 }
496 else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
497 emit_op2(c,
498 WM_LINTERP,
499 dst,
500 interp,
501 deltas);
502 }
503 else {
504 emit_op3(c,
505 WM_PINTERP,
506 dst,
507 interp,
508 deltas,
509 get_pixel_w(c));
510 }
511
512 break;
513
514 case TGSI_SEMANTIC_FOG:
515 /* Interpolate the fog coordinate */
516 emit_op3(c,
517 WM_PINTERP,
518 dst_mask(dst, BRW_WRITEMASK_X),
519 interp,
520 deltas,
521 get_pixel_w(c));
522
523 emit_op1(c,
524 TGSI_OPCODE_MOV,
525 dst_mask(dst, BRW_WRITEMASK_YZ),
526 src_imm1f(c, 0.0));
527
528 emit_op1(c,
529 TGSI_OPCODE_MOV,
530 dst_mask(dst, BRW_WRITEMASK_W),
531 src_imm1f(c, 1.0));
532 break;
533
534 case TGSI_SEMANTIC_FACE:
535 /* XXX review/test this case */
536 emit_op0(c,
537 WM_FRONTFACING,
538 dst_mask(dst, BRW_WRITEMASK_X));
539
540 emit_op1(c,
541 TGSI_OPCODE_MOV,
542 dst_mask(dst, BRW_WRITEMASK_YZ),
543 src_imm1f(c, 0.0));
544
545 emit_op1(c,
546 TGSI_OPCODE_MOV,
547 dst_mask(dst, BRW_WRITEMASK_W),
548 src_imm1f(c, 1.0));
549 break;
550
551 case TGSI_SEMANTIC_PSIZE:
552 /* XXX review/test this case */
553 emit_op3(c,
554 WM_PINTERP,
555 dst_mask(dst, BRW_WRITEMASK_XY),
556 interp,
557 deltas,
558 get_pixel_w(c));
559
560 emit_op1(c,
561 TGSI_OPCODE_MOV,
562 dst_mask(dst, BRW_WRITEMASK_Z),
563 src_imm1f(c, 0.0f));
564
565 emit_op1(c,
566 TGSI_OPCODE_MOV,
567 dst_mask(dst, BRW_WRITEMASK_W),
568 src_imm1f(c, 1.0f));
569 break;
570
571 default:
572 switch (interp_mode) {
573 case TGSI_INTERPOLATE_CONSTANT:
574 emit_op1(c,
575 WM_CINTERP,
576 dst,
577 interp);
578 break;
579
580 case TGSI_INTERPOLATE_LINEAR:
581 emit_op2(c,
582 WM_LINTERP,
583 dst,
584 interp,
585 deltas);
586 break;
587
588 case TGSI_INTERPOLATE_PERSPECTIVE:
589 emit_op3(c,
590 WM_PINTERP,
591 dst,
592 interp,
593 deltas,
594 get_pixel_w(c));
595 break;
596 }
597 break;
598 }
599 }
600
601
602 /***********************************************************************
603 * Expand various instructions here to simpler forms.
604 */
605 static void precalc_dst( struct brw_wm_compile *c,
606 struct brw_fp_dst dst,
607 struct brw_fp_src src0,
608 struct brw_fp_src src1 )
609 {
610 if (dst.writemask & BRW_WRITEMASK_Y) {
611 /* dst.y = mul src0.y, src1.y
612 */
613 emit_op2(c,
614 TGSI_OPCODE_MUL,
615 dst_mask(dst, BRW_WRITEMASK_Y),
616 src0,
617 src1);
618 }
619
620 if (dst.writemask & BRW_WRITEMASK_XZ) {
621 /* dst.z = mov src0.zzzz
622 */
623 emit_op1(c,
624 TGSI_OPCODE_MOV,
625 dst_mask(dst, BRW_WRITEMASK_Z),
626 src_scalar(src0, Z));
627
628 /* dst.x = imm1f(1.0)
629 */
630 emit_op1(c,
631 TGSI_OPCODE_MOV,
632 dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
633 src_imm1f(c, 1.0));
634 }
635 if (dst.writemask & BRW_WRITEMASK_W) {
636 /* dst.w = mov src1.w
637 */
638 emit_op1(c,
639 TGSI_OPCODE_MOV,
640 dst_mask(dst, BRW_WRITEMASK_W),
641 src1);
642 }
643 }
644
645
646 static void precalc_lit( struct brw_wm_compile *c,
647 struct brw_fp_dst dst,
648 struct brw_fp_src src0 )
649 {
650 if (dst.writemask & BRW_WRITEMASK_XW) {
651 /* dst.xw = imm(1.0f)
652 */
653 emit_op1(c,
654 TGSI_OPCODE_MOV,
655 dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
656 src_imm1f(c, 1.0f));
657 }
658
659 if (dst.writemask & BRW_WRITEMASK_YZ) {
660 emit_op1(c,
661 TGSI_OPCODE_LIT,
662 dst_mask(dst, BRW_WRITEMASK_YZ),
663 src0);
664 }
665 }
666
667
668 /**
669 * Some TEX instructions require extra code, cube map coordinate
670 * normalization, or coordinate scaling for RECT textures, etc.
671 * This function emits those extra instructions and the TEX
672 * instruction itself.
673 */
674 static void precalc_tex( struct brw_wm_compile *c,
675 struct brw_fp_dst dst,
676 unsigned target,
677 unsigned unit,
678 struct brw_fp_src src0,
679 struct brw_fp_src sampler )
680 {
681 struct brw_fp_src coord = src_undef();
682 struct brw_fp_dst tmp = dst_undef();
683
684 assert(unit < BRW_MAX_TEX_UNIT);
685
686 /* Cubemap: find longest component of coord vector and normalize
687 * it.
688 */
689 if (target == TGSI_TEXTURE_CUBE) {
690 struct brw_fp_src tmpsrc;
691
692 tmp = get_temp(c);
693 tmpsrc = src_reg_from_dst(tmp);
694
695 /* tmp = abs(src0) */
696 emit_op1(c,
697 TGSI_OPCODE_MOV,
698 tmp,
699 src_abs(src0));
700
701 /* tmp.X = MAX(tmp.X, tmp.Y) */
702 emit_op2(c, TGSI_OPCODE_MAX,
703 dst_mask(tmp, BRW_WRITEMASK_X),
704 src_scalar(tmpsrc, X),
705 src_scalar(tmpsrc, Y));
706
707 /* tmp.X = MAX(tmp.X, tmp.Z) */
708 emit_op2(c, TGSI_OPCODE_MAX,
709 dst_mask(tmp, BRW_WRITEMASK_X),
710 tmpsrc,
711 src_scalar(tmpsrc, Z));
712
713 /* tmp.X = 1 / tmp.X */
714 emit_op1(c, TGSI_OPCODE_RCP,
715 dst_mask(tmp, BRW_WRITEMASK_X),
716 tmpsrc);
717
718 /* tmp = src0 * tmp.xxxx */
719 emit_op2(c, TGSI_OPCODE_MUL,
720 tmp,
721 src0,
722 src_scalar(tmpsrc, X));
723
724 coord = tmpsrc;
725 }
726 else if (target == TGSI_TEXTURE_RECT ||
727 target == TGSI_TEXTURE_SHADOWRECT) {
728 /* XXX: need a mechanism for internally generated constants.
729 */
730 coord = src0;
731 }
732 else {
733 coord = src0;
734 }
735
736 /* Need to emit YUV texture conversions by hand. Probably need to
737 * do this here - the alternative is in brw_wm_emit.c, but the
738 * conversion requires allocating a temporary variable which we
739 * don't have the facility to do that late in the compilation.
740 */
741 if (c->key.yuvtex_mask & (1 << unit)) {
742 /* convert ycbcr to RGBA */
743 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
744 struct brw_fp_dst tmp = get_temp(c);
745 struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
746 struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 );
747 struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
748
749 /* tmp = TEX ...
750 */
751 emit_tex_op(c,
752 TGSI_OPCODE_TEX,
753 dst_saturate(tmp, dst.saturate),
754 unit,
755 target,
756 sampler.index,
757 coord,
758 src_undef(),
759 src_undef());
760
761 /* tmp.xyz = ADD TMP, C0
762 */
763 emit_op2(c, TGSI_OPCODE_ADD,
764 dst_mask(tmp, BRW_WRITEMASK_XYZ),
765 tmpsrc,
766 C0);
767
768 /* YUV.y = MUL YUV.y, C0.w
769 */
770 emit_op2(c, TGSI_OPCODE_MUL,
771 dst_mask(tmp, BRW_WRITEMASK_Y),
772 tmpsrc,
773 src_scalar(C0, W));
774
775 /*
776 * if (UV swaped)
777 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
778 * else
779 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
780 */
781
782 emit_op3(c, TGSI_OPCODE_MAD,
783 dst_mask(dst, BRW_WRITEMASK_XYZ),
784 ( swap_uv ?
785 src_swizzle(tmpsrc, Z,Z,X,X) :
786 src_swizzle(tmpsrc, X,X,Z,Z)),
787 C1,
788 src_scalar(tmpsrc, Y));
789
790 /* RGB.y = MAD YUV.z, C1.w, RGB.y
791 */
792 emit_op3(c,
793 TGSI_OPCODE_MAD,
794 dst_mask(dst, BRW_WRITEMASK_Y),
795 src_scalar(tmpsrc, Z),
796 src_scalar(C1, W),
797 src_scalar(src_reg_from_dst(dst), Y));
798
799 release_temp(c, tmp);
800 }
801 else {
802 /* ordinary RGBA tex instruction */
803 emit_tex_op(c,
804 TGSI_OPCODE_TEX,
805 dst,
806 unit,
807 target,
808 sampler.index,
809 coord,
810 src_undef(),
811 src_undef());
812 }
813
814 /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
815 * generating shader varients in mesa state tracker.
816 */
817
818 /* Release this temp if we ended up allocating it:
819 */
820 if (!dst_is_undef(tmp))
821 release_temp(c, tmp);
822 }
823
824
825 /**
826 * Check if the given TXP instruction really needs the divide-by-W step.
827 */
828 static GLboolean projtex( struct brw_wm_compile *c,
829 unsigned target,
830 struct brw_fp_src src )
831 {
832 /* Only try to detect the simplest cases. Could detect (later)
833 * cases where we are trying to emit code like RCP {1.0}, MUL x,
834 * {1.0}, and so on.
835 *
836 * More complex cases than this typically only arise from
837 * user-provided fragment programs anyway:
838 */
839 if (target == TGSI_TEXTURE_CUBE)
840 return GL_FALSE; /* ut2004 gun rendering !?! */
841
842 if (src.file == TGSI_FILE_INPUT &&
843 BRW_GET_SWZ(src.swizzle, W) == W &&
844 c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE)
845 return GL_FALSE;
846
847 return GL_TRUE;
848 }
849
850
851 /**
852 * Emit code for TXP.
853 */
854 static void precalc_txp( struct brw_wm_compile *c,
855 struct brw_fp_dst dst,
856 unsigned target,
857 unsigned unit,
858 struct brw_fp_src src0,
859 struct brw_fp_src sampler )
860 {
861 if (projtex(c, target, src0)) {
862 struct brw_fp_dst tmp = get_temp(c);
863
864 /* tmp0.w = RCP inst.arg[0][3]
865 */
866 emit_op1(c,
867 TGSI_OPCODE_RCP,
868 dst_mask(tmp, BRW_WRITEMASK_W),
869 src_scalar(src0, W));
870
871 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
872 */
873 emit_op2(c,
874 TGSI_OPCODE_MUL,
875 dst_mask(tmp, BRW_WRITEMASK_XYZ),
876 src0,
877 src_scalar(src_reg_from_dst(tmp), W));
878
879 /* dst = TEX tmp0
880 */
881 precalc_tex(c,
882 dst,
883 target,
884 unit,
885 src_reg_from_dst(tmp),
886 sampler );
887
888 release_temp(c, tmp);
889 }
890 else
891 {
892 /* dst = TEX src0
893 */
894 precalc_tex(c, dst, target, unit, src0, sampler);
895 }
896 }
897
898
899 /* XXX: note this returns a src_reg.
900 */
901 static struct brw_fp_src
902 find_output_by_semantic( struct brw_wm_compile *c,
903 unsigned semantic,
904 unsigned index )
905 {
906 const struct tgsi_shader_info *info = &c->fp->info;
907 unsigned i;
908
909 for (i = 0; i < info->num_outputs; i++)
910 if (info->output_semantic_name[i] == semantic &&
911 info->output_semantic_index[i] == index)
912 return src_reg( TGSI_FILE_OUTPUT, i );
913
914 /* If not found, return some arbitrary immediate value:
915 *
916 * XXX: this is a good idea but immediates are up generating extra
917 * curbe entries atm, as they would have in the original driver.
918 */
919 return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */
920 }
921
922
923 static void emit_fb_write( struct brw_wm_compile *c )
924 {
925 struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
926 struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
927 GLuint i;
928
929
930 outdepth = src_scalar(outdepth, Z);
931
932 for (i = 0 ; i < c->key.nr_cbufs; i++) {
933 struct brw_fp_src outcolor;
934
935 outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
936
937 /* Use emit_tex_op so that we can specify the inst->target
938 * field, which is abused to contain the FB write target and the
939 * EOT marker
940 */
941 emit_tex_op(c, WM_FB_WRITE,
942 dst_undef(),
943 (i == c->key.nr_cbufs - 1), /* EOT */
944 i,
945 0, /* no sampler */
946 outcolor,
947 payload_r0_depth,
948 outdepth);
949 }
950 }
951
952
953 static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
954 const struct tgsi_full_dst_register *dst,
955 unsigned saturate )
956 {
957 struct brw_fp_dst out;
958
959 out.file = dst->Register.File;
960 out.index = dst->Register.Index;
961 out.writemask = dst->Register.WriteMask;
962 out.indirect = dst->Register.Indirect;
963 out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
964
965 if (out.indirect) {
966 assert(dst->Indirect.File == TGSI_FILE_ADDRESS);
967 assert(dst->Indirect.Index == 0);
968 }
969
970 return out;
971 }
972
973
974 static struct brw_fp_src translate_src( struct brw_wm_compile *c,
975 const struct tgsi_full_src_register *src )
976 {
977 struct brw_fp_src out;
978
979 out.file = src->Register.File;
980 out.index = src->Register.Index;
981 out.indirect = src->Register.Indirect;
982
983 out.swizzle = ((src->Register.SwizzleX << 0) |
984 (src->Register.SwizzleY << 2) |
985 (src->Register.SwizzleZ << 4) |
986 (src->Register.SwizzleW << 6));
987
988 switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
989 case TGSI_UTIL_SIGN_CLEAR:
990 out.abs = 1;
991 out.negate = 0;
992 break;
993
994 case TGSI_UTIL_SIGN_SET:
995 out.abs = 1;
996 out.negate = 1;
997 break;
998
999 case TGSI_UTIL_SIGN_TOGGLE:
1000 out.abs = 0;
1001 out.negate = 1;
1002 break;
1003
1004 case TGSI_UTIL_SIGN_KEEP:
1005 default:
1006 out.abs = 0;
1007 out.negate = 0;
1008 break;
1009 }
1010
1011 if (out.indirect) {
1012 assert(src->Indirect.File == TGSI_FILE_ADDRESS);
1013 assert(src->Indirect.Index == 0);
1014 }
1015
1016 return out;
1017 }
1018
1019
1020
1021 static void emit_insn( struct brw_wm_compile *c,
1022 const struct tgsi_full_instruction *inst )
1023 {
1024 unsigned opcode = inst->Instruction.Opcode;
1025 struct brw_fp_dst dst;
1026 struct brw_fp_src src[3];
1027 int i;
1028
1029 dst = translate_dst( c, &inst->Dst[0],
1030 inst->Instruction.Saturate );
1031
1032 for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
1033 src[i] = translate_src( c, &inst->Src[i] );
1034
1035 switch (opcode) {
1036 case TGSI_OPCODE_ABS:
1037 emit_op1(c, TGSI_OPCODE_MOV,
1038 dst,
1039 src_abs(src[0]));
1040 break;
1041
1042 case TGSI_OPCODE_SUB:
1043 emit_op2(c, TGSI_OPCODE_ADD,
1044 dst,
1045 src[0],
1046 src_negate(src[1]));
1047 break;
1048
1049 case TGSI_OPCODE_SCS:
1050 emit_op1(c, TGSI_OPCODE_SCS,
1051 dst_mask(dst, BRW_WRITEMASK_XY),
1052 src[0]);
1053 break;
1054
1055 case TGSI_OPCODE_DST:
1056 precalc_dst(c, dst, src[0], src[1]);
1057 break;
1058
1059 case TGSI_OPCODE_LIT:
1060 precalc_lit(c, dst, src[0]);
1061 break;
1062
1063 case TGSI_OPCODE_TEX:
1064 precalc_tex(c, dst,
1065 inst->Texture.Texture,
1066 src[1].index, /* use sampler unit for tex idx */
1067 src[0], /* coord */
1068 src[1]); /* sampler */
1069 break;
1070
1071 case TGSI_OPCODE_TXP:
1072 precalc_txp(c, dst,
1073 inst->Texture.Texture,
1074 src[1].index, /* use sampler unit for tex idx */
1075 src[0], /* coord */
1076 src[1]); /* sampler */
1077 break;
1078
1079 case TGSI_OPCODE_TXB:
1080 /* XXX: TXB not done
1081 */
1082 precalc_tex(c, dst,
1083 inst->Texture.Texture,
1084 src[1].index, /* use sampler unit for tex idx*/
1085 src[0],
1086 src[1]);
1087 break;
1088
1089 case TGSI_OPCODE_XPD:
1090 emit_op2(c, TGSI_OPCODE_XPD,
1091 dst_mask(dst, BRW_WRITEMASK_XYZ),
1092 src[0],
1093 src[1]);
1094 break;
1095
1096 case TGSI_OPCODE_KIL:
1097 emit_op1(c, TGSI_OPCODE_KIL,
1098 dst_mask(dst_undef(), 0),
1099 src[0]);
1100 break;
1101
1102 case TGSI_OPCODE_END:
1103 emit_fb_write(c);
1104 break;
1105 default:
1106 if (!c->key.has_flow_control &&
1107 brw_wm_is_scalar_result(opcode))
1108 emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
1109 else
1110 emit_op3(c, opcode, dst, src[0], src[1], src[2]);
1111 break;
1112 }
1113 }
1114
1115 /**
1116 * Initial pass for fragment program code generation.
1117 * This function is used by both the GLSL and non-GLSL paths.
1118 */
1119 int brw_wm_pass_fp( struct brw_wm_compile *c )
1120 {
1121 struct brw_fragment_shader *fs = c->fp;
1122 struct tgsi_parse_context parse;
1123 struct tgsi_full_instruction *inst;
1124 struct tgsi_full_declaration *decl;
1125 const float *imm;
1126 GLuint size;
1127 GLuint i;
1128
1129 if (BRW_DEBUG & DEBUG_WM) {
1130 debug_printf("pre-fp:\n");
1131 tgsi_dump(fs->tokens, 0);
1132 }
1133
1134 c->fp_pixel_xy = src_undef();
1135 c->fp_delta_xy = src_undef();
1136 c->fp_pixel_w = src_undef();
1137 c->nr_fp_insns = 0;
1138 c->nr_immediates = 0;
1139
1140
1141 /* Loop over all instructions doing assorted simplifications and
1142 * transformations.
1143 */
1144 tgsi_parse_init( &parse, fs->tokens );
1145 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1146 tgsi_parse_token( &parse );
1147
1148 switch( parse.FullToken.Token.Type ) {
1149 case TGSI_TOKEN_TYPE_DECLARATION:
1150 /* Turn intput declarations into special WM_* instructions.
1151 *
1152 * XXX: For non-branching shaders, consider deferring variable
1153 * initialization as late as possible to minimize register
1154 * usage. This is how the original BRW driver worked.
1155 *
1156 * In a branching shader, must preamble instructions at decl
1157 * time, as instruction order in the shader does not
1158 * correspond to the order instructions are executed in the
1159 * wild.
1160 *
1161 * This is where special instructions such as WM_CINTERP,
1162 * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1163 * compute shader inputs from the payload registers and pixel
1164 * position.
1165 */
1166 decl = &parse.FullToken.FullDeclaration;
1167 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1168 unsigned first, last, mask;
1169 unsigned attrib;
1170
1171 first = decl->Range.First;
1172 last = decl->Range.Last;
1173 mask = decl->Declaration.UsageMask;
1174
1175 for (attrib = first; attrib <= last; attrib++) {
1176 emit_interp(c,
1177 attrib,
1178 decl->Semantic.Name,
1179 decl->Declaration.Interpolate );
1180 }
1181 }
1182
1183 break;
1184
1185 case TGSI_TOKEN_TYPE_IMMEDIATE:
1186 /* Unlike VS programs we can probably manage fine encoding
1187 * immediate values directly into the emitted EU
1188 * instructions, as we probably only need to reference one
1189 * float value per instruction. Just save the data for now
1190 * and use directly later.
1191 */
1192 i = c->nr_immediates++;
1193 imm = &parse.FullToken.FullImmediate.u[i].Float;
1194 size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1195
1196 if (c->nr_immediates >= BRW_WM_MAX_CONST)
1197 return PIPE_ERROR_OUT_OF_MEMORY;
1198
1199 for (i = 0; i < size; i++)
1200 c->immediate[c->nr_immediates].v[i] = imm[i];
1201
1202 for (; i < 4; i++)
1203 c->immediate[c->nr_immediates].v[i] = 0.0;
1204
1205 c->immediate[c->nr_immediates].nr = size;
1206 c->nr_immediates++;
1207 break;
1208
1209 case TGSI_TOKEN_TYPE_INSTRUCTION:
1210 inst = &parse.FullToken.FullInstruction;
1211 emit_insn(c, inst);
1212 break;
1213 }
1214 }
1215
1216 if (BRW_DEBUG & DEBUG_WM) {
1217 brw_wm_print_fp_program( c, "pass_fp" );
1218 debug_printf("\n");
1219 }
1220
1221 return c->error;
1222 }
1223