i965: Handle scalar result swizzling in shared GLSL/non-GLSL code.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47 #define X 0
48 #define Y 1
49 #define Z 2
50 #define W 3
51
52
53 static const char *wm_opcode_strings[] = {
54 "PIXELXY",
55 "DELTAXY",
56 "PIXELW",
57 "LINTERP",
58 "PINTERP",
59 "CINTERP",
60 "WPOSXY",
61 "FB_WRITE",
62 "FRONTFACING",
63 };
64
65 #if 0
66 static const char *wm_file_strings[] = {
67 "PAYLOAD"
68 };
69 #endif
70
71
72 /***********************************************************************
73 * Source regs
74 */
75
76 static struct prog_src_register src_reg(GLuint file, GLuint idx)
77 {
78 struct prog_src_register reg;
79 reg.File = file;
80 reg.Index = idx;
81 reg.Swizzle = SWIZZLE_NOOP;
82 reg.RelAddr = 0;
83 reg.Negate = NEGATE_NONE;
84 reg.Abs = 0;
85 return reg;
86 }
87
88 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89 {
90 return src_reg(dst.File, dst.Index);
91 }
92
93 static struct prog_src_register src_undef( void )
94 {
95 return src_reg(PROGRAM_UNDEFINED, 0);
96 }
97
98 static GLboolean src_is_undef(struct prog_src_register src)
99 {
100 return src.File == PROGRAM_UNDEFINED;
101 }
102
103 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104 {
105 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106 return reg;
107 }
108
109 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110 {
111 return src_swizzle(reg, x, x, x, x);
112 }
113
114 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
115 {
116 reg.Swizzle = swizzle;
117 return reg;
118 }
119
120
121 /***********************************************************************
122 * Dest regs
123 */
124
125 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
126 {
127 struct prog_dst_register reg;
128 reg.File = file;
129 reg.Index = idx;
130 reg.WriteMask = WRITEMASK_XYZW;
131 reg.RelAddr = 0;
132 reg.CondMask = COND_TR;
133 reg.CondSwizzle = 0;
134 reg.CondSrc = 0;
135 reg.pad = 0;
136 return reg;
137 }
138
139 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
140 {
141 reg.WriteMask &= mask;
142 return reg;
143 }
144
145 static struct prog_dst_register dst_undef( void )
146 {
147 return dst_reg(PROGRAM_UNDEFINED, 0);
148 }
149
150
151
152 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
153 {
154 int bit = _mesa_ffs( ~c->fp_temp );
155
156 if (!bit) {
157 _mesa_printf("%s: out of temporaries\n", __FILE__);
158 exit(1);
159 }
160
161 c->fp_temp |= 1<<(bit-1);
162 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
163 }
164
165
166 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
167 {
168 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
169 }
170
171
172 /***********************************************************************
173 * Instructions
174 */
175
176 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
177 {
178 return &c->prog_instructions[c->nr_fp_insns++];
179 }
180
181 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
182 const struct prog_instruction *inst0)
183 {
184 struct prog_instruction *inst = get_fp_inst(c);
185 *inst = *inst0;
186 return inst;
187 }
188
189 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
190 GLuint op,
191 struct prog_dst_register dest,
192 GLuint saturate,
193 GLuint tex_src_unit,
194 GLuint tex_src_target,
195 GLuint tex_shadow,
196 struct prog_src_register src0,
197 struct prog_src_register src1,
198 struct prog_src_register src2 )
199 {
200 struct prog_instruction *inst = get_fp_inst(c);
201
202 memset(inst, 0, sizeof(*inst));
203
204 inst->Opcode = op;
205 inst->DstReg = dest;
206 inst->SaturateMode = saturate;
207 inst->TexSrcUnit = tex_src_unit;
208 inst->TexSrcTarget = tex_src_target;
209 inst->TexShadow = tex_shadow;
210 inst->SrcReg[0] = src0;
211 inst->SrcReg[1] = src1;
212 inst->SrcReg[2] = src2;
213 return inst;
214 }
215
216
217 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
218 GLuint op,
219 struct prog_dst_register dest,
220 GLuint saturate,
221 struct prog_src_register src0,
222 struct prog_src_register src1,
223 struct prog_src_register src2 )
224 {
225 return emit_tex_op(c, op, dest, saturate,
226 0, 0, 0, /* tex unit, target, shadow */
227 src0, src1, src2);
228 }
229
230
231 /* Many Mesa opcodes produce the same value across all the result channels.
232 * We'd rather not have to support that splatting in the opcode implementations,
233 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
234 * anyway. We can easily get both by emitting the opcode to one channel, and
235 * then MOVing it to the others, which brw_wm_pass*.c already understands.
236 */
237 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
238 const struct prog_instruction *inst0)
239 {
240 struct prog_instruction *inst;
241 unsigned int dst_chan;
242 unsigned int other_channel_mask;
243
244 if (inst0->DstReg.WriteMask == 0)
245 return NULL;
246
247 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
248 inst = get_fp_inst(c);
249 *inst = *inst0;
250 inst->DstReg.WriteMask = 1 << dst_chan;
251
252 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
253 if (other_channel_mask != 0) {
254 inst = emit_op(c,
255 OPCODE_MOV,
256 dst_mask(inst0->DstReg, other_channel_mask),
257 0,
258 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
259 src_undef(),
260 src_undef());
261 }
262 return inst;
263 }
264
265
266 /***********************************************************************
267 * Special instructions for interpolation and other tasks
268 */
269
270 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
271 {
272 if (src_is_undef(c->pixel_xy)) {
273 struct prog_dst_register pixel_xy = get_temp(c);
274 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
275
276
277 /* Emit the out calculations, and hold onto the results. Use
278 * two instructions as a temporary is required.
279 */
280 /* pixel_xy.xy = PIXELXY payload[0];
281 */
282 emit_op(c,
283 WM_PIXELXY,
284 dst_mask(pixel_xy, WRITEMASK_XY),
285 0,
286 payload_r0_depth,
287 src_undef(),
288 src_undef());
289
290 c->pixel_xy = src_reg_from_dst(pixel_xy);
291 }
292
293 return c->pixel_xy;
294 }
295
296 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
297 {
298 if (src_is_undef(c->delta_xy)) {
299 struct prog_dst_register delta_xy = get_temp(c);
300 struct prog_src_register pixel_xy = get_pixel_xy(c);
301 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
302
303 /* deltas.xy = DELTAXY pixel_xy, payload[0]
304 */
305 emit_op(c,
306 WM_DELTAXY,
307 dst_mask(delta_xy, WRITEMASK_XY),
308 0,
309 pixel_xy,
310 payload_r0_depth,
311 src_undef());
312
313 c->delta_xy = src_reg_from_dst(delta_xy);
314 }
315
316 return c->delta_xy;
317 }
318
319 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
320 {
321 if (src_is_undef(c->pixel_w)) {
322 struct prog_dst_register pixel_w = get_temp(c);
323 struct prog_src_register deltas = get_delta_xy(c);
324 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
325
326 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
327 */
328 emit_op(c,
329 WM_PIXELW,
330 dst_mask(pixel_w, WRITEMASK_W),
331 0,
332 interp_wpos,
333 deltas,
334 src_undef());
335
336
337 c->pixel_w = src_reg_from_dst(pixel_w);
338 }
339
340 return c->pixel_w;
341 }
342
343 static void emit_interp( struct brw_wm_compile *c,
344 GLuint idx )
345 {
346 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
347 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
348 struct prog_src_register deltas = get_delta_xy(c);
349
350 /* Need to use PINTERP on attributes which have been
351 * multiplied by 1/W in the SF program, and LINTERP on those
352 * which have not:
353 */
354 switch (idx) {
355 case FRAG_ATTRIB_WPOS:
356 /* Have to treat wpos.xy specially:
357 */
358 emit_op(c,
359 WM_WPOSXY,
360 dst_mask(dst, WRITEMASK_XY),
361 0,
362 get_pixel_xy(c),
363 src_undef(),
364 src_undef());
365
366 dst = dst_mask(dst, WRITEMASK_ZW);
367
368 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
369 */
370 emit_op(c,
371 WM_LINTERP,
372 dst,
373 0,
374 interp,
375 deltas,
376 src_undef());
377 break;
378 case FRAG_ATTRIB_COL0:
379 case FRAG_ATTRIB_COL1:
380 if (c->key.flat_shade) {
381 emit_op(c,
382 WM_CINTERP,
383 dst,
384 0,
385 interp,
386 src_undef(),
387 src_undef());
388 }
389 else {
390 if (c->key.linear_color) {
391 emit_op(c,
392 WM_LINTERP,
393 dst,
394 0,
395 interp,
396 deltas,
397 src_undef());
398 }
399 else {
400 /* perspective-corrected color interpolation */
401 emit_op(c,
402 WM_PINTERP,
403 dst,
404 0,
405 interp,
406 deltas,
407 get_pixel_w(c));
408 }
409 }
410 break;
411 case FRAG_ATTRIB_FOGC:
412 /* The FOGC input is really special. When a program uses glFogFragCoord,
413 * the results returned are supposed to be (f,0,0,1). But for Mesa GLSL,
414 * the glFrontFacing and glPointCoord values are also stashed in FOGC.
415 * So, write the interpolated fog value to X, then either 0, 1, or the
416 * stashed values to Y, Z, W. Note that this means that
417 * glFogFragCoord.yzw can be wrong in those cases!
418 */
419
420 /* Interpolate the fog coordinate */
421 emit_op(c,
422 WM_PINTERP,
423 dst_mask(dst, WRITEMASK_X),
424 0,
425 interp,
426 deltas,
427 get_pixel_w(c));
428
429 /* Move the front facing value into FOGC.y if it's needed. */
430 if (c->fp->program.UsesFrontFacing) {
431 emit_op(c,
432 WM_FRONTFACING,
433 dst_mask(dst, WRITEMASK_Y),
434 0,
435 src_undef(),
436 src_undef(),
437 src_undef());
438 } else {
439 emit_op(c,
440 OPCODE_MOV,
441 dst_mask(dst, WRITEMASK_Y),
442 0,
443 src_swizzle1(interp, SWIZZLE_ZERO),
444 src_undef(),
445 src_undef());
446 }
447
448 /* Should do the PointCoord thing here. */
449 emit_op(c,
450 OPCODE_MOV,
451 dst_mask(dst, WRITEMASK_ZW),
452 0,
453 src_swizzle(interp,
454 SWIZZLE_ZERO,
455 SWIZZLE_ZERO,
456 SWIZZLE_ZERO,
457 SWIZZLE_ONE),
458 src_undef(),
459 src_undef());
460 break;
461 default:
462 emit_op(c,
463 WM_PINTERP,
464 dst,
465 0,
466 interp,
467 deltas,
468 get_pixel_w(c));
469 break;
470 }
471
472 c->fp_interp_emitted |= 1<<idx;
473 }
474
475 static void emit_ddx( struct brw_wm_compile *c,
476 const struct prog_instruction *inst )
477 {
478 GLuint idx = inst->SrcReg[0].Index;
479 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
480
481 c->fp_deriv_emitted |= 1<<idx;
482 emit_op(c,
483 OPCODE_DDX,
484 inst->DstReg,
485 0,
486 interp,
487 get_pixel_w(c),
488 src_undef());
489 }
490
491 static void emit_ddy( struct brw_wm_compile *c,
492 const struct prog_instruction *inst )
493 {
494 GLuint idx = inst->SrcReg[0].Index;
495 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
496
497 c->fp_deriv_emitted |= 1<<idx;
498 emit_op(c,
499 OPCODE_DDY,
500 inst->DstReg,
501 0,
502 interp,
503 get_pixel_w(c),
504 src_undef());
505 }
506
507 /***********************************************************************
508 * Hacks to extend the program parameter and constant lists.
509 */
510
511 /* Add the fog parameters to the parameter list of the original
512 * program, rather than creating a new list. Doesn't really do any
513 * harm and it's not as if the parameter handling isn't a big hack
514 * anyway.
515 */
516 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
517 GLint s0,
518 GLint s1,
519 GLint s2,
520 GLint s3,
521 GLint s4)
522 {
523 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
524 gl_state_index tokens[STATE_LENGTH];
525 GLuint idx;
526 tokens[0] = s0;
527 tokens[1] = s1;
528 tokens[2] = s2;
529 tokens[3] = s3;
530 tokens[4] = s4;
531
532 for (idx = 0; idx < paramList->NumParameters; idx++) {
533 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
534 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
535 return src_reg(PROGRAM_STATE_VAR, idx);
536 }
537
538 idx = _mesa_add_state_reference( paramList, tokens );
539
540 return src_reg(PROGRAM_STATE_VAR, idx);
541 }
542
543
544 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
545 GLfloat s0,
546 GLfloat s1,
547 GLfloat s2,
548 GLfloat s3)
549 {
550 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
551 GLfloat values[4];
552 GLuint idx;
553 GLuint swizzle;
554
555 values[0] = s0;
556 values[1] = s1;
557 values[2] = s2;
558 values[3] = s3;
559
560 /* Have to search, otherwise multiple compilations will each grow
561 * the parameter list.
562 */
563 for (idx = 0; idx < paramList->NumParameters; idx++) {
564 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
565 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
566
567 /* XXX: this mimics the mesa bug which puts all constants and
568 * parameters into the "PROGRAM_STATE_VAR" category:
569 */
570 return src_reg(PROGRAM_STATE_VAR, idx);
571 }
572
573 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
574 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
575 return src_reg(PROGRAM_STATE_VAR, idx);
576 }
577
578
579
580 /***********************************************************************
581 * Expand various instructions here to simpler forms.
582 */
583 static void precalc_dst( struct brw_wm_compile *c,
584 const struct prog_instruction *inst )
585 {
586 struct prog_src_register src0 = inst->SrcReg[0];
587 struct prog_src_register src1 = inst->SrcReg[1];
588 struct prog_dst_register dst = inst->DstReg;
589
590 if (dst.WriteMask & WRITEMASK_Y) {
591 /* dst.y = mul src0.y, src1.y
592 */
593 emit_op(c,
594 OPCODE_MUL,
595 dst_mask(dst, WRITEMASK_Y),
596 inst->SaturateMode,
597 src0,
598 src1,
599 src_undef());
600 }
601
602 if (dst.WriteMask & WRITEMASK_XZ) {
603 struct prog_instruction *swz;
604 GLuint z = GET_SWZ(src0.Swizzle, Z);
605
606 /* dst.xz = swz src0.1zzz
607 */
608 swz = emit_op(c,
609 OPCODE_SWZ,
610 dst_mask(dst, WRITEMASK_XZ),
611 inst->SaturateMode,
612 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
613 src_undef(),
614 src_undef());
615 /* Avoid letting negation flag of src0 affect our 1 constant. */
616 swz->SrcReg[0].Negate &= ~NEGATE_X;
617 }
618 if (dst.WriteMask & WRITEMASK_W) {
619 /* dst.w = mov src1.w
620 */
621 emit_op(c,
622 OPCODE_MOV,
623 dst_mask(dst, WRITEMASK_W),
624 inst->SaturateMode,
625 src1,
626 src_undef(),
627 src_undef());
628 }
629 }
630
631
632 static void precalc_lit( struct brw_wm_compile *c,
633 const struct prog_instruction *inst )
634 {
635 struct prog_src_register src0 = inst->SrcReg[0];
636 struct prog_dst_register dst = inst->DstReg;
637
638 if (dst.WriteMask & WRITEMASK_XW) {
639 struct prog_instruction *swz;
640
641 /* dst.xw = swz src0.1111
642 */
643 swz = emit_op(c,
644 OPCODE_SWZ,
645 dst_mask(dst, WRITEMASK_XW),
646 0,
647 src_swizzle1(src0, SWIZZLE_ONE),
648 src_undef(),
649 src_undef());
650 /* Avoid letting the negation flag of src0 affect our 1 constant. */
651 swz->SrcReg[0].Negate = NEGATE_NONE;
652 }
653
654 if (dst.WriteMask & WRITEMASK_YZ) {
655 emit_op(c,
656 OPCODE_LIT,
657 dst_mask(dst, WRITEMASK_YZ),
658 inst->SaturateMode,
659 src0,
660 src_undef(),
661 src_undef());
662 }
663 }
664
665
666 /**
667 * Some TEX instructions require extra code, cube map coordinate
668 * normalization, or coordinate scaling for RECT textures, etc.
669 * This function emits those extra instructions and the TEX
670 * instruction itself.
671 */
672 static void precalc_tex( struct brw_wm_compile *c,
673 const struct prog_instruction *inst )
674 {
675 struct prog_src_register coord;
676 struct prog_dst_register tmpcoord;
677 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
678
679 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
680 struct prog_instruction *out;
681 struct prog_dst_register tmp0 = get_temp(c);
682 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
683 struct prog_dst_register tmp1 = get_temp(c);
684 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
685 struct prog_src_register src0 = inst->SrcReg[0];
686
687 /* find longest component of coord vector and normalize it */
688 tmpcoord = get_temp(c);
689 coord = src_reg_from_dst(tmpcoord);
690
691 /* tmpcoord = src0 (i.e.: coord = src0) */
692 out = emit_op(c, OPCODE_MOV,
693 tmpcoord,
694 0,
695 src0,
696 src_undef(),
697 src_undef());
698 out->SrcReg[0].Negate = NEGATE_NONE;
699 out->SrcReg[0].Abs = 1;
700
701 /* tmp0 = MAX(coord.X, coord.Y) */
702 emit_op(c, OPCODE_MAX,
703 tmp0,
704 0,
705 src_swizzle1(coord, X),
706 src_swizzle1(coord, Y),
707 src_undef());
708
709 /* tmp1 = MAX(tmp0, coord.Z) */
710 emit_op(c, OPCODE_MAX,
711 tmp1,
712 0,
713 tmp0src,
714 src_swizzle1(coord, Z),
715 src_undef());
716
717 /* tmp0 = 1 / tmp1 */
718 emit_op(c, OPCODE_RCP,
719 tmp0,
720 0,
721 tmp1src,
722 src_undef(),
723 src_undef());
724
725 /* tmpCoord = src0 * tmp0 */
726 emit_op(c, OPCODE_MUL,
727 tmpcoord,
728 0,
729 src0,
730 tmp0src,
731 src_undef());
732
733 release_temp(c, tmp0);
734 release_temp(c, tmp1);
735 }
736 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
737 struct prog_src_register scale =
738 search_or_add_param5( c,
739 STATE_INTERNAL,
740 STATE_TEXRECT_SCALE,
741 unit,
742 0,0 );
743
744 tmpcoord = get_temp(c);
745
746 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
747 */
748 emit_op(c,
749 OPCODE_MUL,
750 tmpcoord,
751 0,
752 inst->SrcReg[0],
753 src_swizzle(scale,
754 SWIZZLE_X,
755 SWIZZLE_Y,
756 SWIZZLE_ONE,
757 SWIZZLE_ONE),
758 src_undef());
759
760 coord = src_reg_from_dst(tmpcoord);
761 }
762 else {
763 coord = inst->SrcReg[0];
764 }
765
766 /* Need to emit YUV texture conversions by hand. Probably need to
767 * do this here - the alternative is in brw_wm_emit.c, but the
768 * conversion requires allocating a temporary variable which we
769 * don't have the facility to do that late in the compilation.
770 */
771 if (c->key.yuvtex_mask & (1 << unit)) {
772 /* convert ycbcr to RGBA */
773 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
774
775 /*
776 CONST C0 = { -.5, -.0625, -.5, 1.164 }
777 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
778 UYV = TEX ...
779 UYV.xyz = ADD UYV, C0
780 UYV.y = MUL UYV.y, C0.w
781 if (UV swaped)
782 RGB.xyz = MAD UYV.zzx, C1, UYV.y
783 else
784 RGB.xyz = MAD UYV.xxz, C1, UYV.y
785 RGB.y = MAD UYV.z, C1.w, RGB.y
786 */
787 struct prog_dst_register dst = inst->DstReg;
788 struct prog_dst_register tmp = get_temp(c);
789 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
790 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
791 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
792
793 /* tmp = TEX ...
794 */
795 emit_tex_op(c,
796 OPCODE_TEX,
797 tmp,
798 inst->SaturateMode,
799 unit,
800 inst->TexSrcTarget,
801 inst->TexShadow,
802 coord,
803 src_undef(),
804 src_undef());
805
806 /* tmp.xyz = ADD TMP, C0
807 */
808 emit_op(c,
809 OPCODE_ADD,
810 dst_mask(tmp, WRITEMASK_XYZ),
811 0,
812 tmpsrc,
813 C0,
814 src_undef());
815
816 /* YUV.y = MUL YUV.y, C0.w
817 */
818
819 emit_op(c,
820 OPCODE_MUL,
821 dst_mask(tmp, WRITEMASK_Y),
822 0,
823 tmpsrc,
824 src_swizzle1(C0, W),
825 src_undef());
826
827 /*
828 * if (UV swaped)
829 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
830 * else
831 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
832 */
833
834 emit_op(c,
835 OPCODE_MAD,
836 dst_mask(dst, WRITEMASK_XYZ),
837 0,
838 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
839 C1,
840 src_swizzle1(tmpsrc, Y));
841
842 /* RGB.y = MAD YUV.z, C1.w, RGB.y
843 */
844 emit_op(c,
845 OPCODE_MAD,
846 dst_mask(dst, WRITEMASK_Y),
847 0,
848 src_swizzle1(tmpsrc, Z),
849 src_swizzle1(C1, W),
850 src_swizzle1(src_reg_from_dst(dst), Y));
851
852 release_temp(c, tmp);
853 }
854 else {
855 /* ordinary RGBA tex instruction */
856 emit_tex_op(c,
857 OPCODE_TEX,
858 inst->DstReg,
859 inst->SaturateMode,
860 unit,
861 inst->TexSrcTarget,
862 inst->TexShadow,
863 coord,
864 src_undef(),
865 src_undef());
866 }
867
868 /* For GL_EXT_texture_swizzle: */
869 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
870 /* swizzle the result of the TEX instruction */
871 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
872 emit_op(c, OPCODE_SWZ,
873 inst->DstReg,
874 SATURATE_OFF, /* saturate already done above */
875 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
876 src_undef(),
877 src_undef());
878 }
879
880 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
881 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
882 release_temp(c, tmpcoord);
883 }
884
885
886 /**
887 * Check if the given TXP instruction really needs the divide-by-W step.
888 */
889 static GLboolean projtex( struct brw_wm_compile *c,
890 const struct prog_instruction *inst )
891 {
892 const struct prog_src_register src = inst->SrcReg[0];
893 GLboolean retVal;
894
895 assert(inst->Opcode == OPCODE_TXP);
896
897 /* Only try to detect the simplest cases. Could detect (later)
898 * cases where we are trying to emit code like RCP {1.0}, MUL x,
899 * {1.0}, and so on.
900 *
901 * More complex cases than this typically only arise from
902 * user-provided fragment programs anyway:
903 */
904 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
905 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
906 else if (src.File == PROGRAM_INPUT &&
907 GET_SWZ(src.Swizzle, W) == W &&
908 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
909 retVal = GL_FALSE;
910 else
911 retVal = GL_TRUE;
912
913 return retVal;
914 }
915
916
917 /**
918 * Emit code for TXP.
919 */
920 static void precalc_txp( struct brw_wm_compile *c,
921 const struct prog_instruction *inst )
922 {
923 struct prog_src_register src0 = inst->SrcReg[0];
924
925 if (projtex(c, inst)) {
926 struct prog_dst_register tmp = get_temp(c);
927 struct prog_instruction tmp_inst;
928
929 /* tmp0.w = RCP inst.arg[0][3]
930 */
931 emit_op(c,
932 OPCODE_RCP,
933 dst_mask(tmp, WRITEMASK_W),
934 0,
935 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
936 src_undef(),
937 src_undef());
938
939 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
940 */
941 emit_op(c,
942 OPCODE_MUL,
943 dst_mask(tmp, WRITEMASK_XYZ),
944 0,
945 src0,
946 src_swizzle1(src_reg_from_dst(tmp), W),
947 src_undef());
948
949 /* dst = precalc(TEX tmp0)
950 */
951 tmp_inst = *inst;
952 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
953 precalc_tex(c, &tmp_inst);
954
955 release_temp(c, tmp);
956 }
957 else
958 {
959 /* dst = precalc(TEX src0)
960 */
961 precalc_tex(c, inst);
962 }
963 }
964
965
966
967 static void emit_fb_write( struct brw_wm_compile *c )
968 {
969 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
970 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
971 struct prog_src_register outcolor;
972 GLuint i;
973
974 struct prog_instruction *inst, *last_inst;
975 struct brw_context *brw = c->func.brw;
976
977 /* The inst->Aux field is used for FB write target and the EOT marker */
978
979 if (brw->state.nr_color_regions > 1) {
980 for (i = 0 ; i < brw->state.nr_color_regions; i++) {
981 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
982 last_inst = inst = emit_op(c,
983 WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
984 outcolor, payload_r0_depth, outdepth);
985 inst->Aux = (i<<1);
986 if (c->fp_fragcolor_emitted) {
987 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
988 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
989 0, outcolor, payload_r0_depth, outdepth);
990 inst->Aux = (i<<1);
991 }
992 }
993 last_inst->Aux |= 1; //eot
994 }
995 else {
996 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
997 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
998 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
999 else
1000 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
1001
1002 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
1003 0, outcolor, payload_r0_depth, outdepth);
1004 inst->Aux = 1|(0<<1);
1005 }
1006 }
1007
1008
1009
1010
1011 /***********************************************************************
1012 * Emit INTERP instructions ahead of first use of each attrib.
1013 */
1014
1015 static void validate_src_regs( struct brw_wm_compile *c,
1016 const struct prog_instruction *inst )
1017 {
1018 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1019 GLuint i;
1020
1021 for (i = 0; i < nr_args; i++) {
1022 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1023 GLuint idx = inst->SrcReg[i].Index;
1024 if (!(c->fp_interp_emitted & (1<<idx))) {
1025 emit_interp(c, idx);
1026 }
1027 }
1028 }
1029 }
1030
1031 static void validate_dst_regs( struct brw_wm_compile *c,
1032 const struct prog_instruction *inst )
1033 {
1034 if (inst->DstReg.File == PROGRAM_OUTPUT) {
1035 GLuint idx = inst->DstReg.Index;
1036 if (idx == FRAG_RESULT_COLOR)
1037 c->fp_fragcolor_emitted = 1;
1038 }
1039 }
1040
1041 static void print_insns( const struct prog_instruction *insn,
1042 GLuint nr )
1043 {
1044 GLuint i;
1045 for (i = 0; i < nr; i++, insn++) {
1046 _mesa_printf("%3d: ", i);
1047 if (insn->Opcode < MAX_OPCODE)
1048 _mesa_print_instruction(insn);
1049 else if (insn->Opcode < MAX_WM_OPCODE) {
1050 GLuint idx = insn->Opcode - MAX_OPCODE;
1051
1052 _mesa_print_alu_instruction(insn,
1053 wm_opcode_strings[idx],
1054 3);
1055 }
1056 else
1057 _mesa_printf("965 Opcode %d\n", insn->Opcode);
1058 }
1059 }
1060
1061
1062 /**
1063 * Initial pass for fragment program code generation.
1064 * This function is used by both the GLSL and non-GLSL paths.
1065 */
1066 void brw_wm_pass_fp( struct brw_wm_compile *c )
1067 {
1068 struct brw_fragment_program *fp = c->fp;
1069 GLuint insn;
1070
1071 if (INTEL_DEBUG & DEBUG_WM) {
1072 _mesa_printf("pre-fp:\n");
1073 _mesa_print_program(&fp->program.Base);
1074 _mesa_printf("\n");
1075 }
1076
1077 c->pixel_xy = src_undef();
1078 c->delta_xy = src_undef();
1079 c->pixel_w = src_undef();
1080 c->nr_fp_insns = 0;
1081
1082 /* Emit preamble instructions. This is where special instructions such as
1083 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1084 * compute shader inputs from varying vars.
1085 */
1086 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1087 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1088 validate_src_regs(c, inst);
1089 validate_dst_regs(c, inst);
1090 }
1091
1092 /* Loop over all instructions doing assorted simplifications and
1093 * transformations.
1094 */
1095 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1096 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1097 struct prog_instruction *out;
1098
1099 /* Check for INPUT values, emit INTERP instructions where
1100 * necessary:
1101 */
1102
1103 switch (inst->Opcode) {
1104 case OPCODE_SWZ:
1105 out = emit_insn(c, inst);
1106 out->Opcode = OPCODE_MOV;
1107 break;
1108
1109 case OPCODE_ABS:
1110 out = emit_insn(c, inst);
1111 out->Opcode = OPCODE_MOV;
1112 out->SrcReg[0].Negate = NEGATE_NONE;
1113 out->SrcReg[0].Abs = 1;
1114 break;
1115
1116 case OPCODE_SUB:
1117 out = emit_insn(c, inst);
1118 out->Opcode = OPCODE_ADD;
1119 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1120 break;
1121
1122 case OPCODE_SCS:
1123 out = emit_insn(c, inst);
1124 /* This should probably be done in the parser.
1125 */
1126 out->DstReg.WriteMask &= WRITEMASK_XY;
1127 break;
1128
1129 case OPCODE_DST:
1130 precalc_dst(c, inst);
1131 break;
1132
1133 case OPCODE_LIT:
1134 precalc_lit(c, inst);
1135 break;
1136
1137 case OPCODE_TEX:
1138 precalc_tex(c, inst);
1139 break;
1140
1141 case OPCODE_TXP:
1142 precalc_txp(c, inst);
1143 break;
1144
1145 case OPCODE_TXB:
1146 out = emit_insn(c, inst);
1147 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1148 break;
1149
1150 case OPCODE_XPD:
1151 out = emit_insn(c, inst);
1152 /* This should probably be done in the parser.
1153 */
1154 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1155 break;
1156
1157 case OPCODE_KIL:
1158 out = emit_insn(c, inst);
1159 /* This should probably be done in the parser.
1160 */
1161 out->DstReg.WriteMask = 0;
1162 break;
1163 case OPCODE_DDX:
1164 emit_ddx(c, inst);
1165 break;
1166 case OPCODE_DDY:
1167 emit_ddy(c, inst);
1168 break;
1169 case OPCODE_END:
1170 emit_fb_write(c);
1171 break;
1172 case OPCODE_PRINT:
1173 break;
1174 default:
1175 if (brw_wm_is_scalar_result(inst->Opcode))
1176 emit_scalar_insn(c, inst);
1177 else
1178 emit_insn(c, inst);
1179 break;
1180 }
1181 }
1182
1183 if (INTEL_DEBUG & DEBUG_WM) {
1184 _mesa_printf("pass_fp:\n");
1185 print_insns( c->prog_instructions, c->nr_fp_insns );
1186 _mesa_printf("\n");
1187 }
1188 }
1189