Merge branch 'mesa_7_5_branch' into mesa_7_6_branch
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 /** An invalid texture target */
46 #define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48 /** An invalid texture unit */
49 #define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53 #define X 0
54 #define Y 1
55 #define Z 2
56 #define W 3
57
58
59 static const char *wm_opcode_strings[] = {
60 "PIXELXY",
61 "DELTAXY",
62 "PIXELW",
63 "LINTERP",
64 "PINTERP",
65 "CINTERP",
66 "WPOSXY",
67 "FB_WRITE",
68 "FRONTFACING",
69 };
70
71 #if 0
72 static const char *wm_file_strings[] = {
73 "PAYLOAD"
74 };
75 #endif
76
77
78 /***********************************************************************
79 * Source regs
80 */
81
82 static struct prog_src_register src_reg(GLuint file, GLuint idx)
83 {
84 struct prog_src_register reg;
85 reg.File = file;
86 reg.Index = idx;
87 reg.Swizzle = SWIZZLE_NOOP;
88 reg.RelAddr = 0;
89 reg.Negate = NEGATE_NONE;
90 reg.Abs = 0;
91 return reg;
92 }
93
94 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
95 {
96 return src_reg(dst.File, dst.Index);
97 }
98
99 static struct prog_src_register src_undef( void )
100 {
101 return src_reg(PROGRAM_UNDEFINED, 0);
102 }
103
104 static GLboolean src_is_undef(struct prog_src_register src)
105 {
106 return src.File == PROGRAM_UNDEFINED;
107 }
108
109 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
110 {
111 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
112 return reg;
113 }
114
115 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
116 {
117 return src_swizzle(reg, x, x, x, x);
118 }
119
120 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
121 {
122 reg.Swizzle = swizzle;
123 return reg;
124 }
125
126
127 /***********************************************************************
128 * Dest regs
129 */
130
131 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
132 {
133 struct prog_dst_register reg;
134 reg.File = file;
135 reg.Index = idx;
136 reg.WriteMask = WRITEMASK_XYZW;
137 reg.RelAddr = 0;
138 reg.CondMask = COND_TR;
139 reg.CondSwizzle = 0;
140 reg.CondSrc = 0;
141 reg.pad = 0;
142 return reg;
143 }
144
145 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
146 {
147 reg.WriteMask &= mask;
148 return reg;
149 }
150
151 static struct prog_dst_register dst_undef( void )
152 {
153 return dst_reg(PROGRAM_UNDEFINED, 0);
154 }
155
156
157
158 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
159 {
160 int bit = _mesa_ffs( ~c->fp_temp );
161
162 if (!bit) {
163 _mesa_printf("%s: out of temporaries\n", __FILE__);
164 exit(1);
165 }
166
167 c->fp_temp |= 1<<(bit-1);
168 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
169 }
170
171
172 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
173 {
174 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
175 }
176
177
178 /***********************************************************************
179 * Instructions
180 */
181
182 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
183 {
184 return &c->prog_instructions[c->nr_fp_insns++];
185 }
186
187 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
188 const struct prog_instruction *inst0)
189 {
190 struct prog_instruction *inst = get_fp_inst(c);
191 *inst = *inst0;
192 return inst;
193 }
194
195 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
196 GLuint op,
197 struct prog_dst_register dest,
198 GLuint saturate,
199 GLuint tex_src_unit,
200 GLuint tex_src_target,
201 GLuint tex_shadow,
202 struct prog_src_register src0,
203 struct prog_src_register src1,
204 struct prog_src_register src2 )
205 {
206 struct prog_instruction *inst = get_fp_inst(c);
207
208 assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
209 tex_src_unit == TEX_UNIT_NONE);
210 assert(tex_src_target < NUM_TEXTURE_TARGETS ||
211 tex_src_target == TEX_TARGET_NONE);
212
213 /* update mask of which texture units are referenced by this program */
214 if (tex_src_unit != TEX_UNIT_NONE)
215 c->fp->tex_units_used |= (1 << tex_src_unit);
216
217 memset(inst, 0, sizeof(*inst));
218
219 inst->Opcode = op;
220 inst->DstReg = dest;
221 inst->SaturateMode = saturate;
222 inst->TexSrcUnit = tex_src_unit;
223 inst->TexSrcTarget = tex_src_target;
224 inst->TexShadow = tex_shadow;
225 inst->SrcReg[0] = src0;
226 inst->SrcReg[1] = src1;
227 inst->SrcReg[2] = src2;
228 return inst;
229 }
230
231
232 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
233 GLuint op,
234 struct prog_dst_register dest,
235 GLuint saturate,
236 struct prog_src_register src0,
237 struct prog_src_register src1,
238 struct prog_src_register src2 )
239 {
240 return emit_tex_op(c, op, dest, saturate,
241 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
242 src0, src1, src2);
243 }
244
245
246 /* Many Mesa opcodes produce the same value across all the result channels.
247 * We'd rather not have to support that splatting in the opcode implementations,
248 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
249 * anyway. We can easily get both by emitting the opcode to one channel, and
250 * then MOVing it to the others, which brw_wm_pass*.c already understands.
251 */
252 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
253 const struct prog_instruction *inst0)
254 {
255 struct prog_instruction *inst;
256 unsigned int dst_chan;
257 unsigned int other_channel_mask;
258
259 if (inst0->DstReg.WriteMask == 0)
260 return NULL;
261
262 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
263 inst = get_fp_inst(c);
264 *inst = *inst0;
265 inst->DstReg.WriteMask = 1 << dst_chan;
266
267 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
268 if (other_channel_mask != 0) {
269 inst = emit_op(c,
270 OPCODE_MOV,
271 dst_mask(inst0->DstReg, other_channel_mask),
272 0,
273 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
274 src_undef(),
275 src_undef());
276 }
277 return inst;
278 }
279
280
281 /***********************************************************************
282 * Special instructions for interpolation and other tasks
283 */
284
285 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
286 {
287 if (src_is_undef(c->pixel_xy)) {
288 struct prog_dst_register pixel_xy = get_temp(c);
289 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
290
291
292 /* Emit the out calculations, and hold onto the results. Use
293 * two instructions as a temporary is required.
294 */
295 /* pixel_xy.xy = PIXELXY payload[0];
296 */
297 emit_op(c,
298 WM_PIXELXY,
299 dst_mask(pixel_xy, WRITEMASK_XY),
300 0,
301 payload_r0_depth,
302 src_undef(),
303 src_undef());
304
305 c->pixel_xy = src_reg_from_dst(pixel_xy);
306 }
307
308 return c->pixel_xy;
309 }
310
311 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
312 {
313 if (src_is_undef(c->delta_xy)) {
314 struct prog_dst_register delta_xy = get_temp(c);
315 struct prog_src_register pixel_xy = get_pixel_xy(c);
316 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
317
318 /* deltas.xy = DELTAXY pixel_xy, payload[0]
319 */
320 emit_op(c,
321 WM_DELTAXY,
322 dst_mask(delta_xy, WRITEMASK_XY),
323 0,
324 pixel_xy,
325 payload_r0_depth,
326 src_undef());
327
328 c->delta_xy = src_reg_from_dst(delta_xy);
329 }
330
331 return c->delta_xy;
332 }
333
334 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
335 {
336 if (src_is_undef(c->pixel_w)) {
337 struct prog_dst_register pixel_w = get_temp(c);
338 struct prog_src_register deltas = get_delta_xy(c);
339 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
340
341 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
342 */
343 emit_op(c,
344 WM_PIXELW,
345 dst_mask(pixel_w, WRITEMASK_W),
346 0,
347 interp_wpos,
348 deltas,
349 src_undef());
350
351
352 c->pixel_w = src_reg_from_dst(pixel_w);
353 }
354
355 return c->pixel_w;
356 }
357
358 static void emit_interp( struct brw_wm_compile *c,
359 GLuint idx )
360 {
361 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
362 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
363 struct prog_src_register deltas = get_delta_xy(c);
364
365 /* Need to use PINTERP on attributes which have been
366 * multiplied by 1/W in the SF program, and LINTERP on those
367 * which have not:
368 */
369 switch (idx) {
370 case FRAG_ATTRIB_WPOS:
371 /* Have to treat wpos.xy specially:
372 */
373 emit_op(c,
374 WM_WPOSXY,
375 dst_mask(dst, WRITEMASK_XY),
376 0,
377 get_pixel_xy(c),
378 src_undef(),
379 src_undef());
380
381 dst = dst_mask(dst, WRITEMASK_ZW);
382
383 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
384 */
385 emit_op(c,
386 WM_LINTERP,
387 dst,
388 0,
389 interp,
390 deltas,
391 src_undef());
392 break;
393 case FRAG_ATTRIB_COL0:
394 case FRAG_ATTRIB_COL1:
395 if (c->key.flat_shade) {
396 emit_op(c,
397 WM_CINTERP,
398 dst,
399 0,
400 interp,
401 src_undef(),
402 src_undef());
403 }
404 else {
405 if (c->key.linear_color) {
406 emit_op(c,
407 WM_LINTERP,
408 dst,
409 0,
410 interp,
411 deltas,
412 src_undef());
413 }
414 else {
415 /* perspective-corrected color interpolation */
416 emit_op(c,
417 WM_PINTERP,
418 dst,
419 0,
420 interp,
421 deltas,
422 get_pixel_w(c));
423 }
424 }
425 break;
426 case FRAG_ATTRIB_FOGC:
427 /* Interpolate the fog coordinate */
428 emit_op(c,
429 WM_PINTERP,
430 dst_mask(dst, WRITEMASK_X),
431 0,
432 interp,
433 deltas,
434 get_pixel_w(c));
435
436 emit_op(c,
437 OPCODE_MOV,
438 dst_mask(dst, WRITEMASK_YZW),
439 0,
440 src_swizzle(interp,
441 SWIZZLE_ZERO,
442 SWIZZLE_ZERO,
443 SWIZZLE_ZERO,
444 SWIZZLE_ONE),
445 src_undef(),
446 src_undef());
447 break;
448
449 case FRAG_ATTRIB_FACE:
450 /* XXX review/test this case */
451 emit_op(c,
452 WM_FRONTFACING,
453 dst_mask(dst, WRITEMASK_X),
454 0,
455 src_undef(),
456 src_undef(),
457 src_undef());
458 break;
459
460 case FRAG_ATTRIB_PNTC:
461 /* XXX review/test this case */
462 emit_op(c,
463 WM_PINTERP,
464 dst_mask(dst, WRITEMASK_XY),
465 0,
466 interp,
467 deltas,
468 get_pixel_w(c));
469
470 emit_op(c,
471 OPCODE_MOV,
472 dst_mask(dst, WRITEMASK_ZW),
473 0,
474 src_swizzle(interp,
475 SWIZZLE_ZERO,
476 SWIZZLE_ZERO,
477 SWIZZLE_ZERO,
478 SWIZZLE_ONE),
479 src_undef(),
480 src_undef());
481 break;
482
483 default:
484 emit_op(c,
485 WM_PINTERP,
486 dst,
487 0,
488 interp,
489 deltas,
490 get_pixel_w(c));
491 break;
492 }
493
494 c->fp_interp_emitted |= 1<<idx;
495 }
496
497 static void emit_ddx( struct brw_wm_compile *c,
498 const struct prog_instruction *inst )
499 {
500 GLuint idx = inst->SrcReg[0].Index;
501 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
502
503 c->fp_deriv_emitted |= 1<<idx;
504 emit_op(c,
505 OPCODE_DDX,
506 inst->DstReg,
507 0,
508 interp,
509 get_pixel_w(c),
510 src_undef());
511 }
512
513 static void emit_ddy( struct brw_wm_compile *c,
514 const struct prog_instruction *inst )
515 {
516 GLuint idx = inst->SrcReg[0].Index;
517 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
518
519 c->fp_deriv_emitted |= 1<<idx;
520 emit_op(c,
521 OPCODE_DDY,
522 inst->DstReg,
523 0,
524 interp,
525 get_pixel_w(c),
526 src_undef());
527 }
528
529 /***********************************************************************
530 * Hacks to extend the program parameter and constant lists.
531 */
532
533 /* Add the fog parameters to the parameter list of the original
534 * program, rather than creating a new list. Doesn't really do any
535 * harm and it's not as if the parameter handling isn't a big hack
536 * anyway.
537 */
538 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
539 GLint s0,
540 GLint s1,
541 GLint s2,
542 GLint s3,
543 GLint s4)
544 {
545 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
546 gl_state_index tokens[STATE_LENGTH];
547 GLuint idx;
548 tokens[0] = s0;
549 tokens[1] = s1;
550 tokens[2] = s2;
551 tokens[3] = s3;
552 tokens[4] = s4;
553
554 for (idx = 0; idx < paramList->NumParameters; idx++) {
555 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
556 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
557 return src_reg(PROGRAM_STATE_VAR, idx);
558 }
559
560 idx = _mesa_add_state_reference( paramList, tokens );
561
562 return src_reg(PROGRAM_STATE_VAR, idx);
563 }
564
565
566 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
567 GLfloat s0,
568 GLfloat s1,
569 GLfloat s2,
570 GLfloat s3)
571 {
572 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
573 GLfloat values[4];
574 GLuint idx;
575 GLuint swizzle;
576
577 values[0] = s0;
578 values[1] = s1;
579 values[2] = s2;
580 values[3] = s3;
581
582 /* Have to search, otherwise multiple compilations will each grow
583 * the parameter list.
584 */
585 for (idx = 0; idx < paramList->NumParameters; idx++) {
586 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
587 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
588
589 /* XXX: this mimics the mesa bug which puts all constants and
590 * parameters into the "PROGRAM_STATE_VAR" category:
591 */
592 return src_reg(PROGRAM_STATE_VAR, idx);
593 }
594
595 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
596 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
597 return src_reg(PROGRAM_STATE_VAR, idx);
598 }
599
600
601
602 /***********************************************************************
603 * Expand various instructions here to simpler forms.
604 */
605 static void precalc_dst( struct brw_wm_compile *c,
606 const struct prog_instruction *inst )
607 {
608 struct prog_src_register src0 = inst->SrcReg[0];
609 struct prog_src_register src1 = inst->SrcReg[1];
610 struct prog_dst_register dst = inst->DstReg;
611
612 if (dst.WriteMask & WRITEMASK_Y) {
613 /* dst.y = mul src0.y, src1.y
614 */
615 emit_op(c,
616 OPCODE_MUL,
617 dst_mask(dst, WRITEMASK_Y),
618 inst->SaturateMode,
619 src0,
620 src1,
621 src_undef());
622 }
623
624 if (dst.WriteMask & WRITEMASK_XZ) {
625 struct prog_instruction *swz;
626 GLuint z = GET_SWZ(src0.Swizzle, Z);
627
628 /* dst.xz = swz src0.1zzz
629 */
630 swz = emit_op(c,
631 OPCODE_SWZ,
632 dst_mask(dst, WRITEMASK_XZ),
633 inst->SaturateMode,
634 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
635 src_undef(),
636 src_undef());
637 /* Avoid letting negation flag of src0 affect our 1 constant. */
638 swz->SrcReg[0].Negate &= ~NEGATE_X;
639 }
640 if (dst.WriteMask & WRITEMASK_W) {
641 /* dst.w = mov src1.w
642 */
643 emit_op(c,
644 OPCODE_MOV,
645 dst_mask(dst, WRITEMASK_W),
646 inst->SaturateMode,
647 src1,
648 src_undef(),
649 src_undef());
650 }
651 }
652
653
654 static void precalc_lit( struct brw_wm_compile *c,
655 const struct prog_instruction *inst )
656 {
657 struct prog_src_register src0 = inst->SrcReg[0];
658 struct prog_dst_register dst = inst->DstReg;
659
660 if (dst.WriteMask & WRITEMASK_XW) {
661 struct prog_instruction *swz;
662
663 /* dst.xw = swz src0.1111
664 */
665 swz = emit_op(c,
666 OPCODE_SWZ,
667 dst_mask(dst, WRITEMASK_XW),
668 0,
669 src_swizzle1(src0, SWIZZLE_ONE),
670 src_undef(),
671 src_undef());
672 /* Avoid letting the negation flag of src0 affect our 1 constant. */
673 swz->SrcReg[0].Negate = NEGATE_NONE;
674 }
675
676 if (dst.WriteMask & WRITEMASK_YZ) {
677 emit_op(c,
678 OPCODE_LIT,
679 dst_mask(dst, WRITEMASK_YZ),
680 inst->SaturateMode,
681 src0,
682 src_undef(),
683 src_undef());
684 }
685 }
686
687
688 /**
689 * Some TEX instructions require extra code, cube map coordinate
690 * normalization, or coordinate scaling for RECT textures, etc.
691 * This function emits those extra instructions and the TEX
692 * instruction itself.
693 */
694 static void precalc_tex( struct brw_wm_compile *c,
695 const struct prog_instruction *inst )
696 {
697 struct prog_src_register coord;
698 struct prog_dst_register tmpcoord;
699 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
700
701 assert(unit < BRW_MAX_TEX_UNIT);
702
703 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
704 struct prog_instruction *out;
705 struct prog_dst_register tmp0 = get_temp(c);
706 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
707 struct prog_dst_register tmp1 = get_temp(c);
708 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
709 struct prog_src_register src0 = inst->SrcReg[0];
710
711 /* find longest component of coord vector and normalize it */
712 tmpcoord = get_temp(c);
713 coord = src_reg_from_dst(tmpcoord);
714
715 /* tmpcoord = src0 (i.e.: coord = src0) */
716 out = emit_op(c, OPCODE_MOV,
717 tmpcoord,
718 0,
719 src0,
720 src_undef(),
721 src_undef());
722 out->SrcReg[0].Negate = NEGATE_NONE;
723 out->SrcReg[0].Abs = 1;
724
725 /* tmp0 = MAX(coord.X, coord.Y) */
726 emit_op(c, OPCODE_MAX,
727 tmp0,
728 0,
729 src_swizzle1(coord, X),
730 src_swizzle1(coord, Y),
731 src_undef());
732
733 /* tmp1 = MAX(tmp0, coord.Z) */
734 emit_op(c, OPCODE_MAX,
735 tmp1,
736 0,
737 tmp0src,
738 src_swizzle1(coord, Z),
739 src_undef());
740
741 /* tmp0 = 1 / tmp1 */
742 emit_op(c, OPCODE_RCP,
743 dst_mask(tmp0, WRITEMASK_X),
744 0,
745 tmp1src,
746 src_undef(),
747 src_undef());
748
749 /* tmpCoord = src0 * tmp0 */
750 emit_op(c, OPCODE_MUL,
751 tmpcoord,
752 0,
753 src0,
754 src_swizzle1(tmp0src, SWIZZLE_X),
755 src_undef());
756
757 release_temp(c, tmp0);
758 release_temp(c, tmp1);
759 }
760 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
761 struct prog_src_register scale =
762 search_or_add_param5( c,
763 STATE_INTERNAL,
764 STATE_TEXRECT_SCALE,
765 unit,
766 0,0 );
767
768 tmpcoord = get_temp(c);
769
770 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
771 */
772 emit_op(c,
773 OPCODE_MUL,
774 tmpcoord,
775 0,
776 inst->SrcReg[0],
777 src_swizzle(scale,
778 SWIZZLE_X,
779 SWIZZLE_Y,
780 SWIZZLE_ONE,
781 SWIZZLE_ONE),
782 src_undef());
783
784 coord = src_reg_from_dst(tmpcoord);
785 }
786 else {
787 coord = inst->SrcReg[0];
788 }
789
790 /* Need to emit YUV texture conversions by hand. Probably need to
791 * do this here - the alternative is in brw_wm_emit.c, but the
792 * conversion requires allocating a temporary variable which we
793 * don't have the facility to do that late in the compilation.
794 */
795 if (c->key.yuvtex_mask & (1 << unit)) {
796 /* convert ycbcr to RGBA */
797 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
798
799 /*
800 CONST C0 = { -.5, -.0625, -.5, 1.164 }
801 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
802 UYV = TEX ...
803 UYV.xyz = ADD UYV, C0
804 UYV.y = MUL UYV.y, C0.w
805 if (UV swaped)
806 RGB.xyz = MAD UYV.zzx, C1, UYV.y
807 else
808 RGB.xyz = MAD UYV.xxz, C1, UYV.y
809 RGB.y = MAD UYV.z, C1.w, RGB.y
810 */
811 struct prog_dst_register dst = inst->DstReg;
812 struct prog_dst_register tmp = get_temp(c);
813 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
814 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
815 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
816
817 /* tmp = TEX ...
818 */
819 emit_tex_op(c,
820 OPCODE_TEX,
821 tmp,
822 inst->SaturateMode,
823 unit,
824 inst->TexSrcTarget,
825 inst->TexShadow,
826 coord,
827 src_undef(),
828 src_undef());
829
830 /* tmp.xyz = ADD TMP, C0
831 */
832 emit_op(c,
833 OPCODE_ADD,
834 dst_mask(tmp, WRITEMASK_XYZ),
835 0,
836 tmpsrc,
837 C0,
838 src_undef());
839
840 /* YUV.y = MUL YUV.y, C0.w
841 */
842
843 emit_op(c,
844 OPCODE_MUL,
845 dst_mask(tmp, WRITEMASK_Y),
846 0,
847 tmpsrc,
848 src_swizzle1(C0, W),
849 src_undef());
850
851 /*
852 * if (UV swaped)
853 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
854 * else
855 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
856 */
857
858 emit_op(c,
859 OPCODE_MAD,
860 dst_mask(dst, WRITEMASK_XYZ),
861 0,
862 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
863 C1,
864 src_swizzle1(tmpsrc, Y));
865
866 /* RGB.y = MAD YUV.z, C1.w, RGB.y
867 */
868 emit_op(c,
869 OPCODE_MAD,
870 dst_mask(dst, WRITEMASK_Y),
871 0,
872 src_swizzle1(tmpsrc, Z),
873 src_swizzle1(C1, W),
874 src_swizzle1(src_reg_from_dst(dst), Y));
875
876 release_temp(c, tmp);
877 }
878 else {
879 /* ordinary RGBA tex instruction */
880 emit_tex_op(c,
881 OPCODE_TEX,
882 inst->DstReg,
883 inst->SaturateMode,
884 unit,
885 inst->TexSrcTarget,
886 inst->TexShadow,
887 coord,
888 src_undef(),
889 src_undef());
890 }
891
892 /* For GL_EXT_texture_swizzle: */
893 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
894 /* swizzle the result of the TEX instruction */
895 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
896 emit_op(c, OPCODE_SWZ,
897 inst->DstReg,
898 SATURATE_OFF, /* saturate already done above */
899 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
900 src_undef(),
901 src_undef());
902 }
903
904 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
905 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
906 release_temp(c, tmpcoord);
907 }
908
909
910 /**
911 * Check if the given TXP instruction really needs the divide-by-W step.
912 */
913 static GLboolean projtex( struct brw_wm_compile *c,
914 const struct prog_instruction *inst )
915 {
916 const struct prog_src_register src = inst->SrcReg[0];
917 GLboolean retVal;
918
919 assert(inst->Opcode == OPCODE_TXP);
920
921 /* Only try to detect the simplest cases. Could detect (later)
922 * cases where we are trying to emit code like RCP {1.0}, MUL x,
923 * {1.0}, and so on.
924 *
925 * More complex cases than this typically only arise from
926 * user-provided fragment programs anyway:
927 */
928 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
929 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
930 else if (src.File == PROGRAM_INPUT &&
931 GET_SWZ(src.Swizzle, W) == W &&
932 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
933 retVal = GL_FALSE;
934 else
935 retVal = GL_TRUE;
936
937 return retVal;
938 }
939
940
941 /**
942 * Emit code for TXP.
943 */
944 static void precalc_txp( struct brw_wm_compile *c,
945 const struct prog_instruction *inst )
946 {
947 struct prog_src_register src0 = inst->SrcReg[0];
948
949 if (projtex(c, inst)) {
950 struct prog_dst_register tmp = get_temp(c);
951 struct prog_instruction tmp_inst;
952
953 /* tmp0.w = RCP inst.arg[0][3]
954 */
955 emit_op(c,
956 OPCODE_RCP,
957 dst_mask(tmp, WRITEMASK_W),
958 0,
959 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
960 src_undef(),
961 src_undef());
962
963 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
964 */
965 emit_op(c,
966 OPCODE_MUL,
967 dst_mask(tmp, WRITEMASK_XYZ),
968 0,
969 src0,
970 src_swizzle1(src_reg_from_dst(tmp), W),
971 src_undef());
972
973 /* dst = precalc(TEX tmp0)
974 */
975 tmp_inst = *inst;
976 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
977 precalc_tex(c, &tmp_inst);
978
979 release_temp(c, tmp);
980 }
981 else
982 {
983 /* dst = precalc(TEX src0)
984 */
985 precalc_tex(c, inst);
986 }
987 }
988
989
990
991 static void emit_fb_write( struct brw_wm_compile *c )
992 {
993 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
994 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
995 struct prog_src_register outcolor;
996 GLuint i;
997
998 struct prog_instruction *inst, *last_inst;
999 struct brw_context *brw = c->func.brw;
1000
1001 /* The inst->Aux field is used for FB write target and the EOT marker */
1002
1003 if (brw->state.nr_color_regions > 1) {
1004 for (i = 0 ; i < brw->state.nr_color_regions; i++) {
1005 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
1006 last_inst = inst = emit_op(c,
1007 WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
1008 outcolor, payload_r0_depth, outdepth);
1009 inst->Aux = (i<<1);
1010 if (c->fp_fragcolor_emitted) {
1011 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
1012 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
1013 0, outcolor, payload_r0_depth, outdepth);
1014 inst->Aux = (i<<1);
1015 }
1016 }
1017 last_inst->Aux |= 1; //eot
1018 }
1019 else {
1020 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
1021 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
1022 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
1023 else
1024 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
1025
1026 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
1027 0, outcolor, payload_r0_depth, outdepth);
1028 inst->Aux = 1|(0<<1);
1029 }
1030 }
1031
1032
1033
1034
1035 /***********************************************************************
1036 * Emit INTERP instructions ahead of first use of each attrib.
1037 */
1038
1039 static void validate_src_regs( struct brw_wm_compile *c,
1040 const struct prog_instruction *inst )
1041 {
1042 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1043 GLuint i;
1044
1045 for (i = 0; i < nr_args; i++) {
1046 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1047 GLuint idx = inst->SrcReg[i].Index;
1048 if (!(c->fp_interp_emitted & (1<<idx))) {
1049 emit_interp(c, idx);
1050 }
1051 }
1052 }
1053 }
1054
1055 static void validate_dst_regs( struct brw_wm_compile *c,
1056 const struct prog_instruction *inst )
1057 {
1058 if (inst->DstReg.File == PROGRAM_OUTPUT) {
1059 GLuint idx = inst->DstReg.Index;
1060 if (idx == FRAG_RESULT_COLOR)
1061 c->fp_fragcolor_emitted = 1;
1062 }
1063 }
1064
1065 static void print_insns( const struct prog_instruction *insn,
1066 GLuint nr )
1067 {
1068 GLuint i;
1069 for (i = 0; i < nr; i++, insn++) {
1070 _mesa_printf("%3d: ", i);
1071 if (insn->Opcode < MAX_OPCODE)
1072 _mesa_print_instruction(insn);
1073 else if (insn->Opcode < MAX_WM_OPCODE) {
1074 GLuint idx = insn->Opcode - MAX_OPCODE;
1075
1076 _mesa_print_alu_instruction(insn,
1077 wm_opcode_strings[idx],
1078 3);
1079 }
1080 else
1081 _mesa_printf("965 Opcode %d\n", insn->Opcode);
1082 }
1083 }
1084
1085
1086 /**
1087 * Initial pass for fragment program code generation.
1088 * This function is used by both the GLSL and non-GLSL paths.
1089 */
1090 void brw_wm_pass_fp( struct brw_wm_compile *c )
1091 {
1092 struct brw_fragment_program *fp = c->fp;
1093 GLuint insn;
1094
1095 if (INTEL_DEBUG & DEBUG_WM) {
1096 _mesa_printf("pre-fp:\n");
1097 _mesa_print_program(&fp->program.Base);
1098 _mesa_printf("\n");
1099 }
1100
1101 c->pixel_xy = src_undef();
1102 c->delta_xy = src_undef();
1103 c->pixel_w = src_undef();
1104 c->nr_fp_insns = 0;
1105 c->fp->tex_units_used = 0x0;
1106
1107 /* Emit preamble instructions. This is where special instructions such as
1108 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1109 * compute shader inputs from varying vars.
1110 */
1111 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1112 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1113 validate_src_regs(c, inst);
1114 validate_dst_regs(c, inst);
1115 }
1116
1117 /* Loop over all instructions doing assorted simplifications and
1118 * transformations.
1119 */
1120 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1121 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1122 struct prog_instruction *out;
1123
1124 /* Check for INPUT values, emit INTERP instructions where
1125 * necessary:
1126 */
1127
1128 switch (inst->Opcode) {
1129 case OPCODE_SWZ:
1130 out = emit_insn(c, inst);
1131 out->Opcode = OPCODE_MOV;
1132 break;
1133
1134 case OPCODE_ABS:
1135 out = emit_insn(c, inst);
1136 out->Opcode = OPCODE_MOV;
1137 out->SrcReg[0].Negate = NEGATE_NONE;
1138 out->SrcReg[0].Abs = 1;
1139 break;
1140
1141 case OPCODE_SUB:
1142 out = emit_insn(c, inst);
1143 out->Opcode = OPCODE_ADD;
1144 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1145 break;
1146
1147 case OPCODE_SCS:
1148 out = emit_insn(c, inst);
1149 /* This should probably be done in the parser.
1150 */
1151 out->DstReg.WriteMask &= WRITEMASK_XY;
1152 break;
1153
1154 case OPCODE_DST:
1155 precalc_dst(c, inst);
1156 break;
1157
1158 case OPCODE_LIT:
1159 precalc_lit(c, inst);
1160 break;
1161
1162 case OPCODE_TEX:
1163 precalc_tex(c, inst);
1164 break;
1165
1166 case OPCODE_TXP:
1167 precalc_txp(c, inst);
1168 break;
1169
1170 case OPCODE_TXB:
1171 out = emit_insn(c, inst);
1172 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1173 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1174 break;
1175
1176 case OPCODE_XPD:
1177 out = emit_insn(c, inst);
1178 /* This should probably be done in the parser.
1179 */
1180 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1181 break;
1182
1183 case OPCODE_KIL:
1184 out = emit_insn(c, inst);
1185 /* This should probably be done in the parser.
1186 */
1187 out->DstReg.WriteMask = 0;
1188 break;
1189 case OPCODE_DDX:
1190 emit_ddx(c, inst);
1191 break;
1192 case OPCODE_DDY:
1193 emit_ddy(c, inst);
1194 break;
1195 case OPCODE_END:
1196 emit_fb_write(c);
1197 break;
1198 case OPCODE_PRINT:
1199 break;
1200 default:
1201 if (brw_wm_is_scalar_result(inst->Opcode))
1202 emit_scalar_insn(c, inst);
1203 else
1204 emit_insn(c, inst);
1205 break;
1206 }
1207 }
1208
1209 if (INTEL_DEBUG & DEBUG_WM) {
1210 _mesa_printf("pass_fp:\n");
1211 print_insns( c->prog_instructions, c->nr_fp_insns );
1212 _mesa_printf("\n");
1213 }
1214 }
1215