Merge commit 'origin/gallium-0.1'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47 #define X 0
48 #define Y 1
49 #define Z 2
50 #define W 3
51
52
53 static const char *wm_opcode_strings[] = {
54 "PIXELXY",
55 "DELTAXY",
56 "PIXELW",
57 "LINTERP",
58 "PINTERP",
59 "CINTERP",
60 "WPOSXY",
61 "FB_WRITE"
62 };
63
64 #if 0
65 static const char *wm_file_strings[] = {
66 "PAYLOAD"
67 };
68 #endif
69
70
71 /***********************************************************************
72 * Source regs
73 */
74
75 static struct prog_src_register src_reg(GLuint file, GLuint idx)
76 {
77 struct prog_src_register reg;
78 reg.File = file;
79 reg.Index = idx;
80 reg.Swizzle = SWIZZLE_NOOP;
81 reg.RelAddr = 0;
82 reg.NegateBase = 0;
83 reg.Abs = 0;
84 reg.NegateAbs = 0;
85 return reg;
86 }
87
88 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89 {
90 return src_reg(dst.File, dst.Index);
91 }
92
93 static struct prog_src_register src_undef( void )
94 {
95 return src_reg(PROGRAM_UNDEFINED, 0);
96 }
97
98 static GLboolean src_is_undef(struct prog_src_register src)
99 {
100 return src.File == PROGRAM_UNDEFINED;
101 }
102
103 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104 {
105 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106 return reg;
107 }
108
109 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110 {
111 return src_swizzle(reg, x, x, x, x);
112 }
113
114 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
115 {
116 reg.Swizzle = swizzle;
117 return reg;
118 }
119
120
121 /***********************************************************************
122 * Dest regs
123 */
124
125 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
126 {
127 struct prog_dst_register reg;
128 reg.File = file;
129 reg.Index = idx;
130 reg.WriteMask = WRITEMASK_XYZW;
131 reg.RelAddr = 0;
132 reg.CondMask = 0;
133 reg.CondSwizzle = 0;
134 reg.CondSrc = 0;
135 reg.pad = 0;
136 return reg;
137 }
138
139 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
140 {
141 reg.WriteMask &= mask;
142 return reg;
143 }
144
145 static struct prog_dst_register dst_undef( void )
146 {
147 return dst_reg(PROGRAM_UNDEFINED, 0);
148 }
149
150
151
152 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
153 {
154 int bit = _mesa_ffs( ~c->fp_temp );
155
156 if (!bit) {
157 _mesa_printf("%s: out of temporaries\n", __FILE__);
158 exit(1);
159 }
160
161 c->fp_temp |= 1<<(bit-1);
162 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
163 }
164
165
166 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
167 {
168 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
169 }
170
171
172 /***********************************************************************
173 * Instructions
174 */
175
176 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
177 {
178 return &c->prog_instructions[c->nr_fp_insns++];
179 }
180
181 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
182 const struct prog_instruction *inst0)
183 {
184 struct prog_instruction *inst = get_fp_inst(c);
185 *inst = *inst0;
186 return inst;
187 }
188
189 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
190 GLuint op,
191 struct prog_dst_register dest,
192 GLuint saturate,
193 GLuint tex_src_unit,
194 GLuint tex_src_target,
195 GLuint tex_shadow,
196 struct prog_src_register src0,
197 struct prog_src_register src1,
198 struct prog_src_register src2 )
199 {
200 struct prog_instruction *inst = get_fp_inst(c);
201
202 memset(inst, 0, sizeof(*inst));
203
204 inst->Opcode = op;
205 inst->DstReg = dest;
206 inst->SaturateMode = saturate;
207 inst->TexSrcUnit = tex_src_unit;
208 inst->TexSrcTarget = tex_src_target;
209 inst->TexShadow = tex_shadow;
210 inst->SrcReg[0] = src0;
211 inst->SrcReg[1] = src1;
212 inst->SrcReg[2] = src2;
213 return inst;
214 }
215
216
217 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
218 GLuint op,
219 struct prog_dst_register dest,
220 GLuint saturate,
221 struct prog_src_register src0,
222 struct prog_src_register src1,
223 struct prog_src_register src2 )
224 {
225 return emit_tex_op(c, op, dest, saturate,
226 0, 0, 0, /* tex unit, target, shadow */
227 src0, src1, src2);
228 }
229
230
231
232
233 /***********************************************************************
234 * Special instructions for interpolation and other tasks
235 */
236
237 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
238 {
239 if (src_is_undef(c->pixel_xy)) {
240 struct prog_dst_register pixel_xy = get_temp(c);
241 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
242
243
244 /* Emit the out calculations, and hold onto the results. Use
245 * two instructions as a temporary is required.
246 */
247 /* pixel_xy.xy = PIXELXY payload[0];
248 */
249 emit_op(c,
250 WM_PIXELXY,
251 dst_mask(pixel_xy, WRITEMASK_XY),
252 0,
253 payload_r0_depth,
254 src_undef(),
255 src_undef());
256
257 c->pixel_xy = src_reg_from_dst(pixel_xy);
258 }
259
260 return c->pixel_xy;
261 }
262
263 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
264 {
265 if (src_is_undef(c->delta_xy)) {
266 struct prog_dst_register delta_xy = get_temp(c);
267 struct prog_src_register pixel_xy = get_pixel_xy(c);
268 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
269
270 /* deltas.xy = DELTAXY pixel_xy, payload[0]
271 */
272 emit_op(c,
273 WM_DELTAXY,
274 dst_mask(delta_xy, WRITEMASK_XY),
275 0,
276 pixel_xy,
277 payload_r0_depth,
278 src_undef());
279
280 c->delta_xy = src_reg_from_dst(delta_xy);
281 }
282
283 return c->delta_xy;
284 }
285
286 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
287 {
288 if (src_is_undef(c->pixel_w)) {
289 struct prog_dst_register pixel_w = get_temp(c);
290 struct prog_src_register deltas = get_delta_xy(c);
291 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
292
293 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
294 */
295 emit_op(c,
296 WM_PIXELW,
297 dst_mask(pixel_w, WRITEMASK_W),
298 0,
299 interp_wpos,
300 deltas,
301 src_undef());
302
303
304 c->pixel_w = src_reg_from_dst(pixel_w);
305 }
306
307 return c->pixel_w;
308 }
309
310 static void emit_interp( struct brw_wm_compile *c,
311 GLuint idx )
312 {
313 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
314 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
315 struct prog_src_register deltas = get_delta_xy(c);
316 struct prog_src_register arg2;
317 GLuint opcode;
318
319 /* Need to use PINTERP on attributes which have been
320 * multiplied by 1/W in the SF program, and LINTERP on those
321 * which have not:
322 */
323 switch (idx) {
324 case FRAG_ATTRIB_WPOS:
325 opcode = WM_LINTERP;
326 arg2 = src_undef();
327
328 /* Have to treat wpos.xy specially:
329 */
330 emit_op(c,
331 WM_WPOSXY,
332 dst_mask(dst, WRITEMASK_XY),
333 0,
334 get_pixel_xy(c),
335 src_undef(),
336 src_undef());
337
338 dst = dst_mask(dst, WRITEMASK_ZW);
339
340 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
341 */
342 emit_op(c,
343 WM_LINTERP,
344 dst,
345 0,
346 interp,
347 deltas,
348 arg2);
349 break;
350 case FRAG_ATTRIB_COL0:
351 case FRAG_ATTRIB_COL1:
352 if (c->key.flat_shade) {
353 emit_op(c,
354 WM_CINTERP,
355 dst,
356 0,
357 interp,
358 src_undef(),
359 src_undef());
360 }
361 else {
362 emit_op(c,
363 WM_LINTERP,
364 dst,
365 0,
366 interp,
367 deltas,
368 src_undef());
369 }
370 break;
371 default:
372 emit_op(c,
373 WM_PINTERP,
374 dst,
375 0,
376 interp,
377 deltas,
378 get_pixel_w(c));
379 break;
380 }
381
382 c->fp_interp_emitted |= 1<<idx;
383 }
384
385 static void emit_ddx( struct brw_wm_compile *c,
386 const struct prog_instruction *inst )
387 {
388 GLuint idx = inst->SrcReg[0].Index;
389 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
390
391 c->fp_deriv_emitted |= 1<<idx;
392 emit_op(c,
393 OPCODE_DDX,
394 inst->DstReg,
395 0,
396 interp,
397 get_pixel_w(c),
398 src_undef());
399 }
400
401 static void emit_ddy( struct brw_wm_compile *c,
402 const struct prog_instruction *inst )
403 {
404 GLuint idx = inst->SrcReg[0].Index;
405 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
406
407 c->fp_deriv_emitted |= 1<<idx;
408 emit_op(c,
409 OPCODE_DDY,
410 inst->DstReg,
411 0,
412 interp,
413 get_pixel_w(c),
414 src_undef());
415 }
416
417 /***********************************************************************
418 * Hacks to extend the program parameter and constant lists.
419 */
420
421 /* Add the fog parameters to the parameter list of the original
422 * program, rather than creating a new list. Doesn't really do any
423 * harm and it's not as if the parameter handling isn't a big hack
424 * anyway.
425 */
426 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
427 GLint s0,
428 GLint s1,
429 GLint s2,
430 GLint s3,
431 GLint s4)
432 {
433 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
434 gl_state_index tokens[STATE_LENGTH];
435 GLuint idx;
436 tokens[0] = s0;
437 tokens[1] = s1;
438 tokens[2] = s2;
439 tokens[3] = s3;
440 tokens[4] = s4;
441
442 for (idx = 0; idx < paramList->NumParameters; idx++) {
443 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
444 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
445 return src_reg(PROGRAM_STATE_VAR, idx);
446 }
447
448 idx = _mesa_add_state_reference( paramList, tokens );
449
450 return src_reg(PROGRAM_STATE_VAR, idx);
451 }
452
453
454 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
455 GLfloat s0,
456 GLfloat s1,
457 GLfloat s2,
458 GLfloat s3)
459 {
460 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
461 GLfloat values[4];
462 GLuint idx;
463 GLuint swizzle;
464
465 values[0] = s0;
466 values[1] = s1;
467 values[2] = s2;
468 values[3] = s3;
469
470 /* Have to search, otherwise multiple compilations will each grow
471 * the parameter list.
472 */
473 for (idx = 0; idx < paramList->NumParameters; idx++) {
474 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
475 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
476
477 /* XXX: this mimics the mesa bug which puts all constants and
478 * parameters into the "PROGRAM_STATE_VAR" category:
479 */
480 return src_reg(PROGRAM_STATE_VAR, idx);
481 }
482
483 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
484 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
485 return src_reg(PROGRAM_STATE_VAR, idx);
486 }
487
488
489
490 /***********************************************************************
491 * Expand various instructions here to simpler forms.
492 */
493 static void precalc_dst( struct brw_wm_compile *c,
494 const struct prog_instruction *inst )
495 {
496 struct prog_src_register src0 = inst->SrcReg[0];
497 struct prog_src_register src1 = inst->SrcReg[1];
498 struct prog_dst_register dst = inst->DstReg;
499
500 if (dst.WriteMask & WRITEMASK_Y) {
501 /* dst.y = mul src0.y, src1.y
502 */
503 emit_op(c,
504 OPCODE_MUL,
505 dst_mask(dst, WRITEMASK_Y),
506 inst->SaturateMode,
507 src0,
508 src1,
509 src_undef());
510 }
511
512 if (dst.WriteMask & WRITEMASK_XZ) {
513 struct prog_instruction *swz;
514 GLuint z = GET_SWZ(src0.Swizzle, Z);
515
516 /* dst.xz = swz src0.1zzz
517 */
518 swz = emit_op(c,
519 OPCODE_SWZ,
520 dst_mask(dst, WRITEMASK_XZ),
521 inst->SaturateMode,
522 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
523 src_undef(),
524 src_undef());
525 /* Avoid letting negation flag of src0 affect our 1 constant. */
526 swz->SrcReg[0].NegateBase &= ~NEGATE_X;
527 }
528 if (dst.WriteMask & WRITEMASK_W) {
529 /* dst.w = mov src1.w
530 */
531 emit_op(c,
532 OPCODE_MOV,
533 dst_mask(dst, WRITEMASK_W),
534 inst->SaturateMode,
535 src1,
536 src_undef(),
537 src_undef());
538 }
539 }
540
541
542 static void precalc_lit( struct brw_wm_compile *c,
543 const struct prog_instruction *inst )
544 {
545 struct prog_src_register src0 = inst->SrcReg[0];
546 struct prog_dst_register dst = inst->DstReg;
547
548 if (dst.WriteMask & WRITEMASK_XW) {
549 struct prog_instruction *swz;
550
551 /* dst.xw = swz src0.1111
552 */
553 swz = emit_op(c,
554 OPCODE_SWZ,
555 dst_mask(dst, WRITEMASK_XW),
556 0,
557 src_swizzle1(src0, SWIZZLE_ONE),
558 src_undef(),
559 src_undef());
560 /* Avoid letting the negation flag of src0 affect our 1 constant. */
561 swz->SrcReg[0].NegateBase = 0;
562 }
563
564 if (dst.WriteMask & WRITEMASK_YZ) {
565 emit_op(c,
566 OPCODE_LIT,
567 dst_mask(dst, WRITEMASK_YZ),
568 inst->SaturateMode,
569 src0,
570 src_undef(),
571 src_undef());
572 }
573 }
574
575
576 /**
577 * Some TEX instructions require extra code, cube map coordinate
578 * normalization, or coordinate scaling for RECT textures, etc.
579 * This function emits those extra instructions and the TEX
580 * instruction itself.
581 */
582 static void precalc_tex( struct brw_wm_compile *c,
583 const struct prog_instruction *inst )
584 {
585 struct prog_src_register coord;
586 struct prog_dst_register tmpcoord;
587 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
588
589 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
590 struct prog_instruction *out;
591 struct prog_dst_register tmp0 = get_temp(c);
592 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
593 struct prog_dst_register tmp1 = get_temp(c);
594 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
595 struct prog_src_register src0 = inst->SrcReg[0];
596
597 /* find longest component of coord vector and normalize it */
598 tmpcoord = get_temp(c);
599 coord = src_reg_from_dst(tmpcoord);
600
601 /* tmpcoord = src0 (i.e.: coord = src0) */
602 out = emit_op(c, OPCODE_MOV,
603 tmpcoord,
604 0,
605 src0,
606 src_undef(),
607 src_undef());
608 out->SrcReg[0].NegateBase = 0;
609 out->SrcReg[0].Abs = 1;
610
611 /* tmp0 = MAX(coord.X, coord.Y) */
612 emit_op(c, OPCODE_MAX,
613 tmp0,
614 0,
615 src_swizzle1(coord, X),
616 src_swizzle1(coord, Y),
617 src_undef());
618
619 /* tmp1 = MAX(tmp0, coord.Z) */
620 emit_op(c, OPCODE_MAX,
621 tmp1,
622 0,
623 tmp0src,
624 src_swizzle1(coord, Z),
625 src_undef());
626
627 /* tmp0 = 1 / tmp1 */
628 emit_op(c, OPCODE_RCP,
629 tmp0,
630 0,
631 tmp1src,
632 src_undef(),
633 src_undef());
634
635 /* tmpCoord = src0 * tmp0 */
636 emit_op(c, OPCODE_MUL,
637 tmpcoord,
638 0,
639 src0,
640 tmp0src,
641 src_undef());
642
643 release_temp(c, tmp0);
644 release_temp(c, tmp1);
645 }
646 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
647 struct prog_src_register scale =
648 search_or_add_param5( c,
649 STATE_INTERNAL,
650 STATE_TEXRECT_SCALE,
651 unit,
652 0,0 );
653
654 tmpcoord = get_temp(c);
655
656 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
657 */
658 emit_op(c,
659 OPCODE_MUL,
660 tmpcoord,
661 0,
662 inst->SrcReg[0],
663 scale,
664 src_undef());
665
666 coord = src_reg_from_dst(tmpcoord);
667 }
668 else {
669 coord = inst->SrcReg[0];
670 }
671
672 /* Need to emit YUV texture conversions by hand. Probably need to
673 * do this here - the alternative is in brw_wm_emit.c, but the
674 * conversion requires allocating a temporary variable which we
675 * don't have the facility to do that late in the compilation.
676 */
677 if (c->key.yuvtex_mask & (1 << unit)) {
678 /* convert ycbcr to RGBA */
679 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
680
681 /*
682 CONST C0 = { -.5, -.0625, -.5, 1.164 }
683 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
684 UYV = TEX ...
685 UYV.xyz = ADD UYV, C0
686 UYV.y = MUL UYV.y, C0.w
687 if (UV swaped)
688 RGB.xyz = MAD UYV.zzx, C1, UYV.y
689 else
690 RGB.xyz = MAD UYV.xxz, C1, UYV.y
691 RGB.y = MAD UYV.z, C1.w, RGB.y
692 */
693 struct prog_dst_register dst = inst->DstReg;
694 struct prog_dst_register tmp = get_temp(c);
695 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
696 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
697 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
698
699 /* tmp = TEX ...
700 */
701 emit_tex_op(c,
702 OPCODE_TEX,
703 tmp,
704 inst->SaturateMode,
705 unit,
706 inst->TexSrcTarget,
707 inst->TexShadow,
708 coord,
709 src_undef(),
710 src_undef());
711
712 /* tmp.xyz = ADD TMP, C0
713 */
714 emit_op(c,
715 OPCODE_ADD,
716 dst_mask(tmp, WRITEMASK_XYZ),
717 0,
718 tmpsrc,
719 C0,
720 src_undef());
721
722 /* YUV.y = MUL YUV.y, C0.w
723 */
724
725 emit_op(c,
726 OPCODE_MUL,
727 dst_mask(tmp, WRITEMASK_Y),
728 0,
729 tmpsrc,
730 src_swizzle1(C0, W),
731 src_undef());
732
733 /*
734 * if (UV swaped)
735 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
736 * else
737 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
738 */
739
740 emit_op(c,
741 OPCODE_MAD,
742 dst_mask(dst, WRITEMASK_XYZ),
743 0,
744 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
745 C1,
746 src_swizzle1(tmpsrc, Y));
747
748 /* RGB.y = MAD YUV.z, C1.w, RGB.y
749 */
750 emit_op(c,
751 OPCODE_MAD,
752 dst_mask(dst, WRITEMASK_Y),
753 0,
754 src_swizzle1(tmpsrc, Z),
755 src_swizzle1(C1, W),
756 src_swizzle1(src_reg_from_dst(dst), Y));
757
758 release_temp(c, tmp);
759 }
760 else {
761 /* ordinary RGBA tex instruction */
762 emit_tex_op(c,
763 OPCODE_TEX,
764 inst->DstReg,
765 inst->SaturateMode,
766 unit,
767 inst->TexSrcTarget,
768 inst->TexShadow,
769 coord,
770 src_undef(),
771 src_undef());
772 }
773
774 /* For GL_EXT_texture_swizzle: */
775 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
776 /* swizzle the result of the TEX instruction */
777 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
778 emit_op(c, OPCODE_SWZ,
779 inst->DstReg,
780 SATURATE_OFF, /* saturate already done above */
781 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
782 src_undef(),
783 src_undef());
784 }
785
786 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
787 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
788 release_temp(c, tmpcoord);
789 }
790
791
792 static GLboolean projtex( struct brw_wm_compile *c,
793 const struct prog_instruction *inst )
794 {
795 struct prog_src_register src = inst->SrcReg[0];
796
797 /* Only try to detect the simplest cases. Could detect (later)
798 * cases where we are trying to emit code like RCP {1.0}, MUL x,
799 * {1.0}, and so on.
800 *
801 * More complex cases than this typically only arise from
802 * user-provided fragment programs anyway:
803 */
804 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
805 return 0; /* ut2004 gun rendering !?! */
806 else if (src.File == PROGRAM_INPUT &&
807 GET_SWZ(src.Swizzle, W) == W &&
808 (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
809 return 0;
810 else
811 return 1;
812 }
813
814
815 static void precalc_txp( struct brw_wm_compile *c,
816 const struct prog_instruction *inst )
817 {
818 struct prog_src_register src0 = inst->SrcReg[0];
819
820 if (projtex(c, inst)) {
821 struct prog_dst_register tmp = get_temp(c);
822 struct prog_instruction tmp_inst;
823
824 /* tmp0.w = RCP inst.arg[0][3]
825 */
826 emit_op(c,
827 OPCODE_RCP,
828 dst_mask(tmp, WRITEMASK_W),
829 0,
830 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
831 src_undef(),
832 src_undef());
833
834 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
835 */
836 emit_op(c,
837 OPCODE_MUL,
838 dst_mask(tmp, WRITEMASK_XYZ),
839 0,
840 src0,
841 src_swizzle1(src_reg_from_dst(tmp), W),
842 src_undef());
843
844 /* dst = precalc(TEX tmp0)
845 */
846 tmp_inst = *inst;
847 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
848 precalc_tex(c, &tmp_inst);
849
850 release_temp(c, tmp);
851 }
852 else
853 {
854 /* dst = precalc(TEX src0)
855 */
856 precalc_tex(c, inst);
857 }
858 }
859
860
861
862 static void emit_fb_write( struct brw_wm_compile *c )
863 {
864 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
865 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
866 struct prog_src_register outcolor;
867 GLuint i;
868
869 struct prog_instruction *inst, *last_inst;
870 struct brw_context *brw = c->func.brw;
871
872 /* The inst->Aux field is used for FB write target and the EOT marker */
873
874 if (brw->state.nr_color_regions > 1) {
875 for (i = 0 ; i < brw->state.nr_color_regions; i++) {
876 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
877 last_inst = inst = emit_op(c,
878 WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
879 outcolor, payload_r0_depth, outdepth);
880 inst->Aux = (i<<1);
881 if (c->fp_fragcolor_emitted) {
882 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
883 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
884 0, outcolor, payload_r0_depth, outdepth);
885 inst->Aux = (i<<1);
886 }
887 }
888 last_inst->Aux |= 1; //eot
889 }
890 else {
891 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
892 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
893 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
894 else
895 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
896
897 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
898 0, outcolor, payload_r0_depth, outdepth);
899 inst->Aux = 1|(0<<1);
900 }
901 }
902
903
904
905
906 /***********************************************************************
907 * Emit INTERP instructions ahead of first use of each attrib.
908 */
909
910 static void validate_src_regs( struct brw_wm_compile *c,
911 const struct prog_instruction *inst )
912 {
913 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
914 GLuint i;
915
916 for (i = 0; i < nr_args; i++) {
917 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
918 GLuint idx = inst->SrcReg[i].Index;
919 if (!(c->fp_interp_emitted & (1<<idx))) {
920 emit_interp(c, idx);
921 }
922 }
923 }
924 }
925
926 static void validate_dst_regs( struct brw_wm_compile *c,
927 const struct prog_instruction *inst )
928 {
929 if (inst->DstReg.File == PROGRAM_OUTPUT) {
930 GLuint idx = inst->DstReg.Index;
931 if (idx == FRAG_RESULT_COLOR)
932 c->fp_fragcolor_emitted = 1;
933 }
934 }
935
936 static void print_insns( const struct prog_instruction *insn,
937 GLuint nr )
938 {
939 GLuint i;
940 for (i = 0; i < nr; i++, insn++) {
941 _mesa_printf("%3d: ", i);
942 if (insn->Opcode < MAX_OPCODE)
943 _mesa_print_instruction(insn);
944 else if (insn->Opcode < MAX_WM_OPCODE) {
945 GLuint idx = insn->Opcode - MAX_OPCODE;
946
947 _mesa_print_alu_instruction(insn,
948 wm_opcode_strings[idx],
949 3);
950 }
951 else
952 _mesa_printf("UNKNOWN\n");
953 }
954 }
955
956
957 /**
958 * Initial pass for fragment program code generation.
959 * This function is used by both the GLSL and non-GLSL paths.
960 */
961 void brw_wm_pass_fp( struct brw_wm_compile *c )
962 {
963 struct brw_fragment_program *fp = c->fp;
964 GLuint insn;
965
966 if (INTEL_DEBUG & DEBUG_WM) {
967 _mesa_printf("pre-fp:\n");
968 _mesa_print_program(&fp->program.Base);
969 _mesa_printf("\n");
970 }
971
972 c->pixel_xy = src_undef();
973 c->delta_xy = src_undef();
974 c->pixel_w = src_undef();
975 c->nr_fp_insns = 0;
976
977 /* Emit preamble instructions. This is where special instructions such as
978 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
979 * compute shader inputs from varying vars.
980 */
981 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
982 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
983 validate_src_regs(c, inst);
984 validate_dst_regs(c, inst);
985 }
986
987 /* Loop over all instructions doing assorted simplifications and
988 * transformations.
989 */
990 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
991 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
992 struct prog_instruction *out;
993
994 /* Check for INPUT values, emit INTERP instructions where
995 * necessary:
996 */
997
998 switch (inst->Opcode) {
999 case OPCODE_SWZ:
1000 out = emit_insn(c, inst);
1001 out->Opcode = OPCODE_MOV;
1002 break;
1003
1004 case OPCODE_ABS:
1005 out = emit_insn(c, inst);
1006 out->Opcode = OPCODE_MOV;
1007 out->SrcReg[0].NegateBase = 0;
1008 out->SrcReg[0].Abs = 1;
1009 break;
1010
1011 case OPCODE_SUB:
1012 out = emit_insn(c, inst);
1013 out->Opcode = OPCODE_ADD;
1014 out->SrcReg[1].NegateBase ^= 0xf;
1015 break;
1016
1017 case OPCODE_SCS:
1018 out = emit_insn(c, inst);
1019 /* This should probably be done in the parser.
1020 */
1021 out->DstReg.WriteMask &= WRITEMASK_XY;
1022 break;
1023
1024 case OPCODE_DST:
1025 precalc_dst(c, inst);
1026 break;
1027
1028 case OPCODE_LIT:
1029 precalc_lit(c, inst);
1030 break;
1031
1032 case OPCODE_TEX:
1033 precalc_tex(c, inst);
1034 break;
1035
1036 case OPCODE_TXP:
1037 precalc_txp(c, inst);
1038 break;
1039
1040 case OPCODE_TXB:
1041 out = emit_insn(c, inst);
1042 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1043 break;
1044
1045 case OPCODE_XPD:
1046 out = emit_insn(c, inst);
1047 /* This should probably be done in the parser.
1048 */
1049 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1050 break;
1051
1052 case OPCODE_KIL:
1053 out = emit_insn(c, inst);
1054 /* This should probably be done in the parser.
1055 */
1056 out->DstReg.WriteMask = 0;
1057 break;
1058 case OPCODE_DDX:
1059 emit_ddx(c, inst);
1060 break;
1061 case OPCODE_DDY:
1062 emit_ddy(c, inst);
1063 break;
1064 case OPCODE_END:
1065 emit_fb_write(c);
1066 break;
1067 case OPCODE_PRINT:
1068 break;
1069
1070 default:
1071 emit_insn(c, inst);
1072 break;
1073 }
1074 }
1075
1076 if (INTEL_DEBUG & DEBUG_WM) {
1077 _mesa_printf("pass_fp:\n");
1078 print_insns( c->prog_instructions, c->nr_fp_insns );
1079 _mesa_printf("\n");
1080 }
1081 }
1082