Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47 #define X 0
48 #define Y 1
49 #define Z 2
50 #define W 3
51
52
53 static const char *wm_opcode_strings[] = {
54 "PIXELXY",
55 "DELTAXY",
56 "PIXELW",
57 "LINTERP",
58 "PINTERP",
59 "CINTERP",
60 "WPOSXY",
61 "FB_WRITE",
62 "FRONTFACING",
63 };
64
65 #if 0
66 static const char *wm_file_strings[] = {
67 "PAYLOAD"
68 };
69 #endif
70
71
72 /***********************************************************************
73 * Source regs
74 */
75
76 static struct prog_src_register src_reg(GLuint file, GLuint idx)
77 {
78 struct prog_src_register reg;
79 reg.File = file;
80 reg.Index = idx;
81 reg.Swizzle = SWIZZLE_NOOP;
82 reg.RelAddr = 0;
83 reg.Negate = NEGATE_NONE;
84 reg.Abs = 0;
85 return reg;
86 }
87
88 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89 {
90 return src_reg(dst.File, dst.Index);
91 }
92
93 static struct prog_src_register src_undef( void )
94 {
95 return src_reg(PROGRAM_UNDEFINED, 0);
96 }
97
98 static GLboolean src_is_undef(struct prog_src_register src)
99 {
100 return src.File == PROGRAM_UNDEFINED;
101 }
102
103 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104 {
105 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106 return reg;
107 }
108
109 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110 {
111 return src_swizzle(reg, x, x, x, x);
112 }
113
114 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
115 {
116 reg.Swizzle = swizzle;
117 return reg;
118 }
119
120
121 /***********************************************************************
122 * Dest regs
123 */
124
125 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
126 {
127 struct prog_dst_register reg;
128 reg.File = file;
129 reg.Index = idx;
130 reg.WriteMask = WRITEMASK_XYZW;
131 reg.RelAddr = 0;
132 reg.CondMask = COND_TR;
133 reg.CondSwizzle = 0;
134 reg.CondSrc = 0;
135 reg.pad = 0;
136 return reg;
137 }
138
139 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
140 {
141 reg.WriteMask &= mask;
142 return reg;
143 }
144
145 static struct prog_dst_register dst_undef( void )
146 {
147 return dst_reg(PROGRAM_UNDEFINED, 0);
148 }
149
150
151
152 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
153 {
154 int bit = _mesa_ffs( ~c->fp_temp );
155
156 if (!bit) {
157 _mesa_printf("%s: out of temporaries\n", __FILE__);
158 exit(1);
159 }
160
161 c->fp_temp |= 1<<(bit-1);
162 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
163 }
164
165
166 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
167 {
168 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
169 }
170
171
172 /***********************************************************************
173 * Instructions
174 */
175
176 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
177 {
178 return &c->prog_instructions[c->nr_fp_insns++];
179 }
180
181 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
182 const struct prog_instruction *inst0)
183 {
184 struct prog_instruction *inst = get_fp_inst(c);
185 *inst = *inst0;
186 return inst;
187 }
188
189 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
190 GLuint op,
191 struct prog_dst_register dest,
192 GLuint saturate,
193 GLuint tex_src_unit,
194 GLuint tex_src_target,
195 GLuint tex_shadow,
196 struct prog_src_register src0,
197 struct prog_src_register src1,
198 struct prog_src_register src2 )
199 {
200 struct prog_instruction *inst = get_fp_inst(c);
201
202 memset(inst, 0, sizeof(*inst));
203
204 inst->Opcode = op;
205 inst->DstReg = dest;
206 inst->SaturateMode = saturate;
207 inst->TexSrcUnit = tex_src_unit;
208 inst->TexSrcTarget = tex_src_target;
209 inst->TexShadow = tex_shadow;
210 inst->SrcReg[0] = src0;
211 inst->SrcReg[1] = src1;
212 inst->SrcReg[2] = src2;
213 return inst;
214 }
215
216
217 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
218 GLuint op,
219 struct prog_dst_register dest,
220 GLuint saturate,
221 struct prog_src_register src0,
222 struct prog_src_register src1,
223 struct prog_src_register src2 )
224 {
225 return emit_tex_op(c, op, dest, saturate,
226 0, 0, 0, /* tex unit, target, shadow */
227 src0, src1, src2);
228 }
229
230
231
232
233 /***********************************************************************
234 * Special instructions for interpolation and other tasks
235 */
236
237 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
238 {
239 if (src_is_undef(c->pixel_xy)) {
240 struct prog_dst_register pixel_xy = get_temp(c);
241 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
242
243
244 /* Emit the out calculations, and hold onto the results. Use
245 * two instructions as a temporary is required.
246 */
247 /* pixel_xy.xy = PIXELXY payload[0];
248 */
249 emit_op(c,
250 WM_PIXELXY,
251 dst_mask(pixel_xy, WRITEMASK_XY),
252 0,
253 payload_r0_depth,
254 src_undef(),
255 src_undef());
256
257 c->pixel_xy = src_reg_from_dst(pixel_xy);
258 }
259
260 return c->pixel_xy;
261 }
262
263 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
264 {
265 if (src_is_undef(c->delta_xy)) {
266 struct prog_dst_register delta_xy = get_temp(c);
267 struct prog_src_register pixel_xy = get_pixel_xy(c);
268 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
269
270 /* deltas.xy = DELTAXY pixel_xy, payload[0]
271 */
272 emit_op(c,
273 WM_DELTAXY,
274 dst_mask(delta_xy, WRITEMASK_XY),
275 0,
276 pixel_xy,
277 payload_r0_depth,
278 src_undef());
279
280 c->delta_xy = src_reg_from_dst(delta_xy);
281 }
282
283 return c->delta_xy;
284 }
285
286 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
287 {
288 if (src_is_undef(c->pixel_w)) {
289 struct prog_dst_register pixel_w = get_temp(c);
290 struct prog_src_register deltas = get_delta_xy(c);
291 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
292
293 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
294 */
295 emit_op(c,
296 WM_PIXELW,
297 dst_mask(pixel_w, WRITEMASK_W),
298 0,
299 interp_wpos,
300 deltas,
301 src_undef());
302
303
304 c->pixel_w = src_reg_from_dst(pixel_w);
305 }
306
307 return c->pixel_w;
308 }
309
310 static void emit_interp( struct brw_wm_compile *c,
311 GLuint idx )
312 {
313 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
314 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
315 struct prog_src_register deltas = get_delta_xy(c);
316
317 /* Need to use PINTERP on attributes which have been
318 * multiplied by 1/W in the SF program, and LINTERP on those
319 * which have not:
320 */
321 switch (idx) {
322 case FRAG_ATTRIB_WPOS:
323 /* Have to treat wpos.xy specially:
324 */
325 emit_op(c,
326 WM_WPOSXY,
327 dst_mask(dst, WRITEMASK_XY),
328 0,
329 get_pixel_xy(c),
330 src_undef(),
331 src_undef());
332
333 dst = dst_mask(dst, WRITEMASK_ZW);
334
335 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
336 */
337 emit_op(c,
338 WM_LINTERP,
339 dst,
340 0,
341 interp,
342 deltas,
343 src_undef());
344 break;
345 case FRAG_ATTRIB_COL0:
346 case FRAG_ATTRIB_COL1:
347 if (c->key.flat_shade) {
348 emit_op(c,
349 WM_CINTERP,
350 dst,
351 0,
352 interp,
353 src_undef(),
354 src_undef());
355 }
356 else {
357 if (c->key.linear_color) {
358 emit_op(c,
359 WM_LINTERP,
360 dst,
361 0,
362 interp,
363 deltas,
364 src_undef());
365 }
366 else {
367 /* perspective-corrected color interpolation */
368 emit_op(c,
369 WM_PINTERP,
370 dst,
371 0,
372 interp,
373 deltas,
374 get_pixel_w(c));
375 }
376 }
377 break;
378 case FRAG_ATTRIB_FOGC:
379 /* The FOGC input is really special. When a program uses glFogFragCoord,
380 * the results returned are supposed to be (f,0,0,1). But for Mesa GLSL,
381 * the glFrontFacing and glPointCoord values are also stashed in FOGC.
382 * So, write the interpolated fog value to X, then either 0, 1, or the
383 * stashed values to Y, Z, W. Note that this means that
384 * glFogFragCoord.yzw can be wrong in those cases!
385 */
386
387 /* Interpolate the fog coordinate */
388 emit_op(c,
389 WM_PINTERP,
390 dst_mask(dst, WRITEMASK_X),
391 0,
392 interp,
393 deltas,
394 get_pixel_w(c));
395
396 /* Move the front facing value into FOGC.y if it's needed. */
397 if (c->fp->program.UsesFrontFacing) {
398 emit_op(c,
399 WM_FRONTFACING,
400 dst_mask(dst, WRITEMASK_Y),
401 0,
402 src_undef(),
403 src_undef(),
404 src_undef());
405 } else {
406 emit_op(c,
407 OPCODE_MOV,
408 dst_mask(dst, WRITEMASK_Y),
409 0,
410 src_swizzle1(interp, SWIZZLE_ZERO),
411 src_undef(),
412 src_undef());
413 }
414
415 /* Should do the PointCoord thing here. */
416 emit_op(c,
417 OPCODE_MOV,
418 dst_mask(dst, WRITEMASK_ZW),
419 0,
420 src_swizzle(interp,
421 SWIZZLE_ZERO,
422 SWIZZLE_ZERO,
423 SWIZZLE_ZERO,
424 SWIZZLE_ONE),
425 src_undef(),
426 src_undef());
427 break;
428 default:
429 emit_op(c,
430 WM_PINTERP,
431 dst,
432 0,
433 interp,
434 deltas,
435 get_pixel_w(c));
436 break;
437 }
438
439 c->fp_interp_emitted |= 1<<idx;
440 }
441
442 static void emit_ddx( struct brw_wm_compile *c,
443 const struct prog_instruction *inst )
444 {
445 GLuint idx = inst->SrcReg[0].Index;
446 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
447
448 c->fp_deriv_emitted |= 1<<idx;
449 emit_op(c,
450 OPCODE_DDX,
451 inst->DstReg,
452 0,
453 interp,
454 get_pixel_w(c),
455 src_undef());
456 }
457
458 static void emit_ddy( struct brw_wm_compile *c,
459 const struct prog_instruction *inst )
460 {
461 GLuint idx = inst->SrcReg[0].Index;
462 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
463
464 c->fp_deriv_emitted |= 1<<idx;
465 emit_op(c,
466 OPCODE_DDY,
467 inst->DstReg,
468 0,
469 interp,
470 get_pixel_w(c),
471 src_undef());
472 }
473
474 /***********************************************************************
475 * Hacks to extend the program parameter and constant lists.
476 */
477
478 /* Add the fog parameters to the parameter list of the original
479 * program, rather than creating a new list. Doesn't really do any
480 * harm and it's not as if the parameter handling isn't a big hack
481 * anyway.
482 */
483 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
484 GLint s0,
485 GLint s1,
486 GLint s2,
487 GLint s3,
488 GLint s4)
489 {
490 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
491 gl_state_index tokens[STATE_LENGTH];
492 GLuint idx;
493 tokens[0] = s0;
494 tokens[1] = s1;
495 tokens[2] = s2;
496 tokens[3] = s3;
497 tokens[4] = s4;
498
499 for (idx = 0; idx < paramList->NumParameters; idx++) {
500 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
501 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
502 return src_reg(PROGRAM_STATE_VAR, idx);
503 }
504
505 idx = _mesa_add_state_reference( paramList, tokens );
506
507 return src_reg(PROGRAM_STATE_VAR, idx);
508 }
509
510
511 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
512 GLfloat s0,
513 GLfloat s1,
514 GLfloat s2,
515 GLfloat s3)
516 {
517 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
518 GLfloat values[4];
519 GLuint idx;
520 GLuint swizzle;
521
522 values[0] = s0;
523 values[1] = s1;
524 values[2] = s2;
525 values[3] = s3;
526
527 /* Have to search, otherwise multiple compilations will each grow
528 * the parameter list.
529 */
530 for (idx = 0; idx < paramList->NumParameters; idx++) {
531 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
532 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
533
534 /* XXX: this mimics the mesa bug which puts all constants and
535 * parameters into the "PROGRAM_STATE_VAR" category:
536 */
537 return src_reg(PROGRAM_STATE_VAR, idx);
538 }
539
540 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
541 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
542 return src_reg(PROGRAM_STATE_VAR, idx);
543 }
544
545
546
547 /***********************************************************************
548 * Expand various instructions here to simpler forms.
549 */
550 static void precalc_dst( struct brw_wm_compile *c,
551 const struct prog_instruction *inst )
552 {
553 struct prog_src_register src0 = inst->SrcReg[0];
554 struct prog_src_register src1 = inst->SrcReg[1];
555 struct prog_dst_register dst = inst->DstReg;
556
557 if (dst.WriteMask & WRITEMASK_Y) {
558 /* dst.y = mul src0.y, src1.y
559 */
560 emit_op(c,
561 OPCODE_MUL,
562 dst_mask(dst, WRITEMASK_Y),
563 inst->SaturateMode,
564 src0,
565 src1,
566 src_undef());
567 }
568
569 if (dst.WriteMask & WRITEMASK_XZ) {
570 struct prog_instruction *swz;
571 GLuint z = GET_SWZ(src0.Swizzle, Z);
572
573 /* dst.xz = swz src0.1zzz
574 */
575 swz = emit_op(c,
576 OPCODE_SWZ,
577 dst_mask(dst, WRITEMASK_XZ),
578 inst->SaturateMode,
579 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
580 src_undef(),
581 src_undef());
582 /* Avoid letting negation flag of src0 affect our 1 constant. */
583 swz->SrcReg[0].Negate &= ~NEGATE_X;
584 }
585 if (dst.WriteMask & WRITEMASK_W) {
586 /* dst.w = mov src1.w
587 */
588 emit_op(c,
589 OPCODE_MOV,
590 dst_mask(dst, WRITEMASK_W),
591 inst->SaturateMode,
592 src1,
593 src_undef(),
594 src_undef());
595 }
596 }
597
598
599 static void precalc_lit( struct brw_wm_compile *c,
600 const struct prog_instruction *inst )
601 {
602 struct prog_src_register src0 = inst->SrcReg[0];
603 struct prog_dst_register dst = inst->DstReg;
604
605 if (dst.WriteMask & WRITEMASK_XW) {
606 struct prog_instruction *swz;
607
608 /* dst.xw = swz src0.1111
609 */
610 swz = emit_op(c,
611 OPCODE_SWZ,
612 dst_mask(dst, WRITEMASK_XW),
613 0,
614 src_swizzle1(src0, SWIZZLE_ONE),
615 src_undef(),
616 src_undef());
617 /* Avoid letting the negation flag of src0 affect our 1 constant. */
618 swz->SrcReg[0].Negate = NEGATE_NONE;
619 }
620
621 if (dst.WriteMask & WRITEMASK_YZ) {
622 emit_op(c,
623 OPCODE_LIT,
624 dst_mask(dst, WRITEMASK_YZ),
625 inst->SaturateMode,
626 src0,
627 src_undef(),
628 src_undef());
629 }
630 }
631
632
633 /**
634 * Some TEX instructions require extra code, cube map coordinate
635 * normalization, or coordinate scaling for RECT textures, etc.
636 * This function emits those extra instructions and the TEX
637 * instruction itself.
638 */
639 static void precalc_tex( struct brw_wm_compile *c,
640 const struct prog_instruction *inst )
641 {
642 struct prog_src_register coord;
643 struct prog_dst_register tmpcoord;
644 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
645
646 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
647 struct prog_instruction *out;
648 struct prog_dst_register tmp0 = get_temp(c);
649 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
650 struct prog_dst_register tmp1 = get_temp(c);
651 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
652 struct prog_src_register src0 = inst->SrcReg[0];
653
654 /* find longest component of coord vector and normalize it */
655 tmpcoord = get_temp(c);
656 coord = src_reg_from_dst(tmpcoord);
657
658 /* tmpcoord = src0 (i.e.: coord = src0) */
659 out = emit_op(c, OPCODE_MOV,
660 tmpcoord,
661 0,
662 src0,
663 src_undef(),
664 src_undef());
665 out->SrcReg[0].Negate = NEGATE_NONE;
666 out->SrcReg[0].Abs = 1;
667
668 /* tmp0 = MAX(coord.X, coord.Y) */
669 emit_op(c, OPCODE_MAX,
670 tmp0,
671 0,
672 src_swizzle1(coord, X),
673 src_swizzle1(coord, Y),
674 src_undef());
675
676 /* tmp1 = MAX(tmp0, coord.Z) */
677 emit_op(c, OPCODE_MAX,
678 tmp1,
679 0,
680 tmp0src,
681 src_swizzle1(coord, Z),
682 src_undef());
683
684 /* tmp0 = 1 / tmp1 */
685 emit_op(c, OPCODE_RCP,
686 tmp0,
687 0,
688 tmp1src,
689 src_undef(),
690 src_undef());
691
692 /* tmpCoord = src0 * tmp0 */
693 emit_op(c, OPCODE_MUL,
694 tmpcoord,
695 0,
696 src0,
697 tmp0src,
698 src_undef());
699
700 release_temp(c, tmp0);
701 release_temp(c, tmp1);
702 }
703 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
704 struct prog_src_register scale =
705 search_or_add_param5( c,
706 STATE_INTERNAL,
707 STATE_TEXRECT_SCALE,
708 unit,
709 0,0 );
710
711 tmpcoord = get_temp(c);
712
713 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
714 */
715 emit_op(c,
716 OPCODE_MUL,
717 tmpcoord,
718 0,
719 inst->SrcReg[0],
720 scale,
721 src_undef());
722
723 coord = src_reg_from_dst(tmpcoord);
724 }
725 else {
726 coord = inst->SrcReg[0];
727 }
728
729 /* Need to emit YUV texture conversions by hand. Probably need to
730 * do this here - the alternative is in brw_wm_emit.c, but the
731 * conversion requires allocating a temporary variable which we
732 * don't have the facility to do that late in the compilation.
733 */
734 if (c->key.yuvtex_mask & (1 << unit)) {
735 /* convert ycbcr to RGBA */
736 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
737
738 /*
739 CONST C0 = { -.5, -.0625, -.5, 1.164 }
740 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
741 UYV = TEX ...
742 UYV.xyz = ADD UYV, C0
743 UYV.y = MUL UYV.y, C0.w
744 if (UV swaped)
745 RGB.xyz = MAD UYV.zzx, C1, UYV.y
746 else
747 RGB.xyz = MAD UYV.xxz, C1, UYV.y
748 RGB.y = MAD UYV.z, C1.w, RGB.y
749 */
750 struct prog_dst_register dst = inst->DstReg;
751 struct prog_dst_register tmp = get_temp(c);
752 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
753 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
754 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
755
756 /* tmp = TEX ...
757 */
758 emit_tex_op(c,
759 OPCODE_TEX,
760 tmp,
761 inst->SaturateMode,
762 unit,
763 inst->TexSrcTarget,
764 inst->TexShadow,
765 coord,
766 src_undef(),
767 src_undef());
768
769 /* tmp.xyz = ADD TMP, C0
770 */
771 emit_op(c,
772 OPCODE_ADD,
773 dst_mask(tmp, WRITEMASK_XYZ),
774 0,
775 tmpsrc,
776 C0,
777 src_undef());
778
779 /* YUV.y = MUL YUV.y, C0.w
780 */
781
782 emit_op(c,
783 OPCODE_MUL,
784 dst_mask(tmp, WRITEMASK_Y),
785 0,
786 tmpsrc,
787 src_swizzle1(C0, W),
788 src_undef());
789
790 /*
791 * if (UV swaped)
792 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
793 * else
794 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
795 */
796
797 emit_op(c,
798 OPCODE_MAD,
799 dst_mask(dst, WRITEMASK_XYZ),
800 0,
801 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
802 C1,
803 src_swizzle1(tmpsrc, Y));
804
805 /* RGB.y = MAD YUV.z, C1.w, RGB.y
806 */
807 emit_op(c,
808 OPCODE_MAD,
809 dst_mask(dst, WRITEMASK_Y),
810 0,
811 src_swizzle1(tmpsrc, Z),
812 src_swizzle1(C1, W),
813 src_swizzle1(src_reg_from_dst(dst), Y));
814
815 release_temp(c, tmp);
816 }
817 else {
818 /* ordinary RGBA tex instruction */
819 emit_tex_op(c,
820 OPCODE_TEX,
821 inst->DstReg,
822 inst->SaturateMode,
823 unit,
824 inst->TexSrcTarget,
825 inst->TexShadow,
826 coord,
827 src_undef(),
828 src_undef());
829 }
830
831 /* For GL_EXT_texture_swizzle: */
832 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
833 /* swizzle the result of the TEX instruction */
834 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
835 emit_op(c, OPCODE_SWZ,
836 inst->DstReg,
837 SATURATE_OFF, /* saturate already done above */
838 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
839 src_undef(),
840 src_undef());
841 }
842
843 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
844 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
845 release_temp(c, tmpcoord);
846 }
847
848
849 /**
850 * Check if the given TXP instruction really needs the divide-by-W step.
851 */
852 static GLboolean projtex( struct brw_wm_compile *c,
853 const struct prog_instruction *inst )
854 {
855 const struct prog_src_register src = inst->SrcReg[0];
856 GLboolean retVal;
857
858 assert(inst->Opcode == OPCODE_TXP);
859
860 /* Only try to detect the simplest cases. Could detect (later)
861 * cases where we are trying to emit code like RCP {1.0}, MUL x,
862 * {1.0}, and so on.
863 *
864 * More complex cases than this typically only arise from
865 * user-provided fragment programs anyway:
866 */
867 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
868 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
869 else if (src.File == PROGRAM_INPUT &&
870 GET_SWZ(src.Swizzle, W) == W &&
871 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
872 retVal = GL_FALSE;
873 else
874 retVal = GL_TRUE;
875
876 return retVal;
877 }
878
879
880 /**
881 * Emit code for TXP.
882 */
883 static void precalc_txp( struct brw_wm_compile *c,
884 const struct prog_instruction *inst )
885 {
886 struct prog_src_register src0 = inst->SrcReg[0];
887
888 if (projtex(c, inst)) {
889 struct prog_dst_register tmp = get_temp(c);
890 struct prog_instruction tmp_inst;
891
892 /* tmp0.w = RCP inst.arg[0][3]
893 */
894 emit_op(c,
895 OPCODE_RCP,
896 dst_mask(tmp, WRITEMASK_W),
897 0,
898 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
899 src_undef(),
900 src_undef());
901
902 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
903 */
904 emit_op(c,
905 OPCODE_MUL,
906 dst_mask(tmp, WRITEMASK_XYZ),
907 0,
908 src0,
909 src_swizzle1(src_reg_from_dst(tmp), W),
910 src_undef());
911
912 /* dst = precalc(TEX tmp0)
913 */
914 tmp_inst = *inst;
915 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
916 precalc_tex(c, &tmp_inst);
917
918 release_temp(c, tmp);
919 }
920 else
921 {
922 /* dst = precalc(TEX src0)
923 */
924 precalc_tex(c, inst);
925 }
926 }
927
928
929
930 static void emit_fb_write( struct brw_wm_compile *c )
931 {
932 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
933 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
934 struct prog_src_register outcolor;
935 GLuint i;
936
937 struct prog_instruction *inst, *last_inst;
938 struct brw_context *brw = c->func.brw;
939
940 /* The inst->Aux field is used for FB write target and the EOT marker */
941
942 if (brw->state.nr_color_regions > 1) {
943 for (i = 0 ; i < brw->state.nr_color_regions; i++) {
944 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
945 last_inst = inst = emit_op(c,
946 WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
947 outcolor, payload_r0_depth, outdepth);
948 inst->Aux = (i<<1);
949 if (c->fp_fragcolor_emitted) {
950 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
951 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
952 0, outcolor, payload_r0_depth, outdepth);
953 inst->Aux = (i<<1);
954 }
955 }
956 last_inst->Aux |= 1; //eot
957 }
958 else {
959 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
960 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
961 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
962 else
963 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
964
965 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
966 0, outcolor, payload_r0_depth, outdepth);
967 inst->Aux = 1|(0<<1);
968 }
969 }
970
971
972
973
974 /***********************************************************************
975 * Emit INTERP instructions ahead of first use of each attrib.
976 */
977
978 static void validate_src_regs( struct brw_wm_compile *c,
979 const struct prog_instruction *inst )
980 {
981 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
982 GLuint i;
983
984 for (i = 0; i < nr_args; i++) {
985 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
986 GLuint idx = inst->SrcReg[i].Index;
987 if (!(c->fp_interp_emitted & (1<<idx))) {
988 emit_interp(c, idx);
989 }
990 }
991 }
992 }
993
994 static void validate_dst_regs( struct brw_wm_compile *c,
995 const struct prog_instruction *inst )
996 {
997 if (inst->DstReg.File == PROGRAM_OUTPUT) {
998 GLuint idx = inst->DstReg.Index;
999 if (idx == FRAG_RESULT_COLOR)
1000 c->fp_fragcolor_emitted = 1;
1001 }
1002 }
1003
1004 static void print_insns( const struct prog_instruction *insn,
1005 GLuint nr )
1006 {
1007 GLuint i;
1008 for (i = 0; i < nr; i++, insn++) {
1009 _mesa_printf("%3d: ", i);
1010 if (insn->Opcode < MAX_OPCODE)
1011 _mesa_print_instruction(insn);
1012 else if (insn->Opcode < MAX_WM_OPCODE) {
1013 GLuint idx = insn->Opcode - MAX_OPCODE;
1014
1015 _mesa_print_alu_instruction(insn,
1016 wm_opcode_strings[idx],
1017 3);
1018 }
1019 else
1020 _mesa_printf("965 Opcode %d\n", insn->Opcode);
1021 }
1022 }
1023
1024
1025 /**
1026 * Initial pass for fragment program code generation.
1027 * This function is used by both the GLSL and non-GLSL paths.
1028 */
1029 void brw_wm_pass_fp( struct brw_wm_compile *c )
1030 {
1031 struct brw_fragment_program *fp = c->fp;
1032 GLuint insn;
1033
1034 if (INTEL_DEBUG & DEBUG_WM) {
1035 _mesa_printf("pre-fp:\n");
1036 _mesa_print_program(&fp->program.Base);
1037 _mesa_printf("\n");
1038 }
1039
1040 c->pixel_xy = src_undef();
1041 c->delta_xy = src_undef();
1042 c->pixel_w = src_undef();
1043 c->nr_fp_insns = 0;
1044
1045 /* Emit preamble instructions. This is where special instructions such as
1046 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1047 * compute shader inputs from varying vars.
1048 */
1049 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1050 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1051 validate_src_regs(c, inst);
1052 validate_dst_regs(c, inst);
1053 }
1054
1055 /* Loop over all instructions doing assorted simplifications and
1056 * transformations.
1057 */
1058 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1059 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1060 struct prog_instruction *out;
1061
1062 /* Check for INPUT values, emit INTERP instructions where
1063 * necessary:
1064 */
1065
1066 switch (inst->Opcode) {
1067 case OPCODE_SWZ:
1068 out = emit_insn(c, inst);
1069 out->Opcode = OPCODE_MOV;
1070 break;
1071
1072 case OPCODE_ABS:
1073 out = emit_insn(c, inst);
1074 out->Opcode = OPCODE_MOV;
1075 out->SrcReg[0].Negate = NEGATE_NONE;
1076 out->SrcReg[0].Abs = 1;
1077 break;
1078
1079 case OPCODE_SUB:
1080 out = emit_insn(c, inst);
1081 out->Opcode = OPCODE_ADD;
1082 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1083 break;
1084
1085 case OPCODE_SCS:
1086 out = emit_insn(c, inst);
1087 /* This should probably be done in the parser.
1088 */
1089 out->DstReg.WriteMask &= WRITEMASK_XY;
1090 break;
1091
1092 case OPCODE_DST:
1093 precalc_dst(c, inst);
1094 break;
1095
1096 case OPCODE_LIT:
1097 precalc_lit(c, inst);
1098 break;
1099
1100 case OPCODE_TEX:
1101 precalc_tex(c, inst);
1102 break;
1103
1104 case OPCODE_TXP:
1105 precalc_txp(c, inst);
1106 break;
1107
1108 case OPCODE_TXB:
1109 out = emit_insn(c, inst);
1110 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1111 break;
1112
1113 case OPCODE_XPD:
1114 out = emit_insn(c, inst);
1115 /* This should probably be done in the parser.
1116 */
1117 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1118 break;
1119
1120 case OPCODE_KIL:
1121 out = emit_insn(c, inst);
1122 /* This should probably be done in the parser.
1123 */
1124 out->DstReg.WriteMask = 0;
1125 break;
1126 case OPCODE_DDX:
1127 emit_ddx(c, inst);
1128 break;
1129 case OPCODE_DDY:
1130 emit_ddy(c, inst);
1131 break;
1132 case OPCODE_END:
1133 emit_fb_write(c);
1134 break;
1135 case OPCODE_PRINT:
1136 break;
1137
1138 default:
1139 emit_insn(c, inst);
1140 break;
1141 }
1142 }
1143
1144 if (INTEL_DEBUG & DEBUG_WM) {
1145 _mesa_printf("pass_fp:\n");
1146 print_insns( c->prog_instructions, c->nr_fp_insns );
1147 _mesa_printf("\n");
1148 }
1149 }
1150