i965: fix fetching constants from constant buffer in glsl path
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47 #define X 0
48 #define Y 1
49 #define Z 2
50 #define W 3
51
52
53 static const char *wm_opcode_strings[] = {
54 "PIXELXY",
55 "DELTAXY",
56 "PIXELW",
57 "LINTERP",
58 "PINTERP",
59 "CINTERP",
60 "WPOSXY",
61 "FB_WRITE",
62 "FRONTFACING",
63 };
64
65 #if 0
66 static const char *wm_file_strings[] = {
67 "PAYLOAD"
68 };
69 #endif
70
71
72 /***********************************************************************
73 * Source regs
74 */
75
76 static struct prog_src_register src_reg(GLuint file, GLuint idx)
77 {
78 struct prog_src_register reg;
79 reg.File = file;
80 reg.Index = idx;
81 reg.Swizzle = SWIZZLE_NOOP;
82 reg.RelAddr = 0;
83 reg.Negate = NEGATE_NONE;
84 reg.Abs = 0;
85 return reg;
86 }
87
88 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89 {
90 return src_reg(dst.File, dst.Index);
91 }
92
93 static struct prog_src_register src_undef( void )
94 {
95 return src_reg(PROGRAM_UNDEFINED, 0);
96 }
97
98 static GLboolean src_is_undef(struct prog_src_register src)
99 {
100 return src.File == PROGRAM_UNDEFINED;
101 }
102
103 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104 {
105 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106 return reg;
107 }
108
109 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110 {
111 return src_swizzle(reg, x, x, x, x);
112 }
113
114 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
115 {
116 reg.Swizzle = swizzle;
117 return reg;
118 }
119
120
121 /***********************************************************************
122 * Dest regs
123 */
124
125 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
126 {
127 struct prog_dst_register reg;
128 reg.File = file;
129 reg.Index = idx;
130 reg.WriteMask = WRITEMASK_XYZW;
131 reg.RelAddr = 0;
132 reg.CondMask = COND_TR;
133 reg.CondSwizzle = 0;
134 reg.CondSrc = 0;
135 reg.pad = 0;
136 return reg;
137 }
138
139 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
140 {
141 reg.WriteMask &= mask;
142 return reg;
143 }
144
145 static struct prog_dst_register dst_undef( void )
146 {
147 return dst_reg(PROGRAM_UNDEFINED, 0);
148 }
149
150
151
152 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
153 {
154 int bit = _mesa_ffs( ~c->fp_temp );
155
156 if (!bit) {
157 _mesa_printf("%s: out of temporaries\n", __FILE__);
158 exit(1);
159 }
160
161 c->fp_temp |= 1<<(bit-1);
162 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
163 }
164
165
166 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
167 {
168 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
169 }
170
171
172 /***********************************************************************
173 * Instructions
174 */
175
176 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
177 {
178 return &c->prog_instructions[c->nr_fp_insns++];
179 }
180
181 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
182 const struct prog_instruction *inst0)
183 {
184 struct prog_instruction *inst = get_fp_inst(c);
185 *inst = *inst0;
186 return inst;
187 }
188
189 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
190 GLuint op,
191 struct prog_dst_register dest,
192 GLuint saturate,
193 GLuint tex_src_unit,
194 GLuint tex_src_target,
195 GLuint tex_shadow,
196 struct prog_src_register src0,
197 struct prog_src_register src1,
198 struct prog_src_register src2 )
199 {
200 struct prog_instruction *inst = get_fp_inst(c);
201
202 memset(inst, 0, sizeof(*inst));
203
204 inst->Opcode = op;
205 inst->DstReg = dest;
206 inst->SaturateMode = saturate;
207 inst->TexSrcUnit = tex_src_unit;
208 inst->TexSrcTarget = tex_src_target;
209 inst->TexShadow = tex_shadow;
210 inst->SrcReg[0] = src0;
211 inst->SrcReg[1] = src1;
212 inst->SrcReg[2] = src2;
213 return inst;
214 }
215
216
217 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
218 GLuint op,
219 struct prog_dst_register dest,
220 GLuint saturate,
221 struct prog_src_register src0,
222 struct prog_src_register src1,
223 struct prog_src_register src2 )
224 {
225 return emit_tex_op(c, op, dest, saturate,
226 0, 0, 0, /* tex unit, target, shadow */
227 src0, src1, src2);
228 }
229
230
231
232
233 /***********************************************************************
234 * Special instructions for interpolation and other tasks
235 */
236
237 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
238 {
239 if (src_is_undef(c->pixel_xy)) {
240 struct prog_dst_register pixel_xy = get_temp(c);
241 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
242
243
244 /* Emit the out calculations, and hold onto the results. Use
245 * two instructions as a temporary is required.
246 */
247 /* pixel_xy.xy = PIXELXY payload[0];
248 */
249 emit_op(c,
250 WM_PIXELXY,
251 dst_mask(pixel_xy, WRITEMASK_XY),
252 0,
253 payload_r0_depth,
254 src_undef(),
255 src_undef());
256
257 c->pixel_xy = src_reg_from_dst(pixel_xy);
258 }
259
260 return c->pixel_xy;
261 }
262
263 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
264 {
265 if (src_is_undef(c->delta_xy)) {
266 struct prog_dst_register delta_xy = get_temp(c);
267 struct prog_src_register pixel_xy = get_pixel_xy(c);
268 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
269
270 /* deltas.xy = DELTAXY pixel_xy, payload[0]
271 */
272 emit_op(c,
273 WM_DELTAXY,
274 dst_mask(delta_xy, WRITEMASK_XY),
275 0,
276 pixel_xy,
277 payload_r0_depth,
278 src_undef());
279
280 c->delta_xy = src_reg_from_dst(delta_xy);
281 }
282
283 return c->delta_xy;
284 }
285
286 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
287 {
288 if (src_is_undef(c->pixel_w)) {
289 struct prog_dst_register pixel_w = get_temp(c);
290 struct prog_src_register deltas = get_delta_xy(c);
291 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
292
293 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
294 */
295 emit_op(c,
296 WM_PIXELW,
297 dst_mask(pixel_w, WRITEMASK_W),
298 0,
299 interp_wpos,
300 deltas,
301 src_undef());
302
303
304 c->pixel_w = src_reg_from_dst(pixel_w);
305 }
306
307 return c->pixel_w;
308 }
309
310 static void emit_interp( struct brw_wm_compile *c,
311 GLuint idx )
312 {
313 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
314 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
315 struct prog_src_register deltas = get_delta_xy(c);
316
317 /* Need to use PINTERP on attributes which have been
318 * multiplied by 1/W in the SF program, and LINTERP on those
319 * which have not:
320 */
321 switch (idx) {
322 case FRAG_ATTRIB_WPOS:
323 /* Have to treat wpos.xy specially:
324 */
325 emit_op(c,
326 WM_WPOSXY,
327 dst_mask(dst, WRITEMASK_XY),
328 0,
329 get_pixel_xy(c),
330 src_undef(),
331 src_undef());
332
333 dst = dst_mask(dst, WRITEMASK_ZW);
334
335 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
336 */
337 emit_op(c,
338 WM_LINTERP,
339 dst,
340 0,
341 interp,
342 deltas,
343 src_undef());
344 break;
345 case FRAG_ATTRIB_COL0:
346 case FRAG_ATTRIB_COL1:
347 if (c->key.flat_shade) {
348 emit_op(c,
349 WM_CINTERP,
350 dst,
351 0,
352 interp,
353 src_undef(),
354 src_undef());
355 }
356 else {
357 emit_op(c,
358 WM_LINTERP,
359 dst,
360 0,
361 interp,
362 deltas,
363 src_undef());
364 }
365 break;
366 case FRAG_ATTRIB_FOGC:
367 /* The FOGC input is really special. When a program uses glFogFragCoord,
368 * the results returned are supposed to be (f,0,0,1). But for Mesa GLSL,
369 * the glFrontFacing and glPointCoord values are also stashed in FOGC.
370 * So, write the interpolated fog value to X, then either 0, 1, or the
371 * stashed values to Y, Z, W. Note that this means that
372 * glFogFragCoord.yzw can be wrong in those cases!
373 */
374
375 /* Interpolate the fog coordinate */
376 emit_op(c,
377 WM_PINTERP,
378 dst_mask(dst, WRITEMASK_X),
379 0,
380 interp,
381 deltas,
382 get_pixel_w(c));
383
384 /* Move the front facing value into FOGC.y if it's needed. */
385 if (c->fp->program.UsesFrontFacing) {
386 emit_op(c,
387 WM_FRONTFACING,
388 dst_mask(dst, WRITEMASK_Y),
389 0,
390 src_undef(),
391 src_undef(),
392 src_undef());
393 } else {
394 emit_op(c,
395 OPCODE_MOV,
396 dst_mask(dst, WRITEMASK_Y),
397 0,
398 src_swizzle1(interp, SWIZZLE_ZERO),
399 src_undef(),
400 src_undef());
401 }
402
403 /* Should do the PointCoord thing here. */
404 emit_op(c,
405 OPCODE_MOV,
406 dst_mask(dst, WRITEMASK_ZW),
407 0,
408 src_swizzle(interp,
409 SWIZZLE_ZERO,
410 SWIZZLE_ZERO,
411 SWIZZLE_ZERO,
412 SWIZZLE_ONE),
413 src_undef(),
414 src_undef());
415 break;
416 default:
417 emit_op(c,
418 WM_PINTERP,
419 dst,
420 0,
421 interp,
422 deltas,
423 get_pixel_w(c));
424 break;
425 }
426
427 c->fp_interp_emitted |= 1<<idx;
428 }
429
430 static void emit_ddx( struct brw_wm_compile *c,
431 const struct prog_instruction *inst )
432 {
433 GLuint idx = inst->SrcReg[0].Index;
434 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
435
436 c->fp_deriv_emitted |= 1<<idx;
437 emit_op(c,
438 OPCODE_DDX,
439 inst->DstReg,
440 0,
441 interp,
442 get_pixel_w(c),
443 src_undef());
444 }
445
446 static void emit_ddy( struct brw_wm_compile *c,
447 const struct prog_instruction *inst )
448 {
449 GLuint idx = inst->SrcReg[0].Index;
450 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
451
452 c->fp_deriv_emitted |= 1<<idx;
453 emit_op(c,
454 OPCODE_DDY,
455 inst->DstReg,
456 0,
457 interp,
458 get_pixel_w(c),
459 src_undef());
460 }
461
462 /***********************************************************************
463 * Hacks to extend the program parameter and constant lists.
464 */
465
466 /* Add the fog parameters to the parameter list of the original
467 * program, rather than creating a new list. Doesn't really do any
468 * harm and it's not as if the parameter handling isn't a big hack
469 * anyway.
470 */
471 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
472 GLint s0,
473 GLint s1,
474 GLint s2,
475 GLint s3,
476 GLint s4)
477 {
478 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
479 gl_state_index tokens[STATE_LENGTH];
480 GLuint idx;
481 tokens[0] = s0;
482 tokens[1] = s1;
483 tokens[2] = s2;
484 tokens[3] = s3;
485 tokens[4] = s4;
486
487 for (idx = 0; idx < paramList->NumParameters; idx++) {
488 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
489 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
490 return src_reg(PROGRAM_STATE_VAR, idx);
491 }
492
493 idx = _mesa_add_state_reference( paramList, tokens );
494
495 return src_reg(PROGRAM_STATE_VAR, idx);
496 }
497
498
499 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
500 GLfloat s0,
501 GLfloat s1,
502 GLfloat s2,
503 GLfloat s3)
504 {
505 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
506 GLfloat values[4];
507 GLuint idx;
508 GLuint swizzle;
509
510 values[0] = s0;
511 values[1] = s1;
512 values[2] = s2;
513 values[3] = s3;
514
515 /* Have to search, otherwise multiple compilations will each grow
516 * the parameter list.
517 */
518 for (idx = 0; idx < paramList->NumParameters; idx++) {
519 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
520 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
521
522 /* XXX: this mimics the mesa bug which puts all constants and
523 * parameters into the "PROGRAM_STATE_VAR" category:
524 */
525 return src_reg(PROGRAM_STATE_VAR, idx);
526 }
527
528 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
529 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
530 return src_reg(PROGRAM_STATE_VAR, idx);
531 }
532
533
534
535 /***********************************************************************
536 * Expand various instructions here to simpler forms.
537 */
538 static void precalc_dst( struct brw_wm_compile *c,
539 const struct prog_instruction *inst )
540 {
541 struct prog_src_register src0 = inst->SrcReg[0];
542 struct prog_src_register src1 = inst->SrcReg[1];
543 struct prog_dst_register dst = inst->DstReg;
544
545 if (dst.WriteMask & WRITEMASK_Y) {
546 /* dst.y = mul src0.y, src1.y
547 */
548 emit_op(c,
549 OPCODE_MUL,
550 dst_mask(dst, WRITEMASK_Y),
551 inst->SaturateMode,
552 src0,
553 src1,
554 src_undef());
555 }
556
557 if (dst.WriteMask & WRITEMASK_XZ) {
558 struct prog_instruction *swz;
559 GLuint z = GET_SWZ(src0.Swizzle, Z);
560
561 /* dst.xz = swz src0.1zzz
562 */
563 swz = emit_op(c,
564 OPCODE_SWZ,
565 dst_mask(dst, WRITEMASK_XZ),
566 inst->SaturateMode,
567 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
568 src_undef(),
569 src_undef());
570 /* Avoid letting negation flag of src0 affect our 1 constant. */
571 swz->SrcReg[0].Negate &= ~NEGATE_X;
572 }
573 if (dst.WriteMask & WRITEMASK_W) {
574 /* dst.w = mov src1.w
575 */
576 emit_op(c,
577 OPCODE_MOV,
578 dst_mask(dst, WRITEMASK_W),
579 inst->SaturateMode,
580 src1,
581 src_undef(),
582 src_undef());
583 }
584 }
585
586
587 static void precalc_lit( struct brw_wm_compile *c,
588 const struct prog_instruction *inst )
589 {
590 struct prog_src_register src0 = inst->SrcReg[0];
591 struct prog_dst_register dst = inst->DstReg;
592
593 if (dst.WriteMask & WRITEMASK_XW) {
594 struct prog_instruction *swz;
595
596 /* dst.xw = swz src0.1111
597 */
598 swz = emit_op(c,
599 OPCODE_SWZ,
600 dst_mask(dst, WRITEMASK_XW),
601 0,
602 src_swizzle1(src0, SWIZZLE_ONE),
603 src_undef(),
604 src_undef());
605 /* Avoid letting the negation flag of src0 affect our 1 constant. */
606 swz->SrcReg[0].Negate = NEGATE_NONE;
607 }
608
609 if (dst.WriteMask & WRITEMASK_YZ) {
610 emit_op(c,
611 OPCODE_LIT,
612 dst_mask(dst, WRITEMASK_YZ),
613 inst->SaturateMode,
614 src0,
615 src_undef(),
616 src_undef());
617 }
618 }
619
620
621 /**
622 * Some TEX instructions require extra code, cube map coordinate
623 * normalization, or coordinate scaling for RECT textures, etc.
624 * This function emits those extra instructions and the TEX
625 * instruction itself.
626 */
627 static void precalc_tex( struct brw_wm_compile *c,
628 const struct prog_instruction *inst )
629 {
630 struct prog_src_register coord;
631 struct prog_dst_register tmpcoord;
632 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
633
634 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
635 struct prog_instruction *out;
636 struct prog_dst_register tmp0 = get_temp(c);
637 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
638 struct prog_dst_register tmp1 = get_temp(c);
639 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
640 struct prog_src_register src0 = inst->SrcReg[0];
641
642 /* find longest component of coord vector and normalize it */
643 tmpcoord = get_temp(c);
644 coord = src_reg_from_dst(tmpcoord);
645
646 /* tmpcoord = src0 (i.e.: coord = src0) */
647 out = emit_op(c, OPCODE_MOV,
648 tmpcoord,
649 0,
650 src0,
651 src_undef(),
652 src_undef());
653 out->SrcReg[0].Negate = NEGATE_NONE;
654 out->SrcReg[0].Abs = 1;
655
656 /* tmp0 = MAX(coord.X, coord.Y) */
657 emit_op(c, OPCODE_MAX,
658 tmp0,
659 0,
660 src_swizzle1(coord, X),
661 src_swizzle1(coord, Y),
662 src_undef());
663
664 /* tmp1 = MAX(tmp0, coord.Z) */
665 emit_op(c, OPCODE_MAX,
666 tmp1,
667 0,
668 tmp0src,
669 src_swizzle1(coord, Z),
670 src_undef());
671
672 /* tmp0 = 1 / tmp1 */
673 emit_op(c, OPCODE_RCP,
674 tmp0,
675 0,
676 tmp1src,
677 src_undef(),
678 src_undef());
679
680 /* tmpCoord = src0 * tmp0 */
681 emit_op(c, OPCODE_MUL,
682 tmpcoord,
683 0,
684 src0,
685 tmp0src,
686 src_undef());
687
688 release_temp(c, tmp0);
689 release_temp(c, tmp1);
690 }
691 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
692 struct prog_src_register scale =
693 search_or_add_param5( c,
694 STATE_INTERNAL,
695 STATE_TEXRECT_SCALE,
696 unit,
697 0,0 );
698
699 tmpcoord = get_temp(c);
700
701 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
702 */
703 emit_op(c,
704 OPCODE_MUL,
705 tmpcoord,
706 0,
707 inst->SrcReg[0],
708 scale,
709 src_undef());
710
711 coord = src_reg_from_dst(tmpcoord);
712 }
713 else {
714 coord = inst->SrcReg[0];
715 }
716
717 /* Need to emit YUV texture conversions by hand. Probably need to
718 * do this here - the alternative is in brw_wm_emit.c, but the
719 * conversion requires allocating a temporary variable which we
720 * don't have the facility to do that late in the compilation.
721 */
722 if (c->key.yuvtex_mask & (1 << unit)) {
723 /* convert ycbcr to RGBA */
724 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
725
726 /*
727 CONST C0 = { -.5, -.0625, -.5, 1.164 }
728 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
729 UYV = TEX ...
730 UYV.xyz = ADD UYV, C0
731 UYV.y = MUL UYV.y, C0.w
732 if (UV swaped)
733 RGB.xyz = MAD UYV.zzx, C1, UYV.y
734 else
735 RGB.xyz = MAD UYV.xxz, C1, UYV.y
736 RGB.y = MAD UYV.z, C1.w, RGB.y
737 */
738 struct prog_dst_register dst = inst->DstReg;
739 struct prog_dst_register tmp = get_temp(c);
740 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
741 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
742 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
743
744 /* tmp = TEX ...
745 */
746 emit_tex_op(c,
747 OPCODE_TEX,
748 tmp,
749 inst->SaturateMode,
750 unit,
751 inst->TexSrcTarget,
752 inst->TexShadow,
753 coord,
754 src_undef(),
755 src_undef());
756
757 /* tmp.xyz = ADD TMP, C0
758 */
759 emit_op(c,
760 OPCODE_ADD,
761 dst_mask(tmp, WRITEMASK_XYZ),
762 0,
763 tmpsrc,
764 C0,
765 src_undef());
766
767 /* YUV.y = MUL YUV.y, C0.w
768 */
769
770 emit_op(c,
771 OPCODE_MUL,
772 dst_mask(tmp, WRITEMASK_Y),
773 0,
774 tmpsrc,
775 src_swizzle1(C0, W),
776 src_undef());
777
778 /*
779 * if (UV swaped)
780 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
781 * else
782 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
783 */
784
785 emit_op(c,
786 OPCODE_MAD,
787 dst_mask(dst, WRITEMASK_XYZ),
788 0,
789 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
790 C1,
791 src_swizzle1(tmpsrc, Y));
792
793 /* RGB.y = MAD YUV.z, C1.w, RGB.y
794 */
795 emit_op(c,
796 OPCODE_MAD,
797 dst_mask(dst, WRITEMASK_Y),
798 0,
799 src_swizzle1(tmpsrc, Z),
800 src_swizzle1(C1, W),
801 src_swizzle1(src_reg_from_dst(dst), Y));
802
803 release_temp(c, tmp);
804 }
805 else {
806 /* ordinary RGBA tex instruction */
807 emit_tex_op(c,
808 OPCODE_TEX,
809 inst->DstReg,
810 inst->SaturateMode,
811 unit,
812 inst->TexSrcTarget,
813 inst->TexShadow,
814 coord,
815 src_undef(),
816 src_undef());
817 }
818
819 /* For GL_EXT_texture_swizzle: */
820 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
821 /* swizzle the result of the TEX instruction */
822 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
823 emit_op(c, OPCODE_SWZ,
824 inst->DstReg,
825 SATURATE_OFF, /* saturate already done above */
826 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
827 src_undef(),
828 src_undef());
829 }
830
831 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
832 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
833 release_temp(c, tmpcoord);
834 }
835
836
837 /**
838 * Check if the given TXP instruction really needs the divide-by-W step.
839 */
840 static GLboolean projtex( struct brw_wm_compile *c,
841 const struct prog_instruction *inst )
842 {
843 const struct prog_src_register src = inst->SrcReg[0];
844 GLboolean retVal;
845
846 assert(inst->Opcode == OPCODE_TXP);
847
848 /* Only try to detect the simplest cases. Could detect (later)
849 * cases where we are trying to emit code like RCP {1.0}, MUL x,
850 * {1.0}, and so on.
851 *
852 * More complex cases than this typically only arise from
853 * user-provided fragment programs anyway:
854 */
855 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
856 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
857 else if (src.File == PROGRAM_INPUT &&
858 GET_SWZ(src.Swizzle, W) == W &&
859 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
860 retVal = GL_FALSE;
861 else
862 retVal = GL_TRUE;
863
864 return retVal;
865 }
866
867
868 /**
869 * Emit code for TXP.
870 */
871 static void precalc_txp( struct brw_wm_compile *c,
872 const struct prog_instruction *inst )
873 {
874 struct prog_src_register src0 = inst->SrcReg[0];
875
876 if (projtex(c, inst)) {
877 struct prog_dst_register tmp = get_temp(c);
878 struct prog_instruction tmp_inst;
879
880 /* tmp0.w = RCP inst.arg[0][3]
881 */
882 emit_op(c,
883 OPCODE_RCP,
884 dst_mask(tmp, WRITEMASK_W),
885 0,
886 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
887 src_undef(),
888 src_undef());
889
890 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
891 */
892 emit_op(c,
893 OPCODE_MUL,
894 dst_mask(tmp, WRITEMASK_XYZ),
895 0,
896 src0,
897 src_swizzle1(src_reg_from_dst(tmp), W),
898 src_undef());
899
900 /* dst = precalc(TEX tmp0)
901 */
902 tmp_inst = *inst;
903 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
904 precalc_tex(c, &tmp_inst);
905
906 release_temp(c, tmp);
907 }
908 else
909 {
910 /* dst = precalc(TEX src0)
911 */
912 precalc_tex(c, inst);
913 }
914 }
915
916
917
918 static void emit_fb_write( struct brw_wm_compile *c )
919 {
920 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
921 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
922 struct prog_src_register outcolor;
923 GLuint i;
924
925 struct prog_instruction *inst, *last_inst;
926 struct brw_context *brw = c->func.brw;
927
928 /* The inst->Aux field is used for FB write target and the EOT marker */
929
930 if (brw->state.nr_color_regions > 1) {
931 for (i = 0 ; i < brw->state.nr_color_regions; i++) {
932 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
933 last_inst = inst = emit_op(c,
934 WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
935 outcolor, payload_r0_depth, outdepth);
936 inst->Aux = (i<<1);
937 if (c->fp_fragcolor_emitted) {
938 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
939 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
940 0, outcolor, payload_r0_depth, outdepth);
941 inst->Aux = (i<<1);
942 }
943 }
944 last_inst->Aux |= 1; //eot
945 }
946 else {
947 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
948 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
949 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
950 else
951 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
952
953 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
954 0, outcolor, payload_r0_depth, outdepth);
955 inst->Aux = 1|(0<<1);
956 }
957 }
958
959
960
961
962 /***********************************************************************
963 * Emit INTERP instructions ahead of first use of each attrib.
964 */
965
966 static void validate_src_regs( struct brw_wm_compile *c,
967 const struct prog_instruction *inst )
968 {
969 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
970 GLuint i;
971
972 for (i = 0; i < nr_args; i++) {
973 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
974 GLuint idx = inst->SrcReg[i].Index;
975 if (!(c->fp_interp_emitted & (1<<idx))) {
976 emit_interp(c, idx);
977 }
978 }
979 }
980 }
981
982 static void validate_dst_regs( struct brw_wm_compile *c,
983 const struct prog_instruction *inst )
984 {
985 if (inst->DstReg.File == PROGRAM_OUTPUT) {
986 GLuint idx = inst->DstReg.Index;
987 if (idx == FRAG_RESULT_COLOR)
988 c->fp_fragcolor_emitted = 1;
989 }
990 }
991
992 static void print_insns( const struct prog_instruction *insn,
993 GLuint nr )
994 {
995 GLuint i;
996 for (i = 0; i < nr; i++, insn++) {
997 _mesa_printf("%3d: ", i);
998 if (insn->Opcode < MAX_OPCODE)
999 _mesa_print_instruction(insn);
1000 else if (insn->Opcode < MAX_WM_OPCODE) {
1001 GLuint idx = insn->Opcode - MAX_OPCODE;
1002
1003 _mesa_print_alu_instruction(insn,
1004 wm_opcode_strings[idx],
1005 3);
1006 }
1007 else
1008 _mesa_printf("965 Opcode %d\n", insn->Opcode);
1009 }
1010 }
1011
1012
1013 /**
1014 * Initial pass for fragment program code generation.
1015 * This function is used by both the GLSL and non-GLSL paths.
1016 */
1017 void brw_wm_pass_fp( struct brw_wm_compile *c )
1018 {
1019 struct brw_fragment_program *fp = c->fp;
1020 GLuint insn;
1021
1022 if (INTEL_DEBUG & DEBUG_WM) {
1023 _mesa_printf("pre-fp:\n");
1024 _mesa_print_program(&fp->program.Base);
1025 _mesa_printf("\n");
1026 }
1027
1028 c->pixel_xy = src_undef();
1029 c->delta_xy = src_undef();
1030 c->pixel_w = src_undef();
1031 c->nr_fp_insns = 0;
1032
1033 /* Emit preamble instructions. This is where special instructions such as
1034 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1035 * compute shader inputs from varying vars.
1036 */
1037 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1038 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1039 validate_src_regs(c, inst);
1040 validate_dst_regs(c, inst);
1041 }
1042
1043 /* Loop over all instructions doing assorted simplifications and
1044 * transformations.
1045 */
1046 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1047 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1048 struct prog_instruction *out;
1049
1050 /* Check for INPUT values, emit INTERP instructions where
1051 * necessary:
1052 */
1053
1054 switch (inst->Opcode) {
1055 case OPCODE_SWZ:
1056 out = emit_insn(c, inst);
1057 out->Opcode = OPCODE_MOV;
1058 break;
1059
1060 case OPCODE_ABS:
1061 out = emit_insn(c, inst);
1062 out->Opcode = OPCODE_MOV;
1063 out->SrcReg[0].Negate = NEGATE_NONE;
1064 out->SrcReg[0].Abs = 1;
1065 break;
1066
1067 case OPCODE_SUB:
1068 out = emit_insn(c, inst);
1069 out->Opcode = OPCODE_ADD;
1070 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1071 break;
1072
1073 case OPCODE_SCS:
1074 out = emit_insn(c, inst);
1075 /* This should probably be done in the parser.
1076 */
1077 out->DstReg.WriteMask &= WRITEMASK_XY;
1078 break;
1079
1080 case OPCODE_DST:
1081 precalc_dst(c, inst);
1082 break;
1083
1084 case OPCODE_LIT:
1085 precalc_lit(c, inst);
1086 break;
1087
1088 case OPCODE_TEX:
1089 precalc_tex(c, inst);
1090 break;
1091
1092 case OPCODE_TXP:
1093 precalc_txp(c, inst);
1094 break;
1095
1096 case OPCODE_TXB:
1097 out = emit_insn(c, inst);
1098 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1099 break;
1100
1101 case OPCODE_XPD:
1102 out = emit_insn(c, inst);
1103 /* This should probably be done in the parser.
1104 */
1105 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1106 break;
1107
1108 case OPCODE_KIL:
1109 out = emit_insn(c, inst);
1110 /* This should probably be done in the parser.
1111 */
1112 out->DstReg.WriteMask = 0;
1113 break;
1114 case OPCODE_DDX:
1115 emit_ddx(c, inst);
1116 break;
1117 case OPCODE_DDY:
1118 emit_ddy(c, inst);
1119 break;
1120 case OPCODE_END:
1121 emit_fb_write(c);
1122 break;
1123 case OPCODE_PRINT:
1124 break;
1125
1126 default:
1127 emit_insn(c, inst);
1128 break;
1129 }
1130 }
1131
1132 if (INTEL_DEBUG & DEBUG_WM) {
1133 _mesa_printf("pass_fp:\n");
1134 print_insns( c->prog_instructions, c->nr_fp_insns );
1135 _mesa_printf("\n");
1136 }
1137 }
1138