mesa: add new FRAG_ATTRIB_FACE and FRAG_ATTRIB_PNTC fragment program inputs
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47 #define X 0
48 #define Y 1
49 #define Z 2
50 #define W 3
51
52
53 static const char *wm_opcode_strings[] = {
54 "PIXELXY",
55 "DELTAXY",
56 "PIXELW",
57 "LINTERP",
58 "PINTERP",
59 "CINTERP",
60 "WPOSXY",
61 "FB_WRITE",
62 "FRONTFACING",
63 };
64
65 #if 0
66 static const char *wm_file_strings[] = {
67 "PAYLOAD"
68 };
69 #endif
70
71
72 /***********************************************************************
73 * Source regs
74 */
75
76 static struct prog_src_register src_reg(GLuint file, GLuint idx)
77 {
78 struct prog_src_register reg;
79 reg.File = file;
80 reg.Index = idx;
81 reg.Swizzle = SWIZZLE_NOOP;
82 reg.RelAddr = 0;
83 reg.Negate = NEGATE_NONE;
84 reg.Abs = 0;
85 return reg;
86 }
87
88 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89 {
90 return src_reg(dst.File, dst.Index);
91 }
92
93 static struct prog_src_register src_undef( void )
94 {
95 return src_reg(PROGRAM_UNDEFINED, 0);
96 }
97
98 static GLboolean src_is_undef(struct prog_src_register src)
99 {
100 return src.File == PROGRAM_UNDEFINED;
101 }
102
103 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104 {
105 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106 return reg;
107 }
108
109 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110 {
111 return src_swizzle(reg, x, x, x, x);
112 }
113
114 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
115 {
116 reg.Swizzle = swizzle;
117 return reg;
118 }
119
120
121 /***********************************************************************
122 * Dest regs
123 */
124
125 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
126 {
127 struct prog_dst_register reg;
128 reg.File = file;
129 reg.Index = idx;
130 reg.WriteMask = WRITEMASK_XYZW;
131 reg.RelAddr = 0;
132 reg.CondMask = COND_TR;
133 reg.CondSwizzle = 0;
134 reg.CondSrc = 0;
135 reg.pad = 0;
136 return reg;
137 }
138
139 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
140 {
141 reg.WriteMask &= mask;
142 return reg;
143 }
144
145 static struct prog_dst_register dst_undef( void )
146 {
147 return dst_reg(PROGRAM_UNDEFINED, 0);
148 }
149
150
151
152 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
153 {
154 int bit = _mesa_ffs( ~c->fp_temp );
155
156 if (!bit) {
157 _mesa_printf("%s: out of temporaries\n", __FILE__);
158 exit(1);
159 }
160
161 c->fp_temp |= 1<<(bit-1);
162 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
163 }
164
165
166 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
167 {
168 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
169 }
170
171
172 /***********************************************************************
173 * Instructions
174 */
175
176 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
177 {
178 return &c->prog_instructions[c->nr_fp_insns++];
179 }
180
181 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
182 const struct prog_instruction *inst0)
183 {
184 struct prog_instruction *inst = get_fp_inst(c);
185 *inst = *inst0;
186 return inst;
187 }
188
189 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
190 GLuint op,
191 struct prog_dst_register dest,
192 GLuint saturate,
193 GLuint tex_src_unit,
194 GLuint tex_src_target,
195 GLuint tex_shadow,
196 struct prog_src_register src0,
197 struct prog_src_register src1,
198 struct prog_src_register src2 )
199 {
200 struct prog_instruction *inst = get_fp_inst(c);
201
202 memset(inst, 0, sizeof(*inst));
203
204 inst->Opcode = op;
205 inst->DstReg = dest;
206 inst->SaturateMode = saturate;
207 inst->TexSrcUnit = tex_src_unit;
208 inst->TexSrcTarget = tex_src_target;
209 inst->TexShadow = tex_shadow;
210 inst->SrcReg[0] = src0;
211 inst->SrcReg[1] = src1;
212 inst->SrcReg[2] = src2;
213 return inst;
214 }
215
216
217 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
218 GLuint op,
219 struct prog_dst_register dest,
220 GLuint saturate,
221 struct prog_src_register src0,
222 struct prog_src_register src1,
223 struct prog_src_register src2 )
224 {
225 return emit_tex_op(c, op, dest, saturate,
226 0, 0, 0, /* tex unit, target, shadow */
227 src0, src1, src2);
228 }
229
230
231
232
233 /***********************************************************************
234 * Special instructions for interpolation and other tasks
235 */
236
237 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
238 {
239 if (src_is_undef(c->pixel_xy)) {
240 struct prog_dst_register pixel_xy = get_temp(c);
241 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
242
243
244 /* Emit the out calculations, and hold onto the results. Use
245 * two instructions as a temporary is required.
246 */
247 /* pixel_xy.xy = PIXELXY payload[0];
248 */
249 emit_op(c,
250 WM_PIXELXY,
251 dst_mask(pixel_xy, WRITEMASK_XY),
252 0,
253 payload_r0_depth,
254 src_undef(),
255 src_undef());
256
257 c->pixel_xy = src_reg_from_dst(pixel_xy);
258 }
259
260 return c->pixel_xy;
261 }
262
263 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
264 {
265 if (src_is_undef(c->delta_xy)) {
266 struct prog_dst_register delta_xy = get_temp(c);
267 struct prog_src_register pixel_xy = get_pixel_xy(c);
268 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
269
270 /* deltas.xy = DELTAXY pixel_xy, payload[0]
271 */
272 emit_op(c,
273 WM_DELTAXY,
274 dst_mask(delta_xy, WRITEMASK_XY),
275 0,
276 pixel_xy,
277 payload_r0_depth,
278 src_undef());
279
280 c->delta_xy = src_reg_from_dst(delta_xy);
281 }
282
283 return c->delta_xy;
284 }
285
286 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
287 {
288 if (src_is_undef(c->pixel_w)) {
289 struct prog_dst_register pixel_w = get_temp(c);
290 struct prog_src_register deltas = get_delta_xy(c);
291 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
292
293 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
294 */
295 emit_op(c,
296 WM_PIXELW,
297 dst_mask(pixel_w, WRITEMASK_W),
298 0,
299 interp_wpos,
300 deltas,
301 src_undef());
302
303
304 c->pixel_w = src_reg_from_dst(pixel_w);
305 }
306
307 return c->pixel_w;
308 }
309
310 static void emit_interp( struct brw_wm_compile *c,
311 GLuint idx )
312 {
313 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
314 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
315 struct prog_src_register deltas = get_delta_xy(c);
316
317 /* Need to use PINTERP on attributes which have been
318 * multiplied by 1/W in the SF program, and LINTERP on those
319 * which have not:
320 */
321 switch (idx) {
322 case FRAG_ATTRIB_WPOS:
323 /* Have to treat wpos.xy specially:
324 */
325 emit_op(c,
326 WM_WPOSXY,
327 dst_mask(dst, WRITEMASK_XY),
328 0,
329 get_pixel_xy(c),
330 src_undef(),
331 src_undef());
332
333 dst = dst_mask(dst, WRITEMASK_ZW);
334
335 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
336 */
337 emit_op(c,
338 WM_LINTERP,
339 dst,
340 0,
341 interp,
342 deltas,
343 src_undef());
344 break;
345 case FRAG_ATTRIB_COL0:
346 case FRAG_ATTRIB_COL1:
347 if (c->key.flat_shade) {
348 emit_op(c,
349 WM_CINTERP,
350 dst,
351 0,
352 interp,
353 src_undef(),
354 src_undef());
355 }
356 else {
357 if (c->key.linear_color) {
358 emit_op(c,
359 WM_LINTERP,
360 dst,
361 0,
362 interp,
363 deltas,
364 src_undef());
365 }
366 else {
367 /* perspective-corrected color interpolation */
368 emit_op(c,
369 WM_PINTERP,
370 dst,
371 0,
372 interp,
373 deltas,
374 get_pixel_w(c));
375 }
376 }
377 break;
378 case FRAG_ATTRIB_FOGC:
379 /* Interpolate the fog coordinate */
380 emit_op(c,
381 WM_PINTERP,
382 dst_mask(dst, WRITEMASK_X),
383 0,
384 interp,
385 deltas,
386 get_pixel_w(c));
387
388 emit_op(c,
389 OPCODE_MOV,
390 dst_mask(dst, WRITEMASK_YZW),
391 0,
392 src_swizzle(interp,
393 SWIZZLE_ZERO,
394 SWIZZLE_ZERO,
395 SWIZZLE_ZERO,
396 SWIZZLE_ONE),
397 src_undef(),
398 src_undef());
399 break;
400
401 case FRAG_ATTRIB_FACE:
402 /* XXX review/test this case */
403 emit_op(c,
404 WM_FRONTFACING,
405 dst_mask(dst, WRITEMASK_X),
406 0,
407 src_undef(),
408 src_undef(),
409 src_undef());
410 break;
411
412 case FRAG_ATTRIB_PNTC:
413 /* XXX review/test this case */
414 emit_op(c,
415 WM_PINTERP,
416 dst_mask(dst, WRITEMASK_XY),
417 0,
418 interp,
419 deltas,
420 get_pixel_w(c));
421
422 emit_op(c,
423 OPCODE_MOV,
424 dst_mask(dst, WRITEMASK_ZW),
425 0,
426 src_swizzle(interp,
427 SWIZZLE_ZERO,
428 SWIZZLE_ZERO,
429 SWIZZLE_ZERO,
430 SWIZZLE_ONE),
431 src_undef(),
432 src_undef());
433 break;
434
435 default:
436 emit_op(c,
437 WM_PINTERP,
438 dst,
439 0,
440 interp,
441 deltas,
442 get_pixel_w(c));
443 break;
444 }
445
446 c->fp_interp_emitted |= 1<<idx;
447 }
448
449 static void emit_ddx( struct brw_wm_compile *c,
450 const struct prog_instruction *inst )
451 {
452 GLuint idx = inst->SrcReg[0].Index;
453 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
454
455 c->fp_deriv_emitted |= 1<<idx;
456 emit_op(c,
457 OPCODE_DDX,
458 inst->DstReg,
459 0,
460 interp,
461 get_pixel_w(c),
462 src_undef());
463 }
464
465 static void emit_ddy( struct brw_wm_compile *c,
466 const struct prog_instruction *inst )
467 {
468 GLuint idx = inst->SrcReg[0].Index;
469 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
470
471 c->fp_deriv_emitted |= 1<<idx;
472 emit_op(c,
473 OPCODE_DDY,
474 inst->DstReg,
475 0,
476 interp,
477 get_pixel_w(c),
478 src_undef());
479 }
480
481 /***********************************************************************
482 * Hacks to extend the program parameter and constant lists.
483 */
484
485 /* Add the fog parameters to the parameter list of the original
486 * program, rather than creating a new list. Doesn't really do any
487 * harm and it's not as if the parameter handling isn't a big hack
488 * anyway.
489 */
490 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
491 GLint s0,
492 GLint s1,
493 GLint s2,
494 GLint s3,
495 GLint s4)
496 {
497 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
498 gl_state_index tokens[STATE_LENGTH];
499 GLuint idx;
500 tokens[0] = s0;
501 tokens[1] = s1;
502 tokens[2] = s2;
503 tokens[3] = s3;
504 tokens[4] = s4;
505
506 for (idx = 0; idx < paramList->NumParameters; idx++) {
507 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
508 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
509 return src_reg(PROGRAM_STATE_VAR, idx);
510 }
511
512 idx = _mesa_add_state_reference( paramList, tokens );
513
514 return src_reg(PROGRAM_STATE_VAR, idx);
515 }
516
517
518 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
519 GLfloat s0,
520 GLfloat s1,
521 GLfloat s2,
522 GLfloat s3)
523 {
524 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
525 GLfloat values[4];
526 GLuint idx;
527 GLuint swizzle;
528
529 values[0] = s0;
530 values[1] = s1;
531 values[2] = s2;
532 values[3] = s3;
533
534 /* Have to search, otherwise multiple compilations will each grow
535 * the parameter list.
536 */
537 for (idx = 0; idx < paramList->NumParameters; idx++) {
538 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
539 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
540
541 /* XXX: this mimics the mesa bug which puts all constants and
542 * parameters into the "PROGRAM_STATE_VAR" category:
543 */
544 return src_reg(PROGRAM_STATE_VAR, idx);
545 }
546
547 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
548 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
549 return src_reg(PROGRAM_STATE_VAR, idx);
550 }
551
552
553
554 /***********************************************************************
555 * Expand various instructions here to simpler forms.
556 */
557 static void precalc_dst( struct brw_wm_compile *c,
558 const struct prog_instruction *inst )
559 {
560 struct prog_src_register src0 = inst->SrcReg[0];
561 struct prog_src_register src1 = inst->SrcReg[1];
562 struct prog_dst_register dst = inst->DstReg;
563
564 if (dst.WriteMask & WRITEMASK_Y) {
565 /* dst.y = mul src0.y, src1.y
566 */
567 emit_op(c,
568 OPCODE_MUL,
569 dst_mask(dst, WRITEMASK_Y),
570 inst->SaturateMode,
571 src0,
572 src1,
573 src_undef());
574 }
575
576 if (dst.WriteMask & WRITEMASK_XZ) {
577 struct prog_instruction *swz;
578 GLuint z = GET_SWZ(src0.Swizzle, Z);
579
580 /* dst.xz = swz src0.1zzz
581 */
582 swz = emit_op(c,
583 OPCODE_SWZ,
584 dst_mask(dst, WRITEMASK_XZ),
585 inst->SaturateMode,
586 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
587 src_undef(),
588 src_undef());
589 /* Avoid letting negation flag of src0 affect our 1 constant. */
590 swz->SrcReg[0].Negate &= ~NEGATE_X;
591 }
592 if (dst.WriteMask & WRITEMASK_W) {
593 /* dst.w = mov src1.w
594 */
595 emit_op(c,
596 OPCODE_MOV,
597 dst_mask(dst, WRITEMASK_W),
598 inst->SaturateMode,
599 src1,
600 src_undef(),
601 src_undef());
602 }
603 }
604
605
606 static void precalc_lit( struct brw_wm_compile *c,
607 const struct prog_instruction *inst )
608 {
609 struct prog_src_register src0 = inst->SrcReg[0];
610 struct prog_dst_register dst = inst->DstReg;
611
612 if (dst.WriteMask & WRITEMASK_XW) {
613 struct prog_instruction *swz;
614
615 /* dst.xw = swz src0.1111
616 */
617 swz = emit_op(c,
618 OPCODE_SWZ,
619 dst_mask(dst, WRITEMASK_XW),
620 0,
621 src_swizzle1(src0, SWIZZLE_ONE),
622 src_undef(),
623 src_undef());
624 /* Avoid letting the negation flag of src0 affect our 1 constant. */
625 swz->SrcReg[0].Negate = NEGATE_NONE;
626 }
627
628 if (dst.WriteMask & WRITEMASK_YZ) {
629 emit_op(c,
630 OPCODE_LIT,
631 dst_mask(dst, WRITEMASK_YZ),
632 inst->SaturateMode,
633 src0,
634 src_undef(),
635 src_undef());
636 }
637 }
638
639
640 /**
641 * Some TEX instructions require extra code, cube map coordinate
642 * normalization, or coordinate scaling for RECT textures, etc.
643 * This function emits those extra instructions and the TEX
644 * instruction itself.
645 */
646 static void precalc_tex( struct brw_wm_compile *c,
647 const struct prog_instruction *inst )
648 {
649 struct prog_src_register coord;
650 struct prog_dst_register tmpcoord;
651 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
652
653 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
654 struct prog_instruction *out;
655 struct prog_dst_register tmp0 = get_temp(c);
656 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
657 struct prog_dst_register tmp1 = get_temp(c);
658 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
659 struct prog_src_register src0 = inst->SrcReg[0];
660
661 /* find longest component of coord vector and normalize it */
662 tmpcoord = get_temp(c);
663 coord = src_reg_from_dst(tmpcoord);
664
665 /* tmpcoord = src0 (i.e.: coord = src0) */
666 out = emit_op(c, OPCODE_MOV,
667 tmpcoord,
668 0,
669 src0,
670 src_undef(),
671 src_undef());
672 out->SrcReg[0].Negate = NEGATE_NONE;
673 out->SrcReg[0].Abs = 1;
674
675 /* tmp0 = MAX(coord.X, coord.Y) */
676 emit_op(c, OPCODE_MAX,
677 tmp0,
678 0,
679 src_swizzle1(coord, X),
680 src_swizzle1(coord, Y),
681 src_undef());
682
683 /* tmp1 = MAX(tmp0, coord.Z) */
684 emit_op(c, OPCODE_MAX,
685 tmp1,
686 0,
687 tmp0src,
688 src_swizzle1(coord, Z),
689 src_undef());
690
691 /* tmp0 = 1 / tmp1 */
692 emit_op(c, OPCODE_RCP,
693 tmp0,
694 0,
695 tmp1src,
696 src_undef(),
697 src_undef());
698
699 /* tmpCoord = src0 * tmp0 */
700 emit_op(c, OPCODE_MUL,
701 tmpcoord,
702 0,
703 src0,
704 tmp0src,
705 src_undef());
706
707 release_temp(c, tmp0);
708 release_temp(c, tmp1);
709 }
710 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
711 struct prog_src_register scale =
712 search_or_add_param5( c,
713 STATE_INTERNAL,
714 STATE_TEXRECT_SCALE,
715 unit,
716 0,0 );
717
718 tmpcoord = get_temp(c);
719
720 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
721 */
722 emit_op(c,
723 OPCODE_MUL,
724 tmpcoord,
725 0,
726 inst->SrcReg[0],
727 scale,
728 src_undef());
729
730 coord = src_reg_from_dst(tmpcoord);
731 }
732 else {
733 coord = inst->SrcReg[0];
734 }
735
736 /* Need to emit YUV texture conversions by hand. Probably need to
737 * do this here - the alternative is in brw_wm_emit.c, but the
738 * conversion requires allocating a temporary variable which we
739 * don't have the facility to do that late in the compilation.
740 */
741 if (c->key.yuvtex_mask & (1 << unit)) {
742 /* convert ycbcr to RGBA */
743 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
744
745 /*
746 CONST C0 = { -.5, -.0625, -.5, 1.164 }
747 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
748 UYV = TEX ...
749 UYV.xyz = ADD UYV, C0
750 UYV.y = MUL UYV.y, C0.w
751 if (UV swaped)
752 RGB.xyz = MAD UYV.zzx, C1, UYV.y
753 else
754 RGB.xyz = MAD UYV.xxz, C1, UYV.y
755 RGB.y = MAD UYV.z, C1.w, RGB.y
756 */
757 struct prog_dst_register dst = inst->DstReg;
758 struct prog_dst_register tmp = get_temp(c);
759 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
760 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
761 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
762
763 /* tmp = TEX ...
764 */
765 emit_tex_op(c,
766 OPCODE_TEX,
767 tmp,
768 inst->SaturateMode,
769 unit,
770 inst->TexSrcTarget,
771 inst->TexShadow,
772 coord,
773 src_undef(),
774 src_undef());
775
776 /* tmp.xyz = ADD TMP, C0
777 */
778 emit_op(c,
779 OPCODE_ADD,
780 dst_mask(tmp, WRITEMASK_XYZ),
781 0,
782 tmpsrc,
783 C0,
784 src_undef());
785
786 /* YUV.y = MUL YUV.y, C0.w
787 */
788
789 emit_op(c,
790 OPCODE_MUL,
791 dst_mask(tmp, WRITEMASK_Y),
792 0,
793 tmpsrc,
794 src_swizzle1(C0, W),
795 src_undef());
796
797 /*
798 * if (UV swaped)
799 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
800 * else
801 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
802 */
803
804 emit_op(c,
805 OPCODE_MAD,
806 dst_mask(dst, WRITEMASK_XYZ),
807 0,
808 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
809 C1,
810 src_swizzle1(tmpsrc, Y));
811
812 /* RGB.y = MAD YUV.z, C1.w, RGB.y
813 */
814 emit_op(c,
815 OPCODE_MAD,
816 dst_mask(dst, WRITEMASK_Y),
817 0,
818 src_swizzle1(tmpsrc, Z),
819 src_swizzle1(C1, W),
820 src_swizzle1(src_reg_from_dst(dst), Y));
821
822 release_temp(c, tmp);
823 }
824 else {
825 /* ordinary RGBA tex instruction */
826 emit_tex_op(c,
827 OPCODE_TEX,
828 inst->DstReg,
829 inst->SaturateMode,
830 unit,
831 inst->TexSrcTarget,
832 inst->TexShadow,
833 coord,
834 src_undef(),
835 src_undef());
836 }
837
838 /* For GL_EXT_texture_swizzle: */
839 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
840 /* swizzle the result of the TEX instruction */
841 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
842 emit_op(c, OPCODE_SWZ,
843 inst->DstReg,
844 SATURATE_OFF, /* saturate already done above */
845 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
846 src_undef(),
847 src_undef());
848 }
849
850 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
851 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
852 release_temp(c, tmpcoord);
853 }
854
855
856 /**
857 * Check if the given TXP instruction really needs the divide-by-W step.
858 */
859 static GLboolean projtex( struct brw_wm_compile *c,
860 const struct prog_instruction *inst )
861 {
862 const struct prog_src_register src = inst->SrcReg[0];
863 GLboolean retVal;
864
865 assert(inst->Opcode == OPCODE_TXP);
866
867 /* Only try to detect the simplest cases. Could detect (later)
868 * cases where we are trying to emit code like RCP {1.0}, MUL x,
869 * {1.0}, and so on.
870 *
871 * More complex cases than this typically only arise from
872 * user-provided fragment programs anyway:
873 */
874 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
875 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
876 else if (src.File == PROGRAM_INPUT &&
877 GET_SWZ(src.Swizzle, W) == W &&
878 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
879 retVal = GL_FALSE;
880 else
881 retVal = GL_TRUE;
882
883 return retVal;
884 }
885
886
887 /**
888 * Emit code for TXP.
889 */
890 static void precalc_txp( struct brw_wm_compile *c,
891 const struct prog_instruction *inst )
892 {
893 struct prog_src_register src0 = inst->SrcReg[0];
894
895 if (projtex(c, inst)) {
896 struct prog_dst_register tmp = get_temp(c);
897 struct prog_instruction tmp_inst;
898
899 /* tmp0.w = RCP inst.arg[0][3]
900 */
901 emit_op(c,
902 OPCODE_RCP,
903 dst_mask(tmp, WRITEMASK_W),
904 0,
905 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
906 src_undef(),
907 src_undef());
908
909 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
910 */
911 emit_op(c,
912 OPCODE_MUL,
913 dst_mask(tmp, WRITEMASK_XYZ),
914 0,
915 src0,
916 src_swizzle1(src_reg_from_dst(tmp), W),
917 src_undef());
918
919 /* dst = precalc(TEX tmp0)
920 */
921 tmp_inst = *inst;
922 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
923 precalc_tex(c, &tmp_inst);
924
925 release_temp(c, tmp);
926 }
927 else
928 {
929 /* dst = precalc(TEX src0)
930 */
931 precalc_tex(c, inst);
932 }
933 }
934
935
936
937 static void emit_fb_write( struct brw_wm_compile *c )
938 {
939 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
940 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
941 struct prog_src_register outcolor;
942 GLuint i;
943
944 struct prog_instruction *inst, *last_inst;
945 struct brw_context *brw = c->func.brw;
946
947 /* The inst->Aux field is used for FB write target and the EOT marker */
948
949 if (brw->state.nr_color_regions > 1) {
950 for (i = 0 ; i < brw->state.nr_color_regions; i++) {
951 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
952 last_inst = inst = emit_op(c,
953 WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
954 outcolor, payload_r0_depth, outdepth);
955 inst->Aux = (i<<1);
956 if (c->fp_fragcolor_emitted) {
957 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
958 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
959 0, outcolor, payload_r0_depth, outdepth);
960 inst->Aux = (i<<1);
961 }
962 }
963 last_inst->Aux |= 1; //eot
964 }
965 else {
966 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
967 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
968 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
969 else
970 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
971
972 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
973 0, outcolor, payload_r0_depth, outdepth);
974 inst->Aux = 1|(0<<1);
975 }
976 }
977
978
979
980
981 /***********************************************************************
982 * Emit INTERP instructions ahead of first use of each attrib.
983 */
984
985 static void validate_src_regs( struct brw_wm_compile *c,
986 const struct prog_instruction *inst )
987 {
988 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
989 GLuint i;
990
991 for (i = 0; i < nr_args; i++) {
992 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
993 GLuint idx = inst->SrcReg[i].Index;
994 if (!(c->fp_interp_emitted & (1<<idx))) {
995 emit_interp(c, idx);
996 }
997 }
998 }
999 }
1000
1001 static void validate_dst_regs( struct brw_wm_compile *c,
1002 const struct prog_instruction *inst )
1003 {
1004 if (inst->DstReg.File == PROGRAM_OUTPUT) {
1005 GLuint idx = inst->DstReg.Index;
1006 if (idx == FRAG_RESULT_COLOR)
1007 c->fp_fragcolor_emitted = 1;
1008 }
1009 }
1010
1011 static void print_insns( const struct prog_instruction *insn,
1012 GLuint nr )
1013 {
1014 GLuint i;
1015 for (i = 0; i < nr; i++, insn++) {
1016 _mesa_printf("%3d: ", i);
1017 if (insn->Opcode < MAX_OPCODE)
1018 _mesa_print_instruction(insn);
1019 else if (insn->Opcode < MAX_WM_OPCODE) {
1020 GLuint idx = insn->Opcode - MAX_OPCODE;
1021
1022 _mesa_print_alu_instruction(insn,
1023 wm_opcode_strings[idx],
1024 3);
1025 }
1026 else
1027 _mesa_printf("965 Opcode %d\n", insn->Opcode);
1028 }
1029 }
1030
1031
1032 /**
1033 * Initial pass for fragment program code generation.
1034 * This function is used by both the GLSL and non-GLSL paths.
1035 */
1036 void brw_wm_pass_fp( struct brw_wm_compile *c )
1037 {
1038 struct brw_fragment_program *fp = c->fp;
1039 GLuint insn;
1040
1041 if (INTEL_DEBUG & DEBUG_WM) {
1042 _mesa_printf("pre-fp:\n");
1043 _mesa_print_program(&fp->program.Base);
1044 _mesa_printf("\n");
1045 }
1046
1047 c->pixel_xy = src_undef();
1048 c->delta_xy = src_undef();
1049 c->pixel_w = src_undef();
1050 c->nr_fp_insns = 0;
1051
1052 /* Emit preamble instructions. This is where special instructions such as
1053 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1054 * compute shader inputs from varying vars.
1055 */
1056 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1057 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1058 validate_src_regs(c, inst);
1059 validate_dst_regs(c, inst);
1060 }
1061
1062 /* Loop over all instructions doing assorted simplifications and
1063 * transformations.
1064 */
1065 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1066 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1067 struct prog_instruction *out;
1068
1069 /* Check for INPUT values, emit INTERP instructions where
1070 * necessary:
1071 */
1072
1073 switch (inst->Opcode) {
1074 case OPCODE_SWZ:
1075 out = emit_insn(c, inst);
1076 out->Opcode = OPCODE_MOV;
1077 break;
1078
1079 case OPCODE_ABS:
1080 out = emit_insn(c, inst);
1081 out->Opcode = OPCODE_MOV;
1082 out->SrcReg[0].Negate = NEGATE_NONE;
1083 out->SrcReg[0].Abs = 1;
1084 break;
1085
1086 case OPCODE_SUB:
1087 out = emit_insn(c, inst);
1088 out->Opcode = OPCODE_ADD;
1089 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1090 break;
1091
1092 case OPCODE_SCS:
1093 out = emit_insn(c, inst);
1094 /* This should probably be done in the parser.
1095 */
1096 out->DstReg.WriteMask &= WRITEMASK_XY;
1097 break;
1098
1099 case OPCODE_DST:
1100 precalc_dst(c, inst);
1101 break;
1102
1103 case OPCODE_LIT:
1104 precalc_lit(c, inst);
1105 break;
1106
1107 case OPCODE_TEX:
1108 precalc_tex(c, inst);
1109 break;
1110
1111 case OPCODE_TXP:
1112 precalc_txp(c, inst);
1113 break;
1114
1115 case OPCODE_TXB:
1116 out = emit_insn(c, inst);
1117 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1118 break;
1119
1120 case OPCODE_XPD:
1121 out = emit_insn(c, inst);
1122 /* This should probably be done in the parser.
1123 */
1124 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1125 break;
1126
1127 case OPCODE_KIL:
1128 out = emit_insn(c, inst);
1129 /* This should probably be done in the parser.
1130 */
1131 out->DstReg.WriteMask = 0;
1132 break;
1133 case OPCODE_DDX:
1134 emit_ddx(c, inst);
1135 break;
1136 case OPCODE_DDY:
1137 emit_ddy(c, inst);
1138 break;
1139 case OPCODE_END:
1140 emit_fb_write(c);
1141 break;
1142 case OPCODE_PRINT:
1143 break;
1144
1145 default:
1146 emit_insn(c, inst);
1147 break;
1148 }
1149 }
1150
1151 if (INTEL_DEBUG & DEBUG_WM) {
1152 _mesa_printf("pass_fp:\n");
1153 print_insns( c->prog_instructions, c->nr_fp_insns );
1154 _mesa_printf("\n");
1155 }
1156 }
1157