i965g: still working on compilation
[mesa.git] / src / gallium / drivers / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "pipe/p_shader_constants.h"
34
35 #include "brw_context.h"
36 #include "brw_wm.h"
37 #include "brw_util.h"
38
39
40 #define X 0
41 #define Y 1
42 #define Z 2
43 #define W 3
44
45
46 static const char *wm_opcode_strings[] = {
47 "PIXELXY",
48 "DELTAXY",
49 "PIXELW",
50 "LINTERP",
51 "PINTERP",
52 "CINTERP",
53 "WPOSXY",
54 "FB_WRITE",
55 "FRONTFACING",
56 };
57
58
59
60 /***********************************************************************
61 * Source regs
62 */
63
64 static struct prog_src_register src_reg(GLuint file, GLuint idx)
65 {
66 struct prog_src_register reg;
67 reg.File = file;
68 reg.Index = idx;
69 reg.Swizzle = SWIZZLE_NOOP;
70 reg.RelAddr = 0;
71 reg.Negate = NEGATE_NONE;
72 reg.Abs = 0;
73 return reg;
74 }
75
76 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
77 {
78 return src_reg(dst.File, dst.Index);
79 }
80
81 static struct prog_src_register src_undef( void )
82 {
83 return src_reg(PROGRAM_UNDEFINED, 0);
84 }
85
86 static GLboolean src_is_undef(struct prog_src_register src)
87 {
88 return src.File == PROGRAM_UNDEFINED;
89 }
90
91 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
92 {
93 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
94 return reg;
95 }
96
97 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
98 {
99 return src_swizzle(reg, x, x, x, x);
100 }
101
102 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
103 {
104 reg.Swizzle = swizzle;
105 return reg;
106 }
107
108
109 /***********************************************************************
110 * Dest regs
111 */
112
113 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
114 {
115 struct prog_dst_register reg;
116 reg.File = file;
117 reg.Index = idx;
118 reg.WriteMask = BRW_WRITEMASK_XYZW;
119 reg.RelAddr = 0;
120 reg.CondMask = COND_TR;
121 reg.CondSwizzle = 0;
122 reg.CondSrc = 0;
123 reg.pad = 0;
124 return reg;
125 }
126
127 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
128 {
129 reg.WriteMask &= mask;
130 return reg;
131 }
132
133 static struct prog_dst_register dst_undef( void )
134 {
135 return dst_reg(PROGRAM_UNDEFINED, 0);
136 }
137
138
139
140 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
141 {
142 int bit = _mesa_ffs( ~c->fp_temp );
143
144 if (!bit) {
145 debug_printf("%s: out of temporaries\n", __FILE__);
146 exit(1);
147 }
148
149 c->fp_temp |= 1<<(bit-1);
150 return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1));
151 }
152
153
154 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
155 {
156 c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp));
157 }
158
159
160 /***********************************************************************
161 * Instructions
162 */
163
164 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
165 {
166 return &c->prog_instructions[c->nr_fp_insns++];
167 }
168
169 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
170 const struct prog_instruction *inst0)
171 {
172 struct prog_instruction *inst = get_fp_inst(c);
173 *inst = *inst0;
174 return inst;
175 }
176
177 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
178 GLuint op,
179 struct prog_dst_register dest,
180 GLuint saturate,
181 struct prog_src_register src0,
182 struct prog_src_register src1,
183 struct prog_src_register src2 )
184 {
185 struct prog_instruction *inst = get_fp_inst(c);
186
187 memset(inst, 0, sizeof(*inst));
188
189 inst->Opcode = op;
190 inst->DstReg = dest;
191 inst->SaturateMode = saturate;
192 inst->SrcReg[0] = src0;
193 inst->SrcReg[1] = src1;
194 inst->SrcReg[2] = src2;
195 return inst;
196 }
197
198
199 /* Many opcodes produce the same value across all the result channels.
200 * We'd rather not have to support that splatting in the opcode implementations,
201 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
202 * anyway. We can easily get both by emitting the opcode to one channel, and
203 * then MOVing it to the others, which brw_wm_pass*.c already understands.
204 */
205 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
206 const struct prog_instruction *inst0)
207 {
208 struct prog_instruction *inst;
209 unsigned int dst_chan;
210 unsigned int other_channel_mask;
211
212 if (inst0->DstReg.WriteMask == 0)
213 return NULL;
214
215 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
216 inst = get_fp_inst(c);
217 *inst = *inst0;
218 inst->DstReg.WriteMask = 1 << dst_chan;
219
220 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
221 if (other_channel_mask != 0) {
222 inst = emit_op(c,
223 TGSI_OPCODE_MOV,
224 dst_mask(inst0->DstReg, other_channel_mask),
225 0,
226 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
227 src_undef(),
228 src_undef());
229 }
230 return inst;
231 }
232
233
234 /***********************************************************************
235 * Special instructions for interpolation and other tasks
236 */
237
238 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
239 {
240 if (src_is_undef(c->pixel_xy)) {
241 struct prog_dst_register pixel_xy = get_temp(c);
242 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
243
244
245 /* Emit the out calculations, and hold onto the results. Use
246 * two instructions as a temporary is required.
247 */
248 /* pixel_xy.xy = PIXELXY payload[0];
249 */
250 emit_op(c,
251 WM_PIXELXY,
252 dst_mask(pixel_xy, BRW_WRITEMASK_XY),
253 0,
254 payload_r0_depth,
255 src_undef(),
256 src_undef());
257
258 c->pixel_xy = src_reg_from_dst(pixel_xy);
259 }
260
261 return c->pixel_xy;
262 }
263
264 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
265 {
266 if (src_is_undef(c->delta_xy)) {
267 struct prog_dst_register delta_xy = get_temp(c);
268 struct prog_src_register pixel_xy = get_pixel_xy(c);
269 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
270
271 /* deltas.xy = DELTAXY pixel_xy, payload[0]
272 */
273 emit_op(c,
274 WM_DELTAXY,
275 dst_mask(delta_xy, BRW_WRITEMASK_XY),
276 0,
277 pixel_xy,
278 payload_r0_depth,
279 src_undef());
280
281 c->delta_xy = src_reg_from_dst(delta_xy);
282 }
283
284 return c->delta_xy;
285 }
286
287 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
288 {
289 if (src_is_undef(c->pixel_w)) {
290 struct prog_dst_register pixel_w = get_temp(c);
291 struct prog_src_register deltas = get_delta_xy(c);
292 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
293
294 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
295 */
296 emit_op(c,
297 WM_PIXELW,
298 dst_mask(pixel_w, BRW_WRITEMASK_W),
299 0,
300 interp_wpos,
301 deltas,
302 src_undef());
303
304
305 c->pixel_w = src_reg_from_dst(pixel_w);
306 }
307
308 return c->pixel_w;
309 }
310
311 static void emit_interp( struct brw_wm_compile *c,
312 GLuint semantic,
313 GLuint semantic_index,
314 GLuint interp_mode )
315 {
316 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
317 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
318 struct prog_src_register deltas = get_delta_xy(c);
319
320 /* Need to use PINTERP on attributes which have been
321 * multiplied by 1/W in the SF program, and LINTERP on those
322 * which have not:
323 */
324 switch (semantic) {
325 case FRAG_ATTRIB_WPOS:
326 /* Have to treat wpos.xy specially:
327 */
328 emit_op(c,
329 WM_WPOSXY,
330 dst_mask(dst, BRW_WRITEMASK_XY),
331 0,
332 get_pixel_xy(c),
333 src_undef(),
334 src_undef());
335
336 dst = dst_mask(dst, BRW_WRITEMASK_ZW);
337
338 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
339 */
340 emit_op(c,
341 WM_LINTERP,
342 dst,
343 0,
344 interp,
345 deltas,
346 src_undef());
347 break;
348
349 case TGSI_SEMANTIC_COLOR:
350 if (c->key.flat_shade) {
351 emit_op(c,
352 WM_CINTERP,
353 dst,
354 0,
355 interp,
356 src_undef(),
357 src_undef());
358 }
359 else {
360 emit_op(c,
361 translate_interp_mode(interp_mode),
362 dst,
363 0,
364 interp,
365 deltas,
366 src_undef());
367 }
368 break;
369 case FRAG_ATTRIB_FOGC:
370 /* Interpolate the fog coordinate */
371 emit_op(c,
372 WM_PINTERP,
373 dst_mask(dst, BRW_WRITEMASK_X),
374 0,
375 interp,
376 deltas,
377 get_pixel_w(c));
378
379 emit_op(c,
380 TGSI_OPCODE_MOV,
381 dst_mask(dst, BRW_WRITEMASK_YZW),
382 0,
383 src_swizzle(interp,
384 SWIZZLE_ZERO,
385 SWIZZLE_ZERO,
386 SWIZZLE_ZERO,
387 SWIZZLE_ONE),
388 src_undef(),
389 src_undef());
390 break;
391
392 case FRAG_ATTRIB_FACE:
393 /* XXX review/test this case */
394 emit_op(c,
395 WM_FRONTFACING,
396 dst_mask(dst, BRW_WRITEMASK_X),
397 0,
398 src_undef(),
399 src_undef(),
400 src_undef());
401 break;
402
403 case FRAG_ATTRIB_PNTC:
404 /* XXX review/test this case */
405 emit_op(c,
406 WM_PINTERP,
407 dst_mask(dst, BRW_WRITEMASK_XY),
408 0,
409 interp,
410 deltas,
411 get_pixel_w(c));
412
413 emit_op(c,
414 TGSI_OPCODE_MOV,
415 dst_mask(dst, BRW_WRITEMASK_ZW),
416 0,
417 src_swizzle(interp,
418 SWIZZLE_ZERO,
419 SWIZZLE_ZERO,
420 SWIZZLE_ZERO,
421 SWIZZLE_ONE),
422 src_undef(),
423 src_undef());
424 break;
425
426 default:
427 emit_op(c,
428 translate_interp_mode(interp_mode),
429 dst,
430 0,
431 interp,
432 deltas,
433 get_pixel_w(c));
434 break;
435 }
436 }
437
438 /***********************************************************************
439 * Hacks to extend the program parameter and constant lists.
440 */
441
442 /* Add the fog parameters to the parameter list of the original
443 * program, rather than creating a new list. Doesn't really do any
444 * harm and it's not as if the parameter handling isn't a big hack
445 * anyway.
446 */
447 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
448 GLint s0,
449 GLint s1,
450 GLint s2,
451 GLint s3,
452 GLint s4)
453 {
454 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
455 gl_state_index tokens[STATE_LENGTH];
456 GLuint idx;
457 tokens[0] = s0;
458 tokens[1] = s1;
459 tokens[2] = s2;
460 tokens[3] = s3;
461 tokens[4] = s4;
462
463 for (idx = 0; idx < paramList->NumParameters; idx++) {
464 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
465 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
466 return src_reg(PROGRAM_STATE_VAR, idx);
467 }
468
469 idx = _mesa_add_state_reference( paramList, tokens );
470
471 return src_reg(PROGRAM_STATE_VAR, idx);
472 }
473
474
475 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
476 GLfloat s0,
477 GLfloat s1,
478 GLfloat s2,
479 GLfloat s3)
480 {
481 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
482 GLfloat values[4];
483 GLuint idx;
484 GLuint swizzle;
485
486 values[0] = s0;
487 values[1] = s1;
488 values[2] = s2;
489 values[3] = s3;
490
491 /* Have to search, otherwise multiple compilations will each grow
492 * the parameter list.
493 */
494 for (idx = 0; idx < paramList->NumParameters; idx++) {
495 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
496 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
497
498 /* XXX: this mimics the mesa bug which puts all constants and
499 * parameters into the "PROGRAM_STATE_VAR" category:
500 */
501 return src_reg(PROGRAM_STATE_VAR, idx);
502 }
503
504 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
505 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
506 return src_reg(PROGRAM_STATE_VAR, idx);
507 }
508
509
510
511 /***********************************************************************
512 * Expand various instructions here to simpler forms.
513 */
514 static void precalc_dst( struct brw_wm_compile *c,
515 const struct prog_instruction *inst )
516 {
517 struct prog_src_register src0 = inst->SrcReg[0];
518 struct prog_src_register src1 = inst->SrcReg[1];
519 struct prog_dst_register dst = inst->DstReg;
520
521 if (dst.WriteMask & BRW_WRITEMASK_Y) {
522 /* dst.y = mul src0.y, src1.y
523 */
524 emit_op(c,
525 TGSI_OPCODE_MUL,
526 dst_mask(dst, BRW_WRITEMASK_Y),
527 inst->SaturateMode,
528 src0,
529 src1,
530 src_undef());
531 }
532
533 if (dst.WriteMask & BRW_WRITEMASK_XZ) {
534 struct prog_instruction *swz;
535 GLuint z = GET_SWZ(src0.Swizzle, Z);
536
537 /* dst.xz = swz src0.1zzz
538 */
539 swz = emit_op(c,
540 TGSI_OPCODE_MOV,
541 dst_mask(dst, BRW_WRITEMASK_XZ),
542 inst->SaturateMode,
543 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
544 src_undef(),
545 src_undef());
546 /* Avoid letting negation flag of src0 affect our 1 constant. */
547 swz->SrcReg[0].Negate &= ~NEGATE_X;
548 }
549 if (dst.WriteMask & BRW_WRITEMASK_W) {
550 /* dst.w = mov src1.w
551 */
552 emit_op(c,
553 TGSI_OPCODE_MOV,
554 dst_mask(dst, BRW_WRITEMASK_W),
555 inst->SaturateMode,
556 src1,
557 src_undef(),
558 src_undef());
559 }
560 }
561
562
563 static void precalc_lit( struct brw_wm_compile *c,
564 const struct prog_instruction *inst )
565 {
566 struct prog_src_register src0 = inst->SrcReg[0];
567 struct prog_dst_register dst = inst->DstReg;
568
569 if (dst.WriteMask & BRW_WRITEMASK_XW) {
570 struct prog_instruction *swz;
571
572 /* dst.xw = swz src0.1111
573 */
574 swz = emit_op(c,
575 TGSI_OPCODE_MOV,
576 dst_mask(dst, BRW_WRITEMASK_XW),
577 0,
578 src_swizzle1(src0, SWIZZLE_ONE),
579 src_undef(),
580 src_undef());
581 /* Avoid letting the negation flag of src0 affect our 1 constant. */
582 swz->SrcReg[0].Negate = NEGATE_NONE;
583 }
584
585 if (dst.WriteMask & BRW_WRITEMASK_YZ) {
586 emit_op(c,
587 TGSI_OPCODE_LIT,
588 dst_mask(dst, BRW_WRITEMASK_YZ),
589 inst->SaturateMode,
590 src0,
591 src_undef(),
592 src_undef());
593 }
594 }
595
596
597 /**
598 * Some TEX instructions require extra code, cube map coordinate
599 * normalization, or coordinate scaling for RECT textures, etc.
600 * This function emits those extra instructions and the TEX
601 * instruction itself.
602 */
603 static void precalc_tex( struct brw_wm_compile *c,
604 const struct prog_instruction *inst )
605 {
606 struct prog_src_register coord;
607 struct prog_dst_register tmpcoord;
608 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
609
610 assert(unit < BRW_MAX_TEX_UNIT);
611
612 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
613 struct prog_instruction *out;
614 struct prog_dst_register tmp0 = get_temp(c);
615 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
616 struct prog_dst_register tmp1 = get_temp(c);
617 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
618 struct prog_src_register src0 = inst->SrcReg[0];
619
620 /* find longest component of coord vector and normalize it */
621 tmpcoord = get_temp(c);
622 coord = src_reg_from_dst(tmpcoord);
623
624 /* tmpcoord = src0 (i.e.: coord = src0) */
625 out = emit_op(c, TGSI_OPCODE_MOV,
626 tmpcoord,
627 0,
628 src0,
629 src_undef(),
630 src_undef());
631 out->SrcReg[0].Negate = NEGATE_NONE;
632 out->SrcReg[0].Abs = 1;
633
634 /* tmp0 = MAX(coord.X, coord.Y) */
635 emit_op(c, TGSI_OPCODE_MAX,
636 tmp0,
637 0,
638 src_swizzle1(coord, X),
639 src_swizzle1(coord, Y),
640 src_undef());
641
642 /* tmp1 = MAX(tmp0, coord.Z) */
643 emit_op(c, TGSI_OPCODE_MAX,
644 tmp1,
645 0,
646 tmp0src,
647 src_swizzle1(coord, Z),
648 src_undef());
649
650 /* tmp0 = 1 / tmp1 */
651 emit_op(c, TGSI_OPCODE_RCP,
652 dst_mask(tmp0, BRW_WRITEMASK_X),
653 0,
654 tmp1src,
655 src_undef(),
656 src_undef());
657
658 /* tmpCoord = src0 * tmp0 */
659 emit_op(c, TGSI_OPCODE_MUL,
660 tmpcoord,
661 0,
662 src0,
663 src_swizzle1(tmp0src, SWIZZLE_X),
664 src_undef());
665
666 release_temp(c, tmp0);
667 release_temp(c, tmp1);
668 }
669 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
670 struct prog_src_register scale =
671 search_or_add_param5( c,
672 STATE_INTERNAL,
673 STATE_TEXRECT_SCALE,
674 unit,
675 0,0 );
676
677 tmpcoord = get_temp(c);
678
679 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
680 */
681 emit_op(c,
682 TGSI_OPCODE_MUL,
683 tmpcoord,
684 0,
685 inst->SrcReg[0],
686 src_swizzle(scale,
687 SWIZZLE_X,
688 SWIZZLE_Y,
689 SWIZZLE_ONE,
690 SWIZZLE_ONE),
691 src_undef());
692
693 coord = src_reg_from_dst(tmpcoord);
694 }
695 else {
696 coord = inst->SrcReg[0];
697 }
698
699 /* Need to emit YUV texture conversions by hand. Probably need to
700 * do this here - the alternative is in brw_wm_emit.c, but the
701 * conversion requires allocating a temporary variable which we
702 * don't have the facility to do that late in the compilation.
703 */
704 if (c->key.yuvtex_mask & (1 << unit)) {
705 /* convert ycbcr to RGBA */
706 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
707
708 /*
709 CONST C0 = { -.5, -.0625, -.5, 1.164 }
710 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
711 UYV = TEX ...
712 UYV.xyz = ADD UYV, C0
713 UYV.y = MUL UYV.y, C0.w
714 if (UV swaped)
715 RGB.xyz = MAD UYV.zzx, C1, UYV.y
716 else
717 RGB.xyz = MAD UYV.xxz, C1, UYV.y
718 RGB.y = MAD UYV.z, C1.w, RGB.y
719 */
720 struct prog_dst_register dst = inst->DstReg;
721 struct prog_dst_register tmp = get_temp(c);
722 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
723 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
724 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
725
726 /* tmp = TEX ...
727 */
728 emit_tex_op(c,
729 TGSI_OPCODE_TEX,
730 tmp,
731 inst->SaturateMode,
732 unit,
733 inst->TexSrcTarget,
734 inst->TexShadow,
735 coord,
736 src_undef(),
737 src_undef());
738
739 /* tmp.xyz = ADD TMP, C0
740 */
741 emit_op(c,
742 TGSI_OPCODE_ADD,
743 dst_mask(tmp, BRW_WRITEMASK_XYZ),
744 0,
745 tmpsrc,
746 C0,
747 src_undef());
748
749 /* YUV.y = MUL YUV.y, C0.w
750 */
751
752 emit_op(c,
753 TGSI_OPCODE_MUL,
754 dst_mask(tmp, BRW_WRITEMASK_Y),
755 0,
756 tmpsrc,
757 src_swizzle1(C0, W),
758 src_undef());
759
760 /*
761 * if (UV swaped)
762 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
763 * else
764 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
765 */
766
767 emit_op(c,
768 TGSI_OPCODE_MAD,
769 dst_mask(dst, BRW_WRITEMASK_XYZ),
770 0,
771 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
772 C1,
773 src_swizzle1(tmpsrc, Y));
774
775 /* RGB.y = MAD YUV.z, C1.w, RGB.y
776 */
777 emit_op(c,
778 TGSI_OPCODE_MAD,
779 dst_mask(dst, BRW_WRITEMASK_Y),
780 0,
781 src_swizzle1(tmpsrc, Z),
782 src_swizzle1(C1, W),
783 src_swizzle1(src_reg_from_dst(dst), Y));
784
785 release_temp(c, tmp);
786 }
787 else {
788 /* ordinary RGBA tex instruction */
789 emit_tex_op(c,
790 TGSI_OPCODE_TEX,
791 inst->DstReg,
792 inst->SaturateMode,
793 unit,
794 inst->TexSrcTarget,
795 inst->TexShadow,
796 coord,
797 src_undef(),
798 src_undef());
799 }
800
801 /* For GL_EXT_texture_swizzle: */
802 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
803 /* swizzle the result of the TEX instruction */
804 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
805 emit_op(c, TGSI_OPCODE_MOV,
806 inst->DstReg,
807 SATURATE_OFF, /* saturate already done above */
808 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
809 src_undef(),
810 src_undef());
811 }
812
813 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
814 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
815 release_temp(c, tmpcoord);
816 }
817
818
819 /**
820 * Check if the given TXP instruction really needs the divide-by-W step.
821 */
822 static GLboolean projtex( struct brw_wm_compile *c,
823 const struct prog_instruction *inst )
824 {
825 const struct prog_src_register src = inst->SrcReg[0];
826 GLboolean retVal;
827
828 assert(inst->Opcode == TGSI_OPCODE_TXP);
829
830 /* Only try to detect the simplest cases. Could detect (later)
831 * cases where we are trying to emit code like RCP {1.0}, MUL x,
832 * {1.0}, and so on.
833 *
834 * More complex cases than this typically only arise from
835 * user-provided fragment programs anyway:
836 */
837 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
838 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
839 else if (src.File == PROGRAM_INPUT &&
840 GET_SWZ(src.Swizzle, W) == W &&
841 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
842 retVal = GL_FALSE;
843 else
844 retVal = GL_TRUE;
845
846 return retVal;
847 }
848
849
850 /**
851 * Emit code for TXP.
852 */
853 static void precalc_txp( struct brw_wm_compile *c,
854 const struct prog_instruction *inst )
855 {
856 struct prog_src_register src0 = inst->SrcReg[0];
857
858 if (projtex(c, inst)) {
859 struct prog_dst_register tmp = get_temp(c);
860 struct prog_instruction tmp_inst;
861
862 /* tmp0.w = RCP inst.arg[0][3]
863 */
864 emit_op(c,
865 TGSI_OPCODE_RCP,
866 dst_mask(tmp, BRW_WRITEMASK_W),
867 0,
868 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
869 src_undef(),
870 src_undef());
871
872 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
873 */
874 emit_op(c,
875 TGSI_OPCODE_MUL,
876 dst_mask(tmp, BRW_WRITEMASK_XYZ),
877 0,
878 src0,
879 src_swizzle1(src_reg_from_dst(tmp), W),
880 src_undef());
881
882 /* dst = precalc(TEX tmp0)
883 */
884 tmp_inst = *inst;
885 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
886 precalc_tex(c, &tmp_inst);
887
888 release_temp(c, tmp);
889 }
890 else
891 {
892 /* dst = precalc(TEX src0)
893 */
894 precalc_tex(c, inst);
895 }
896 }
897
898
899
900 static void emit_fb_write( struct brw_wm_compile *c )
901 {
902 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
903 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
904 struct prog_src_register outcolor;
905 GLuint i;
906
907 struct prog_instruction *inst, *last_inst;
908 struct brw_context *brw = c->func.brw;
909
910 /* The inst->Aux field is used for FB write target and the EOT marker */
911
912 if (brw->state.nr_color_regions > 1) {
913 for (i = 0 ; i < brw->state.nr_color_regions; i++) {
914 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
915 last_inst = inst = emit_op(c,
916 WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
917 outcolor, payload_r0_depth, outdepth);
918 inst->Aux = (i<<1);
919 if (c->fp_fragcolor_emitted) {
920 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
921 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
922 0, outcolor, payload_r0_depth, outdepth);
923 inst->Aux = (i<<1);
924 }
925 }
926 last_inst->Aux |= 1; //eot
927 }
928 else {
929 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
930 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
931 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
932 else
933 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
934
935 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
936 0, outcolor, payload_r0_depth, outdepth);
937 inst->Aux = 1|(0<<1);
938 }
939 }
940
941
942
943
944 /***********************************************************************
945 * Emit INTERP instructions ahead of first use of each attrib.
946 */
947
948 static void validate_src_regs( struct brw_wm_compile *c,
949 const struct prog_instruction *inst )
950 {
951 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
952 GLuint i;
953
954 for (i = 0; i < nr_args; i++) {
955 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
956 GLuint idx = inst->SrcReg[i].Index;
957 if (!(c->fp_interp_emitted & (1<<idx))) {
958 emit_interp(c, idx);
959 c->fp_interp_emitted |= 1<<idx;
960 }
961 }
962 }
963 }
964
965 static void validate_dst_regs( struct brw_wm_compile *c,
966 const struct prog_instruction *inst )
967 {
968 if (inst->DstReg.File == PROGRAM_OUTPUT) {
969 GLuint idx = inst->DstReg.Index;
970 if (idx == FRAG_RESULT_COLOR)
971 c->fp_fragcolor_emitted = 1;
972 }
973 }
974
975 static void print_insns( const struct prog_instruction *insn,
976 GLuint nr )
977 {
978 GLuint i;
979 for (i = 0; i < nr; i++, insn++) {
980 debug_printf("%3d: ", i);
981 if (insn->Opcode < MAX_OPCODE)
982 _mesa_print_instruction(insn);
983 else if (insn->Opcode < MAX_WM_OPCODE) {
984 GLuint idx = insn->Opcode - MAX_OPCODE;
985
986 _mesa_print_alu_instruction(insn,
987 wm_opcode_strings[idx],
988 3);
989 }
990 else
991 debug_printf("965 Opcode %d\n", insn->Opcode);
992 }
993 }
994
995
996 /**
997 * Initial pass for fragment program code generation.
998 * This function is used by both the GLSL and non-GLSL paths.
999 */
1000 void brw_wm_pass_fp( struct brw_wm_compile *c )
1001 {
1002 struct brw_fragment_program *fp = c->fp;
1003 GLuint insn;
1004
1005 if (BRW_DEBUG & DEBUG_WM) {
1006 debug_printf("pre-fp:\n");
1007 _mesa_print_program(&fp->program.Base);
1008 debug_printf("\n");
1009 }
1010
1011 c->pixel_xy = src_undef();
1012 c->delta_xy = src_undef();
1013 c->pixel_w = src_undef();
1014 c->nr_fp_insns = 0;
1015 c->fp->tex_units_used = 0x0;
1016
1017 /* Emit preamble instructions. This is where special instructions such as
1018 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1019 * compute shader inputs from varying vars.
1020 */
1021 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1022 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1023 validate_src_regs(c, inst);
1024 validate_dst_regs(c, inst);
1025 }
1026
1027 /* Loop over all instructions doing assorted simplifications and
1028 * transformations.
1029 */
1030 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1031 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1032 struct prog_instruction *out;
1033
1034 /* Check for INPUT values, emit INTERP instructions where
1035 * necessary:
1036 */
1037
1038 switch (inst->Opcode) {
1039 case TGSI_OPCODE_ABS:
1040 out = emit_insn(c, inst);
1041 out->Opcode = TGSI_OPCODE_MOV;
1042 out->SrcReg[0].Negate = NEGATE_NONE;
1043 out->SrcReg[0].Abs = 1;
1044 break;
1045
1046 case TGSI_OPCODE_SUB:
1047 out = emit_insn(c, inst);
1048 out->Opcode = TGSI_OPCODE_ADD;
1049 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1050 break;
1051
1052 case TGSI_OPCODE_SCS:
1053 out = emit_insn(c, inst);
1054 /* This should probably be done in the parser.
1055 */
1056 out->DstReg.WriteMask &= BRW_WRITEMASK_XY;
1057 break;
1058
1059 case TGSI_OPCODE_DST:
1060 precalc_dst(c, inst);
1061 break;
1062
1063 case TGSI_OPCODE_LIT:
1064 precalc_lit(c, inst);
1065 break;
1066
1067 case TGSI_OPCODE_TEX:
1068 precalc_tex(c, inst);
1069 break;
1070
1071 case TGSI_OPCODE_TXP:
1072 precalc_txp(c, inst);
1073 break;
1074
1075 case TGSI_OPCODE_TXB:
1076 out = emit_insn(c, inst);
1077 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1078 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1079 break;
1080
1081 case TGSI_OPCODE_XPD:
1082 out = emit_insn(c, inst);
1083 /* This should probably be done in the parser.
1084 */
1085 out->DstReg.WriteMask &= BRW_WRITEMASK_XYZ;
1086 break;
1087
1088 case TGSI_OPCODE_KIL:
1089 out = emit_insn(c, inst);
1090 /* This should probably be done in the parser.
1091 */
1092 out->DstReg.WriteMask = 0;
1093 break;
1094 case TGSI_OPCODE_END:
1095 emit_fb_write(c);
1096 break;
1097 default:
1098 if (brw_wm_is_scalar_result(inst->Opcode))
1099 emit_scalar_insn(c, inst);
1100 else
1101 emit_insn(c, inst);
1102 break;
1103 }
1104 }
1105
1106 if (BRW_DEBUG & DEBUG_WM) {
1107 debug_printf("pass_fp:\n");
1108 print_insns( c->prog_instructions, c->nr_fp_insns );
1109 debug_printf("\n");
1110 }
1111 }
1112