i965/vs: Fix access of attribute arrays.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "program/prog_parameter.h"
41 #include "program/prog_print.h"
42 #include "program/prog_statevars.h"
43
44
45 /** An invalid texture target */
46 #define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48 /** An invalid texture unit */
49 #define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53 #define X 0
54 #define Y 1
55 #define Z 2
56 #define W 3
57
58
59 static const char *wm_opcode_strings[] = {
60 "PIXELXY",
61 "DELTAXY",
62 "PIXELW",
63 "LINTERP",
64 "PINTERP",
65 "CINTERP",
66 "WPOSXY",
67 "FB_WRITE",
68 "FRONTFACING",
69 };
70
71 #if 0
72 static const char *wm_file_strings[] = {
73 "PAYLOAD"
74 };
75 #endif
76
77
78 /***********************************************************************
79 * Source regs
80 */
81
82 static struct prog_src_register src_reg(GLuint file, GLuint idx)
83 {
84 struct prog_src_register reg;
85 reg.File = file;
86 reg.Index = idx;
87 reg.Swizzle = SWIZZLE_NOOP;
88 reg.RelAddr = 0;
89 reg.Negate = NEGATE_NONE;
90 reg.Abs = 0;
91 reg.HasIndex2 = 0;
92 reg.RelAddr2 = 0;
93 reg.Index2 = 0;
94 return reg;
95 }
96
97 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
98 {
99 return src_reg(dst.File, dst.Index);
100 }
101
102 static struct prog_src_register src_undef( void )
103 {
104 return src_reg(PROGRAM_UNDEFINED, 0);
105 }
106
107 static GLboolean src_is_undef(struct prog_src_register src)
108 {
109 return src.File == PROGRAM_UNDEFINED;
110 }
111
112 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
113 {
114 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
115 return reg;
116 }
117
118 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
119 {
120 return src_swizzle(reg, x, x, x, x);
121 }
122
123 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
124 {
125 reg.Swizzle = swizzle;
126 return reg;
127 }
128
129
130 /***********************************************************************
131 * Dest regs
132 */
133
134 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
135 {
136 struct prog_dst_register reg;
137 reg.File = file;
138 reg.Index = idx;
139 reg.WriteMask = WRITEMASK_XYZW;
140 reg.RelAddr = 0;
141 reg.CondMask = COND_TR;
142 reg.CondSwizzle = 0;
143 reg.CondSrc = 0;
144 return reg;
145 }
146
147 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
148 {
149 reg.WriteMask &= mask;
150 return reg;
151 }
152
153 static struct prog_dst_register dst_undef( void )
154 {
155 return dst_reg(PROGRAM_UNDEFINED, 0);
156 }
157
158
159
160 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
161 {
162 int bit = _mesa_ffs( ~c->fp_temp );
163
164 if (!bit) {
165 printf("%s: out of temporaries\n", __FILE__);
166 exit(1);
167 }
168
169 c->fp_temp |= 1<<(bit-1);
170 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
171 }
172
173
174 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
175 {
176 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
177 }
178
179
180 /***********************************************************************
181 * Instructions
182 */
183
184 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
185 {
186 assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
187 memset(&c->prog_instructions[c->nr_fp_insns], 0,
188 sizeof(*c->prog_instructions));
189 return &c->prog_instructions[c->nr_fp_insns++];
190 }
191
192 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
193 const struct prog_instruction *inst0)
194 {
195 struct prog_instruction *inst = get_fp_inst(c);
196 *inst = *inst0;
197 return inst;
198 }
199
200 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
201 GLuint op,
202 struct prog_dst_register dest,
203 GLuint saturate,
204 GLuint tex_src_unit,
205 GLuint tex_src_target,
206 GLuint tex_shadow,
207 struct prog_src_register src0,
208 struct prog_src_register src1,
209 struct prog_src_register src2 )
210 {
211 struct prog_instruction *inst = get_fp_inst(c);
212
213 assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
214 tex_src_unit == TEX_UNIT_NONE);
215 assert(tex_src_target < NUM_TEXTURE_TARGETS ||
216 tex_src_target == TEX_TARGET_NONE);
217
218 /* update mask of which texture units are referenced by this program */
219 if (tex_src_unit != TEX_UNIT_NONE)
220 c->fp->tex_units_used |= (1 << tex_src_unit);
221
222 memset(inst, 0, sizeof(*inst));
223
224 inst->Opcode = op;
225 inst->DstReg = dest;
226 inst->SaturateMode = saturate;
227 inst->TexSrcUnit = tex_src_unit;
228 inst->TexSrcTarget = tex_src_target;
229 inst->TexShadow = tex_shadow;
230 inst->SrcReg[0] = src0;
231 inst->SrcReg[1] = src1;
232 inst->SrcReg[2] = src2;
233 return inst;
234 }
235
236
237 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
238 GLuint op,
239 struct prog_dst_register dest,
240 GLuint saturate,
241 struct prog_src_register src0,
242 struct prog_src_register src1,
243 struct prog_src_register src2 )
244 {
245 return emit_tex_op(c, op, dest, saturate,
246 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
247 src0, src1, src2);
248 }
249
250
251 /* Many Mesa opcodes produce the same value across all the result channels.
252 * We'd rather not have to support that splatting in the opcode implementations,
253 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
254 * anyway. We can easily get both by emitting the opcode to one channel, and
255 * then MOVing it to the others, which brw_wm_pass*.c already understands.
256 */
257 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
258 const struct prog_instruction *inst0)
259 {
260 struct prog_instruction *inst;
261 unsigned int dst_chan;
262 unsigned int other_channel_mask;
263
264 if (inst0->DstReg.WriteMask == 0)
265 return NULL;
266
267 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
268 inst = get_fp_inst(c);
269 *inst = *inst0;
270 inst->DstReg.WriteMask = 1 << dst_chan;
271
272 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
273 if (other_channel_mask != 0) {
274 inst = emit_op(c,
275 OPCODE_MOV,
276 dst_mask(inst0->DstReg, other_channel_mask),
277 0,
278 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
279 src_undef(),
280 src_undef());
281 }
282 return inst;
283 }
284
285
286 /***********************************************************************
287 * Special instructions for interpolation and other tasks
288 */
289
290 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
291 {
292 if (src_is_undef(c->pixel_xy)) {
293 struct prog_dst_register pixel_xy = get_temp(c);
294 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
295
296
297 /* Emit the out calculations, and hold onto the results. Use
298 * two instructions as a temporary is required.
299 */
300 /* pixel_xy.xy = PIXELXY payload[0];
301 */
302 emit_op(c,
303 WM_PIXELXY,
304 dst_mask(pixel_xy, WRITEMASK_XY),
305 0,
306 payload_r0_depth,
307 src_undef(),
308 src_undef());
309
310 c->pixel_xy = src_reg_from_dst(pixel_xy);
311 }
312
313 return c->pixel_xy;
314 }
315
316 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
317 {
318 if (src_is_undef(c->delta_xy)) {
319 struct prog_dst_register delta_xy = get_temp(c);
320 struct prog_src_register pixel_xy = get_pixel_xy(c);
321 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
322
323 /* deltas.xy = DELTAXY pixel_xy, payload[0]
324 */
325 emit_op(c,
326 WM_DELTAXY,
327 dst_mask(delta_xy, WRITEMASK_XY),
328 0,
329 pixel_xy,
330 payload_r0_depth,
331 src_undef());
332
333 c->delta_xy = src_reg_from_dst(delta_xy);
334 }
335
336 return c->delta_xy;
337 }
338
339 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
340 {
341 /* This is called for producing 1/w in pre-gen6 interp. for gen6,
342 * the interp opcodes don't use this argument. But to keep the
343 * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
344 * into the slot.
345 */
346 if (c->func.brw->intel.gen >= 6)
347 return c->delta_xy;
348
349 if (src_is_undef(c->pixel_w)) {
350 struct prog_dst_register pixel_w = get_temp(c);
351 struct prog_src_register deltas = get_delta_xy(c);
352 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
353
354 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
355 */
356 emit_op(c,
357 WM_PIXELW,
358 dst_mask(pixel_w, WRITEMASK_W),
359 0,
360 interp_wpos,
361 deltas,
362 src_undef());
363
364
365 c->pixel_w = src_reg_from_dst(pixel_w);
366 }
367
368 return c->pixel_w;
369 }
370
371 static void emit_interp( struct brw_wm_compile *c,
372 GLuint idx )
373 {
374 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
375 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
376 struct prog_src_register deltas;
377
378 deltas = get_delta_xy(c);
379
380 /* Need to use PINTERP on attributes which have been
381 * multiplied by 1/W in the SF program, and LINTERP on those
382 * which have not:
383 */
384 switch (idx) {
385 case FRAG_ATTRIB_WPOS:
386 /* Have to treat wpos.xy specially:
387 */
388 emit_op(c,
389 WM_WPOSXY,
390 dst_mask(dst, WRITEMASK_XY),
391 0,
392 get_pixel_xy(c),
393 src_undef(),
394 src_undef());
395
396 dst = dst_mask(dst, WRITEMASK_ZW);
397
398 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
399 */
400 emit_op(c,
401 WM_LINTERP,
402 dst,
403 0,
404 interp,
405 deltas,
406 src_undef());
407 break;
408 case FRAG_ATTRIB_COL0:
409 case FRAG_ATTRIB_COL1:
410 if (c->key.flat_shade) {
411 emit_op(c,
412 WM_CINTERP,
413 dst,
414 0,
415 interp,
416 src_undef(),
417 src_undef());
418 }
419 else {
420 /* perspective-corrected color interpolation */
421 emit_op(c,
422 WM_PINTERP,
423 dst,
424 0,
425 interp,
426 deltas,
427 get_pixel_w(c));
428 }
429 break;
430 case FRAG_ATTRIB_FOGC:
431 /* Interpolate the fog coordinate */
432 emit_op(c,
433 WM_PINTERP,
434 dst_mask(dst, WRITEMASK_X),
435 0,
436 interp,
437 deltas,
438 get_pixel_w(c));
439
440 emit_op(c,
441 OPCODE_MOV,
442 dst_mask(dst, WRITEMASK_YZW),
443 0,
444 src_swizzle(interp,
445 SWIZZLE_ZERO,
446 SWIZZLE_ZERO,
447 SWIZZLE_ZERO,
448 SWIZZLE_ONE),
449 src_undef(),
450 src_undef());
451 break;
452
453 case FRAG_ATTRIB_FACE:
454 emit_op(c,
455 WM_FRONTFACING,
456 dst_mask(dst, WRITEMASK_X),
457 0,
458 src_undef(),
459 src_undef(),
460 src_undef());
461 break;
462
463 case FRAG_ATTRIB_PNTC:
464 /* XXX review/test this case */
465 emit_op(c,
466 WM_PINTERP,
467 dst_mask(dst, WRITEMASK_XY),
468 0,
469 interp,
470 deltas,
471 get_pixel_w(c));
472
473 emit_op(c,
474 OPCODE_MOV,
475 dst_mask(dst, WRITEMASK_ZW),
476 0,
477 src_swizzle(interp,
478 SWIZZLE_ZERO,
479 SWIZZLE_ZERO,
480 SWIZZLE_ZERO,
481 SWIZZLE_ONE),
482 src_undef(),
483 src_undef());
484 break;
485
486 default:
487 emit_op(c,
488 WM_PINTERP,
489 dst,
490 0,
491 interp,
492 deltas,
493 get_pixel_w(c));
494 break;
495 }
496
497 c->fp_interp_emitted |= 1<<idx;
498 }
499
500 /***********************************************************************
501 * Hacks to extend the program parameter and constant lists.
502 */
503
504 /* Add the fog parameters to the parameter list of the original
505 * program, rather than creating a new list. Doesn't really do any
506 * harm and it's not as if the parameter handling isn't a big hack
507 * anyway.
508 */
509 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
510 GLint s0,
511 GLint s1,
512 GLint s2,
513 GLint s3,
514 GLint s4)
515 {
516 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
517 gl_state_index tokens[STATE_LENGTH];
518 GLuint idx;
519 tokens[0] = s0;
520 tokens[1] = s1;
521 tokens[2] = s2;
522 tokens[3] = s3;
523 tokens[4] = s4;
524
525 idx = _mesa_add_state_reference( paramList, tokens );
526
527 return src_reg(PROGRAM_STATE_VAR, idx);
528 }
529
530
531 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
532 GLfloat s0,
533 GLfloat s1,
534 GLfloat s2,
535 GLfloat s3)
536 {
537 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
538 gl_constant_value values[4];
539 GLuint idx;
540 GLuint swizzle;
541 struct prog_src_register reg;
542
543 values[0].f = s0;
544 values[1].f = s1;
545 values[2].f = s2;
546 values[3].f = s3;
547
548 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
549 reg = src_reg(PROGRAM_STATE_VAR, idx);
550 reg.Swizzle = swizzle;
551
552 return reg;
553 }
554
555
556
557 /***********************************************************************
558 * Expand various instructions here to simpler forms.
559 */
560 static void precalc_dst( struct brw_wm_compile *c,
561 const struct prog_instruction *inst )
562 {
563 struct prog_src_register src0 = inst->SrcReg[0];
564 struct prog_src_register src1 = inst->SrcReg[1];
565 struct prog_dst_register dst = inst->DstReg;
566 struct prog_dst_register temp = get_temp(c);
567
568 if (dst.WriteMask & WRITEMASK_Y) {
569 /* dst.y = mul src0.y, src1.y
570 */
571 emit_op(c,
572 OPCODE_MUL,
573 dst_mask(temp, WRITEMASK_Y),
574 inst->SaturateMode,
575 src0,
576 src1,
577 src_undef());
578 }
579
580 if (dst.WriteMask & WRITEMASK_XZ) {
581 struct prog_instruction *swz;
582 GLuint z = GET_SWZ(src0.Swizzle, Z);
583
584 /* dst.xz = swz src0.1zzz
585 */
586 swz = emit_op(c,
587 OPCODE_SWZ,
588 dst_mask(temp, WRITEMASK_XZ),
589 inst->SaturateMode,
590 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
591 src_undef(),
592 src_undef());
593 /* Avoid letting negation flag of src0 affect our 1 constant. */
594 swz->SrcReg[0].Negate &= ~NEGATE_X;
595 }
596 if (dst.WriteMask & WRITEMASK_W) {
597 /* dst.w = mov src1.w
598 */
599 emit_op(c,
600 OPCODE_MOV,
601 dst_mask(temp, WRITEMASK_W),
602 inst->SaturateMode,
603 src1,
604 src_undef(),
605 src_undef());
606 }
607
608 /* This will get optimized out in general, but it ensures that we
609 * don't overwrite src operands in our channel-wise splitting
610 * above. See piglit fp-dst-aliasing-[12].
611 */
612 emit_op(c,
613 OPCODE_MOV,
614 dst,
615 0,
616 src_reg_from_dst(temp),
617 src_undef(),
618 src_undef());
619
620 release_temp(c, temp);
621 }
622
623
624 static void precalc_lit( struct brw_wm_compile *c,
625 const struct prog_instruction *inst )
626 {
627 struct prog_src_register src0 = inst->SrcReg[0];
628 struct prog_dst_register dst = inst->DstReg;
629
630 if (dst.WriteMask & WRITEMASK_YZ) {
631 emit_op(c,
632 OPCODE_LIT,
633 dst_mask(dst, WRITEMASK_YZ),
634 inst->SaturateMode,
635 src0,
636 src_undef(),
637 src_undef());
638 }
639
640 if (dst.WriteMask & WRITEMASK_XW) {
641 struct prog_instruction *swz;
642
643 /* dst.xw = swz src0.1111
644 */
645 swz = emit_op(c,
646 OPCODE_SWZ,
647 dst_mask(dst, WRITEMASK_XW),
648 0,
649 src_swizzle1(src0, SWIZZLE_ONE),
650 src_undef(),
651 src_undef());
652 /* Avoid letting the negation flag of src0 affect our 1 constant. */
653 swz->SrcReg[0].Negate = NEGATE_NONE;
654 }
655 }
656
657
658 /**
659 * Some TEX instructions require extra code, cube map coordinate
660 * normalization, or coordinate scaling for RECT textures, etc.
661 * This function emits those extra instructions and the TEX
662 * instruction itself.
663 */
664 static void precalc_tex( struct brw_wm_compile *c,
665 const struct prog_instruction *inst )
666 {
667 struct prog_src_register coord;
668 struct prog_dst_register tmpcoord = { 0 };
669 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
670
671 assert(unit < BRW_MAX_TEX_UNIT);
672
673 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
674 struct prog_instruction *out;
675 struct prog_dst_register tmp0 = get_temp(c);
676 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
677 struct prog_dst_register tmp1 = get_temp(c);
678 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
679 struct prog_src_register src0 = inst->SrcReg[0];
680
681 /* find longest component of coord vector and normalize it */
682 tmpcoord = get_temp(c);
683 coord = src_reg_from_dst(tmpcoord);
684
685 /* tmpcoord = src0 (i.e.: coord = src0) */
686 out = emit_op(c, OPCODE_MOV,
687 tmpcoord,
688 0,
689 src0,
690 src_undef(),
691 src_undef());
692 out->SrcReg[0].Negate = NEGATE_NONE;
693 out->SrcReg[0].Abs = 1;
694
695 /* tmp0 = MAX(coord.X, coord.Y) */
696 emit_op(c, OPCODE_MAX,
697 tmp0,
698 0,
699 src_swizzle1(coord, X),
700 src_swizzle1(coord, Y),
701 src_undef());
702
703 /* tmp1 = MAX(tmp0, coord.Z) */
704 emit_op(c, OPCODE_MAX,
705 tmp1,
706 0,
707 tmp0src,
708 src_swizzle1(coord, Z),
709 src_undef());
710
711 /* tmp0 = 1 / tmp1 */
712 emit_op(c, OPCODE_RCP,
713 dst_mask(tmp0, WRITEMASK_X),
714 0,
715 tmp1src,
716 src_undef(),
717 src_undef());
718
719 /* tmpCoord = src0 * tmp0 */
720 emit_op(c, OPCODE_MUL,
721 tmpcoord,
722 0,
723 src0,
724 src_swizzle1(tmp0src, SWIZZLE_X),
725 src_undef());
726
727 release_temp(c, tmp0);
728 release_temp(c, tmp1);
729 }
730 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
731 struct prog_src_register scale =
732 search_or_add_param5( c,
733 STATE_INTERNAL,
734 STATE_TEXRECT_SCALE,
735 unit,
736 0,0 );
737
738 tmpcoord = get_temp(c);
739
740 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
741 */
742 emit_op(c,
743 OPCODE_MUL,
744 tmpcoord,
745 0,
746 inst->SrcReg[0],
747 src_swizzle(scale,
748 SWIZZLE_X,
749 SWIZZLE_Y,
750 SWIZZLE_ONE,
751 SWIZZLE_ONE),
752 src_undef());
753
754 coord = src_reg_from_dst(tmpcoord);
755 }
756 else {
757 coord = inst->SrcReg[0];
758 }
759
760 /* Need to emit YUV texture conversions by hand. Probably need to
761 * do this here - the alternative is in brw_wm_emit.c, but the
762 * conversion requires allocating a temporary variable which we
763 * don't have the facility to do that late in the compilation.
764 */
765 if (c->key.yuvtex_mask & (1 << unit)) {
766 /* convert ycbcr to RGBA */
767 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
768
769 /*
770 CONST C0 = { -.5, -.0625, -.5, 1.164 }
771 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
772 UYV = TEX ...
773 UYV.xyz = ADD UYV, C0
774 UYV.y = MUL UYV.y, C0.w
775 if (UV swaped)
776 RGB.xyz = MAD UYV.zzx, C1, UYV.y
777 else
778 RGB.xyz = MAD UYV.xxz, C1, UYV.y
779 RGB.y = MAD UYV.z, C1.w, RGB.y
780 */
781 struct prog_dst_register dst = inst->DstReg;
782 struct prog_dst_register tmp = get_temp(c);
783 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
784 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
785 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
786
787 /* tmp = TEX ...
788 */
789 emit_tex_op(c,
790 OPCODE_TEX,
791 tmp,
792 inst->SaturateMode,
793 unit,
794 inst->TexSrcTarget,
795 inst->TexShadow,
796 coord,
797 src_undef(),
798 src_undef());
799
800 /* tmp.xyz = ADD TMP, C0
801 */
802 emit_op(c,
803 OPCODE_ADD,
804 dst_mask(tmp, WRITEMASK_XYZ),
805 0,
806 tmpsrc,
807 C0,
808 src_undef());
809
810 /* YUV.y = MUL YUV.y, C0.w
811 */
812
813 emit_op(c,
814 OPCODE_MUL,
815 dst_mask(tmp, WRITEMASK_Y),
816 0,
817 tmpsrc,
818 src_swizzle1(C0, W),
819 src_undef());
820
821 /*
822 * if (UV swaped)
823 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
824 * else
825 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
826 */
827
828 emit_op(c,
829 OPCODE_MAD,
830 dst_mask(dst, WRITEMASK_XYZ),
831 0,
832 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
833 C1,
834 src_swizzle1(tmpsrc, Y));
835
836 /* RGB.y = MAD YUV.z, C1.w, RGB.y
837 */
838 emit_op(c,
839 OPCODE_MAD,
840 dst_mask(dst, WRITEMASK_Y),
841 0,
842 src_swizzle1(tmpsrc, Z),
843 src_swizzle1(C1, W),
844 src_swizzle1(src_reg_from_dst(dst), Y));
845
846 release_temp(c, tmp);
847 }
848 else {
849 /* ordinary RGBA tex instruction */
850 emit_tex_op(c,
851 OPCODE_TEX,
852 inst->DstReg,
853 inst->SaturateMode,
854 unit,
855 inst->TexSrcTarget,
856 inst->TexShadow,
857 coord,
858 src_undef(),
859 src_undef());
860 }
861
862 /* For GL_EXT_texture_swizzle: */
863 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
864 /* swizzle the result of the TEX instruction */
865 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
866 emit_op(c, OPCODE_SWZ,
867 inst->DstReg,
868 SATURATE_OFF, /* saturate already done above */
869 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
870 src_undef(),
871 src_undef());
872 }
873
874 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
875 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
876 release_temp(c, tmpcoord);
877 }
878
879
880 /**
881 * Check if the given TXP instruction really needs the divide-by-W step.
882 */
883 static GLboolean projtex( struct brw_wm_compile *c,
884 const struct prog_instruction *inst )
885 {
886 const struct prog_src_register src = inst->SrcReg[0];
887 GLboolean retVal;
888
889 assert(inst->Opcode == OPCODE_TXP);
890
891 /* Only try to detect the simplest cases. Could detect (later)
892 * cases where we are trying to emit code like RCP {1.0}, MUL x,
893 * {1.0}, and so on.
894 *
895 * More complex cases than this typically only arise from
896 * user-provided fragment programs anyway:
897 */
898 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
899 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
900 else if (src.File == PROGRAM_INPUT &&
901 GET_SWZ(src.Swizzle, W) == W &&
902 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
903 retVal = GL_FALSE;
904 else
905 retVal = GL_TRUE;
906
907 return retVal;
908 }
909
910
911 /**
912 * Emit code for TXP.
913 */
914 static void precalc_txp( struct brw_wm_compile *c,
915 const struct prog_instruction *inst )
916 {
917 struct prog_src_register src0 = inst->SrcReg[0];
918
919 if (projtex(c, inst)) {
920 struct prog_dst_register tmp = get_temp(c);
921 struct prog_instruction tmp_inst;
922
923 /* tmp0.w = RCP inst.arg[0][3]
924 */
925 emit_op(c,
926 OPCODE_RCP,
927 dst_mask(tmp, WRITEMASK_W),
928 0,
929 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
930 src_undef(),
931 src_undef());
932
933 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
934 */
935 emit_op(c,
936 OPCODE_MUL,
937 dst_mask(tmp, WRITEMASK_XYZ),
938 0,
939 src0,
940 src_swizzle1(src_reg_from_dst(tmp), W),
941 src_undef());
942
943 /* dst = precalc(TEX tmp0)
944 */
945 tmp_inst = *inst;
946 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
947 precalc_tex(c, &tmp_inst);
948
949 release_temp(c, tmp);
950 }
951 else
952 {
953 /* dst = precalc(TEX src0)
954 */
955 precalc_tex(c, inst);
956 }
957 }
958
959
960
961 static void emit_render_target_writes( struct brw_wm_compile *c )
962 {
963 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
964 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
965 struct prog_src_register outcolor;
966 GLuint i;
967
968 struct prog_instruction *inst = NULL;
969
970 /* The inst->Aux field is used for FB write target and the EOT marker */
971
972 for (i = 0; i < c->key.nr_color_regions; i++) {
973 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) {
974 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
975 } else {
976 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
977 }
978 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
979 0, outcolor, payload_r0_depth, outdepth);
980 inst->Aux = INST_AUX_TARGET(i);
981 }
982
983 /* Mark the last FB write as final, or emit a dummy write if we had
984 * no render targets bound.
985 */
986 if (c->key.nr_color_regions != 0) {
987 inst->Aux |= INST_AUX_EOT;
988 } else {
989 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
990 0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR),
991 payload_r0_depth, outdepth);
992 inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT;
993 }
994 }
995
996
997
998
999 /***********************************************************************
1000 * Emit INTERP instructions ahead of first use of each attrib.
1001 */
1002
1003 static void validate_src_regs( struct brw_wm_compile *c,
1004 const struct prog_instruction *inst )
1005 {
1006 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1007 GLuint i;
1008
1009 for (i = 0; i < nr_args; i++) {
1010 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1011 GLuint idx = inst->SrcReg[i].Index;
1012 if (!(c->fp_interp_emitted & (1<<idx))) {
1013 emit_interp(c, idx);
1014 }
1015 }
1016 }
1017 }
1018
1019 static void print_insns( const struct prog_instruction *insn,
1020 GLuint nr )
1021 {
1022 GLuint i;
1023 for (i = 0; i < nr; i++, insn++) {
1024 printf("%3d: ", i);
1025 if (insn->Opcode < MAX_OPCODE)
1026 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
1027 else if (insn->Opcode < MAX_WM_OPCODE) {
1028 GLuint idx = insn->Opcode - MAX_OPCODE;
1029
1030 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
1031 3, PROG_PRINT_DEBUG, NULL);
1032 }
1033 else
1034 printf("965 Opcode %d\n", insn->Opcode);
1035 }
1036 }
1037
1038
1039 /**
1040 * Initial pass for fragment program code generation.
1041 * This function is used by both the GLSL and non-GLSL paths.
1042 */
1043 void brw_wm_pass_fp( struct brw_wm_compile *c )
1044 {
1045 struct intel_context *intel = &c->func.brw->intel;
1046 struct brw_fragment_program *fp = c->fp;
1047 GLuint insn;
1048
1049 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1050 printf("pre-fp:\n");
1051 _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
1052 GL_TRUE);
1053 printf("\n");
1054 }
1055
1056 c->pixel_xy = src_undef();
1057 if (intel->gen >= 6) {
1058 /* The interpolation deltas come in as the perspective pixel
1059 * location barycentric params.
1060 */
1061 c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
1062 } else {
1063 c->delta_xy = src_undef();
1064 }
1065 c->pixel_w = src_undef();
1066 c->nr_fp_insns = 0;
1067 c->fp->tex_units_used = 0x0;
1068
1069 /* Emit preamble instructions. This is where special instructions such as
1070 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1071 * compute shader inputs from varying vars.
1072 */
1073 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1074 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1075 validate_src_regs(c, inst);
1076 }
1077
1078 /* Loop over all instructions doing assorted simplifications and
1079 * transformations.
1080 */
1081 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1082 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1083 struct prog_instruction *out;
1084
1085 /* Check for INPUT values, emit INTERP instructions where
1086 * necessary:
1087 */
1088
1089 switch (inst->Opcode) {
1090 case OPCODE_SWZ:
1091 out = emit_insn(c, inst);
1092 out->Opcode = OPCODE_MOV;
1093 break;
1094
1095 case OPCODE_ABS:
1096 out = emit_insn(c, inst);
1097 out->Opcode = OPCODE_MOV;
1098 out->SrcReg[0].Negate = NEGATE_NONE;
1099 out->SrcReg[0].Abs = 1;
1100 break;
1101
1102 case OPCODE_SUB:
1103 out = emit_insn(c, inst);
1104 out->Opcode = OPCODE_ADD;
1105 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1106 break;
1107
1108 case OPCODE_SCS:
1109 out = emit_insn(c, inst);
1110 /* This should probably be done in the parser.
1111 */
1112 out->DstReg.WriteMask &= WRITEMASK_XY;
1113 break;
1114
1115 case OPCODE_DST:
1116 precalc_dst(c, inst);
1117 break;
1118
1119 case OPCODE_LIT:
1120 precalc_lit(c, inst);
1121 break;
1122
1123 case OPCODE_RSQ:
1124 out = emit_scalar_insn(c, inst);
1125 out->SrcReg[0].Abs = GL_TRUE;
1126 break;
1127
1128 case OPCODE_TEX:
1129 precalc_tex(c, inst);
1130 break;
1131
1132 case OPCODE_TXP:
1133 precalc_txp(c, inst);
1134 break;
1135
1136 case OPCODE_TXB:
1137 out = emit_insn(c, inst);
1138 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1139 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1140 break;
1141
1142 case OPCODE_XPD:
1143 out = emit_insn(c, inst);
1144 /* This should probably be done in the parser.
1145 */
1146 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1147 break;
1148
1149 case OPCODE_KIL:
1150 out = emit_insn(c, inst);
1151 /* This should probably be done in the parser.
1152 */
1153 out->DstReg.WriteMask = 0;
1154 break;
1155 case OPCODE_END:
1156 emit_render_target_writes(c);
1157 break;
1158 case OPCODE_PRINT:
1159 break;
1160 default:
1161 if (brw_wm_is_scalar_result(inst->Opcode))
1162 emit_scalar_insn(c, inst);
1163 else
1164 emit_insn(c, inst);
1165 break;
1166 }
1167 }
1168
1169 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1170 printf("pass_fp:\n");
1171 print_insns( c->prog_instructions, c->nr_fp_insns );
1172 printf("\n");
1173 }
1174 }
1175