Merge branch 'mesa_7_6_branch'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 /** An invalid texture target */
46 #define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48 /** An invalid texture unit */
49 #define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53 #define X 0
54 #define Y 1
55 #define Z 2
56 #define W 3
57
58
59 static const char *wm_opcode_strings[] = {
60 "PIXELXY",
61 "DELTAXY",
62 "PIXELW",
63 "LINTERP",
64 "PINTERP",
65 "CINTERP",
66 "WPOSXY",
67 "FB_WRITE",
68 "FRONTFACING",
69 };
70
71 #if 0
72 static const char *wm_file_strings[] = {
73 "PAYLOAD"
74 };
75 #endif
76
77
78 /***********************************************************************
79 * Source regs
80 */
81
82 static struct prog_src_register src_reg(GLuint file, GLuint idx)
83 {
84 struct prog_src_register reg;
85 reg.File = file;
86 reg.Index = idx;
87 reg.Swizzle = SWIZZLE_NOOP;
88 reg.RelAddr = 0;
89 reg.Negate = NEGATE_NONE;
90 reg.Abs = 0;
91 return reg;
92 }
93
94 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
95 {
96 return src_reg(dst.File, dst.Index);
97 }
98
99 static struct prog_src_register src_undef( void )
100 {
101 return src_reg(PROGRAM_UNDEFINED, 0);
102 }
103
104 static GLboolean src_is_undef(struct prog_src_register src)
105 {
106 return src.File == PROGRAM_UNDEFINED;
107 }
108
109 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
110 {
111 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
112 return reg;
113 }
114
115 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
116 {
117 return src_swizzle(reg, x, x, x, x);
118 }
119
120 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
121 {
122 reg.Swizzle = swizzle;
123 return reg;
124 }
125
126
127 /***********************************************************************
128 * Dest regs
129 */
130
131 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
132 {
133 struct prog_dst_register reg;
134 reg.File = file;
135 reg.Index = idx;
136 reg.WriteMask = WRITEMASK_XYZW;
137 reg.RelAddr = 0;
138 reg.CondMask = COND_TR;
139 reg.CondSwizzle = 0;
140 reg.CondSrc = 0;
141 reg.pad = 0;
142 return reg;
143 }
144
145 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
146 {
147 reg.WriteMask &= mask;
148 return reg;
149 }
150
151 static struct prog_dst_register dst_undef( void )
152 {
153 return dst_reg(PROGRAM_UNDEFINED, 0);
154 }
155
156
157
158 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
159 {
160 int bit = _mesa_ffs( ~c->fp_temp );
161
162 if (!bit) {
163 _mesa_printf("%s: out of temporaries\n", __FILE__);
164 exit(1);
165 }
166
167 c->fp_temp |= 1<<(bit-1);
168 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
169 }
170
171
172 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
173 {
174 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
175 }
176
177
178 /***********************************************************************
179 * Instructions
180 */
181
182 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
183 {
184 return &c->prog_instructions[c->nr_fp_insns++];
185 }
186
187 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
188 const struct prog_instruction *inst0)
189 {
190 struct prog_instruction *inst = get_fp_inst(c);
191 *inst = *inst0;
192 return inst;
193 }
194
195 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
196 GLuint op,
197 struct prog_dst_register dest,
198 GLuint saturate,
199 GLuint tex_src_unit,
200 GLuint tex_src_target,
201 GLuint tex_shadow,
202 struct prog_src_register src0,
203 struct prog_src_register src1,
204 struct prog_src_register src2 )
205 {
206 struct prog_instruction *inst = get_fp_inst(c);
207
208 assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
209 tex_src_unit == TEX_UNIT_NONE);
210 assert(tex_src_target < NUM_TEXTURE_TARGETS ||
211 tex_src_target == TEX_TARGET_NONE);
212
213 /* update mask of which texture units are referenced by this program */
214 if (tex_src_unit != TEX_UNIT_NONE)
215 c->fp->tex_units_used |= (1 << tex_src_unit);
216
217 memset(inst, 0, sizeof(*inst));
218
219 inst->Opcode = op;
220 inst->DstReg = dest;
221 inst->SaturateMode = saturate;
222 inst->TexSrcUnit = tex_src_unit;
223 inst->TexSrcTarget = tex_src_target;
224 inst->TexShadow = tex_shadow;
225 inst->SrcReg[0] = src0;
226 inst->SrcReg[1] = src1;
227 inst->SrcReg[2] = src2;
228 return inst;
229 }
230
231
232 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
233 GLuint op,
234 struct prog_dst_register dest,
235 GLuint saturate,
236 struct prog_src_register src0,
237 struct prog_src_register src1,
238 struct prog_src_register src2 )
239 {
240 return emit_tex_op(c, op, dest, saturate,
241 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
242 src0, src1, src2);
243 }
244
245
246 /* Many Mesa opcodes produce the same value across all the result channels.
247 * We'd rather not have to support that splatting in the opcode implementations,
248 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
249 * anyway. We can easily get both by emitting the opcode to one channel, and
250 * then MOVing it to the others, which brw_wm_pass*.c already understands.
251 */
252 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
253 const struct prog_instruction *inst0)
254 {
255 struct prog_instruction *inst;
256 unsigned int dst_chan;
257 unsigned int other_channel_mask;
258
259 if (inst0->DstReg.WriteMask == 0)
260 return NULL;
261
262 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
263 inst = get_fp_inst(c);
264 *inst = *inst0;
265 inst->DstReg.WriteMask = 1 << dst_chan;
266
267 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
268 if (other_channel_mask != 0) {
269 inst = emit_op(c,
270 OPCODE_MOV,
271 dst_mask(inst0->DstReg, other_channel_mask),
272 0,
273 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
274 src_undef(),
275 src_undef());
276 }
277 return inst;
278 }
279
280
281 /***********************************************************************
282 * Special instructions for interpolation and other tasks
283 */
284
285 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
286 {
287 if (src_is_undef(c->pixel_xy)) {
288 struct prog_dst_register pixel_xy = get_temp(c);
289 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
290
291
292 /* Emit the out calculations, and hold onto the results. Use
293 * two instructions as a temporary is required.
294 */
295 /* pixel_xy.xy = PIXELXY payload[0];
296 */
297 emit_op(c,
298 WM_PIXELXY,
299 dst_mask(pixel_xy, WRITEMASK_XY),
300 0,
301 payload_r0_depth,
302 src_undef(),
303 src_undef());
304
305 c->pixel_xy = src_reg_from_dst(pixel_xy);
306 }
307
308 return c->pixel_xy;
309 }
310
311 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
312 {
313 if (src_is_undef(c->delta_xy)) {
314 struct prog_dst_register delta_xy = get_temp(c);
315 struct prog_src_register pixel_xy = get_pixel_xy(c);
316 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
317
318 /* deltas.xy = DELTAXY pixel_xy, payload[0]
319 */
320 emit_op(c,
321 WM_DELTAXY,
322 dst_mask(delta_xy, WRITEMASK_XY),
323 0,
324 pixel_xy,
325 payload_r0_depth,
326 src_undef());
327
328 c->delta_xy = src_reg_from_dst(delta_xy);
329 }
330
331 return c->delta_xy;
332 }
333
334 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
335 {
336 if (src_is_undef(c->pixel_w)) {
337 struct prog_dst_register pixel_w = get_temp(c);
338 struct prog_src_register deltas = get_delta_xy(c);
339 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
340
341 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
342 */
343 emit_op(c,
344 WM_PIXELW,
345 dst_mask(pixel_w, WRITEMASK_W),
346 0,
347 interp_wpos,
348 deltas,
349 src_undef());
350
351
352 c->pixel_w = src_reg_from_dst(pixel_w);
353 }
354
355 return c->pixel_w;
356 }
357
358 static void emit_interp( struct brw_wm_compile *c,
359 GLuint idx )
360 {
361 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
362 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
363 struct prog_src_register deltas = get_delta_xy(c);
364
365 /* Need to use PINTERP on attributes which have been
366 * multiplied by 1/W in the SF program, and LINTERP on those
367 * which have not:
368 */
369 switch (idx) {
370 case FRAG_ATTRIB_WPOS:
371 /* Have to treat wpos.xy specially:
372 */
373 emit_op(c,
374 WM_WPOSXY,
375 dst_mask(dst, WRITEMASK_XY),
376 0,
377 get_pixel_xy(c),
378 src_undef(),
379 src_undef());
380
381 dst = dst_mask(dst, WRITEMASK_ZW);
382
383 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
384 */
385 emit_op(c,
386 WM_LINTERP,
387 dst,
388 0,
389 interp,
390 deltas,
391 src_undef());
392 break;
393 case FRAG_ATTRIB_COL0:
394 case FRAG_ATTRIB_COL1:
395 if (c->key.flat_shade) {
396 emit_op(c,
397 WM_CINTERP,
398 dst,
399 0,
400 interp,
401 src_undef(),
402 src_undef());
403 }
404 else {
405 if (c->key.linear_color) {
406 emit_op(c,
407 WM_LINTERP,
408 dst,
409 0,
410 interp,
411 deltas,
412 src_undef());
413 }
414 else {
415 /* perspective-corrected color interpolation */
416 emit_op(c,
417 WM_PINTERP,
418 dst,
419 0,
420 interp,
421 deltas,
422 get_pixel_w(c));
423 }
424 }
425 break;
426 case FRAG_ATTRIB_FOGC:
427 /* Interpolate the fog coordinate */
428 emit_op(c,
429 WM_PINTERP,
430 dst_mask(dst, WRITEMASK_X),
431 0,
432 interp,
433 deltas,
434 get_pixel_w(c));
435
436 emit_op(c,
437 OPCODE_MOV,
438 dst_mask(dst, WRITEMASK_YZW),
439 0,
440 src_swizzle(interp,
441 SWIZZLE_ZERO,
442 SWIZZLE_ZERO,
443 SWIZZLE_ZERO,
444 SWIZZLE_ONE),
445 src_undef(),
446 src_undef());
447 break;
448
449 case FRAG_ATTRIB_FACE:
450 /* XXX review/test this case */
451 emit_op(c,
452 WM_FRONTFACING,
453 dst_mask(dst, WRITEMASK_X),
454 0,
455 src_undef(),
456 src_undef(),
457 src_undef());
458 break;
459
460 case FRAG_ATTRIB_PNTC:
461 /* XXX review/test this case */
462 emit_op(c,
463 WM_PINTERP,
464 dst_mask(dst, WRITEMASK_XY),
465 0,
466 interp,
467 deltas,
468 get_pixel_w(c));
469
470 emit_op(c,
471 OPCODE_MOV,
472 dst_mask(dst, WRITEMASK_ZW),
473 0,
474 src_swizzle(interp,
475 SWIZZLE_ZERO,
476 SWIZZLE_ZERO,
477 SWIZZLE_ZERO,
478 SWIZZLE_ONE),
479 src_undef(),
480 src_undef());
481 break;
482
483 default:
484 emit_op(c,
485 WM_PINTERP,
486 dst,
487 0,
488 interp,
489 deltas,
490 get_pixel_w(c));
491 break;
492 }
493
494 c->fp_interp_emitted |= 1<<idx;
495 }
496
497 /***********************************************************************
498 * Hacks to extend the program parameter and constant lists.
499 */
500
501 /* Add the fog parameters to the parameter list of the original
502 * program, rather than creating a new list. Doesn't really do any
503 * harm and it's not as if the parameter handling isn't a big hack
504 * anyway.
505 */
506 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
507 GLint s0,
508 GLint s1,
509 GLint s2,
510 GLint s3,
511 GLint s4)
512 {
513 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
514 gl_state_index tokens[STATE_LENGTH];
515 GLuint idx;
516 tokens[0] = s0;
517 tokens[1] = s1;
518 tokens[2] = s2;
519 tokens[3] = s3;
520 tokens[4] = s4;
521
522 for (idx = 0; idx < paramList->NumParameters; idx++) {
523 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
524 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
525 return src_reg(PROGRAM_STATE_VAR, idx);
526 }
527
528 idx = _mesa_add_state_reference( paramList, tokens );
529
530 return src_reg(PROGRAM_STATE_VAR, idx);
531 }
532
533
534 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
535 GLfloat s0,
536 GLfloat s1,
537 GLfloat s2,
538 GLfloat s3)
539 {
540 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
541 GLfloat values[4];
542 GLuint idx;
543 GLuint swizzle;
544
545 values[0] = s0;
546 values[1] = s1;
547 values[2] = s2;
548 values[3] = s3;
549
550 /* Have to search, otherwise multiple compilations will each grow
551 * the parameter list.
552 */
553 for (idx = 0; idx < paramList->NumParameters; idx++) {
554 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
555 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
556
557 /* XXX: this mimics the mesa bug which puts all constants and
558 * parameters into the "PROGRAM_STATE_VAR" category:
559 */
560 return src_reg(PROGRAM_STATE_VAR, idx);
561 }
562
563 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
564 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
565 return src_reg(PROGRAM_STATE_VAR, idx);
566 }
567
568
569
570 /***********************************************************************
571 * Expand various instructions here to simpler forms.
572 */
573 static void precalc_dst( struct brw_wm_compile *c,
574 const struct prog_instruction *inst )
575 {
576 struct prog_src_register src0 = inst->SrcReg[0];
577 struct prog_src_register src1 = inst->SrcReg[1];
578 struct prog_dst_register dst = inst->DstReg;
579
580 if (dst.WriteMask & WRITEMASK_Y) {
581 /* dst.y = mul src0.y, src1.y
582 */
583 emit_op(c,
584 OPCODE_MUL,
585 dst_mask(dst, WRITEMASK_Y),
586 inst->SaturateMode,
587 src0,
588 src1,
589 src_undef());
590 }
591
592 if (dst.WriteMask & WRITEMASK_XZ) {
593 struct prog_instruction *swz;
594 GLuint z = GET_SWZ(src0.Swizzle, Z);
595
596 /* dst.xz = swz src0.1zzz
597 */
598 swz = emit_op(c,
599 OPCODE_SWZ,
600 dst_mask(dst, WRITEMASK_XZ),
601 inst->SaturateMode,
602 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
603 src_undef(),
604 src_undef());
605 /* Avoid letting negation flag of src0 affect our 1 constant. */
606 swz->SrcReg[0].Negate &= ~NEGATE_X;
607 }
608 if (dst.WriteMask & WRITEMASK_W) {
609 /* dst.w = mov src1.w
610 */
611 emit_op(c,
612 OPCODE_MOV,
613 dst_mask(dst, WRITEMASK_W),
614 inst->SaturateMode,
615 src1,
616 src_undef(),
617 src_undef());
618 }
619 }
620
621
622 static void precalc_lit( struct brw_wm_compile *c,
623 const struct prog_instruction *inst )
624 {
625 struct prog_src_register src0 = inst->SrcReg[0];
626 struct prog_dst_register dst = inst->DstReg;
627
628 if (dst.WriteMask & WRITEMASK_XW) {
629 struct prog_instruction *swz;
630
631 /* dst.xw = swz src0.1111
632 */
633 swz = emit_op(c,
634 OPCODE_SWZ,
635 dst_mask(dst, WRITEMASK_XW),
636 0,
637 src_swizzle1(src0, SWIZZLE_ONE),
638 src_undef(),
639 src_undef());
640 /* Avoid letting the negation flag of src0 affect our 1 constant. */
641 swz->SrcReg[0].Negate = NEGATE_NONE;
642 }
643
644 if (dst.WriteMask & WRITEMASK_YZ) {
645 emit_op(c,
646 OPCODE_LIT,
647 dst_mask(dst, WRITEMASK_YZ),
648 inst->SaturateMode,
649 src0,
650 src_undef(),
651 src_undef());
652 }
653 }
654
655
656 /**
657 * Some TEX instructions require extra code, cube map coordinate
658 * normalization, or coordinate scaling for RECT textures, etc.
659 * This function emits those extra instructions and the TEX
660 * instruction itself.
661 */
662 static void precalc_tex( struct brw_wm_compile *c,
663 const struct prog_instruction *inst )
664 {
665 struct prog_src_register coord;
666 struct prog_dst_register tmpcoord;
667 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
668
669 assert(unit < BRW_MAX_TEX_UNIT);
670
671 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
672 struct prog_instruction *out;
673 struct prog_dst_register tmp0 = get_temp(c);
674 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
675 struct prog_dst_register tmp1 = get_temp(c);
676 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
677 struct prog_src_register src0 = inst->SrcReg[0];
678
679 /* find longest component of coord vector and normalize it */
680 tmpcoord = get_temp(c);
681 coord = src_reg_from_dst(tmpcoord);
682
683 /* tmpcoord = src0 (i.e.: coord = src0) */
684 out = emit_op(c, OPCODE_MOV,
685 tmpcoord,
686 0,
687 src0,
688 src_undef(),
689 src_undef());
690 out->SrcReg[0].Negate = NEGATE_NONE;
691 out->SrcReg[0].Abs = 1;
692
693 /* tmp0 = MAX(coord.X, coord.Y) */
694 emit_op(c, OPCODE_MAX,
695 tmp0,
696 0,
697 src_swizzle1(coord, X),
698 src_swizzle1(coord, Y),
699 src_undef());
700
701 /* tmp1 = MAX(tmp0, coord.Z) */
702 emit_op(c, OPCODE_MAX,
703 tmp1,
704 0,
705 tmp0src,
706 src_swizzle1(coord, Z),
707 src_undef());
708
709 /* tmp0 = 1 / tmp1 */
710 emit_op(c, OPCODE_RCP,
711 dst_mask(tmp0, WRITEMASK_X),
712 0,
713 tmp1src,
714 src_undef(),
715 src_undef());
716
717 /* tmpCoord = src0 * tmp0 */
718 emit_op(c, OPCODE_MUL,
719 tmpcoord,
720 0,
721 src0,
722 src_swizzle1(tmp0src, SWIZZLE_X),
723 src_undef());
724
725 release_temp(c, tmp0);
726 release_temp(c, tmp1);
727 }
728 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
729 struct prog_src_register scale =
730 search_or_add_param5( c,
731 STATE_INTERNAL,
732 STATE_TEXRECT_SCALE,
733 unit,
734 0,0 );
735
736 tmpcoord = get_temp(c);
737
738 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
739 */
740 emit_op(c,
741 OPCODE_MUL,
742 tmpcoord,
743 0,
744 inst->SrcReg[0],
745 src_swizzle(scale,
746 SWIZZLE_X,
747 SWIZZLE_Y,
748 SWIZZLE_ONE,
749 SWIZZLE_ONE),
750 src_undef());
751
752 coord = src_reg_from_dst(tmpcoord);
753 }
754 else {
755 coord = inst->SrcReg[0];
756 }
757
758 /* Need to emit YUV texture conversions by hand. Probably need to
759 * do this here - the alternative is in brw_wm_emit.c, but the
760 * conversion requires allocating a temporary variable which we
761 * don't have the facility to do that late in the compilation.
762 */
763 if (c->key.yuvtex_mask & (1 << unit)) {
764 /* convert ycbcr to RGBA */
765 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
766
767 /*
768 CONST C0 = { -.5, -.0625, -.5, 1.164 }
769 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
770 UYV = TEX ...
771 UYV.xyz = ADD UYV, C0
772 UYV.y = MUL UYV.y, C0.w
773 if (UV swaped)
774 RGB.xyz = MAD UYV.zzx, C1, UYV.y
775 else
776 RGB.xyz = MAD UYV.xxz, C1, UYV.y
777 RGB.y = MAD UYV.z, C1.w, RGB.y
778 */
779 struct prog_dst_register dst = inst->DstReg;
780 struct prog_dst_register tmp = get_temp(c);
781 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
782 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
783 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
784
785 /* tmp = TEX ...
786 */
787 emit_tex_op(c,
788 OPCODE_TEX,
789 tmp,
790 inst->SaturateMode,
791 unit,
792 inst->TexSrcTarget,
793 inst->TexShadow,
794 coord,
795 src_undef(),
796 src_undef());
797
798 /* tmp.xyz = ADD TMP, C0
799 */
800 emit_op(c,
801 OPCODE_ADD,
802 dst_mask(tmp, WRITEMASK_XYZ),
803 0,
804 tmpsrc,
805 C0,
806 src_undef());
807
808 /* YUV.y = MUL YUV.y, C0.w
809 */
810
811 emit_op(c,
812 OPCODE_MUL,
813 dst_mask(tmp, WRITEMASK_Y),
814 0,
815 tmpsrc,
816 src_swizzle1(C0, W),
817 src_undef());
818
819 /*
820 * if (UV swaped)
821 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
822 * else
823 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
824 */
825
826 emit_op(c,
827 OPCODE_MAD,
828 dst_mask(dst, WRITEMASK_XYZ),
829 0,
830 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
831 C1,
832 src_swizzle1(tmpsrc, Y));
833
834 /* RGB.y = MAD YUV.z, C1.w, RGB.y
835 */
836 emit_op(c,
837 OPCODE_MAD,
838 dst_mask(dst, WRITEMASK_Y),
839 0,
840 src_swizzle1(tmpsrc, Z),
841 src_swizzle1(C1, W),
842 src_swizzle1(src_reg_from_dst(dst), Y));
843
844 release_temp(c, tmp);
845 }
846 else {
847 /* ordinary RGBA tex instruction */
848 emit_tex_op(c,
849 OPCODE_TEX,
850 inst->DstReg,
851 inst->SaturateMode,
852 unit,
853 inst->TexSrcTarget,
854 inst->TexShadow,
855 coord,
856 src_undef(),
857 src_undef());
858 }
859
860 /* For GL_EXT_texture_swizzle: */
861 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
862 /* swizzle the result of the TEX instruction */
863 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
864 emit_op(c, OPCODE_SWZ,
865 inst->DstReg,
866 SATURATE_OFF, /* saturate already done above */
867 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
868 src_undef(),
869 src_undef());
870 }
871
872 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
873 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
874 release_temp(c, tmpcoord);
875 }
876
877
878 /**
879 * Check if the given TXP instruction really needs the divide-by-W step.
880 */
881 static GLboolean projtex( struct brw_wm_compile *c,
882 const struct prog_instruction *inst )
883 {
884 const struct prog_src_register src = inst->SrcReg[0];
885 GLboolean retVal;
886
887 assert(inst->Opcode == OPCODE_TXP);
888
889 /* Only try to detect the simplest cases. Could detect (later)
890 * cases where we are trying to emit code like RCP {1.0}, MUL x,
891 * {1.0}, and so on.
892 *
893 * More complex cases than this typically only arise from
894 * user-provided fragment programs anyway:
895 */
896 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
897 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
898 else if (src.File == PROGRAM_INPUT &&
899 GET_SWZ(src.Swizzle, W) == W &&
900 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
901 retVal = GL_FALSE;
902 else
903 retVal = GL_TRUE;
904
905 return retVal;
906 }
907
908
909 /**
910 * Emit code for TXP.
911 */
912 static void precalc_txp( struct brw_wm_compile *c,
913 const struct prog_instruction *inst )
914 {
915 struct prog_src_register src0 = inst->SrcReg[0];
916
917 if (projtex(c, inst)) {
918 struct prog_dst_register tmp = get_temp(c);
919 struct prog_instruction tmp_inst;
920
921 /* tmp0.w = RCP inst.arg[0][3]
922 */
923 emit_op(c,
924 OPCODE_RCP,
925 dst_mask(tmp, WRITEMASK_W),
926 0,
927 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
928 src_undef(),
929 src_undef());
930
931 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
932 */
933 emit_op(c,
934 OPCODE_MUL,
935 dst_mask(tmp, WRITEMASK_XYZ),
936 0,
937 src0,
938 src_swizzle1(src_reg_from_dst(tmp), W),
939 src_undef());
940
941 /* dst = precalc(TEX tmp0)
942 */
943 tmp_inst = *inst;
944 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
945 precalc_tex(c, &tmp_inst);
946
947 release_temp(c, tmp);
948 }
949 else
950 {
951 /* dst = precalc(TEX src0)
952 */
953 precalc_tex(c, inst);
954 }
955 }
956
957
958
959 static void emit_fb_write( struct brw_wm_compile *c )
960 {
961 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
962 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
963 struct prog_src_register outcolor;
964 GLuint i;
965
966 struct prog_instruction *inst, *last_inst;
967 struct brw_context *brw = c->func.brw;
968
969 /* The inst->Aux field is used for FB write target and the EOT marker */
970
971 if (brw->state.nr_color_regions > 1) {
972 for (i = 0 ; i < brw->state.nr_color_regions; i++) {
973 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
974 last_inst = inst = emit_op(c,
975 WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
976 outcolor, payload_r0_depth, outdepth);
977 inst->Aux = (i<<1);
978 if (c->fp_fragcolor_emitted) {
979 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
980 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
981 0, outcolor, payload_r0_depth, outdepth);
982 inst->Aux = (i<<1);
983 }
984 }
985 last_inst->Aux |= 1; //eot
986 }
987 else {
988 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
989 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
990 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
991 else
992 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
993
994 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
995 0, outcolor, payload_r0_depth, outdepth);
996 inst->Aux = 1|(0<<1);
997 }
998 }
999
1000
1001
1002
1003 /***********************************************************************
1004 * Emit INTERP instructions ahead of first use of each attrib.
1005 */
1006
1007 static void validate_src_regs( struct brw_wm_compile *c,
1008 const struct prog_instruction *inst )
1009 {
1010 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1011 GLuint i;
1012
1013 for (i = 0; i < nr_args; i++) {
1014 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1015 GLuint idx = inst->SrcReg[i].Index;
1016 if (!(c->fp_interp_emitted & (1<<idx))) {
1017 emit_interp(c, idx);
1018 }
1019 }
1020 }
1021 }
1022
1023 static void validate_dst_regs( struct brw_wm_compile *c,
1024 const struct prog_instruction *inst )
1025 {
1026 if (inst->DstReg.File == PROGRAM_OUTPUT) {
1027 GLuint idx = inst->DstReg.Index;
1028 if (idx == FRAG_RESULT_COLOR)
1029 c->fp_fragcolor_emitted = 1;
1030 }
1031 }
1032
1033 static void print_insns( const struct prog_instruction *insn,
1034 GLuint nr )
1035 {
1036 GLuint i;
1037 for (i = 0; i < nr; i++, insn++) {
1038 _mesa_printf("%3d: ", i);
1039 if (insn->Opcode < MAX_OPCODE)
1040 _mesa_print_instruction(insn);
1041 else if (insn->Opcode < MAX_WM_OPCODE) {
1042 GLuint idx = insn->Opcode - MAX_OPCODE;
1043
1044 _mesa_print_alu_instruction(insn,
1045 wm_opcode_strings[idx],
1046 3);
1047 }
1048 else
1049 _mesa_printf("965 Opcode %d\n", insn->Opcode);
1050 }
1051 }
1052
1053
1054 /**
1055 * Initial pass for fragment program code generation.
1056 * This function is used by both the GLSL and non-GLSL paths.
1057 */
1058 void brw_wm_pass_fp( struct brw_wm_compile *c )
1059 {
1060 struct brw_fragment_program *fp = c->fp;
1061 GLuint insn;
1062
1063 if (INTEL_DEBUG & DEBUG_WM) {
1064 _mesa_printf("pre-fp:\n");
1065 _mesa_print_program(&fp->program.Base);
1066 _mesa_printf("\n");
1067 }
1068
1069 c->pixel_xy = src_undef();
1070 c->delta_xy = src_undef();
1071 c->pixel_w = src_undef();
1072 c->nr_fp_insns = 0;
1073 c->fp->tex_units_used = 0x0;
1074
1075 /* Emit preamble instructions. This is where special instructions such as
1076 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1077 * compute shader inputs from varying vars.
1078 */
1079 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1080 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1081 validate_src_regs(c, inst);
1082 validate_dst_regs(c, inst);
1083 }
1084
1085 /* Loop over all instructions doing assorted simplifications and
1086 * transformations.
1087 */
1088 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1089 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1090 struct prog_instruction *out;
1091
1092 /* Check for INPUT values, emit INTERP instructions where
1093 * necessary:
1094 */
1095
1096 switch (inst->Opcode) {
1097 case OPCODE_SWZ:
1098 out = emit_insn(c, inst);
1099 out->Opcode = OPCODE_MOV;
1100 break;
1101
1102 case OPCODE_ABS:
1103 out = emit_insn(c, inst);
1104 out->Opcode = OPCODE_MOV;
1105 out->SrcReg[0].Negate = NEGATE_NONE;
1106 out->SrcReg[0].Abs = 1;
1107 break;
1108
1109 case OPCODE_SUB:
1110 out = emit_insn(c, inst);
1111 out->Opcode = OPCODE_ADD;
1112 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1113 break;
1114
1115 case OPCODE_SCS:
1116 out = emit_insn(c, inst);
1117 /* This should probably be done in the parser.
1118 */
1119 out->DstReg.WriteMask &= WRITEMASK_XY;
1120 break;
1121
1122 case OPCODE_DST:
1123 precalc_dst(c, inst);
1124 break;
1125
1126 case OPCODE_LIT:
1127 precalc_lit(c, inst);
1128 break;
1129
1130 case OPCODE_TEX:
1131 precalc_tex(c, inst);
1132 break;
1133
1134 case OPCODE_TXP:
1135 precalc_txp(c, inst);
1136 break;
1137
1138 case OPCODE_TXB:
1139 out = emit_insn(c, inst);
1140 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1141 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1142 break;
1143
1144 case OPCODE_XPD:
1145 out = emit_insn(c, inst);
1146 /* This should probably be done in the parser.
1147 */
1148 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1149 break;
1150
1151 case OPCODE_KIL:
1152 out = emit_insn(c, inst);
1153 /* This should probably be done in the parser.
1154 */
1155 out->DstReg.WriteMask = 0;
1156 break;
1157 case OPCODE_END:
1158 emit_fb_write(c);
1159 break;
1160 case OPCODE_PRINT:
1161 break;
1162 default:
1163 if (brw_wm_is_scalar_result(inst->Opcode))
1164 emit_scalar_insn(c, inst);
1165 else
1166 emit_insn(c, inst);
1167 break;
1168 }
1169 }
1170
1171 if (INTEL_DEBUG & DEBUG_WM) {
1172 _mesa_printf("pass_fp:\n");
1173 print_insns( c->prog_instructions, c->nr_fp_insns );
1174 _mesa_printf("\n");
1175 }
1176 }
1177