Merge branch 'mesa_7_7_branch'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 /** An invalid texture target */
46 #define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48 /** An invalid texture unit */
49 #define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53 #define X 0
54 #define Y 1
55 #define Z 2
56 #define W 3
57
58
59 static const char *wm_opcode_strings[] = {
60 "PIXELXY",
61 "DELTAXY",
62 "PIXELW",
63 "LINTERP",
64 "PINTERP",
65 "CINTERP",
66 "WPOSXY",
67 "FB_WRITE",
68 "FRONTFACING",
69 };
70
71 #if 0
72 static const char *wm_file_strings[] = {
73 "PAYLOAD"
74 };
75 #endif
76
77
78 /***********************************************************************
79 * Source regs
80 */
81
82 static struct prog_src_register src_reg(GLuint file, GLuint idx)
83 {
84 struct prog_src_register reg;
85 reg.File = file;
86 reg.Index = idx;
87 reg.Swizzle = SWIZZLE_NOOP;
88 reg.RelAddr = 0;
89 reg.Negate = NEGATE_NONE;
90 reg.Abs = 0;
91 return reg;
92 }
93
94 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
95 {
96 return src_reg(dst.File, dst.Index);
97 }
98
99 static struct prog_src_register src_undef( void )
100 {
101 return src_reg(PROGRAM_UNDEFINED, 0);
102 }
103
104 static GLboolean src_is_undef(struct prog_src_register src)
105 {
106 return src.File == PROGRAM_UNDEFINED;
107 }
108
109 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
110 {
111 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
112 return reg;
113 }
114
115 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
116 {
117 return src_swizzle(reg, x, x, x, x);
118 }
119
120 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
121 {
122 reg.Swizzle = swizzle;
123 return reg;
124 }
125
126
127 /***********************************************************************
128 * Dest regs
129 */
130
131 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
132 {
133 struct prog_dst_register reg;
134 reg.File = file;
135 reg.Index = idx;
136 reg.WriteMask = WRITEMASK_XYZW;
137 reg.RelAddr = 0;
138 reg.CondMask = COND_TR;
139 reg.CondSwizzle = 0;
140 reg.CondSrc = 0;
141 return reg;
142 }
143
144 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
145 {
146 reg.WriteMask &= mask;
147 return reg;
148 }
149
150 static struct prog_dst_register dst_undef( void )
151 {
152 return dst_reg(PROGRAM_UNDEFINED, 0);
153 }
154
155
156
157 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
158 {
159 int bit = _mesa_ffs( ~c->fp_temp );
160
161 if (!bit) {
162 _mesa_printf("%s: out of temporaries\n", __FILE__);
163 exit(1);
164 }
165
166 c->fp_temp |= 1<<(bit-1);
167 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
168 }
169
170
171 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
172 {
173 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
174 }
175
176
177 /***********************************************************************
178 * Instructions
179 */
180
181 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
182 {
183 assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
184 memset(&c->prog_instructions[c->nr_fp_insns], 0,
185 sizeof(*c->prog_instructions));
186 return &c->prog_instructions[c->nr_fp_insns++];
187 }
188
189 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
190 const struct prog_instruction *inst0)
191 {
192 struct prog_instruction *inst = get_fp_inst(c);
193 *inst = *inst0;
194 return inst;
195 }
196
197 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
198 GLuint op,
199 struct prog_dst_register dest,
200 GLuint saturate,
201 GLuint tex_src_unit,
202 GLuint tex_src_target,
203 GLuint tex_shadow,
204 struct prog_src_register src0,
205 struct prog_src_register src1,
206 struct prog_src_register src2 )
207 {
208 struct prog_instruction *inst = get_fp_inst(c);
209
210 assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
211 tex_src_unit == TEX_UNIT_NONE);
212 assert(tex_src_target < NUM_TEXTURE_TARGETS ||
213 tex_src_target == TEX_TARGET_NONE);
214
215 /* update mask of which texture units are referenced by this program */
216 if (tex_src_unit != TEX_UNIT_NONE)
217 c->fp->tex_units_used |= (1 << tex_src_unit);
218
219 memset(inst, 0, sizeof(*inst));
220
221 inst->Opcode = op;
222 inst->DstReg = dest;
223 inst->SaturateMode = saturate;
224 inst->TexSrcUnit = tex_src_unit;
225 inst->TexSrcTarget = tex_src_target;
226 inst->TexShadow = tex_shadow;
227 inst->SrcReg[0] = src0;
228 inst->SrcReg[1] = src1;
229 inst->SrcReg[2] = src2;
230 return inst;
231 }
232
233
234 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
235 GLuint op,
236 struct prog_dst_register dest,
237 GLuint saturate,
238 struct prog_src_register src0,
239 struct prog_src_register src1,
240 struct prog_src_register src2 )
241 {
242 return emit_tex_op(c, op, dest, saturate,
243 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
244 src0, src1, src2);
245 }
246
247
248 /* Many Mesa opcodes produce the same value across all the result channels.
249 * We'd rather not have to support that splatting in the opcode implementations,
250 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
251 * anyway. We can easily get both by emitting the opcode to one channel, and
252 * then MOVing it to the others, which brw_wm_pass*.c already understands.
253 */
254 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
255 const struct prog_instruction *inst0)
256 {
257 struct prog_instruction *inst;
258 unsigned int dst_chan;
259 unsigned int other_channel_mask;
260
261 if (inst0->DstReg.WriteMask == 0)
262 return NULL;
263
264 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
265 inst = get_fp_inst(c);
266 *inst = *inst0;
267 inst->DstReg.WriteMask = 1 << dst_chan;
268
269 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
270 if (other_channel_mask != 0) {
271 inst = emit_op(c,
272 OPCODE_MOV,
273 dst_mask(inst0->DstReg, other_channel_mask),
274 0,
275 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
276 src_undef(),
277 src_undef());
278 }
279 return inst;
280 }
281
282
283 /***********************************************************************
284 * Special instructions for interpolation and other tasks
285 */
286
287 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
288 {
289 if (src_is_undef(c->pixel_xy)) {
290 struct prog_dst_register pixel_xy = get_temp(c);
291 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
292
293
294 /* Emit the out calculations, and hold onto the results. Use
295 * two instructions as a temporary is required.
296 */
297 /* pixel_xy.xy = PIXELXY payload[0];
298 */
299 emit_op(c,
300 WM_PIXELXY,
301 dst_mask(pixel_xy, WRITEMASK_XY),
302 0,
303 payload_r0_depth,
304 src_undef(),
305 src_undef());
306
307 c->pixel_xy = src_reg_from_dst(pixel_xy);
308 }
309
310 return c->pixel_xy;
311 }
312
313 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
314 {
315 if (src_is_undef(c->delta_xy)) {
316 struct prog_dst_register delta_xy = get_temp(c);
317 struct prog_src_register pixel_xy = get_pixel_xy(c);
318 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
319
320 /* deltas.xy = DELTAXY pixel_xy, payload[0]
321 */
322 emit_op(c,
323 WM_DELTAXY,
324 dst_mask(delta_xy, WRITEMASK_XY),
325 0,
326 pixel_xy,
327 payload_r0_depth,
328 src_undef());
329
330 c->delta_xy = src_reg_from_dst(delta_xy);
331 }
332
333 return c->delta_xy;
334 }
335
336 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
337 {
338 if (src_is_undef(c->pixel_w)) {
339 struct prog_dst_register pixel_w = get_temp(c);
340 struct prog_src_register deltas = get_delta_xy(c);
341 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
342
343 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
344 */
345 emit_op(c,
346 WM_PIXELW,
347 dst_mask(pixel_w, WRITEMASK_W),
348 0,
349 interp_wpos,
350 deltas,
351 src_undef());
352
353
354 c->pixel_w = src_reg_from_dst(pixel_w);
355 }
356
357 return c->pixel_w;
358 }
359
360 static void emit_interp( struct brw_wm_compile *c,
361 GLuint idx )
362 {
363 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
364 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
365 struct prog_src_register deltas = get_delta_xy(c);
366
367 /* Need to use PINTERP on attributes which have been
368 * multiplied by 1/W in the SF program, and LINTERP on those
369 * which have not:
370 */
371 switch (idx) {
372 case FRAG_ATTRIB_WPOS:
373 /* Have to treat wpos.xy specially:
374 */
375 emit_op(c,
376 WM_WPOSXY,
377 dst_mask(dst, WRITEMASK_XY),
378 0,
379 get_pixel_xy(c),
380 src_undef(),
381 src_undef());
382
383 dst = dst_mask(dst, WRITEMASK_ZW);
384
385 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
386 */
387 emit_op(c,
388 WM_LINTERP,
389 dst,
390 0,
391 interp,
392 deltas,
393 src_undef());
394 break;
395 case FRAG_ATTRIB_COL0:
396 case FRAG_ATTRIB_COL1:
397 if (c->key.flat_shade) {
398 emit_op(c,
399 WM_CINTERP,
400 dst,
401 0,
402 interp,
403 src_undef(),
404 src_undef());
405 }
406 else {
407 if (c->key.linear_color) {
408 emit_op(c,
409 WM_LINTERP,
410 dst,
411 0,
412 interp,
413 deltas,
414 src_undef());
415 }
416 else {
417 /* perspective-corrected color interpolation */
418 emit_op(c,
419 WM_PINTERP,
420 dst,
421 0,
422 interp,
423 deltas,
424 get_pixel_w(c));
425 }
426 }
427 break;
428 case FRAG_ATTRIB_FOGC:
429 /* Interpolate the fog coordinate */
430 emit_op(c,
431 WM_PINTERP,
432 dst_mask(dst, WRITEMASK_X),
433 0,
434 interp,
435 deltas,
436 get_pixel_w(c));
437
438 emit_op(c,
439 OPCODE_MOV,
440 dst_mask(dst, WRITEMASK_YZW),
441 0,
442 src_swizzle(interp,
443 SWIZZLE_ZERO,
444 SWIZZLE_ZERO,
445 SWIZZLE_ZERO,
446 SWIZZLE_ONE),
447 src_undef(),
448 src_undef());
449 break;
450
451 case FRAG_ATTRIB_FACE:
452 emit_op(c,
453 WM_FRONTFACING,
454 dst_mask(dst, WRITEMASK_X),
455 0,
456 src_undef(),
457 src_undef(),
458 src_undef());
459 break;
460
461 case FRAG_ATTRIB_PNTC:
462 /* XXX review/test this case */
463 emit_op(c,
464 WM_PINTERP,
465 dst_mask(dst, WRITEMASK_XY),
466 0,
467 interp,
468 deltas,
469 get_pixel_w(c));
470
471 emit_op(c,
472 OPCODE_MOV,
473 dst_mask(dst, WRITEMASK_ZW),
474 0,
475 src_swizzle(interp,
476 SWIZZLE_ZERO,
477 SWIZZLE_ZERO,
478 SWIZZLE_ZERO,
479 SWIZZLE_ONE),
480 src_undef(),
481 src_undef());
482 break;
483
484 default:
485 emit_op(c,
486 WM_PINTERP,
487 dst,
488 0,
489 interp,
490 deltas,
491 get_pixel_w(c));
492 break;
493 }
494
495 c->fp_interp_emitted |= 1<<idx;
496 }
497
498 /***********************************************************************
499 * Hacks to extend the program parameter and constant lists.
500 */
501
502 /* Add the fog parameters to the parameter list of the original
503 * program, rather than creating a new list. Doesn't really do any
504 * harm and it's not as if the parameter handling isn't a big hack
505 * anyway.
506 */
507 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
508 GLint s0,
509 GLint s1,
510 GLint s2,
511 GLint s3,
512 GLint s4)
513 {
514 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
515 gl_state_index tokens[STATE_LENGTH];
516 GLuint idx;
517 tokens[0] = s0;
518 tokens[1] = s1;
519 tokens[2] = s2;
520 tokens[3] = s3;
521 tokens[4] = s4;
522
523 for (idx = 0; idx < paramList->NumParameters; idx++) {
524 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
525 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
526 return src_reg(PROGRAM_STATE_VAR, idx);
527 }
528
529 idx = _mesa_add_state_reference( paramList, tokens );
530
531 return src_reg(PROGRAM_STATE_VAR, idx);
532 }
533
534
535 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
536 GLfloat s0,
537 GLfloat s1,
538 GLfloat s2,
539 GLfloat s3)
540 {
541 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
542 GLfloat values[4];
543 GLuint idx;
544 GLuint swizzle;
545
546 values[0] = s0;
547 values[1] = s1;
548 values[2] = s2;
549 values[3] = s3;
550
551 /* Have to search, otherwise multiple compilations will each grow
552 * the parameter list.
553 */
554 for (idx = 0; idx < paramList->NumParameters; idx++) {
555 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
556 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
557
558 /* XXX: this mimics the mesa bug which puts all constants and
559 * parameters into the "PROGRAM_STATE_VAR" category:
560 */
561 return src_reg(PROGRAM_STATE_VAR, idx);
562 }
563
564 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
565 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
566 return src_reg(PROGRAM_STATE_VAR, idx);
567 }
568
569
570
571 /***********************************************************************
572 * Expand various instructions here to simpler forms.
573 */
574 static void precalc_dst( struct brw_wm_compile *c,
575 const struct prog_instruction *inst )
576 {
577 struct prog_src_register src0 = inst->SrcReg[0];
578 struct prog_src_register src1 = inst->SrcReg[1];
579 struct prog_dst_register dst = inst->DstReg;
580
581 if (dst.WriteMask & WRITEMASK_Y) {
582 /* dst.y = mul src0.y, src1.y
583 */
584 emit_op(c,
585 OPCODE_MUL,
586 dst_mask(dst, WRITEMASK_Y),
587 inst->SaturateMode,
588 src0,
589 src1,
590 src_undef());
591 }
592
593 if (dst.WriteMask & WRITEMASK_XZ) {
594 struct prog_instruction *swz;
595 GLuint z = GET_SWZ(src0.Swizzle, Z);
596
597 /* dst.xz = swz src0.1zzz
598 */
599 swz = emit_op(c,
600 OPCODE_SWZ,
601 dst_mask(dst, WRITEMASK_XZ),
602 inst->SaturateMode,
603 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
604 src_undef(),
605 src_undef());
606 /* Avoid letting negation flag of src0 affect our 1 constant. */
607 swz->SrcReg[0].Negate &= ~NEGATE_X;
608 }
609 if (dst.WriteMask & WRITEMASK_W) {
610 /* dst.w = mov src1.w
611 */
612 emit_op(c,
613 OPCODE_MOV,
614 dst_mask(dst, WRITEMASK_W),
615 inst->SaturateMode,
616 src1,
617 src_undef(),
618 src_undef());
619 }
620 }
621
622
623 static void precalc_lit( struct brw_wm_compile *c,
624 const struct prog_instruction *inst )
625 {
626 struct prog_src_register src0 = inst->SrcReg[0];
627 struct prog_dst_register dst = inst->DstReg;
628
629 if (dst.WriteMask & WRITEMASK_XW) {
630 struct prog_instruction *swz;
631
632 /* dst.xw = swz src0.1111
633 */
634 swz = emit_op(c,
635 OPCODE_SWZ,
636 dst_mask(dst, WRITEMASK_XW),
637 0,
638 src_swizzle1(src0, SWIZZLE_ONE),
639 src_undef(),
640 src_undef());
641 /* Avoid letting the negation flag of src0 affect our 1 constant. */
642 swz->SrcReg[0].Negate = NEGATE_NONE;
643 }
644
645 if (dst.WriteMask & WRITEMASK_YZ) {
646 emit_op(c,
647 OPCODE_LIT,
648 dst_mask(dst, WRITEMASK_YZ),
649 inst->SaturateMode,
650 src0,
651 src_undef(),
652 src_undef());
653 }
654 }
655
656
657 /**
658 * Some TEX instructions require extra code, cube map coordinate
659 * normalization, or coordinate scaling for RECT textures, etc.
660 * This function emits those extra instructions and the TEX
661 * instruction itself.
662 */
663 static void precalc_tex( struct brw_wm_compile *c,
664 const struct prog_instruction *inst )
665 {
666 struct prog_src_register coord;
667 struct prog_dst_register tmpcoord;
668 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
669
670 assert(unit < BRW_MAX_TEX_UNIT);
671
672 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
673 struct prog_instruction *out;
674 struct prog_dst_register tmp0 = get_temp(c);
675 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
676 struct prog_dst_register tmp1 = get_temp(c);
677 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
678 struct prog_src_register src0 = inst->SrcReg[0];
679
680 /* find longest component of coord vector and normalize it */
681 tmpcoord = get_temp(c);
682 coord = src_reg_from_dst(tmpcoord);
683
684 /* tmpcoord = src0 (i.e.: coord = src0) */
685 out = emit_op(c, OPCODE_MOV,
686 tmpcoord,
687 0,
688 src0,
689 src_undef(),
690 src_undef());
691 out->SrcReg[0].Negate = NEGATE_NONE;
692 out->SrcReg[0].Abs = 1;
693
694 /* tmp0 = MAX(coord.X, coord.Y) */
695 emit_op(c, OPCODE_MAX,
696 tmp0,
697 0,
698 src_swizzle1(coord, X),
699 src_swizzle1(coord, Y),
700 src_undef());
701
702 /* tmp1 = MAX(tmp0, coord.Z) */
703 emit_op(c, OPCODE_MAX,
704 tmp1,
705 0,
706 tmp0src,
707 src_swizzle1(coord, Z),
708 src_undef());
709
710 /* tmp0 = 1 / tmp1 */
711 emit_op(c, OPCODE_RCP,
712 dst_mask(tmp0, WRITEMASK_X),
713 0,
714 tmp1src,
715 src_undef(),
716 src_undef());
717
718 /* tmpCoord = src0 * tmp0 */
719 emit_op(c, OPCODE_MUL,
720 tmpcoord,
721 0,
722 src0,
723 src_swizzle1(tmp0src, SWIZZLE_X),
724 src_undef());
725
726 release_temp(c, tmp0);
727 release_temp(c, tmp1);
728 }
729 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
730 struct prog_src_register scale =
731 search_or_add_param5( c,
732 STATE_INTERNAL,
733 STATE_TEXRECT_SCALE,
734 unit,
735 0,0 );
736
737 tmpcoord = get_temp(c);
738
739 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
740 */
741 emit_op(c,
742 OPCODE_MUL,
743 tmpcoord,
744 0,
745 inst->SrcReg[0],
746 src_swizzle(scale,
747 SWIZZLE_X,
748 SWIZZLE_Y,
749 SWIZZLE_ONE,
750 SWIZZLE_ONE),
751 src_undef());
752
753 coord = src_reg_from_dst(tmpcoord);
754 }
755 else {
756 coord = inst->SrcReg[0];
757 }
758
759 /* Need to emit YUV texture conversions by hand. Probably need to
760 * do this here - the alternative is in brw_wm_emit.c, but the
761 * conversion requires allocating a temporary variable which we
762 * don't have the facility to do that late in the compilation.
763 */
764 if (c->key.yuvtex_mask & (1 << unit)) {
765 /* convert ycbcr to RGBA */
766 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
767
768 /*
769 CONST C0 = { -.5, -.0625, -.5, 1.164 }
770 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
771 UYV = TEX ...
772 UYV.xyz = ADD UYV, C0
773 UYV.y = MUL UYV.y, C0.w
774 if (UV swaped)
775 RGB.xyz = MAD UYV.zzx, C1, UYV.y
776 else
777 RGB.xyz = MAD UYV.xxz, C1, UYV.y
778 RGB.y = MAD UYV.z, C1.w, RGB.y
779 */
780 struct prog_dst_register dst = inst->DstReg;
781 struct prog_dst_register tmp = get_temp(c);
782 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
783 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
784 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
785
786 /* tmp = TEX ...
787 */
788 emit_tex_op(c,
789 OPCODE_TEX,
790 tmp,
791 inst->SaturateMode,
792 unit,
793 inst->TexSrcTarget,
794 inst->TexShadow,
795 coord,
796 src_undef(),
797 src_undef());
798
799 /* tmp.xyz = ADD TMP, C0
800 */
801 emit_op(c,
802 OPCODE_ADD,
803 dst_mask(tmp, WRITEMASK_XYZ),
804 0,
805 tmpsrc,
806 C0,
807 src_undef());
808
809 /* YUV.y = MUL YUV.y, C0.w
810 */
811
812 emit_op(c,
813 OPCODE_MUL,
814 dst_mask(tmp, WRITEMASK_Y),
815 0,
816 tmpsrc,
817 src_swizzle1(C0, W),
818 src_undef());
819
820 /*
821 * if (UV swaped)
822 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
823 * else
824 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
825 */
826
827 emit_op(c,
828 OPCODE_MAD,
829 dst_mask(dst, WRITEMASK_XYZ),
830 0,
831 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
832 C1,
833 src_swizzle1(tmpsrc, Y));
834
835 /* RGB.y = MAD YUV.z, C1.w, RGB.y
836 */
837 emit_op(c,
838 OPCODE_MAD,
839 dst_mask(dst, WRITEMASK_Y),
840 0,
841 src_swizzle1(tmpsrc, Z),
842 src_swizzle1(C1, W),
843 src_swizzle1(src_reg_from_dst(dst), Y));
844
845 release_temp(c, tmp);
846 }
847 else {
848 /* ordinary RGBA tex instruction */
849 emit_tex_op(c,
850 OPCODE_TEX,
851 inst->DstReg,
852 inst->SaturateMode,
853 unit,
854 inst->TexSrcTarget,
855 inst->TexShadow,
856 coord,
857 src_undef(),
858 src_undef());
859 }
860
861 /* For GL_EXT_texture_swizzle: */
862 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
863 /* swizzle the result of the TEX instruction */
864 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
865 emit_op(c, OPCODE_SWZ,
866 inst->DstReg,
867 SATURATE_OFF, /* saturate already done above */
868 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
869 src_undef(),
870 src_undef());
871 }
872
873 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
874 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
875 release_temp(c, tmpcoord);
876 }
877
878
879 /**
880 * Check if the given TXP instruction really needs the divide-by-W step.
881 */
882 static GLboolean projtex( struct brw_wm_compile *c,
883 const struct prog_instruction *inst )
884 {
885 const struct prog_src_register src = inst->SrcReg[0];
886 GLboolean retVal;
887
888 assert(inst->Opcode == OPCODE_TXP);
889
890 /* Only try to detect the simplest cases. Could detect (later)
891 * cases where we are trying to emit code like RCP {1.0}, MUL x,
892 * {1.0}, and so on.
893 *
894 * More complex cases than this typically only arise from
895 * user-provided fragment programs anyway:
896 */
897 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
898 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
899 else if (src.File == PROGRAM_INPUT &&
900 GET_SWZ(src.Swizzle, W) == W &&
901 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
902 retVal = GL_FALSE;
903 else
904 retVal = GL_TRUE;
905
906 return retVal;
907 }
908
909
910 /**
911 * Emit code for TXP.
912 */
913 static void precalc_txp( struct brw_wm_compile *c,
914 const struct prog_instruction *inst )
915 {
916 struct prog_src_register src0 = inst->SrcReg[0];
917
918 if (projtex(c, inst)) {
919 struct prog_dst_register tmp = get_temp(c);
920 struct prog_instruction tmp_inst;
921
922 /* tmp0.w = RCP inst.arg[0][3]
923 */
924 emit_op(c,
925 OPCODE_RCP,
926 dst_mask(tmp, WRITEMASK_W),
927 0,
928 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
929 src_undef(),
930 src_undef());
931
932 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
933 */
934 emit_op(c,
935 OPCODE_MUL,
936 dst_mask(tmp, WRITEMASK_XYZ),
937 0,
938 src0,
939 src_swizzle1(src_reg_from_dst(tmp), W),
940 src_undef());
941
942 /* dst = precalc(TEX tmp0)
943 */
944 tmp_inst = *inst;
945 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
946 precalc_tex(c, &tmp_inst);
947
948 release_temp(c, tmp);
949 }
950 else
951 {
952 /* dst = precalc(TEX src0)
953 */
954 precalc_tex(c, inst);
955 }
956 }
957
958
959
960 static void emit_render_target_writes( struct brw_wm_compile *c )
961 {
962 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
963 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
964 struct prog_src_register outcolor;
965 GLuint i;
966
967 struct prog_instruction *inst, *last_inst;
968
969 /* The inst->Aux field is used for FB write target and the EOT marker */
970
971 if (c->key.nr_color_regions > 1) {
972 for (i = 0 ; i < c->key.nr_color_regions; i++) {
973 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
974 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
975 0, outcolor, payload_r0_depth, outdepth);
976 inst->Aux = INST_AUX_TARGET(i);
977 if (c->fp_fragcolor_emitted) {
978 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
979 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
980 0, outcolor, payload_r0_depth, outdepth);
981 inst->Aux = INST_AUX_TARGET(i);
982 }
983 }
984 last_inst->Aux |= INST_AUX_EOT;
985 }
986 else {
987 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
988 if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0))
989 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
990 else
991 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
992
993 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
994 0, outcolor, payload_r0_depth, outdepth);
995 inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0);
996 }
997 }
998
999
1000
1001
1002 /***********************************************************************
1003 * Emit INTERP instructions ahead of first use of each attrib.
1004 */
1005
1006 static void validate_src_regs( struct brw_wm_compile *c,
1007 const struct prog_instruction *inst )
1008 {
1009 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1010 GLuint i;
1011
1012 for (i = 0; i < nr_args; i++) {
1013 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1014 GLuint idx = inst->SrcReg[i].Index;
1015 if (!(c->fp_interp_emitted & (1<<idx))) {
1016 emit_interp(c, idx);
1017 }
1018 }
1019 }
1020 }
1021
1022 static void validate_dst_regs( struct brw_wm_compile *c,
1023 const struct prog_instruction *inst )
1024 {
1025 if (inst->DstReg.File == PROGRAM_OUTPUT) {
1026 GLuint idx = inst->DstReg.Index;
1027 if (idx == FRAG_RESULT_COLOR)
1028 c->fp_fragcolor_emitted = 1;
1029 }
1030 }
1031
1032 static void print_insns( const struct prog_instruction *insn,
1033 GLuint nr )
1034 {
1035 GLuint i;
1036 for (i = 0; i < nr; i++, insn++) {
1037 _mesa_printf("%3d: ", i);
1038 if (insn->Opcode < MAX_OPCODE)
1039 _mesa_print_instruction(insn);
1040 else if (insn->Opcode < MAX_WM_OPCODE) {
1041 GLuint idx = insn->Opcode - MAX_OPCODE;
1042
1043 _mesa_print_alu_instruction(insn,
1044 wm_opcode_strings[idx],
1045 3);
1046 }
1047 else
1048 _mesa_printf("965 Opcode %d\n", insn->Opcode);
1049 }
1050 }
1051
1052
1053 /**
1054 * Initial pass for fragment program code generation.
1055 * This function is used by both the GLSL and non-GLSL paths.
1056 */
1057 void brw_wm_pass_fp( struct brw_wm_compile *c )
1058 {
1059 struct brw_fragment_program *fp = c->fp;
1060 GLuint insn;
1061
1062 if (INTEL_DEBUG & DEBUG_WM) {
1063 _mesa_printf("pre-fp:\n");
1064 _mesa_print_program(&fp->program.Base);
1065 _mesa_printf("\n");
1066 }
1067
1068 c->pixel_xy = src_undef();
1069 c->delta_xy = src_undef();
1070 c->pixel_w = src_undef();
1071 c->nr_fp_insns = 0;
1072 c->fp->tex_units_used = 0x0;
1073
1074 /* Emit preamble instructions. This is where special instructions such as
1075 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1076 * compute shader inputs from varying vars.
1077 */
1078 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1079 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1080 validate_src_regs(c, inst);
1081 validate_dst_regs(c, inst);
1082 }
1083
1084 /* Loop over all instructions doing assorted simplifications and
1085 * transformations.
1086 */
1087 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1088 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1089 struct prog_instruction *out;
1090
1091 /* Check for INPUT values, emit INTERP instructions where
1092 * necessary:
1093 */
1094
1095 switch (inst->Opcode) {
1096 case OPCODE_SWZ:
1097 out = emit_insn(c, inst);
1098 out->Opcode = OPCODE_MOV;
1099 break;
1100
1101 case OPCODE_ABS:
1102 out = emit_insn(c, inst);
1103 out->Opcode = OPCODE_MOV;
1104 out->SrcReg[0].Negate = NEGATE_NONE;
1105 out->SrcReg[0].Abs = 1;
1106 break;
1107
1108 case OPCODE_SUB:
1109 out = emit_insn(c, inst);
1110 out->Opcode = OPCODE_ADD;
1111 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1112 break;
1113
1114 case OPCODE_SCS:
1115 out = emit_insn(c, inst);
1116 /* This should probably be done in the parser.
1117 */
1118 out->DstReg.WriteMask &= WRITEMASK_XY;
1119 break;
1120
1121 case OPCODE_DST:
1122 precalc_dst(c, inst);
1123 break;
1124
1125 case OPCODE_LIT:
1126 precalc_lit(c, inst);
1127 break;
1128
1129 case OPCODE_TEX:
1130 precalc_tex(c, inst);
1131 break;
1132
1133 case OPCODE_TXP:
1134 precalc_txp(c, inst);
1135 break;
1136
1137 case OPCODE_TXB:
1138 out = emit_insn(c, inst);
1139 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1140 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1141 break;
1142
1143 case OPCODE_XPD:
1144 out = emit_insn(c, inst);
1145 /* This should probably be done in the parser.
1146 */
1147 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1148 break;
1149
1150 case OPCODE_KIL:
1151 out = emit_insn(c, inst);
1152 /* This should probably be done in the parser.
1153 */
1154 out->DstReg.WriteMask = 0;
1155 break;
1156 case OPCODE_END:
1157 emit_render_target_writes(c);
1158 break;
1159 case OPCODE_PRINT:
1160 break;
1161 default:
1162 if (brw_wm_is_scalar_result(inst->Opcode))
1163 emit_scalar_insn(c, inst);
1164 else
1165 emit_insn(c, inst);
1166 break;
1167 }
1168 }
1169
1170 if (INTEL_DEBUG & DEBUG_WM) {
1171 _mesa_printf("pass_fp:\n");
1172 print_insns( c->prog_instructions, c->nr_fp_insns );
1173 _mesa_printf("\n");
1174 }
1175 }
1176