Merge branch 'master' into glsl2
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "program/prog_parameter.h"
41 #include "program/prog_print.h"
42 #include "program/prog_statevars.h"
43
44
45 /** An invalid texture target */
46 #define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48 /** An invalid texture unit */
49 #define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53 #define X 0
54 #define Y 1
55 #define Z 2
56 #define W 3
57
58
59 static const char *wm_opcode_strings[] = {
60 "PIXELXY",
61 "DELTAXY",
62 "PIXELW",
63 "LINTERP",
64 "PINTERP",
65 "CINTERP",
66 "WPOSXY",
67 "FB_WRITE",
68 "FRONTFACING",
69 };
70
71 #if 0
72 static const char *wm_file_strings[] = {
73 "PAYLOAD"
74 };
75 #endif
76
77
78 /***********************************************************************
79 * Source regs
80 */
81
82 static struct prog_src_register src_reg(GLuint file, GLuint idx)
83 {
84 struct prog_src_register reg;
85 reg.File = file;
86 reg.Index = idx;
87 reg.Swizzle = SWIZZLE_NOOP;
88 reg.RelAddr = 0;
89 reg.Negate = NEGATE_NONE;
90 reg.Abs = 0;
91 reg.HasIndex2 = 0;
92 return reg;
93 }
94
95 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
96 {
97 return src_reg(dst.File, dst.Index);
98 }
99
100 static struct prog_src_register src_undef( void )
101 {
102 return src_reg(PROGRAM_UNDEFINED, 0);
103 }
104
105 static GLboolean src_is_undef(struct prog_src_register src)
106 {
107 return src.File == PROGRAM_UNDEFINED;
108 }
109
110 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
111 {
112 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
113 return reg;
114 }
115
116 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
117 {
118 return src_swizzle(reg, x, x, x, x);
119 }
120
121 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
122 {
123 reg.Swizzle = swizzle;
124 return reg;
125 }
126
127
128 /***********************************************************************
129 * Dest regs
130 */
131
132 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
133 {
134 struct prog_dst_register reg;
135 reg.File = file;
136 reg.Index = idx;
137 reg.WriteMask = WRITEMASK_XYZW;
138 reg.RelAddr = 0;
139 reg.CondMask = COND_TR;
140 reg.CondSwizzle = 0;
141 reg.CondSrc = 0;
142 return reg;
143 }
144
145 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
146 {
147 reg.WriteMask &= mask;
148 return reg;
149 }
150
151 static struct prog_dst_register dst_undef( void )
152 {
153 return dst_reg(PROGRAM_UNDEFINED, 0);
154 }
155
156
157
158 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
159 {
160 int bit = _mesa_ffs( ~c->fp_temp );
161
162 if (!bit) {
163 printf("%s: out of temporaries\n", __FILE__);
164 exit(1);
165 }
166
167 c->fp_temp |= 1<<(bit-1);
168 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
169 }
170
171
172 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
173 {
174 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
175 }
176
177
178 /***********************************************************************
179 * Instructions
180 */
181
182 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
183 {
184 assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
185 memset(&c->prog_instructions[c->nr_fp_insns], 0,
186 sizeof(*c->prog_instructions));
187 return &c->prog_instructions[c->nr_fp_insns++];
188 }
189
190 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
191 const struct prog_instruction *inst0)
192 {
193 struct prog_instruction *inst = get_fp_inst(c);
194 *inst = *inst0;
195 return inst;
196 }
197
198 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
199 GLuint op,
200 struct prog_dst_register dest,
201 GLuint saturate,
202 GLuint tex_src_unit,
203 GLuint tex_src_target,
204 GLuint tex_shadow,
205 struct prog_src_register src0,
206 struct prog_src_register src1,
207 struct prog_src_register src2 )
208 {
209 struct prog_instruction *inst = get_fp_inst(c);
210
211 assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
212 tex_src_unit == TEX_UNIT_NONE);
213 assert(tex_src_target < NUM_TEXTURE_TARGETS ||
214 tex_src_target == TEX_TARGET_NONE);
215
216 /* update mask of which texture units are referenced by this program */
217 if (tex_src_unit != TEX_UNIT_NONE)
218 c->fp->tex_units_used |= (1 << tex_src_unit);
219
220 memset(inst, 0, sizeof(*inst));
221
222 inst->Opcode = op;
223 inst->DstReg = dest;
224 inst->SaturateMode = saturate;
225 inst->TexSrcUnit = tex_src_unit;
226 inst->TexSrcTarget = tex_src_target;
227 inst->TexShadow = tex_shadow;
228 inst->SrcReg[0] = src0;
229 inst->SrcReg[1] = src1;
230 inst->SrcReg[2] = src2;
231 return inst;
232 }
233
234
235 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
236 GLuint op,
237 struct prog_dst_register dest,
238 GLuint saturate,
239 struct prog_src_register src0,
240 struct prog_src_register src1,
241 struct prog_src_register src2 )
242 {
243 return emit_tex_op(c, op, dest, saturate,
244 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
245 src0, src1, src2);
246 }
247
248
249 /* Many Mesa opcodes produce the same value across all the result channels.
250 * We'd rather not have to support that splatting in the opcode implementations,
251 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
252 * anyway. We can easily get both by emitting the opcode to one channel, and
253 * then MOVing it to the others, which brw_wm_pass*.c already understands.
254 */
255 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
256 const struct prog_instruction *inst0)
257 {
258 struct prog_instruction *inst;
259 unsigned int dst_chan;
260 unsigned int other_channel_mask;
261
262 if (inst0->DstReg.WriteMask == 0)
263 return NULL;
264
265 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
266 inst = get_fp_inst(c);
267 *inst = *inst0;
268 inst->DstReg.WriteMask = 1 << dst_chan;
269
270 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
271 if (other_channel_mask != 0) {
272 inst = emit_op(c,
273 OPCODE_MOV,
274 dst_mask(inst0->DstReg, other_channel_mask),
275 0,
276 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
277 src_undef(),
278 src_undef());
279 }
280 return inst;
281 }
282
283
284 /***********************************************************************
285 * Special instructions for interpolation and other tasks
286 */
287
288 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
289 {
290 if (src_is_undef(c->pixel_xy)) {
291 struct prog_dst_register pixel_xy = get_temp(c);
292 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
293
294
295 /* Emit the out calculations, and hold onto the results. Use
296 * two instructions as a temporary is required.
297 */
298 /* pixel_xy.xy = PIXELXY payload[0];
299 */
300 emit_op(c,
301 WM_PIXELXY,
302 dst_mask(pixel_xy, WRITEMASK_XY),
303 0,
304 payload_r0_depth,
305 src_undef(),
306 src_undef());
307
308 c->pixel_xy = src_reg_from_dst(pixel_xy);
309 }
310
311 return c->pixel_xy;
312 }
313
314 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
315 {
316 if (src_is_undef(c->delta_xy)) {
317 struct prog_dst_register delta_xy = get_temp(c);
318 struct prog_src_register pixel_xy = get_pixel_xy(c);
319 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
320
321 /* deltas.xy = DELTAXY pixel_xy, payload[0]
322 */
323 emit_op(c,
324 WM_DELTAXY,
325 dst_mask(delta_xy, WRITEMASK_XY),
326 0,
327 pixel_xy,
328 payload_r0_depth,
329 src_undef());
330
331 c->delta_xy = src_reg_from_dst(delta_xy);
332 }
333
334 return c->delta_xy;
335 }
336
337 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
338 {
339 if (src_is_undef(c->pixel_w)) {
340 struct prog_dst_register pixel_w = get_temp(c);
341 struct prog_src_register deltas = get_delta_xy(c);
342 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
343
344 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
345 */
346 emit_op(c,
347 WM_PIXELW,
348 dst_mask(pixel_w, WRITEMASK_W),
349 0,
350 interp_wpos,
351 deltas,
352 src_undef());
353
354
355 c->pixel_w = src_reg_from_dst(pixel_w);
356 }
357
358 return c->pixel_w;
359 }
360
361 static void emit_interp( struct brw_wm_compile *c,
362 GLuint idx )
363 {
364 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
365 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
366 struct prog_src_register deltas = get_delta_xy(c);
367
368 /* Need to use PINTERP on attributes which have been
369 * multiplied by 1/W in the SF program, and LINTERP on those
370 * which have not:
371 */
372 switch (idx) {
373 case FRAG_ATTRIB_WPOS:
374 /* Have to treat wpos.xy specially:
375 */
376 emit_op(c,
377 WM_WPOSXY,
378 dst_mask(dst, WRITEMASK_XY),
379 0,
380 get_pixel_xy(c),
381 src_undef(),
382 src_undef());
383
384 dst = dst_mask(dst, WRITEMASK_ZW);
385
386 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
387 */
388 emit_op(c,
389 WM_LINTERP,
390 dst,
391 0,
392 interp,
393 deltas,
394 src_undef());
395 break;
396 case FRAG_ATTRIB_COL0:
397 case FRAG_ATTRIB_COL1:
398 if (c->key.flat_shade) {
399 emit_op(c,
400 WM_CINTERP,
401 dst,
402 0,
403 interp,
404 src_undef(),
405 src_undef());
406 }
407 else {
408 if (c->key.linear_color) {
409 emit_op(c,
410 WM_LINTERP,
411 dst,
412 0,
413 interp,
414 deltas,
415 src_undef());
416 }
417 else {
418 /* perspective-corrected color interpolation */
419 emit_op(c,
420 WM_PINTERP,
421 dst,
422 0,
423 interp,
424 deltas,
425 get_pixel_w(c));
426 }
427 }
428 break;
429 case FRAG_ATTRIB_FOGC:
430 /* Interpolate the fog coordinate */
431 emit_op(c,
432 WM_PINTERP,
433 dst_mask(dst, WRITEMASK_X),
434 0,
435 interp,
436 deltas,
437 get_pixel_w(c));
438
439 emit_op(c,
440 OPCODE_MOV,
441 dst_mask(dst, WRITEMASK_YZW),
442 0,
443 src_swizzle(interp,
444 SWIZZLE_ZERO,
445 SWIZZLE_ZERO,
446 SWIZZLE_ZERO,
447 SWIZZLE_ONE),
448 src_undef(),
449 src_undef());
450 break;
451
452 case FRAG_ATTRIB_FACE:
453 emit_op(c,
454 WM_FRONTFACING,
455 dst_mask(dst, WRITEMASK_X),
456 0,
457 src_undef(),
458 src_undef(),
459 src_undef());
460 break;
461
462 case FRAG_ATTRIB_PNTC:
463 /* XXX review/test this case */
464 emit_op(c,
465 WM_PINTERP,
466 dst_mask(dst, WRITEMASK_XY),
467 0,
468 interp,
469 deltas,
470 get_pixel_w(c));
471
472 emit_op(c,
473 OPCODE_MOV,
474 dst_mask(dst, WRITEMASK_ZW),
475 0,
476 src_swizzle(interp,
477 SWIZZLE_ZERO,
478 SWIZZLE_ZERO,
479 SWIZZLE_ZERO,
480 SWIZZLE_ONE),
481 src_undef(),
482 src_undef());
483 break;
484
485 default:
486 emit_op(c,
487 WM_PINTERP,
488 dst,
489 0,
490 interp,
491 deltas,
492 get_pixel_w(c));
493 break;
494 }
495
496 c->fp_interp_emitted |= 1<<idx;
497 }
498
499 /***********************************************************************
500 * Hacks to extend the program parameter and constant lists.
501 */
502
503 /* Add the fog parameters to the parameter list of the original
504 * program, rather than creating a new list. Doesn't really do any
505 * harm and it's not as if the parameter handling isn't a big hack
506 * anyway.
507 */
508 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
509 GLint s0,
510 GLint s1,
511 GLint s2,
512 GLint s3,
513 GLint s4)
514 {
515 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
516 gl_state_index tokens[STATE_LENGTH];
517 GLuint idx;
518 tokens[0] = s0;
519 tokens[1] = s1;
520 tokens[2] = s2;
521 tokens[3] = s3;
522 tokens[4] = s4;
523
524 for (idx = 0; idx < paramList->NumParameters; idx++) {
525 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
526 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
527 return src_reg(PROGRAM_STATE_VAR, idx);
528 }
529
530 idx = _mesa_add_state_reference( paramList, tokens );
531
532 return src_reg(PROGRAM_STATE_VAR, idx);
533 }
534
535
536 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
537 GLfloat s0,
538 GLfloat s1,
539 GLfloat s2,
540 GLfloat s3)
541 {
542 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
543 GLfloat values[4];
544 GLuint idx;
545 GLuint swizzle;
546
547 values[0] = s0;
548 values[1] = s1;
549 values[2] = s2;
550 values[3] = s3;
551
552 /* Have to search, otherwise multiple compilations will each grow
553 * the parameter list.
554 */
555 for (idx = 0; idx < paramList->NumParameters; idx++) {
556 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
557 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
558
559 /* XXX: this mimics the mesa bug which puts all constants and
560 * parameters into the "PROGRAM_STATE_VAR" category:
561 */
562 return src_reg(PROGRAM_STATE_VAR, idx);
563 }
564
565 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
566 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
567 return src_reg(PROGRAM_STATE_VAR, idx);
568 }
569
570
571
572 /***********************************************************************
573 * Expand various instructions here to simpler forms.
574 */
575 static void precalc_dst( struct brw_wm_compile *c,
576 const struct prog_instruction *inst )
577 {
578 struct prog_src_register src0 = inst->SrcReg[0];
579 struct prog_src_register src1 = inst->SrcReg[1];
580 struct prog_dst_register dst = inst->DstReg;
581
582 if (dst.WriteMask & WRITEMASK_Y) {
583 /* dst.y = mul src0.y, src1.y
584 */
585 emit_op(c,
586 OPCODE_MUL,
587 dst_mask(dst, WRITEMASK_Y),
588 inst->SaturateMode,
589 src0,
590 src1,
591 src_undef());
592 }
593
594 if (dst.WriteMask & WRITEMASK_XZ) {
595 struct prog_instruction *swz;
596 GLuint z = GET_SWZ(src0.Swizzle, Z);
597
598 /* dst.xz = swz src0.1zzz
599 */
600 swz = emit_op(c,
601 OPCODE_SWZ,
602 dst_mask(dst, WRITEMASK_XZ),
603 inst->SaturateMode,
604 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
605 src_undef(),
606 src_undef());
607 /* Avoid letting negation flag of src0 affect our 1 constant. */
608 swz->SrcReg[0].Negate &= ~NEGATE_X;
609 }
610 if (dst.WriteMask & WRITEMASK_W) {
611 /* dst.w = mov src1.w
612 */
613 emit_op(c,
614 OPCODE_MOV,
615 dst_mask(dst, WRITEMASK_W),
616 inst->SaturateMode,
617 src1,
618 src_undef(),
619 src_undef());
620 }
621 }
622
623
624 static void precalc_lit( struct brw_wm_compile *c,
625 const struct prog_instruction *inst )
626 {
627 struct prog_src_register src0 = inst->SrcReg[0];
628 struct prog_dst_register dst = inst->DstReg;
629
630 if (dst.WriteMask & WRITEMASK_XW) {
631 struct prog_instruction *swz;
632
633 /* dst.xw = swz src0.1111
634 */
635 swz = emit_op(c,
636 OPCODE_SWZ,
637 dst_mask(dst, WRITEMASK_XW),
638 0,
639 src_swizzle1(src0, SWIZZLE_ONE),
640 src_undef(),
641 src_undef());
642 /* Avoid letting the negation flag of src0 affect our 1 constant. */
643 swz->SrcReg[0].Negate = NEGATE_NONE;
644 }
645
646 if (dst.WriteMask & WRITEMASK_YZ) {
647 emit_op(c,
648 OPCODE_LIT,
649 dst_mask(dst, WRITEMASK_YZ),
650 inst->SaturateMode,
651 src0,
652 src_undef(),
653 src_undef());
654 }
655 }
656
657
658 /**
659 * Some TEX instructions require extra code, cube map coordinate
660 * normalization, or coordinate scaling for RECT textures, etc.
661 * This function emits those extra instructions and the TEX
662 * instruction itself.
663 */
664 static void precalc_tex( struct brw_wm_compile *c,
665 const struct prog_instruction *inst )
666 {
667 struct prog_src_register coord;
668 struct prog_dst_register tmpcoord;
669 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
670
671 assert(unit < BRW_MAX_TEX_UNIT);
672
673 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
674 struct prog_instruction *out;
675 struct prog_dst_register tmp0 = get_temp(c);
676 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
677 struct prog_dst_register tmp1 = get_temp(c);
678 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
679 struct prog_src_register src0 = inst->SrcReg[0];
680
681 /* find longest component of coord vector and normalize it */
682 tmpcoord = get_temp(c);
683 coord = src_reg_from_dst(tmpcoord);
684
685 /* tmpcoord = src0 (i.e.: coord = src0) */
686 out = emit_op(c, OPCODE_MOV,
687 tmpcoord,
688 0,
689 src0,
690 src_undef(),
691 src_undef());
692 out->SrcReg[0].Negate = NEGATE_NONE;
693 out->SrcReg[0].Abs = 1;
694
695 /* tmp0 = MAX(coord.X, coord.Y) */
696 emit_op(c, OPCODE_MAX,
697 tmp0,
698 0,
699 src_swizzle1(coord, X),
700 src_swizzle1(coord, Y),
701 src_undef());
702
703 /* tmp1 = MAX(tmp0, coord.Z) */
704 emit_op(c, OPCODE_MAX,
705 tmp1,
706 0,
707 tmp0src,
708 src_swizzle1(coord, Z),
709 src_undef());
710
711 /* tmp0 = 1 / tmp1 */
712 emit_op(c, OPCODE_RCP,
713 dst_mask(tmp0, WRITEMASK_X),
714 0,
715 tmp1src,
716 src_undef(),
717 src_undef());
718
719 /* tmpCoord = src0 * tmp0 */
720 emit_op(c, OPCODE_MUL,
721 tmpcoord,
722 0,
723 src0,
724 src_swizzle1(tmp0src, SWIZZLE_X),
725 src_undef());
726
727 release_temp(c, tmp0);
728 release_temp(c, tmp1);
729 }
730 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
731 struct prog_src_register scale =
732 search_or_add_param5( c,
733 STATE_INTERNAL,
734 STATE_TEXRECT_SCALE,
735 unit,
736 0,0 );
737
738 tmpcoord = get_temp(c);
739
740 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
741 */
742 emit_op(c,
743 OPCODE_MUL,
744 tmpcoord,
745 0,
746 inst->SrcReg[0],
747 src_swizzle(scale,
748 SWIZZLE_X,
749 SWIZZLE_Y,
750 SWIZZLE_ONE,
751 SWIZZLE_ONE),
752 src_undef());
753
754 coord = src_reg_from_dst(tmpcoord);
755 }
756 else {
757 coord = inst->SrcReg[0];
758 }
759
760 /* Need to emit YUV texture conversions by hand. Probably need to
761 * do this here - the alternative is in brw_wm_emit.c, but the
762 * conversion requires allocating a temporary variable which we
763 * don't have the facility to do that late in the compilation.
764 */
765 if (c->key.yuvtex_mask & (1 << unit)) {
766 /* convert ycbcr to RGBA */
767 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
768
769 /*
770 CONST C0 = { -.5, -.0625, -.5, 1.164 }
771 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
772 UYV = TEX ...
773 UYV.xyz = ADD UYV, C0
774 UYV.y = MUL UYV.y, C0.w
775 if (UV swaped)
776 RGB.xyz = MAD UYV.zzx, C1, UYV.y
777 else
778 RGB.xyz = MAD UYV.xxz, C1, UYV.y
779 RGB.y = MAD UYV.z, C1.w, RGB.y
780 */
781 struct prog_dst_register dst = inst->DstReg;
782 struct prog_dst_register tmp = get_temp(c);
783 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
784 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
785 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
786
787 /* tmp = TEX ...
788 */
789 emit_tex_op(c,
790 OPCODE_TEX,
791 tmp,
792 inst->SaturateMode,
793 unit,
794 inst->TexSrcTarget,
795 inst->TexShadow,
796 coord,
797 src_undef(),
798 src_undef());
799
800 /* tmp.xyz = ADD TMP, C0
801 */
802 emit_op(c,
803 OPCODE_ADD,
804 dst_mask(tmp, WRITEMASK_XYZ),
805 0,
806 tmpsrc,
807 C0,
808 src_undef());
809
810 /* YUV.y = MUL YUV.y, C0.w
811 */
812
813 emit_op(c,
814 OPCODE_MUL,
815 dst_mask(tmp, WRITEMASK_Y),
816 0,
817 tmpsrc,
818 src_swizzle1(C0, W),
819 src_undef());
820
821 /*
822 * if (UV swaped)
823 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
824 * else
825 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
826 */
827
828 emit_op(c,
829 OPCODE_MAD,
830 dst_mask(dst, WRITEMASK_XYZ),
831 0,
832 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
833 C1,
834 src_swizzle1(tmpsrc, Y));
835
836 /* RGB.y = MAD YUV.z, C1.w, RGB.y
837 */
838 emit_op(c,
839 OPCODE_MAD,
840 dst_mask(dst, WRITEMASK_Y),
841 0,
842 src_swizzle1(tmpsrc, Z),
843 src_swizzle1(C1, W),
844 src_swizzle1(src_reg_from_dst(dst), Y));
845
846 release_temp(c, tmp);
847 }
848 else {
849 /* ordinary RGBA tex instruction */
850 emit_tex_op(c,
851 OPCODE_TEX,
852 inst->DstReg,
853 inst->SaturateMode,
854 unit,
855 inst->TexSrcTarget,
856 inst->TexShadow,
857 coord,
858 src_undef(),
859 src_undef());
860 }
861
862 /* For GL_EXT_texture_swizzle: */
863 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
864 /* swizzle the result of the TEX instruction */
865 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
866 emit_op(c, OPCODE_SWZ,
867 inst->DstReg,
868 SATURATE_OFF, /* saturate already done above */
869 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
870 src_undef(),
871 src_undef());
872 }
873
874 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
875 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
876 release_temp(c, tmpcoord);
877 }
878
879
880 /**
881 * Check if the given TXP instruction really needs the divide-by-W step.
882 */
883 static GLboolean projtex( struct brw_wm_compile *c,
884 const struct prog_instruction *inst )
885 {
886 const struct prog_src_register src = inst->SrcReg[0];
887 GLboolean retVal;
888
889 assert(inst->Opcode == OPCODE_TXP);
890
891 /* Only try to detect the simplest cases. Could detect (later)
892 * cases where we are trying to emit code like RCP {1.0}, MUL x,
893 * {1.0}, and so on.
894 *
895 * More complex cases than this typically only arise from
896 * user-provided fragment programs anyway:
897 */
898 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
899 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
900 else if (src.File == PROGRAM_INPUT &&
901 GET_SWZ(src.Swizzle, W) == W &&
902 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
903 retVal = GL_FALSE;
904 else
905 retVal = GL_TRUE;
906
907 return retVal;
908 }
909
910
911 /**
912 * Emit code for TXP.
913 */
914 static void precalc_txp( struct brw_wm_compile *c,
915 const struct prog_instruction *inst )
916 {
917 struct prog_src_register src0 = inst->SrcReg[0];
918
919 if (projtex(c, inst)) {
920 struct prog_dst_register tmp = get_temp(c);
921 struct prog_instruction tmp_inst;
922
923 /* tmp0.w = RCP inst.arg[0][3]
924 */
925 emit_op(c,
926 OPCODE_RCP,
927 dst_mask(tmp, WRITEMASK_W),
928 0,
929 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
930 src_undef(),
931 src_undef());
932
933 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
934 */
935 emit_op(c,
936 OPCODE_MUL,
937 dst_mask(tmp, WRITEMASK_XYZ),
938 0,
939 src0,
940 src_swizzle1(src_reg_from_dst(tmp), W),
941 src_undef());
942
943 /* dst = precalc(TEX tmp0)
944 */
945 tmp_inst = *inst;
946 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
947 precalc_tex(c, &tmp_inst);
948
949 release_temp(c, tmp);
950 }
951 else
952 {
953 /* dst = precalc(TEX src0)
954 */
955 precalc_tex(c, inst);
956 }
957 }
958
959
960
961 static void emit_render_target_writes( struct brw_wm_compile *c )
962 {
963 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
964 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
965 struct prog_src_register outcolor;
966 GLuint i;
967
968 struct prog_instruction *inst, *last_inst;
969
970 /* The inst->Aux field is used for FB write target and the EOT marker */
971
972 if (c->key.nr_color_regions > 1) {
973 for (i = 0 ; i < c->key.nr_color_regions; i++) {
974 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
975 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
976 0, outcolor, payload_r0_depth, outdepth);
977 inst->Aux = INST_AUX_TARGET(i);
978 if (c->fp_fragcolor_emitted) {
979 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
980 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
981 0, outcolor, payload_r0_depth, outdepth);
982 inst->Aux = INST_AUX_TARGET(i);
983 }
984 }
985 last_inst->Aux |= INST_AUX_EOT;
986 }
987 else {
988 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
989 if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0))
990 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
991 else
992 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
993
994 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
995 0, outcolor, payload_r0_depth, outdepth);
996 inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0);
997 }
998 }
999
1000
1001
1002
1003 /***********************************************************************
1004 * Emit INTERP instructions ahead of first use of each attrib.
1005 */
1006
1007 static void validate_src_regs( struct brw_wm_compile *c,
1008 const struct prog_instruction *inst )
1009 {
1010 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1011 GLuint i;
1012
1013 for (i = 0; i < nr_args; i++) {
1014 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1015 GLuint idx = inst->SrcReg[i].Index;
1016 if (!(c->fp_interp_emitted & (1<<idx))) {
1017 emit_interp(c, idx);
1018 }
1019 }
1020 }
1021 }
1022
1023 static void validate_dst_regs( struct brw_wm_compile *c,
1024 const struct prog_instruction *inst )
1025 {
1026 if (inst->DstReg.File == PROGRAM_OUTPUT) {
1027 GLuint idx = inst->DstReg.Index;
1028 if (idx == FRAG_RESULT_COLOR)
1029 c->fp_fragcolor_emitted = 1;
1030 }
1031 }
1032
1033 static void print_insns( const struct prog_instruction *insn,
1034 GLuint nr )
1035 {
1036 GLuint i;
1037 for (i = 0; i < nr; i++, insn++) {
1038 printf("%3d: ", i);
1039 if (insn->Opcode < MAX_OPCODE)
1040 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
1041 else if (insn->Opcode < MAX_WM_OPCODE) {
1042 GLuint idx = insn->Opcode - MAX_OPCODE;
1043
1044 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
1045 3, PROG_PRINT_DEBUG, NULL);
1046 }
1047 else
1048 printf("965 Opcode %d\n", insn->Opcode);
1049 }
1050 }
1051
1052
1053 /**
1054 * Initial pass for fragment program code generation.
1055 * This function is used by both the GLSL and non-GLSL paths.
1056 */
1057 void brw_wm_pass_fp( struct brw_wm_compile *c )
1058 {
1059 struct brw_fragment_program *fp = c->fp;
1060 GLuint insn;
1061
1062 if (INTEL_DEBUG & DEBUG_WM) {
1063 printf("pre-fp:\n");
1064 _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
1065 GL_TRUE);
1066 printf("\n");
1067 }
1068
1069 c->pixel_xy = src_undef();
1070 c->delta_xy = src_undef();
1071 c->pixel_w = src_undef();
1072 c->nr_fp_insns = 0;
1073 c->fp->tex_units_used = 0x0;
1074
1075 /* Emit preamble instructions. This is where special instructions such as
1076 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1077 * compute shader inputs from varying vars.
1078 */
1079 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1080 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1081 validate_src_regs(c, inst);
1082 validate_dst_regs(c, inst);
1083 }
1084
1085 /* Loop over all instructions doing assorted simplifications and
1086 * transformations.
1087 */
1088 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1089 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1090 struct prog_instruction *out;
1091
1092 /* Check for INPUT values, emit INTERP instructions where
1093 * necessary:
1094 */
1095
1096 switch (inst->Opcode) {
1097 case OPCODE_SWZ:
1098 out = emit_insn(c, inst);
1099 out->Opcode = OPCODE_MOV;
1100 break;
1101
1102 case OPCODE_ABS:
1103 out = emit_insn(c, inst);
1104 out->Opcode = OPCODE_MOV;
1105 out->SrcReg[0].Negate = NEGATE_NONE;
1106 out->SrcReg[0].Abs = 1;
1107 break;
1108
1109 case OPCODE_SUB:
1110 out = emit_insn(c, inst);
1111 out->Opcode = OPCODE_ADD;
1112 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1113 break;
1114
1115 case OPCODE_SCS:
1116 out = emit_insn(c, inst);
1117 /* This should probably be done in the parser.
1118 */
1119 out->DstReg.WriteMask &= WRITEMASK_XY;
1120 break;
1121
1122 case OPCODE_DST:
1123 precalc_dst(c, inst);
1124 break;
1125
1126 case OPCODE_LIT:
1127 precalc_lit(c, inst);
1128 break;
1129
1130 case OPCODE_TEX:
1131 precalc_tex(c, inst);
1132 break;
1133
1134 case OPCODE_TXP:
1135 precalc_txp(c, inst);
1136 break;
1137
1138 case OPCODE_TXB:
1139 out = emit_insn(c, inst);
1140 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1141 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1142 break;
1143
1144 case OPCODE_XPD:
1145 out = emit_insn(c, inst);
1146 /* This should probably be done in the parser.
1147 */
1148 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1149 break;
1150
1151 case OPCODE_KIL:
1152 out = emit_insn(c, inst);
1153 /* This should probably be done in the parser.
1154 */
1155 out->DstReg.WriteMask = 0;
1156 break;
1157 case OPCODE_END:
1158 emit_render_target_writes(c);
1159 break;
1160 case OPCODE_PRINT:
1161 break;
1162 default:
1163 if (brw_wm_is_scalar_result(inst->Opcode))
1164 emit_scalar_insn(c, inst);
1165 else
1166 emit_insn(c, inst);
1167 break;
1168 }
1169 }
1170
1171 if (INTEL_DEBUG & DEBUG_WM) {
1172 printf("pass_fp:\n");
1173 print_insns( c->prog_instructions, c->nr_fp_insns );
1174 printf("\n");
1175 }
1176 }
1177