i965: Add HiZ operation state to brw_context
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "program/prog_parameter.h"
41 #include "program/prog_print.h"
42 #include "program/prog_statevars.h"
43
44
45 /** An invalid texture target */
46 #define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48 /** An invalid texture unit */
49 #define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53 #define X 0
54 #define Y 1
55 #define Z 2
56 #define W 3
57
58
59 static const char *wm_opcode_strings[] = {
60 "PIXELXY",
61 "DELTAXY",
62 "PIXELW",
63 "LINTERP",
64 "PINTERP",
65 "CINTERP",
66 "WPOSXY",
67 "FB_WRITE",
68 "FRONTFACING",
69 };
70
71 #if 0
72 static const char *wm_file_strings[] = {
73 "PAYLOAD"
74 };
75 #endif
76
77
78 /***********************************************************************
79 * Source regs
80 */
81
82 static struct prog_src_register src_reg(GLuint file, GLuint idx)
83 {
84 struct prog_src_register reg;
85 reg.File = file;
86 reg.Index = idx;
87 reg.Swizzle = SWIZZLE_NOOP;
88 reg.RelAddr = 0;
89 reg.Negate = NEGATE_NONE;
90 reg.Abs = 0;
91 reg.HasIndex2 = 0;
92 reg.RelAddr2 = 0;
93 reg.Index2 = 0;
94 return reg;
95 }
96
97 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
98 {
99 return src_reg(dst.File, dst.Index);
100 }
101
102 static struct prog_src_register src_undef( void )
103 {
104 return src_reg(PROGRAM_UNDEFINED, 0);
105 }
106
107 static bool src_is_undef(struct prog_src_register src)
108 {
109 return src.File == PROGRAM_UNDEFINED;
110 }
111
112 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
113 {
114 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
115 return reg;
116 }
117
118 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
119 {
120 return src_swizzle(reg, x, x, x, x);
121 }
122
123 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
124 {
125 reg.Swizzle = swizzle;
126 return reg;
127 }
128
129
130 /***********************************************************************
131 * Dest regs
132 */
133
134 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
135 {
136 struct prog_dst_register reg;
137 reg.File = file;
138 reg.Index = idx;
139 reg.WriteMask = WRITEMASK_XYZW;
140 reg.RelAddr = 0;
141 reg.CondMask = COND_TR;
142 reg.CondSwizzle = 0;
143 reg.CondSrc = 0;
144 return reg;
145 }
146
147 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
148 {
149 reg.WriteMask &= mask;
150 return reg;
151 }
152
153 static struct prog_dst_register dst_undef( void )
154 {
155 return dst_reg(PROGRAM_UNDEFINED, 0);
156 }
157
158
159
160 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
161 {
162 int bit = _mesa_ffs( ~c->fp_temp );
163
164 if (!bit) {
165 printf("%s: out of temporaries\n", __FILE__);
166 exit(1);
167 }
168
169 c->fp_temp |= 1<<(bit-1);
170 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
171 }
172
173
174 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
175 {
176 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
177 }
178
179
180 /***********************************************************************
181 * Instructions
182 */
183
184 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
185 {
186 assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
187 memset(&c->prog_instructions[c->nr_fp_insns], 0,
188 sizeof(*c->prog_instructions));
189 return &c->prog_instructions[c->nr_fp_insns++];
190 }
191
192 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
193 const struct prog_instruction *inst0)
194 {
195 struct prog_instruction *inst = get_fp_inst(c);
196 *inst = *inst0;
197 return inst;
198 }
199
200 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
201 GLuint op,
202 struct prog_dst_register dest,
203 GLuint saturate,
204 GLuint tex_src_unit,
205 GLuint tex_src_target,
206 GLuint tex_shadow,
207 struct prog_src_register src0,
208 struct prog_src_register src1,
209 struct prog_src_register src2 )
210 {
211 struct prog_instruction *inst = get_fp_inst(c);
212
213 assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
214 tex_src_unit == TEX_UNIT_NONE);
215 assert(tex_src_target < NUM_TEXTURE_TARGETS ||
216 tex_src_target == TEX_TARGET_NONE);
217
218 memset(inst, 0, sizeof(*inst));
219
220 inst->Opcode = op;
221 inst->DstReg = dest;
222 inst->SaturateMode = saturate;
223 inst->TexSrcUnit = tex_src_unit;
224 inst->TexSrcTarget = tex_src_target;
225 inst->TexShadow = tex_shadow;
226 inst->SrcReg[0] = src0;
227 inst->SrcReg[1] = src1;
228 inst->SrcReg[2] = src2;
229 return inst;
230 }
231
232
233 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
234 GLuint op,
235 struct prog_dst_register dest,
236 GLuint saturate,
237 struct prog_src_register src0,
238 struct prog_src_register src1,
239 struct prog_src_register src2 )
240 {
241 return emit_tex_op(c, op, dest, saturate,
242 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
243 src0, src1, src2);
244 }
245
246
247 /* Many Mesa opcodes produce the same value across all the result channels.
248 * We'd rather not have to support that splatting in the opcode implementations,
249 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
250 * anyway. We can easily get both by emitting the opcode to one channel, and
251 * then MOVing it to the others, which brw_wm_pass*.c already understands.
252 */
253 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
254 const struct prog_instruction *inst0)
255 {
256 struct prog_instruction *inst;
257 unsigned int dst_chan;
258 unsigned int other_channel_mask;
259
260 if (inst0->DstReg.WriteMask == 0)
261 return NULL;
262
263 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
264 inst = get_fp_inst(c);
265 *inst = *inst0;
266 inst->DstReg.WriteMask = 1 << dst_chan;
267
268 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
269 if (other_channel_mask != 0) {
270 inst = emit_op(c,
271 OPCODE_MOV,
272 dst_mask(inst0->DstReg, other_channel_mask),
273 0,
274 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
275 src_undef(),
276 src_undef());
277 }
278 return inst;
279 }
280
281
282 /***********************************************************************
283 * Special instructions for interpolation and other tasks
284 */
285
286 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
287 {
288 if (src_is_undef(c->pixel_xy)) {
289 struct prog_dst_register pixel_xy = get_temp(c);
290 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
291
292
293 /* Emit the out calculations, and hold onto the results. Use
294 * two instructions as a temporary is required.
295 */
296 /* pixel_xy.xy = PIXELXY payload[0];
297 */
298 emit_op(c,
299 WM_PIXELXY,
300 dst_mask(pixel_xy, WRITEMASK_XY),
301 0,
302 payload_r0_depth,
303 src_undef(),
304 src_undef());
305
306 c->pixel_xy = src_reg_from_dst(pixel_xy);
307 }
308
309 return c->pixel_xy;
310 }
311
312 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
313 {
314 if (src_is_undef(c->delta_xy)) {
315 struct prog_dst_register delta_xy = get_temp(c);
316 struct prog_src_register pixel_xy = get_pixel_xy(c);
317 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
318
319 /* deltas.xy = DELTAXY pixel_xy, payload[0]
320 */
321 emit_op(c,
322 WM_DELTAXY,
323 dst_mask(delta_xy, WRITEMASK_XY),
324 0,
325 pixel_xy,
326 payload_r0_depth,
327 src_undef());
328
329 c->delta_xy = src_reg_from_dst(delta_xy);
330 }
331
332 return c->delta_xy;
333 }
334
335 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
336 {
337 /* This is called for producing 1/w in pre-gen6 interp. for gen6,
338 * the interp opcodes don't use this argument. But to keep the
339 * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
340 * into the slot.
341 */
342 if (c->func.brw->intel.gen >= 6)
343 return c->delta_xy;
344
345 if (src_is_undef(c->pixel_w)) {
346 struct prog_dst_register pixel_w = get_temp(c);
347 struct prog_src_register deltas = get_delta_xy(c);
348 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
349
350 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
351 */
352 emit_op(c,
353 WM_PIXELW,
354 dst_mask(pixel_w, WRITEMASK_W),
355 0,
356 interp_wpos,
357 deltas,
358 src_undef());
359
360
361 c->pixel_w = src_reg_from_dst(pixel_w);
362 }
363
364 return c->pixel_w;
365 }
366
367 static void emit_interp( struct brw_wm_compile *c,
368 GLuint idx )
369 {
370 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
371 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
372 struct prog_src_register deltas;
373
374 deltas = get_delta_xy(c);
375
376 /* Need to use PINTERP on attributes which have been
377 * multiplied by 1/W in the SF program, and LINTERP on those
378 * which have not:
379 */
380 switch (idx) {
381 case FRAG_ATTRIB_WPOS:
382 /* Have to treat wpos.xy specially:
383 */
384 emit_op(c,
385 WM_WPOSXY,
386 dst_mask(dst, WRITEMASK_XY),
387 0,
388 get_pixel_xy(c),
389 src_undef(),
390 src_undef());
391
392 dst = dst_mask(dst, WRITEMASK_ZW);
393
394 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
395 */
396 emit_op(c,
397 WM_LINTERP,
398 dst,
399 0,
400 interp,
401 deltas,
402 src_undef());
403 break;
404 case FRAG_ATTRIB_COL0:
405 case FRAG_ATTRIB_COL1:
406 if (c->key.flat_shade) {
407 emit_op(c,
408 WM_CINTERP,
409 dst,
410 0,
411 interp,
412 src_undef(),
413 src_undef());
414 }
415 else {
416 /* perspective-corrected color interpolation */
417 emit_op(c,
418 WM_PINTERP,
419 dst,
420 0,
421 interp,
422 deltas,
423 get_pixel_w(c));
424 }
425 break;
426 case FRAG_ATTRIB_FOGC:
427 /* Interpolate the fog coordinate */
428 emit_op(c,
429 WM_PINTERP,
430 dst_mask(dst, WRITEMASK_X),
431 0,
432 interp,
433 deltas,
434 get_pixel_w(c));
435
436 emit_op(c,
437 OPCODE_MOV,
438 dst_mask(dst, WRITEMASK_YZW),
439 0,
440 src_swizzle(interp,
441 SWIZZLE_ZERO,
442 SWIZZLE_ZERO,
443 SWIZZLE_ZERO,
444 SWIZZLE_ONE),
445 src_undef(),
446 src_undef());
447 break;
448
449 case FRAG_ATTRIB_FACE:
450 emit_op(c,
451 WM_FRONTFACING,
452 dst_mask(dst, WRITEMASK_X),
453 0,
454 src_undef(),
455 src_undef(),
456 src_undef());
457 break;
458
459 case FRAG_ATTRIB_PNTC:
460 /* XXX review/test this case */
461 emit_op(c,
462 WM_PINTERP,
463 dst_mask(dst, WRITEMASK_XY),
464 0,
465 interp,
466 deltas,
467 get_pixel_w(c));
468
469 emit_op(c,
470 OPCODE_MOV,
471 dst_mask(dst, WRITEMASK_ZW),
472 0,
473 src_swizzle(interp,
474 SWIZZLE_ZERO,
475 SWIZZLE_ZERO,
476 SWIZZLE_ZERO,
477 SWIZZLE_ONE),
478 src_undef(),
479 src_undef());
480 break;
481
482 default:
483 emit_op(c,
484 WM_PINTERP,
485 dst,
486 0,
487 interp,
488 deltas,
489 get_pixel_w(c));
490 break;
491 }
492
493 c->fp_interp_emitted |= 1<<idx;
494 }
495
496 /***********************************************************************
497 * Hacks to extend the program parameter and constant lists.
498 */
499
500 /* Add the fog parameters to the parameter list of the original
501 * program, rather than creating a new list. Doesn't really do any
502 * harm and it's not as if the parameter handling isn't a big hack
503 * anyway.
504 */
505 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
506 GLint s0,
507 GLint s1,
508 GLint s2,
509 GLint s3,
510 GLint s4)
511 {
512 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
513 gl_state_index tokens[STATE_LENGTH];
514 GLuint idx;
515 tokens[0] = s0;
516 tokens[1] = s1;
517 tokens[2] = s2;
518 tokens[3] = s3;
519 tokens[4] = s4;
520
521 idx = _mesa_add_state_reference( paramList, tokens );
522
523 return src_reg(PROGRAM_STATE_VAR, idx);
524 }
525
526
527 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
528 GLfloat s0,
529 GLfloat s1,
530 GLfloat s2,
531 GLfloat s3)
532 {
533 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
534 gl_constant_value values[4];
535 GLuint idx;
536 GLuint swizzle;
537 struct prog_src_register reg;
538
539 values[0].f = s0;
540 values[1].f = s1;
541 values[2].f = s2;
542 values[3].f = s3;
543
544 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
545 reg = src_reg(PROGRAM_STATE_VAR, idx);
546 reg.Swizzle = swizzle;
547
548 return reg;
549 }
550
551
552
553 /***********************************************************************
554 * Expand various instructions here to simpler forms.
555 */
556 static void precalc_dst( struct brw_wm_compile *c,
557 const struct prog_instruction *inst )
558 {
559 struct prog_src_register src0 = inst->SrcReg[0];
560 struct prog_src_register src1 = inst->SrcReg[1];
561 struct prog_dst_register dst = inst->DstReg;
562 struct prog_dst_register temp = get_temp(c);
563
564 if (dst.WriteMask & WRITEMASK_Y) {
565 /* dst.y = mul src0.y, src1.y
566 */
567 emit_op(c,
568 OPCODE_MUL,
569 dst_mask(temp, WRITEMASK_Y),
570 inst->SaturateMode,
571 src0,
572 src1,
573 src_undef());
574 }
575
576 if (dst.WriteMask & WRITEMASK_XZ) {
577 struct prog_instruction *swz;
578 GLuint z = GET_SWZ(src0.Swizzle, Z);
579
580 /* dst.xz = swz src0.1zzz
581 */
582 swz = emit_op(c,
583 OPCODE_SWZ,
584 dst_mask(temp, WRITEMASK_XZ),
585 inst->SaturateMode,
586 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
587 src_undef(),
588 src_undef());
589 /* Avoid letting negation flag of src0 affect our 1 constant. */
590 swz->SrcReg[0].Negate &= ~NEGATE_X;
591 }
592 if (dst.WriteMask & WRITEMASK_W) {
593 /* dst.w = mov src1.w
594 */
595 emit_op(c,
596 OPCODE_MOV,
597 dst_mask(temp, WRITEMASK_W),
598 inst->SaturateMode,
599 src1,
600 src_undef(),
601 src_undef());
602 }
603
604 /* This will get optimized out in general, but it ensures that we
605 * don't overwrite src operands in our channel-wise splitting
606 * above. See piglit fp-dst-aliasing-[12].
607 */
608 emit_op(c,
609 OPCODE_MOV,
610 dst,
611 0,
612 src_reg_from_dst(temp),
613 src_undef(),
614 src_undef());
615
616 release_temp(c, temp);
617 }
618
619
620 static void precalc_lit( struct brw_wm_compile *c,
621 const struct prog_instruction *inst )
622 {
623 struct prog_src_register src0 = inst->SrcReg[0];
624 struct prog_dst_register dst = inst->DstReg;
625
626 if (dst.WriteMask & WRITEMASK_YZ) {
627 emit_op(c,
628 OPCODE_LIT,
629 dst_mask(dst, WRITEMASK_YZ),
630 inst->SaturateMode,
631 src0,
632 src_undef(),
633 src_undef());
634 }
635
636 if (dst.WriteMask & WRITEMASK_XW) {
637 struct prog_instruction *swz;
638
639 /* dst.xw = swz src0.1111
640 */
641 swz = emit_op(c,
642 OPCODE_SWZ,
643 dst_mask(dst, WRITEMASK_XW),
644 0,
645 src_swizzle1(src0, SWIZZLE_ONE),
646 src_undef(),
647 src_undef());
648 /* Avoid letting the negation flag of src0 affect our 1 constant. */
649 swz->SrcReg[0].Negate = NEGATE_NONE;
650 }
651 }
652
653
654 /**
655 * Some TEX instructions require extra code, cube map coordinate
656 * normalization, or coordinate scaling for RECT textures, etc.
657 * This function emits those extra instructions and the TEX
658 * instruction itself.
659 */
660 static void precalc_tex( struct brw_wm_compile *c,
661 const struct prog_instruction *inst )
662 {
663 struct brw_compile *p = &c->func;
664 struct intel_context *intel = &p->brw->intel;
665 struct prog_src_register coord;
666 struct prog_dst_register tmpcoord = { 0 };
667 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
668
669 assert(unit < BRW_MAX_TEX_UNIT);
670
671 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
672 struct prog_instruction *out;
673 struct prog_dst_register tmp0 = get_temp(c);
674 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
675 struct prog_dst_register tmp1 = get_temp(c);
676 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
677 struct prog_src_register src0 = inst->SrcReg[0];
678
679 /* find longest component of coord vector and normalize it */
680 tmpcoord = get_temp(c);
681 coord = src_reg_from_dst(tmpcoord);
682
683 /* tmpcoord = src0 (i.e.: coord = src0) */
684 out = emit_op(c, OPCODE_MOV,
685 tmpcoord,
686 0,
687 src0,
688 src_undef(),
689 src_undef());
690 out->SrcReg[0].Negate = NEGATE_NONE;
691 out->SrcReg[0].Abs = 1;
692
693 /* tmp0 = MAX(coord.X, coord.Y) */
694 emit_op(c, OPCODE_MAX,
695 tmp0,
696 0,
697 src_swizzle1(coord, X),
698 src_swizzle1(coord, Y),
699 src_undef());
700
701 /* tmp1 = MAX(tmp0, coord.Z) */
702 emit_op(c, OPCODE_MAX,
703 tmp1,
704 0,
705 tmp0src,
706 src_swizzle1(coord, Z),
707 src_undef());
708
709 /* tmp0 = 1 / tmp1 */
710 emit_op(c, OPCODE_RCP,
711 dst_mask(tmp0, WRITEMASK_X),
712 0,
713 tmp1src,
714 src_undef(),
715 src_undef());
716
717 /* tmpCoord = src0 * tmp0 */
718 emit_op(c, OPCODE_MUL,
719 tmpcoord,
720 0,
721 src0,
722 src_swizzle1(tmp0src, SWIZZLE_X),
723 src_undef());
724
725 release_temp(c, tmp0);
726 release_temp(c, tmp1);
727 }
728 else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
729 struct prog_src_register scale =
730 search_or_add_param5( c,
731 STATE_INTERNAL,
732 STATE_TEXRECT_SCALE,
733 unit,
734 0,0 );
735
736 tmpcoord = get_temp(c);
737
738 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
739 */
740 emit_op(c,
741 OPCODE_MUL,
742 tmpcoord,
743 0,
744 inst->SrcReg[0],
745 src_swizzle(scale,
746 SWIZZLE_X,
747 SWIZZLE_Y,
748 SWIZZLE_ONE,
749 SWIZZLE_ONE),
750 src_undef());
751
752 coord = src_reg_from_dst(tmpcoord);
753 }
754 else {
755 coord = inst->SrcReg[0];
756 }
757
758 /* Need to emit YUV texture conversions by hand. Probably need to
759 * do this here - the alternative is in brw_wm_emit.c, but the
760 * conversion requires allocating a temporary variable which we
761 * don't have the facility to do that late in the compilation.
762 */
763 if (c->key.yuvtex_mask & (1 << unit)) {
764 /* convert ycbcr to RGBA */
765 bool swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
766
767 /*
768 CONST C0 = { -.5, -.0625, -.5, 1.164 }
769 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
770 UYV = TEX ...
771 UYV.xyz = ADD UYV, C0
772 UYV.y = MUL UYV.y, C0.w
773 if (UV swaped)
774 RGB.xyz = MAD UYV.zzx, C1, UYV.y
775 else
776 RGB.xyz = MAD UYV.xxz, C1, UYV.y
777 RGB.y = MAD UYV.z, C1.w, RGB.y
778 */
779 struct prog_dst_register dst = inst->DstReg;
780 struct prog_dst_register tmp = get_temp(c);
781 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
782 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
783 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
784
785 /* tmp = TEX ...
786 */
787 emit_tex_op(c,
788 OPCODE_TEX,
789 tmp,
790 inst->SaturateMode,
791 unit,
792 inst->TexSrcTarget,
793 inst->TexShadow,
794 coord,
795 src_undef(),
796 src_undef());
797
798 /* tmp.xyz = ADD TMP, C0
799 */
800 emit_op(c,
801 OPCODE_ADD,
802 dst_mask(tmp, WRITEMASK_XYZ),
803 0,
804 tmpsrc,
805 C0,
806 src_undef());
807
808 /* YUV.y = MUL YUV.y, C0.w
809 */
810
811 emit_op(c,
812 OPCODE_MUL,
813 dst_mask(tmp, WRITEMASK_Y),
814 0,
815 tmpsrc,
816 src_swizzle1(C0, W),
817 src_undef());
818
819 /*
820 * if (UV swaped)
821 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
822 * else
823 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
824 */
825
826 emit_op(c,
827 OPCODE_MAD,
828 dst_mask(dst, WRITEMASK_XYZ),
829 0,
830 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
831 C1,
832 src_swizzle1(tmpsrc, Y));
833
834 /* RGB.y = MAD YUV.z, C1.w, RGB.y
835 */
836 emit_op(c,
837 OPCODE_MAD,
838 dst_mask(dst, WRITEMASK_Y),
839 0,
840 src_swizzle1(tmpsrc, Z),
841 src_swizzle1(C1, W),
842 src_swizzle1(src_reg_from_dst(dst), Y));
843
844 release_temp(c, tmp);
845 }
846 else {
847 /* ordinary RGBA tex instruction */
848 emit_tex_op(c,
849 OPCODE_TEX,
850 inst->DstReg,
851 inst->SaturateMode,
852 unit,
853 inst->TexSrcTarget,
854 inst->TexShadow,
855 coord,
856 src_undef(),
857 src_undef());
858 }
859
860 /* For GL_EXT_texture_swizzle: */
861 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
862 /* swizzle the result of the TEX instruction */
863 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
864 emit_op(c, OPCODE_SWZ,
865 inst->DstReg,
866 SATURATE_OFF, /* saturate already done above */
867 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
868 src_undef(),
869 src_undef());
870 }
871
872 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
873 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
874 release_temp(c, tmpcoord);
875 }
876
877
878 /**
879 * Check if the given TXP instruction really needs the divide-by-W step.
880 */
881 static bool
882 projtex(struct brw_wm_compile *c, const struct prog_instruction *inst)
883 {
884 const struct prog_src_register src = inst->SrcReg[0];
885 bool retVal;
886
887 assert(inst->Opcode == OPCODE_TXP);
888
889 /* Only try to detect the simplest cases. Could detect (later)
890 * cases where we are trying to emit code like RCP {1.0}, MUL x,
891 * {1.0}, and so on.
892 *
893 * More complex cases than this typically only arise from
894 * user-provided fragment programs anyway:
895 */
896 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
897 retVal = false; /* ut2004 gun rendering !?! */
898 else if (src.File == PROGRAM_INPUT &&
899 GET_SWZ(src.Swizzle, W) == W &&
900 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
901 retVal = false;
902 else
903 retVal = true;
904
905 return retVal;
906 }
907
908
909 /**
910 * Emit code for TXP.
911 */
912 static void precalc_txp( struct brw_wm_compile *c,
913 const struct prog_instruction *inst )
914 {
915 struct prog_src_register src0 = inst->SrcReg[0];
916
917 if (projtex(c, inst)) {
918 struct prog_dst_register tmp = get_temp(c);
919 struct prog_instruction tmp_inst;
920
921 /* tmp0.w = RCP inst.arg[0][3]
922 */
923 emit_op(c,
924 OPCODE_RCP,
925 dst_mask(tmp, WRITEMASK_W),
926 0,
927 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
928 src_undef(),
929 src_undef());
930
931 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
932 */
933 emit_op(c,
934 OPCODE_MUL,
935 dst_mask(tmp, WRITEMASK_XYZ),
936 0,
937 src0,
938 src_swizzle1(src_reg_from_dst(tmp), W),
939 src_undef());
940
941 /* dst = precalc(TEX tmp0)
942 */
943 tmp_inst = *inst;
944 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
945 precalc_tex(c, &tmp_inst);
946
947 release_temp(c, tmp);
948 }
949 else
950 {
951 /* dst = precalc(TEX src0)
952 */
953 precalc_tex(c, inst);
954 }
955 }
956
957
958
959 static void emit_render_target_writes( struct brw_wm_compile *c )
960 {
961 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
962 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
963 struct prog_src_register outcolor;
964 GLuint i;
965
966 struct prog_instruction *inst = NULL;
967
968 /* The inst->Aux field is used for FB write target and the EOT marker */
969
970 for (i = 0; i < c->key.nr_color_regions; i++) {
971 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) {
972 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
973 } else {
974 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
975 }
976 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
977 0, outcolor, payload_r0_depth, outdepth);
978 inst->Aux = INST_AUX_TARGET(i);
979 }
980
981 /* Mark the last FB write as final, or emit a dummy write if we had
982 * no render targets bound.
983 */
984 if (c->key.nr_color_regions != 0) {
985 inst->Aux |= INST_AUX_EOT;
986 } else {
987 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
988 0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR),
989 payload_r0_depth, outdepth);
990 inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT;
991 }
992 }
993
994
995
996
997 /***********************************************************************
998 * Emit INTERP instructions ahead of first use of each attrib.
999 */
1000
1001 static void validate_src_regs( struct brw_wm_compile *c,
1002 const struct prog_instruction *inst )
1003 {
1004 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1005 GLuint i;
1006
1007 for (i = 0; i < nr_args; i++) {
1008 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1009 GLuint idx = inst->SrcReg[i].Index;
1010 if (!(c->fp_interp_emitted & (1<<idx))) {
1011 emit_interp(c, idx);
1012 }
1013 }
1014 }
1015 }
1016
1017 static void print_insns( const struct prog_instruction *insn,
1018 GLuint nr )
1019 {
1020 GLuint i;
1021 for (i = 0; i < nr; i++, insn++) {
1022 printf("%3d: ", i);
1023 if (insn->Opcode < MAX_OPCODE)
1024 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
1025 else if (insn->Opcode < MAX_WM_OPCODE) {
1026 GLuint idx = insn->Opcode - MAX_OPCODE;
1027
1028 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
1029 3, PROG_PRINT_DEBUG, NULL);
1030 }
1031 else
1032 printf("965 Opcode %d\n", insn->Opcode);
1033 }
1034 }
1035
1036
1037 /**
1038 * Initial pass for fragment program code generation.
1039 * This function is used by both the GLSL and non-GLSL paths.
1040 */
1041 void brw_wm_pass_fp( struct brw_wm_compile *c )
1042 {
1043 struct intel_context *intel = &c->func.brw->intel;
1044 struct brw_fragment_program *fp = c->fp;
1045 GLuint insn;
1046
1047 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1048 printf("pre-fp:\n");
1049 _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
1050 true);
1051 printf("\n");
1052 }
1053
1054 c->pixel_xy = src_undef();
1055 if (intel->gen >= 6) {
1056 /* The interpolation deltas come in as the perspective pixel
1057 * location barycentric params.
1058 */
1059 c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
1060 } else {
1061 c->delta_xy = src_undef();
1062 }
1063 c->pixel_w = src_undef();
1064 c->nr_fp_insns = 0;
1065
1066 /* Emit preamble instructions. This is where special instructions such as
1067 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1068 * compute shader inputs from varying vars.
1069 */
1070 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1071 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1072 validate_src_regs(c, inst);
1073 }
1074
1075 /* Loop over all instructions doing assorted simplifications and
1076 * transformations.
1077 */
1078 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1079 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1080 struct prog_instruction *out;
1081
1082 /* Check for INPUT values, emit INTERP instructions where
1083 * necessary:
1084 */
1085
1086 switch (inst->Opcode) {
1087 case OPCODE_SWZ:
1088 out = emit_insn(c, inst);
1089 out->Opcode = OPCODE_MOV;
1090 break;
1091
1092 case OPCODE_ABS:
1093 out = emit_insn(c, inst);
1094 out->Opcode = OPCODE_MOV;
1095 out->SrcReg[0].Negate = NEGATE_NONE;
1096 out->SrcReg[0].Abs = 1;
1097 break;
1098
1099 case OPCODE_SUB:
1100 out = emit_insn(c, inst);
1101 out->Opcode = OPCODE_ADD;
1102 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1103 break;
1104
1105 case OPCODE_SCS:
1106 out = emit_insn(c, inst);
1107 /* This should probably be done in the parser.
1108 */
1109 out->DstReg.WriteMask &= WRITEMASK_XY;
1110 break;
1111
1112 case OPCODE_DST:
1113 precalc_dst(c, inst);
1114 break;
1115
1116 case OPCODE_LIT:
1117 precalc_lit(c, inst);
1118 break;
1119
1120 case OPCODE_RSQ:
1121 out = emit_scalar_insn(c, inst);
1122 out->SrcReg[0].Abs = true;
1123 break;
1124
1125 case OPCODE_TEX:
1126 precalc_tex(c, inst);
1127 break;
1128
1129 case OPCODE_TXP:
1130 precalc_txp(c, inst);
1131 break;
1132
1133 case OPCODE_TXB:
1134 out = emit_insn(c, inst);
1135 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1136 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1137 break;
1138
1139 case OPCODE_XPD:
1140 out = emit_insn(c, inst);
1141 /* This should probably be done in the parser.
1142 */
1143 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1144 break;
1145
1146 case OPCODE_KIL:
1147 out = emit_insn(c, inst);
1148 /* This should probably be done in the parser.
1149 */
1150 out->DstReg.WriteMask = 0;
1151 break;
1152 case OPCODE_END:
1153 emit_render_target_writes(c);
1154 break;
1155 case OPCODE_PRINT:
1156 break;
1157 default:
1158 if (brw_wm_is_scalar_result(inst->Opcode))
1159 emit_scalar_insn(c, inst);
1160 else
1161 emit_insn(c, inst);
1162 break;
1163 }
1164 }
1165
1166 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1167 printf("pass_fp:\n");
1168 print_insns( c->prog_instructions, c->nr_fp_insns );
1169 printf("\n");
1170 }
1171 }
1172