Merge branch 'llvm-cliptest-viewport'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "program/prog_parameter.h"
41 #include "program/prog_print.h"
42 #include "program/prog_statevars.h"
43
44
45 /** An invalid texture target */
46 #define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48 /** An invalid texture unit */
49 #define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53 #define X 0
54 #define Y 1
55 #define Z 2
56 #define W 3
57
58
59 static const char *wm_opcode_strings[] = {
60 "PIXELXY",
61 "DELTAXY",
62 "PIXELW",
63 "LINTERP",
64 "PINTERP",
65 "CINTERP",
66 "WPOSXY",
67 "FB_WRITE",
68 "FRONTFACING",
69 };
70
71 #if 0
72 static const char *wm_file_strings[] = {
73 "PAYLOAD"
74 };
75 #endif
76
77
78 /***********************************************************************
79 * Source regs
80 */
81
82 static struct prog_src_register src_reg(GLuint file, GLuint idx)
83 {
84 struct prog_src_register reg;
85 reg.File = file;
86 reg.Index = idx;
87 reg.Swizzle = SWIZZLE_NOOP;
88 reg.RelAddr = 0;
89 reg.Negate = NEGATE_NONE;
90 reg.Abs = 0;
91 reg.HasIndex2 = 0;
92 reg.RelAddr2 = 0;
93 reg.Index2 = 0;
94 return reg;
95 }
96
97 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
98 {
99 return src_reg(dst.File, dst.Index);
100 }
101
102 static struct prog_src_register src_undef( void )
103 {
104 return src_reg(PROGRAM_UNDEFINED, 0);
105 }
106
107 static GLboolean src_is_undef(struct prog_src_register src)
108 {
109 return src.File == PROGRAM_UNDEFINED;
110 }
111
112 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
113 {
114 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
115 return reg;
116 }
117
118 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
119 {
120 return src_swizzle(reg, x, x, x, x);
121 }
122
123 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
124 {
125 reg.Swizzle = swizzle;
126 return reg;
127 }
128
129
130 /***********************************************************************
131 * Dest regs
132 */
133
134 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
135 {
136 struct prog_dst_register reg;
137 reg.File = file;
138 reg.Index = idx;
139 reg.WriteMask = WRITEMASK_XYZW;
140 reg.RelAddr = 0;
141 reg.CondMask = COND_TR;
142 reg.CondSwizzle = 0;
143 reg.CondSrc = 0;
144 return reg;
145 }
146
147 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
148 {
149 reg.WriteMask &= mask;
150 return reg;
151 }
152
153 static struct prog_dst_register dst_undef( void )
154 {
155 return dst_reg(PROGRAM_UNDEFINED, 0);
156 }
157
158
159
160 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
161 {
162 int bit = _mesa_ffs( ~c->fp_temp );
163
164 if (!bit) {
165 printf("%s: out of temporaries\n", __FILE__);
166 exit(1);
167 }
168
169 c->fp_temp |= 1<<(bit-1);
170 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
171 }
172
173
174 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
175 {
176 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
177 }
178
179
180 /***********************************************************************
181 * Instructions
182 */
183
184 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
185 {
186 assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
187 memset(&c->prog_instructions[c->nr_fp_insns], 0,
188 sizeof(*c->prog_instructions));
189 return &c->prog_instructions[c->nr_fp_insns++];
190 }
191
192 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
193 const struct prog_instruction *inst0)
194 {
195 struct prog_instruction *inst = get_fp_inst(c);
196 *inst = *inst0;
197 return inst;
198 }
199
200 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
201 GLuint op,
202 struct prog_dst_register dest,
203 GLuint saturate,
204 GLuint tex_src_unit,
205 GLuint tex_src_target,
206 GLuint tex_shadow,
207 struct prog_src_register src0,
208 struct prog_src_register src1,
209 struct prog_src_register src2 )
210 {
211 struct prog_instruction *inst = get_fp_inst(c);
212
213 assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
214 tex_src_unit == TEX_UNIT_NONE);
215 assert(tex_src_target < NUM_TEXTURE_TARGETS ||
216 tex_src_target == TEX_TARGET_NONE);
217
218 /* update mask of which texture units are referenced by this program */
219 if (tex_src_unit != TEX_UNIT_NONE)
220 c->fp->tex_units_used |= (1 << tex_src_unit);
221
222 memset(inst, 0, sizeof(*inst));
223
224 inst->Opcode = op;
225 inst->DstReg = dest;
226 inst->SaturateMode = saturate;
227 inst->TexSrcUnit = tex_src_unit;
228 inst->TexSrcTarget = tex_src_target;
229 inst->TexShadow = tex_shadow;
230 inst->SrcReg[0] = src0;
231 inst->SrcReg[1] = src1;
232 inst->SrcReg[2] = src2;
233 return inst;
234 }
235
236
237 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
238 GLuint op,
239 struct prog_dst_register dest,
240 GLuint saturate,
241 struct prog_src_register src0,
242 struct prog_src_register src1,
243 struct prog_src_register src2 )
244 {
245 return emit_tex_op(c, op, dest, saturate,
246 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
247 src0, src1, src2);
248 }
249
250
251 /* Many Mesa opcodes produce the same value across all the result channels.
252 * We'd rather not have to support that splatting in the opcode implementations,
253 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
254 * anyway. We can easily get both by emitting the opcode to one channel, and
255 * then MOVing it to the others, which brw_wm_pass*.c already understands.
256 */
257 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
258 const struct prog_instruction *inst0)
259 {
260 struct prog_instruction *inst;
261 unsigned int dst_chan;
262 unsigned int other_channel_mask;
263
264 if (inst0->DstReg.WriteMask == 0)
265 return NULL;
266
267 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
268 inst = get_fp_inst(c);
269 *inst = *inst0;
270 inst->DstReg.WriteMask = 1 << dst_chan;
271
272 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
273 if (other_channel_mask != 0) {
274 inst = emit_op(c,
275 OPCODE_MOV,
276 dst_mask(inst0->DstReg, other_channel_mask),
277 0,
278 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
279 src_undef(),
280 src_undef());
281 }
282 return inst;
283 }
284
285
286 /***********************************************************************
287 * Special instructions for interpolation and other tasks
288 */
289
290 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
291 {
292 if (src_is_undef(c->pixel_xy)) {
293 struct prog_dst_register pixel_xy = get_temp(c);
294 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
295
296
297 /* Emit the out calculations, and hold onto the results. Use
298 * two instructions as a temporary is required.
299 */
300 /* pixel_xy.xy = PIXELXY payload[0];
301 */
302 emit_op(c,
303 WM_PIXELXY,
304 dst_mask(pixel_xy, WRITEMASK_XY),
305 0,
306 payload_r0_depth,
307 src_undef(),
308 src_undef());
309
310 c->pixel_xy = src_reg_from_dst(pixel_xy);
311 }
312
313 return c->pixel_xy;
314 }
315
316 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
317 {
318 if (src_is_undef(c->delta_xy)) {
319 struct prog_dst_register delta_xy = get_temp(c);
320 struct prog_src_register pixel_xy = get_pixel_xy(c);
321 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
322
323 /* deltas.xy = DELTAXY pixel_xy, payload[0]
324 */
325 emit_op(c,
326 WM_DELTAXY,
327 dst_mask(delta_xy, WRITEMASK_XY),
328 0,
329 pixel_xy,
330 payload_r0_depth,
331 src_undef());
332
333 c->delta_xy = src_reg_from_dst(delta_xy);
334 }
335
336 return c->delta_xy;
337 }
338
339 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
340 {
341 /* This is only called for producing 1/w in pre-gen6 interp. for
342 * gen6, the interp opcodes don't use this argument.
343 */
344 if (c->func.brw->intel.gen >= 6)
345 return src_undef();
346
347 if (src_is_undef(c->pixel_w)) {
348 struct prog_dst_register pixel_w = get_temp(c);
349 struct prog_src_register deltas = get_delta_xy(c);
350 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
351
352 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
353 */
354 emit_op(c,
355 WM_PIXELW,
356 dst_mask(pixel_w, WRITEMASK_W),
357 0,
358 interp_wpos,
359 deltas,
360 src_undef());
361
362
363 c->pixel_w = src_reg_from_dst(pixel_w);
364 }
365
366 return c->pixel_w;
367 }
368
369 static void emit_interp( struct brw_wm_compile *c,
370 GLuint idx )
371 {
372 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
373 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
374 struct prog_src_register deltas;
375
376 if (c->func.brw->intel.gen < 6) {
377 deltas = get_delta_xy(c);
378 } else {
379 deltas = src_undef();
380 }
381
382 /* Need to use PINTERP on attributes which have been
383 * multiplied by 1/W in the SF program, and LINTERP on those
384 * which have not:
385 */
386 switch (idx) {
387 case FRAG_ATTRIB_WPOS:
388 /* Have to treat wpos.xy specially:
389 */
390 emit_op(c,
391 WM_WPOSXY,
392 dst_mask(dst, WRITEMASK_XY),
393 0,
394 get_pixel_xy(c),
395 src_undef(),
396 src_undef());
397
398 dst = dst_mask(dst, WRITEMASK_ZW);
399
400 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
401 */
402 emit_op(c,
403 WM_LINTERP,
404 dst,
405 0,
406 interp,
407 deltas,
408 src_undef());
409 break;
410 case FRAG_ATTRIB_COL0:
411 case FRAG_ATTRIB_COL1:
412 if (c->key.flat_shade) {
413 emit_op(c,
414 WM_CINTERP,
415 dst,
416 0,
417 interp,
418 src_undef(),
419 src_undef());
420 }
421 else {
422 if (c->key.linear_color) {
423 emit_op(c,
424 WM_LINTERP,
425 dst,
426 0,
427 interp,
428 deltas,
429 src_undef());
430 }
431 else {
432 /* perspective-corrected color interpolation */
433 emit_op(c,
434 WM_PINTERP,
435 dst,
436 0,
437 interp,
438 deltas,
439 get_pixel_w(c));
440 }
441 }
442 break;
443 case FRAG_ATTRIB_FOGC:
444 /* Interpolate the fog coordinate */
445 emit_op(c,
446 WM_PINTERP,
447 dst_mask(dst, WRITEMASK_X),
448 0,
449 interp,
450 deltas,
451 get_pixel_w(c));
452
453 emit_op(c,
454 OPCODE_MOV,
455 dst_mask(dst, WRITEMASK_YZW),
456 0,
457 src_swizzle(interp,
458 SWIZZLE_ZERO,
459 SWIZZLE_ZERO,
460 SWIZZLE_ZERO,
461 SWIZZLE_ONE),
462 src_undef(),
463 src_undef());
464 break;
465
466 case FRAG_ATTRIB_FACE:
467 emit_op(c,
468 WM_FRONTFACING,
469 dst_mask(dst, WRITEMASK_X),
470 0,
471 src_undef(),
472 src_undef(),
473 src_undef());
474 break;
475
476 case FRAG_ATTRIB_PNTC:
477 /* XXX review/test this case */
478 emit_op(c,
479 WM_PINTERP,
480 dst_mask(dst, WRITEMASK_XY),
481 0,
482 interp,
483 deltas,
484 get_pixel_w(c));
485
486 emit_op(c,
487 OPCODE_MOV,
488 dst_mask(dst, WRITEMASK_ZW),
489 0,
490 src_swizzle(interp,
491 SWIZZLE_ZERO,
492 SWIZZLE_ZERO,
493 SWIZZLE_ZERO,
494 SWIZZLE_ONE),
495 src_undef(),
496 src_undef());
497 break;
498
499 default:
500 emit_op(c,
501 WM_PINTERP,
502 dst,
503 0,
504 interp,
505 deltas,
506 get_pixel_w(c));
507 break;
508 }
509
510 c->fp_interp_emitted |= 1<<idx;
511 }
512
513 /***********************************************************************
514 * Hacks to extend the program parameter and constant lists.
515 */
516
517 /* Add the fog parameters to the parameter list of the original
518 * program, rather than creating a new list. Doesn't really do any
519 * harm and it's not as if the parameter handling isn't a big hack
520 * anyway.
521 */
522 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
523 GLint s0,
524 GLint s1,
525 GLint s2,
526 GLint s3,
527 GLint s4)
528 {
529 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
530 gl_state_index tokens[STATE_LENGTH];
531 GLuint idx;
532 tokens[0] = s0;
533 tokens[1] = s1;
534 tokens[2] = s2;
535 tokens[3] = s3;
536 tokens[4] = s4;
537
538 idx = _mesa_add_state_reference( paramList, tokens );
539
540 return src_reg(PROGRAM_STATE_VAR, idx);
541 }
542
543
544 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
545 GLfloat s0,
546 GLfloat s1,
547 GLfloat s2,
548 GLfloat s3)
549 {
550 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
551 GLfloat values[4];
552 GLuint idx;
553 GLuint swizzle;
554 struct prog_src_register reg;
555
556 values[0] = s0;
557 values[1] = s1;
558 values[2] = s2;
559 values[3] = s3;
560
561 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
562 reg = src_reg(PROGRAM_STATE_VAR, idx);
563 reg.Swizzle = swizzle;
564
565 return reg;
566 }
567
568
569
570 /***********************************************************************
571 * Expand various instructions here to simpler forms.
572 */
573 static void precalc_dst( struct brw_wm_compile *c,
574 const struct prog_instruction *inst )
575 {
576 struct prog_src_register src0 = inst->SrcReg[0];
577 struct prog_src_register src1 = inst->SrcReg[1];
578 struct prog_dst_register dst = inst->DstReg;
579
580 if (dst.WriteMask & WRITEMASK_Y) {
581 /* dst.y = mul src0.y, src1.y
582 */
583 emit_op(c,
584 OPCODE_MUL,
585 dst_mask(dst, WRITEMASK_Y),
586 inst->SaturateMode,
587 src0,
588 src1,
589 src_undef());
590 }
591
592 if (dst.WriteMask & WRITEMASK_XZ) {
593 struct prog_instruction *swz;
594 GLuint z = GET_SWZ(src0.Swizzle, Z);
595
596 /* dst.xz = swz src0.1zzz
597 */
598 swz = emit_op(c,
599 OPCODE_SWZ,
600 dst_mask(dst, WRITEMASK_XZ),
601 inst->SaturateMode,
602 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
603 src_undef(),
604 src_undef());
605 /* Avoid letting negation flag of src0 affect our 1 constant. */
606 swz->SrcReg[0].Negate &= ~NEGATE_X;
607 }
608 if (dst.WriteMask & WRITEMASK_W) {
609 /* dst.w = mov src1.w
610 */
611 emit_op(c,
612 OPCODE_MOV,
613 dst_mask(dst, WRITEMASK_W),
614 inst->SaturateMode,
615 src1,
616 src_undef(),
617 src_undef());
618 }
619 }
620
621
622 static void precalc_lit( struct brw_wm_compile *c,
623 const struct prog_instruction *inst )
624 {
625 struct prog_src_register src0 = inst->SrcReg[0];
626 struct prog_dst_register dst = inst->DstReg;
627
628 if (dst.WriteMask & WRITEMASK_XW) {
629 struct prog_instruction *swz;
630
631 /* dst.xw = swz src0.1111
632 */
633 swz = emit_op(c,
634 OPCODE_SWZ,
635 dst_mask(dst, WRITEMASK_XW),
636 0,
637 src_swizzle1(src0, SWIZZLE_ONE),
638 src_undef(),
639 src_undef());
640 /* Avoid letting the negation flag of src0 affect our 1 constant. */
641 swz->SrcReg[0].Negate = NEGATE_NONE;
642 }
643
644 if (dst.WriteMask & WRITEMASK_YZ) {
645 emit_op(c,
646 OPCODE_LIT,
647 dst_mask(dst, WRITEMASK_YZ),
648 inst->SaturateMode,
649 src0,
650 src_undef(),
651 src_undef());
652 }
653 }
654
655
656 /**
657 * Some TEX instructions require extra code, cube map coordinate
658 * normalization, or coordinate scaling for RECT textures, etc.
659 * This function emits those extra instructions and the TEX
660 * instruction itself.
661 */
662 static void precalc_tex( struct brw_wm_compile *c,
663 const struct prog_instruction *inst )
664 {
665 struct prog_src_register coord;
666 struct prog_dst_register tmpcoord;
667 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
668
669 assert(unit < BRW_MAX_TEX_UNIT);
670
671 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
672 struct prog_instruction *out;
673 struct prog_dst_register tmp0 = get_temp(c);
674 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
675 struct prog_dst_register tmp1 = get_temp(c);
676 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
677 struct prog_src_register src0 = inst->SrcReg[0];
678
679 /* find longest component of coord vector and normalize it */
680 tmpcoord = get_temp(c);
681 coord = src_reg_from_dst(tmpcoord);
682
683 /* tmpcoord = src0 (i.e.: coord = src0) */
684 out = emit_op(c, OPCODE_MOV,
685 tmpcoord,
686 0,
687 src0,
688 src_undef(),
689 src_undef());
690 out->SrcReg[0].Negate = NEGATE_NONE;
691 out->SrcReg[0].Abs = 1;
692
693 /* tmp0 = MAX(coord.X, coord.Y) */
694 emit_op(c, OPCODE_MAX,
695 tmp0,
696 0,
697 src_swizzle1(coord, X),
698 src_swizzle1(coord, Y),
699 src_undef());
700
701 /* tmp1 = MAX(tmp0, coord.Z) */
702 emit_op(c, OPCODE_MAX,
703 tmp1,
704 0,
705 tmp0src,
706 src_swizzle1(coord, Z),
707 src_undef());
708
709 /* tmp0 = 1 / tmp1 */
710 emit_op(c, OPCODE_RCP,
711 dst_mask(tmp0, WRITEMASK_X),
712 0,
713 tmp1src,
714 src_undef(),
715 src_undef());
716
717 /* tmpCoord = src0 * tmp0 */
718 emit_op(c, OPCODE_MUL,
719 tmpcoord,
720 0,
721 src0,
722 src_swizzle1(tmp0src, SWIZZLE_X),
723 src_undef());
724
725 release_temp(c, tmp0);
726 release_temp(c, tmp1);
727 }
728 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
729 struct prog_src_register scale =
730 search_or_add_param5( c,
731 STATE_INTERNAL,
732 STATE_TEXRECT_SCALE,
733 unit,
734 0,0 );
735
736 tmpcoord = get_temp(c);
737
738 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
739 */
740 emit_op(c,
741 OPCODE_MUL,
742 tmpcoord,
743 0,
744 inst->SrcReg[0],
745 src_swizzle(scale,
746 SWIZZLE_X,
747 SWIZZLE_Y,
748 SWIZZLE_ONE,
749 SWIZZLE_ONE),
750 src_undef());
751
752 coord = src_reg_from_dst(tmpcoord);
753 }
754 else {
755 coord = inst->SrcReg[0];
756 }
757
758 /* Need to emit YUV texture conversions by hand. Probably need to
759 * do this here - the alternative is in brw_wm_emit.c, but the
760 * conversion requires allocating a temporary variable which we
761 * don't have the facility to do that late in the compilation.
762 */
763 if (c->key.yuvtex_mask & (1 << unit)) {
764 /* convert ycbcr to RGBA */
765 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
766
767 /*
768 CONST C0 = { -.5, -.0625, -.5, 1.164 }
769 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
770 UYV = TEX ...
771 UYV.xyz = ADD UYV, C0
772 UYV.y = MUL UYV.y, C0.w
773 if (UV swaped)
774 RGB.xyz = MAD UYV.zzx, C1, UYV.y
775 else
776 RGB.xyz = MAD UYV.xxz, C1, UYV.y
777 RGB.y = MAD UYV.z, C1.w, RGB.y
778 */
779 struct prog_dst_register dst = inst->DstReg;
780 struct prog_dst_register tmp = get_temp(c);
781 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
782 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
783 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
784
785 /* tmp = TEX ...
786 */
787 emit_tex_op(c,
788 OPCODE_TEX,
789 tmp,
790 inst->SaturateMode,
791 unit,
792 inst->TexSrcTarget,
793 inst->TexShadow,
794 coord,
795 src_undef(),
796 src_undef());
797
798 /* tmp.xyz = ADD TMP, C0
799 */
800 emit_op(c,
801 OPCODE_ADD,
802 dst_mask(tmp, WRITEMASK_XYZ),
803 0,
804 tmpsrc,
805 C0,
806 src_undef());
807
808 /* YUV.y = MUL YUV.y, C0.w
809 */
810
811 emit_op(c,
812 OPCODE_MUL,
813 dst_mask(tmp, WRITEMASK_Y),
814 0,
815 tmpsrc,
816 src_swizzle1(C0, W),
817 src_undef());
818
819 /*
820 * if (UV swaped)
821 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
822 * else
823 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
824 */
825
826 emit_op(c,
827 OPCODE_MAD,
828 dst_mask(dst, WRITEMASK_XYZ),
829 0,
830 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
831 C1,
832 src_swizzle1(tmpsrc, Y));
833
834 /* RGB.y = MAD YUV.z, C1.w, RGB.y
835 */
836 emit_op(c,
837 OPCODE_MAD,
838 dst_mask(dst, WRITEMASK_Y),
839 0,
840 src_swizzle1(tmpsrc, Z),
841 src_swizzle1(C1, W),
842 src_swizzle1(src_reg_from_dst(dst), Y));
843
844 release_temp(c, tmp);
845 }
846 else {
847 /* ordinary RGBA tex instruction */
848 emit_tex_op(c,
849 OPCODE_TEX,
850 inst->DstReg,
851 inst->SaturateMode,
852 unit,
853 inst->TexSrcTarget,
854 inst->TexShadow,
855 coord,
856 src_undef(),
857 src_undef());
858 }
859
860 /* For GL_EXT_texture_swizzle: */
861 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
862 /* swizzle the result of the TEX instruction */
863 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
864 emit_op(c, OPCODE_SWZ,
865 inst->DstReg,
866 SATURATE_OFF, /* saturate already done above */
867 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
868 src_undef(),
869 src_undef());
870 }
871
872 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
873 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
874 release_temp(c, tmpcoord);
875 }
876
877
878 /**
879 * Check if the given TXP instruction really needs the divide-by-W step.
880 */
881 static GLboolean projtex( struct brw_wm_compile *c,
882 const struct prog_instruction *inst )
883 {
884 const struct prog_src_register src = inst->SrcReg[0];
885 GLboolean retVal;
886
887 assert(inst->Opcode == OPCODE_TXP);
888
889 /* Only try to detect the simplest cases. Could detect (later)
890 * cases where we are trying to emit code like RCP {1.0}, MUL x,
891 * {1.0}, and so on.
892 *
893 * More complex cases than this typically only arise from
894 * user-provided fragment programs anyway:
895 */
896 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
897 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
898 else if (src.File == PROGRAM_INPUT &&
899 GET_SWZ(src.Swizzle, W) == W &&
900 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
901 retVal = GL_FALSE;
902 else
903 retVal = GL_TRUE;
904
905 return retVal;
906 }
907
908
909 /**
910 * Emit code for TXP.
911 */
912 static void precalc_txp( struct brw_wm_compile *c,
913 const struct prog_instruction *inst )
914 {
915 struct prog_src_register src0 = inst->SrcReg[0];
916
917 if (projtex(c, inst)) {
918 struct prog_dst_register tmp = get_temp(c);
919 struct prog_instruction tmp_inst;
920
921 /* tmp0.w = RCP inst.arg[0][3]
922 */
923 emit_op(c,
924 OPCODE_RCP,
925 dst_mask(tmp, WRITEMASK_W),
926 0,
927 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
928 src_undef(),
929 src_undef());
930
931 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
932 */
933 emit_op(c,
934 OPCODE_MUL,
935 dst_mask(tmp, WRITEMASK_XYZ),
936 0,
937 src0,
938 src_swizzle1(src_reg_from_dst(tmp), W),
939 src_undef());
940
941 /* dst = precalc(TEX tmp0)
942 */
943 tmp_inst = *inst;
944 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
945 precalc_tex(c, &tmp_inst);
946
947 release_temp(c, tmp);
948 }
949 else
950 {
951 /* dst = precalc(TEX src0)
952 */
953 precalc_tex(c, inst);
954 }
955 }
956
957
958
959 static void emit_render_target_writes( struct brw_wm_compile *c )
960 {
961 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
962 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
963 struct prog_src_register outcolor;
964 GLuint i;
965
966 struct prog_instruction *inst, *last_inst;
967
968 /* The inst->Aux field is used for FB write target and the EOT marker */
969
970 if (c->key.nr_color_regions > 1) {
971 for (i = 0 ; i < c->key.nr_color_regions; i++) {
972 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
973 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
974 0, outcolor, payload_r0_depth, outdepth);
975 inst->Aux = INST_AUX_TARGET(i);
976 if (c->fp_fragcolor_emitted) {
977 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
978 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
979 0, outcolor, payload_r0_depth, outdepth);
980 inst->Aux = INST_AUX_TARGET(i);
981 }
982 }
983 last_inst->Aux |= INST_AUX_EOT;
984 }
985 else {
986 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
987 if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0))
988 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
989 else
990 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
991
992 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
993 0, outcolor, payload_r0_depth, outdepth);
994 inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0);
995 }
996 }
997
998
999
1000
1001 /***********************************************************************
1002 * Emit INTERP instructions ahead of first use of each attrib.
1003 */
1004
1005 static void validate_src_regs( struct brw_wm_compile *c,
1006 const struct prog_instruction *inst )
1007 {
1008 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1009 GLuint i;
1010
1011 for (i = 0; i < nr_args; i++) {
1012 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1013 GLuint idx = inst->SrcReg[i].Index;
1014 if (!(c->fp_interp_emitted & (1<<idx))) {
1015 emit_interp(c, idx);
1016 }
1017 }
1018 }
1019 }
1020
1021 static void validate_dst_regs( struct brw_wm_compile *c,
1022 const struct prog_instruction *inst )
1023 {
1024 if (inst->DstReg.File == PROGRAM_OUTPUT) {
1025 GLuint idx = inst->DstReg.Index;
1026 if (idx == FRAG_RESULT_COLOR)
1027 c->fp_fragcolor_emitted = 1;
1028 }
1029 }
1030
1031 static void print_insns( const struct prog_instruction *insn,
1032 GLuint nr )
1033 {
1034 GLuint i;
1035 for (i = 0; i < nr; i++, insn++) {
1036 printf("%3d: ", i);
1037 if (insn->Opcode < MAX_OPCODE)
1038 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
1039 else if (insn->Opcode < MAX_WM_OPCODE) {
1040 GLuint idx = insn->Opcode - MAX_OPCODE;
1041
1042 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
1043 3, PROG_PRINT_DEBUG, NULL);
1044 }
1045 else
1046 printf("965 Opcode %d\n", insn->Opcode);
1047 }
1048 }
1049
1050
1051 /**
1052 * Initial pass for fragment program code generation.
1053 * This function is used by both the GLSL and non-GLSL paths.
1054 */
1055 void brw_wm_pass_fp( struct brw_wm_compile *c )
1056 {
1057 struct intel_context *intel = &c->func.brw->intel;
1058 struct brw_fragment_program *fp = c->fp;
1059 GLuint insn;
1060
1061 if (INTEL_DEBUG & DEBUG_WM) {
1062 printf("pre-fp:\n");
1063 _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
1064 GL_TRUE);
1065 printf("\n");
1066 }
1067
1068 c->pixel_xy = src_undef();
1069 if (intel->gen >= 6) {
1070 /* The interpolation deltas come in as the perspective pixel
1071 * location barycentric params.
1072 */
1073 c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
1074 } else {
1075 c->delta_xy = src_undef();
1076 }
1077 c->pixel_w = src_undef();
1078 c->nr_fp_insns = 0;
1079 c->fp->tex_units_used = 0x0;
1080
1081 /* Emit preamble instructions. This is where special instructions such as
1082 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1083 * compute shader inputs from varying vars.
1084 */
1085 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1086 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1087 validate_src_regs(c, inst);
1088 validate_dst_regs(c, inst);
1089 }
1090
1091 /* Loop over all instructions doing assorted simplifications and
1092 * transformations.
1093 */
1094 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1095 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1096 struct prog_instruction *out;
1097
1098 /* Check for INPUT values, emit INTERP instructions where
1099 * necessary:
1100 */
1101
1102 switch (inst->Opcode) {
1103 case OPCODE_SWZ:
1104 out = emit_insn(c, inst);
1105 out->Opcode = OPCODE_MOV;
1106 break;
1107
1108 case OPCODE_ABS:
1109 out = emit_insn(c, inst);
1110 out->Opcode = OPCODE_MOV;
1111 out->SrcReg[0].Negate = NEGATE_NONE;
1112 out->SrcReg[0].Abs = 1;
1113 break;
1114
1115 case OPCODE_SUB:
1116 out = emit_insn(c, inst);
1117 out->Opcode = OPCODE_ADD;
1118 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1119 break;
1120
1121 case OPCODE_SCS:
1122 out = emit_insn(c, inst);
1123 /* This should probably be done in the parser.
1124 */
1125 out->DstReg.WriteMask &= WRITEMASK_XY;
1126 break;
1127
1128 case OPCODE_DST:
1129 precalc_dst(c, inst);
1130 break;
1131
1132 case OPCODE_LIT:
1133 precalc_lit(c, inst);
1134 break;
1135
1136 case OPCODE_TEX:
1137 precalc_tex(c, inst);
1138 break;
1139
1140 case OPCODE_TXP:
1141 precalc_txp(c, inst);
1142 break;
1143
1144 case OPCODE_TXB:
1145 out = emit_insn(c, inst);
1146 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1147 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1148 break;
1149
1150 case OPCODE_XPD:
1151 out = emit_insn(c, inst);
1152 /* This should probably be done in the parser.
1153 */
1154 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1155 break;
1156
1157 case OPCODE_KIL:
1158 out = emit_insn(c, inst);
1159 /* This should probably be done in the parser.
1160 */
1161 out->DstReg.WriteMask = 0;
1162 break;
1163 case OPCODE_END:
1164 emit_render_target_writes(c);
1165 break;
1166 case OPCODE_PRINT:
1167 break;
1168 default:
1169 if (brw_wm_is_scalar_result(inst->Opcode))
1170 emit_scalar_insn(c, inst);
1171 else
1172 emit_insn(c, inst);
1173 break;
1174 }
1175 }
1176
1177 if (INTEL_DEBUG & DEBUG_WM) {
1178 printf("pass_fp:\n");
1179 print_insns( c->prog_instructions, c->nr_fp_insns );
1180 printf("\n");
1181 }
1182 }
1183