Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "program/prog_parameter.h"
41 #include "program/prog_print.h"
42 #include "program/prog_statevars.h"
43
44
45 /** An invalid texture target */
46 #define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47
48 /** An invalid texture unit */
49 #define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50
51 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52
53 #define X 0
54 #define Y 1
55 #define Z 2
56 #define W 3
57
58
59 static const char *wm_opcode_strings[] = {
60 "PIXELXY",
61 "DELTAXY",
62 "PIXELW",
63 "LINTERP",
64 "PINTERP",
65 "CINTERP",
66 "WPOSXY",
67 "FB_WRITE",
68 "FRONTFACING",
69 };
70
71 #if 0
72 static const char *wm_file_strings[] = {
73 "PAYLOAD"
74 };
75 #endif
76
77
78 /***********************************************************************
79 * Source regs
80 */
81
82 static struct prog_src_register src_reg(GLuint file, GLuint idx)
83 {
84 struct prog_src_register reg;
85 reg.File = file;
86 reg.Index = idx;
87 reg.Swizzle = SWIZZLE_NOOP;
88 reg.RelAddr = 0;
89 reg.Negate = NEGATE_NONE;
90 reg.Abs = 0;
91 reg.HasIndex2 = 0;
92 reg.RelAddr2 = 0;
93 reg.Index2 = 0;
94 return reg;
95 }
96
97 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
98 {
99 return src_reg(dst.File, dst.Index);
100 }
101
102 static struct prog_src_register src_undef( void )
103 {
104 return src_reg(PROGRAM_UNDEFINED, 0);
105 }
106
107 static GLboolean src_is_undef(struct prog_src_register src)
108 {
109 return src.File == PROGRAM_UNDEFINED;
110 }
111
112 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
113 {
114 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
115 return reg;
116 }
117
118 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
119 {
120 return src_swizzle(reg, x, x, x, x);
121 }
122
123 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
124 {
125 reg.Swizzle = swizzle;
126 return reg;
127 }
128
129
130 /***********************************************************************
131 * Dest regs
132 */
133
134 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
135 {
136 struct prog_dst_register reg;
137 reg.File = file;
138 reg.Index = idx;
139 reg.WriteMask = WRITEMASK_XYZW;
140 reg.RelAddr = 0;
141 reg.CondMask = COND_TR;
142 reg.CondSwizzle = 0;
143 reg.CondSrc = 0;
144 return reg;
145 }
146
147 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
148 {
149 reg.WriteMask &= mask;
150 return reg;
151 }
152
153 static struct prog_dst_register dst_undef( void )
154 {
155 return dst_reg(PROGRAM_UNDEFINED, 0);
156 }
157
158
159
160 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
161 {
162 int bit = _mesa_ffs( ~c->fp_temp );
163
164 if (!bit) {
165 printf("%s: out of temporaries\n", __FILE__);
166 exit(1);
167 }
168
169 c->fp_temp |= 1<<(bit-1);
170 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
171 }
172
173
174 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
175 {
176 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
177 }
178
179
180 /***********************************************************************
181 * Instructions
182 */
183
184 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
185 {
186 assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
187 memset(&c->prog_instructions[c->nr_fp_insns], 0,
188 sizeof(*c->prog_instructions));
189 return &c->prog_instructions[c->nr_fp_insns++];
190 }
191
192 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
193 const struct prog_instruction *inst0)
194 {
195 struct prog_instruction *inst = get_fp_inst(c);
196 *inst = *inst0;
197 return inst;
198 }
199
200 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
201 GLuint op,
202 struct prog_dst_register dest,
203 GLuint saturate,
204 GLuint tex_src_unit,
205 GLuint tex_src_target,
206 GLuint tex_shadow,
207 struct prog_src_register src0,
208 struct prog_src_register src1,
209 struct prog_src_register src2 )
210 {
211 struct prog_instruction *inst = get_fp_inst(c);
212
213 assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
214 tex_src_unit == TEX_UNIT_NONE);
215 assert(tex_src_target < NUM_TEXTURE_TARGETS ||
216 tex_src_target == TEX_TARGET_NONE);
217
218 /* update mask of which texture units are referenced by this program */
219 if (tex_src_unit != TEX_UNIT_NONE)
220 c->fp->tex_units_used |= (1 << tex_src_unit);
221
222 memset(inst, 0, sizeof(*inst));
223
224 inst->Opcode = op;
225 inst->DstReg = dest;
226 inst->SaturateMode = saturate;
227 inst->TexSrcUnit = tex_src_unit;
228 inst->TexSrcTarget = tex_src_target;
229 inst->TexShadow = tex_shadow;
230 inst->SrcReg[0] = src0;
231 inst->SrcReg[1] = src1;
232 inst->SrcReg[2] = src2;
233 return inst;
234 }
235
236
237 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
238 GLuint op,
239 struct prog_dst_register dest,
240 GLuint saturate,
241 struct prog_src_register src0,
242 struct prog_src_register src1,
243 struct prog_src_register src2 )
244 {
245 return emit_tex_op(c, op, dest, saturate,
246 TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
247 src0, src1, src2);
248 }
249
250
251 /* Many Mesa opcodes produce the same value across all the result channels.
252 * We'd rather not have to support that splatting in the opcode implementations,
253 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
254 * anyway. We can easily get both by emitting the opcode to one channel, and
255 * then MOVing it to the others, which brw_wm_pass*.c already understands.
256 */
257 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
258 const struct prog_instruction *inst0)
259 {
260 struct prog_instruction *inst;
261 unsigned int dst_chan;
262 unsigned int other_channel_mask;
263
264 if (inst0->DstReg.WriteMask == 0)
265 return NULL;
266
267 dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
268 inst = get_fp_inst(c);
269 *inst = *inst0;
270 inst->DstReg.WriteMask = 1 << dst_chan;
271
272 other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
273 if (other_channel_mask != 0) {
274 inst = emit_op(c,
275 OPCODE_MOV,
276 dst_mask(inst0->DstReg, other_channel_mask),
277 0,
278 src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
279 src_undef(),
280 src_undef());
281 }
282 return inst;
283 }
284
285
286 /***********************************************************************
287 * Special instructions for interpolation and other tasks
288 */
289
290 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
291 {
292 if (src_is_undef(c->pixel_xy)) {
293 struct prog_dst_register pixel_xy = get_temp(c);
294 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
295
296
297 /* Emit the out calculations, and hold onto the results. Use
298 * two instructions as a temporary is required.
299 */
300 /* pixel_xy.xy = PIXELXY payload[0];
301 */
302 emit_op(c,
303 WM_PIXELXY,
304 dst_mask(pixel_xy, WRITEMASK_XY),
305 0,
306 payload_r0_depth,
307 src_undef(),
308 src_undef());
309
310 c->pixel_xy = src_reg_from_dst(pixel_xy);
311 }
312
313 return c->pixel_xy;
314 }
315
316 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
317 {
318 if (src_is_undef(c->delta_xy)) {
319 struct prog_dst_register delta_xy = get_temp(c);
320 struct prog_src_register pixel_xy = get_pixel_xy(c);
321 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
322
323 /* deltas.xy = DELTAXY pixel_xy, payload[0]
324 */
325 emit_op(c,
326 WM_DELTAXY,
327 dst_mask(delta_xy, WRITEMASK_XY),
328 0,
329 pixel_xy,
330 payload_r0_depth,
331 src_undef());
332
333 c->delta_xy = src_reg_from_dst(delta_xy);
334 }
335
336 return c->delta_xy;
337 }
338
339 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
340 {
341 /* This is called for producing 1/w in pre-gen6 interp. for gen6,
342 * the interp opcodes don't use this argument. But to keep the
343 * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
344 * into the slot.
345 */
346 if (c->func.brw->intel.gen >= 6)
347 return c->delta_xy;
348
349 if (src_is_undef(c->pixel_w)) {
350 struct prog_dst_register pixel_w = get_temp(c);
351 struct prog_src_register deltas = get_delta_xy(c);
352 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
353
354 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
355 */
356 emit_op(c,
357 WM_PIXELW,
358 dst_mask(pixel_w, WRITEMASK_W),
359 0,
360 interp_wpos,
361 deltas,
362 src_undef());
363
364
365 c->pixel_w = src_reg_from_dst(pixel_w);
366 }
367
368 return c->pixel_w;
369 }
370
371 static void emit_interp( struct brw_wm_compile *c,
372 GLuint idx )
373 {
374 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
375 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
376 struct prog_src_register deltas;
377
378 deltas = get_delta_xy(c);
379
380 /* Need to use PINTERP on attributes which have been
381 * multiplied by 1/W in the SF program, and LINTERP on those
382 * which have not:
383 */
384 switch (idx) {
385 case FRAG_ATTRIB_WPOS:
386 /* Have to treat wpos.xy specially:
387 */
388 emit_op(c,
389 WM_WPOSXY,
390 dst_mask(dst, WRITEMASK_XY),
391 0,
392 get_pixel_xy(c),
393 src_undef(),
394 src_undef());
395
396 dst = dst_mask(dst, WRITEMASK_ZW);
397
398 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
399 */
400 emit_op(c,
401 WM_LINTERP,
402 dst,
403 0,
404 interp,
405 deltas,
406 src_undef());
407 break;
408 case FRAG_ATTRIB_COL0:
409 case FRAG_ATTRIB_COL1:
410 if (c->key.flat_shade) {
411 emit_op(c,
412 WM_CINTERP,
413 dst,
414 0,
415 interp,
416 src_undef(),
417 src_undef());
418 }
419 else {
420 if (c->key.linear_color) {
421 emit_op(c,
422 WM_LINTERP,
423 dst,
424 0,
425 interp,
426 deltas,
427 src_undef());
428 }
429 else {
430 /* perspective-corrected color interpolation */
431 emit_op(c,
432 WM_PINTERP,
433 dst,
434 0,
435 interp,
436 deltas,
437 get_pixel_w(c));
438 }
439 }
440 break;
441 case FRAG_ATTRIB_FOGC:
442 /* Interpolate the fog coordinate */
443 emit_op(c,
444 WM_PINTERP,
445 dst_mask(dst, WRITEMASK_X),
446 0,
447 interp,
448 deltas,
449 get_pixel_w(c));
450
451 emit_op(c,
452 OPCODE_MOV,
453 dst_mask(dst, WRITEMASK_YZW),
454 0,
455 src_swizzle(interp,
456 SWIZZLE_ZERO,
457 SWIZZLE_ZERO,
458 SWIZZLE_ZERO,
459 SWIZZLE_ONE),
460 src_undef(),
461 src_undef());
462 break;
463
464 case FRAG_ATTRIB_FACE:
465 emit_op(c,
466 WM_FRONTFACING,
467 dst_mask(dst, WRITEMASK_X),
468 0,
469 src_undef(),
470 src_undef(),
471 src_undef());
472 break;
473
474 case FRAG_ATTRIB_PNTC:
475 /* XXX review/test this case */
476 emit_op(c,
477 WM_PINTERP,
478 dst_mask(dst, WRITEMASK_XY),
479 0,
480 interp,
481 deltas,
482 get_pixel_w(c));
483
484 emit_op(c,
485 OPCODE_MOV,
486 dst_mask(dst, WRITEMASK_ZW),
487 0,
488 src_swizzle(interp,
489 SWIZZLE_ZERO,
490 SWIZZLE_ZERO,
491 SWIZZLE_ZERO,
492 SWIZZLE_ONE),
493 src_undef(),
494 src_undef());
495 break;
496
497 default:
498 emit_op(c,
499 WM_PINTERP,
500 dst,
501 0,
502 interp,
503 deltas,
504 get_pixel_w(c));
505 break;
506 }
507
508 c->fp_interp_emitted |= 1<<idx;
509 }
510
511 /***********************************************************************
512 * Hacks to extend the program parameter and constant lists.
513 */
514
515 /* Add the fog parameters to the parameter list of the original
516 * program, rather than creating a new list. Doesn't really do any
517 * harm and it's not as if the parameter handling isn't a big hack
518 * anyway.
519 */
520 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
521 GLint s0,
522 GLint s1,
523 GLint s2,
524 GLint s3,
525 GLint s4)
526 {
527 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
528 gl_state_index tokens[STATE_LENGTH];
529 GLuint idx;
530 tokens[0] = s0;
531 tokens[1] = s1;
532 tokens[2] = s2;
533 tokens[3] = s3;
534 tokens[4] = s4;
535
536 idx = _mesa_add_state_reference( paramList, tokens );
537
538 return src_reg(PROGRAM_STATE_VAR, idx);
539 }
540
541
542 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
543 GLfloat s0,
544 GLfloat s1,
545 GLfloat s2,
546 GLfloat s3)
547 {
548 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
549 GLfloat values[4];
550 GLuint idx;
551 GLuint swizzle;
552 struct prog_src_register reg;
553
554 values[0] = s0;
555 values[1] = s1;
556 values[2] = s2;
557 values[3] = s3;
558
559 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
560 reg = src_reg(PROGRAM_STATE_VAR, idx);
561 reg.Swizzle = swizzle;
562
563 return reg;
564 }
565
566
567
568 /***********************************************************************
569 * Expand various instructions here to simpler forms.
570 */
571 static void precalc_dst( struct brw_wm_compile *c,
572 const struct prog_instruction *inst )
573 {
574 struct prog_src_register src0 = inst->SrcReg[0];
575 struct prog_src_register src1 = inst->SrcReg[1];
576 struct prog_dst_register dst = inst->DstReg;
577
578 if (dst.WriteMask & WRITEMASK_Y) {
579 /* dst.y = mul src0.y, src1.y
580 */
581 emit_op(c,
582 OPCODE_MUL,
583 dst_mask(dst, WRITEMASK_Y),
584 inst->SaturateMode,
585 src0,
586 src1,
587 src_undef());
588 }
589
590 if (dst.WriteMask & WRITEMASK_XZ) {
591 struct prog_instruction *swz;
592 GLuint z = GET_SWZ(src0.Swizzle, Z);
593
594 /* dst.xz = swz src0.1zzz
595 */
596 swz = emit_op(c,
597 OPCODE_SWZ,
598 dst_mask(dst, WRITEMASK_XZ),
599 inst->SaturateMode,
600 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
601 src_undef(),
602 src_undef());
603 /* Avoid letting negation flag of src0 affect our 1 constant. */
604 swz->SrcReg[0].Negate &= ~NEGATE_X;
605 }
606 if (dst.WriteMask & WRITEMASK_W) {
607 /* dst.w = mov src1.w
608 */
609 emit_op(c,
610 OPCODE_MOV,
611 dst_mask(dst, WRITEMASK_W),
612 inst->SaturateMode,
613 src1,
614 src_undef(),
615 src_undef());
616 }
617 }
618
619
620 static void precalc_lit( struct brw_wm_compile *c,
621 const struct prog_instruction *inst )
622 {
623 struct prog_src_register src0 = inst->SrcReg[0];
624 struct prog_dst_register dst = inst->DstReg;
625
626 if (dst.WriteMask & WRITEMASK_XW) {
627 struct prog_instruction *swz;
628
629 /* dst.xw = swz src0.1111
630 */
631 swz = emit_op(c,
632 OPCODE_SWZ,
633 dst_mask(dst, WRITEMASK_XW),
634 0,
635 src_swizzle1(src0, SWIZZLE_ONE),
636 src_undef(),
637 src_undef());
638 /* Avoid letting the negation flag of src0 affect our 1 constant. */
639 swz->SrcReg[0].Negate = NEGATE_NONE;
640 }
641
642 if (dst.WriteMask & WRITEMASK_YZ) {
643 emit_op(c,
644 OPCODE_LIT,
645 dst_mask(dst, WRITEMASK_YZ),
646 inst->SaturateMode,
647 src0,
648 src_undef(),
649 src_undef());
650 }
651 }
652
653
654 /**
655 * Some TEX instructions require extra code, cube map coordinate
656 * normalization, or coordinate scaling for RECT textures, etc.
657 * This function emits those extra instructions and the TEX
658 * instruction itself.
659 */
660 static void precalc_tex( struct brw_wm_compile *c,
661 const struct prog_instruction *inst )
662 {
663 struct prog_src_register coord;
664 struct prog_dst_register tmpcoord = { 0 };
665 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
666
667 assert(unit < BRW_MAX_TEX_UNIT);
668
669 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
670 struct prog_instruction *out;
671 struct prog_dst_register tmp0 = get_temp(c);
672 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
673 struct prog_dst_register tmp1 = get_temp(c);
674 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
675 struct prog_src_register src0 = inst->SrcReg[0];
676
677 /* find longest component of coord vector and normalize it */
678 tmpcoord = get_temp(c);
679 coord = src_reg_from_dst(tmpcoord);
680
681 /* tmpcoord = src0 (i.e.: coord = src0) */
682 out = emit_op(c, OPCODE_MOV,
683 tmpcoord,
684 0,
685 src0,
686 src_undef(),
687 src_undef());
688 out->SrcReg[0].Negate = NEGATE_NONE;
689 out->SrcReg[0].Abs = 1;
690
691 /* tmp0 = MAX(coord.X, coord.Y) */
692 emit_op(c, OPCODE_MAX,
693 tmp0,
694 0,
695 src_swizzle1(coord, X),
696 src_swizzle1(coord, Y),
697 src_undef());
698
699 /* tmp1 = MAX(tmp0, coord.Z) */
700 emit_op(c, OPCODE_MAX,
701 tmp1,
702 0,
703 tmp0src,
704 src_swizzle1(coord, Z),
705 src_undef());
706
707 /* tmp0 = 1 / tmp1 */
708 emit_op(c, OPCODE_RCP,
709 dst_mask(tmp0, WRITEMASK_X),
710 0,
711 tmp1src,
712 src_undef(),
713 src_undef());
714
715 /* tmpCoord = src0 * tmp0 */
716 emit_op(c, OPCODE_MUL,
717 tmpcoord,
718 0,
719 src0,
720 src_swizzle1(tmp0src, SWIZZLE_X),
721 src_undef());
722
723 release_temp(c, tmp0);
724 release_temp(c, tmp1);
725 }
726 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
727 struct prog_src_register scale =
728 search_or_add_param5( c,
729 STATE_INTERNAL,
730 STATE_TEXRECT_SCALE,
731 unit,
732 0,0 );
733
734 tmpcoord = get_temp(c);
735
736 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
737 */
738 emit_op(c,
739 OPCODE_MUL,
740 tmpcoord,
741 0,
742 inst->SrcReg[0],
743 src_swizzle(scale,
744 SWIZZLE_X,
745 SWIZZLE_Y,
746 SWIZZLE_ONE,
747 SWIZZLE_ONE),
748 src_undef());
749
750 coord = src_reg_from_dst(tmpcoord);
751 }
752 else {
753 coord = inst->SrcReg[0];
754 }
755
756 /* Need to emit YUV texture conversions by hand. Probably need to
757 * do this here - the alternative is in brw_wm_emit.c, but the
758 * conversion requires allocating a temporary variable which we
759 * don't have the facility to do that late in the compilation.
760 */
761 if (c->key.yuvtex_mask & (1 << unit)) {
762 /* convert ycbcr to RGBA */
763 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
764
765 /*
766 CONST C0 = { -.5, -.0625, -.5, 1.164 }
767 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
768 UYV = TEX ...
769 UYV.xyz = ADD UYV, C0
770 UYV.y = MUL UYV.y, C0.w
771 if (UV swaped)
772 RGB.xyz = MAD UYV.zzx, C1, UYV.y
773 else
774 RGB.xyz = MAD UYV.xxz, C1, UYV.y
775 RGB.y = MAD UYV.z, C1.w, RGB.y
776 */
777 struct prog_dst_register dst = inst->DstReg;
778 struct prog_dst_register tmp = get_temp(c);
779 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
780 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
781 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
782
783 /* tmp = TEX ...
784 */
785 emit_tex_op(c,
786 OPCODE_TEX,
787 tmp,
788 inst->SaturateMode,
789 unit,
790 inst->TexSrcTarget,
791 inst->TexShadow,
792 coord,
793 src_undef(),
794 src_undef());
795
796 /* tmp.xyz = ADD TMP, C0
797 */
798 emit_op(c,
799 OPCODE_ADD,
800 dst_mask(tmp, WRITEMASK_XYZ),
801 0,
802 tmpsrc,
803 C0,
804 src_undef());
805
806 /* YUV.y = MUL YUV.y, C0.w
807 */
808
809 emit_op(c,
810 OPCODE_MUL,
811 dst_mask(tmp, WRITEMASK_Y),
812 0,
813 tmpsrc,
814 src_swizzle1(C0, W),
815 src_undef());
816
817 /*
818 * if (UV swaped)
819 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
820 * else
821 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
822 */
823
824 emit_op(c,
825 OPCODE_MAD,
826 dst_mask(dst, WRITEMASK_XYZ),
827 0,
828 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
829 C1,
830 src_swizzle1(tmpsrc, Y));
831
832 /* RGB.y = MAD YUV.z, C1.w, RGB.y
833 */
834 emit_op(c,
835 OPCODE_MAD,
836 dst_mask(dst, WRITEMASK_Y),
837 0,
838 src_swizzle1(tmpsrc, Z),
839 src_swizzle1(C1, W),
840 src_swizzle1(src_reg_from_dst(dst), Y));
841
842 release_temp(c, tmp);
843 }
844 else {
845 /* ordinary RGBA tex instruction */
846 emit_tex_op(c,
847 OPCODE_TEX,
848 inst->DstReg,
849 inst->SaturateMode,
850 unit,
851 inst->TexSrcTarget,
852 inst->TexShadow,
853 coord,
854 src_undef(),
855 src_undef());
856 }
857
858 /* For GL_EXT_texture_swizzle: */
859 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
860 /* swizzle the result of the TEX instruction */
861 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
862 emit_op(c, OPCODE_SWZ,
863 inst->DstReg,
864 SATURATE_OFF, /* saturate already done above */
865 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
866 src_undef(),
867 src_undef());
868 }
869
870 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
871 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
872 release_temp(c, tmpcoord);
873 }
874
875
876 /**
877 * Check if the given TXP instruction really needs the divide-by-W step.
878 */
879 static GLboolean projtex( struct brw_wm_compile *c,
880 const struct prog_instruction *inst )
881 {
882 const struct prog_src_register src = inst->SrcReg[0];
883 GLboolean retVal;
884
885 assert(inst->Opcode == OPCODE_TXP);
886
887 /* Only try to detect the simplest cases. Could detect (later)
888 * cases where we are trying to emit code like RCP {1.0}, MUL x,
889 * {1.0}, and so on.
890 *
891 * More complex cases than this typically only arise from
892 * user-provided fragment programs anyway:
893 */
894 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
895 retVal = GL_FALSE; /* ut2004 gun rendering !?! */
896 else if (src.File == PROGRAM_INPUT &&
897 GET_SWZ(src.Swizzle, W) == W &&
898 (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
899 retVal = GL_FALSE;
900 else
901 retVal = GL_TRUE;
902
903 return retVal;
904 }
905
906
907 /**
908 * Emit code for TXP.
909 */
910 static void precalc_txp( struct brw_wm_compile *c,
911 const struct prog_instruction *inst )
912 {
913 struct prog_src_register src0 = inst->SrcReg[0];
914
915 if (projtex(c, inst)) {
916 struct prog_dst_register tmp = get_temp(c);
917 struct prog_instruction tmp_inst;
918
919 /* tmp0.w = RCP inst.arg[0][3]
920 */
921 emit_op(c,
922 OPCODE_RCP,
923 dst_mask(tmp, WRITEMASK_W),
924 0,
925 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
926 src_undef(),
927 src_undef());
928
929 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
930 */
931 emit_op(c,
932 OPCODE_MUL,
933 dst_mask(tmp, WRITEMASK_XYZ),
934 0,
935 src0,
936 src_swizzle1(src_reg_from_dst(tmp), W),
937 src_undef());
938
939 /* dst = precalc(TEX tmp0)
940 */
941 tmp_inst = *inst;
942 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
943 precalc_tex(c, &tmp_inst);
944
945 release_temp(c, tmp);
946 }
947 else
948 {
949 /* dst = precalc(TEX src0)
950 */
951 precalc_tex(c, inst);
952 }
953 }
954
955
956
957 static void emit_render_target_writes( struct brw_wm_compile *c )
958 {
959 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
960 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
961 struct prog_src_register outcolor;
962 GLuint i;
963
964 struct prog_instruction *inst, *last_inst = NULL;
965
966 /* The inst->Aux field is used for FB write target and the EOT marker */
967
968 if (c->key.nr_color_regions > 1) {
969 for (i = 0 ; i < c->key.nr_color_regions; i++) {
970 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
971 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
972 0, outcolor, payload_r0_depth, outdepth);
973 inst->Aux = INST_AUX_TARGET(i);
974 if (c->fp_fragcolor_emitted) {
975 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
976 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
977 0, outcolor, payload_r0_depth, outdepth);
978 inst->Aux = INST_AUX_TARGET(i);
979 }
980 }
981 last_inst->Aux |= INST_AUX_EOT;
982 }
983 else {
984 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
985 if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0))
986 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
987 else
988 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
989
990 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
991 0, outcolor, payload_r0_depth, outdepth);
992 inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0);
993 }
994 }
995
996
997
998
999 /***********************************************************************
1000 * Emit INTERP instructions ahead of first use of each attrib.
1001 */
1002
1003 static void validate_src_regs( struct brw_wm_compile *c,
1004 const struct prog_instruction *inst )
1005 {
1006 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1007 GLuint i;
1008
1009 for (i = 0; i < nr_args; i++) {
1010 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1011 GLuint idx = inst->SrcReg[i].Index;
1012 if (!(c->fp_interp_emitted & (1<<idx))) {
1013 emit_interp(c, idx);
1014 }
1015 }
1016 }
1017 }
1018
1019 static void validate_dst_regs( struct brw_wm_compile *c,
1020 const struct prog_instruction *inst )
1021 {
1022 if (inst->DstReg.File == PROGRAM_OUTPUT) {
1023 GLuint idx = inst->DstReg.Index;
1024 if (idx == FRAG_RESULT_COLOR)
1025 c->fp_fragcolor_emitted = 1;
1026 }
1027 }
1028
1029 static void print_insns( const struct prog_instruction *insn,
1030 GLuint nr )
1031 {
1032 GLuint i;
1033 for (i = 0; i < nr; i++, insn++) {
1034 printf("%3d: ", i);
1035 if (insn->Opcode < MAX_OPCODE)
1036 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
1037 else if (insn->Opcode < MAX_WM_OPCODE) {
1038 GLuint idx = insn->Opcode - MAX_OPCODE;
1039
1040 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
1041 3, PROG_PRINT_DEBUG, NULL);
1042 }
1043 else
1044 printf("965 Opcode %d\n", insn->Opcode);
1045 }
1046 }
1047
1048
1049 /**
1050 * Initial pass for fragment program code generation.
1051 * This function is used by both the GLSL and non-GLSL paths.
1052 */
1053 void brw_wm_pass_fp( struct brw_wm_compile *c )
1054 {
1055 struct intel_context *intel = &c->func.brw->intel;
1056 struct brw_fragment_program *fp = c->fp;
1057 GLuint insn;
1058
1059 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1060 printf("pre-fp:\n");
1061 _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
1062 GL_TRUE);
1063 printf("\n");
1064 }
1065
1066 c->pixel_xy = src_undef();
1067 if (intel->gen >= 6) {
1068 /* The interpolation deltas come in as the perspective pixel
1069 * location barycentric params.
1070 */
1071 c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
1072 } else {
1073 c->delta_xy = src_undef();
1074 }
1075 c->pixel_w = src_undef();
1076 c->nr_fp_insns = 0;
1077 c->fp->tex_units_used = 0x0;
1078
1079 /* Emit preamble instructions. This is where special instructions such as
1080 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1081 * compute shader inputs from varying vars.
1082 */
1083 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1084 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1085 validate_src_regs(c, inst);
1086 validate_dst_regs(c, inst);
1087 }
1088
1089 /* Loop over all instructions doing assorted simplifications and
1090 * transformations.
1091 */
1092 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1093 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1094 struct prog_instruction *out;
1095
1096 /* Check for INPUT values, emit INTERP instructions where
1097 * necessary:
1098 */
1099
1100 switch (inst->Opcode) {
1101 case OPCODE_SWZ:
1102 out = emit_insn(c, inst);
1103 out->Opcode = OPCODE_MOV;
1104 break;
1105
1106 case OPCODE_ABS:
1107 out = emit_insn(c, inst);
1108 out->Opcode = OPCODE_MOV;
1109 out->SrcReg[0].Negate = NEGATE_NONE;
1110 out->SrcReg[0].Abs = 1;
1111 break;
1112
1113 case OPCODE_SUB:
1114 out = emit_insn(c, inst);
1115 out->Opcode = OPCODE_ADD;
1116 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1117 break;
1118
1119 case OPCODE_SCS:
1120 out = emit_insn(c, inst);
1121 /* This should probably be done in the parser.
1122 */
1123 out->DstReg.WriteMask &= WRITEMASK_XY;
1124 break;
1125
1126 case OPCODE_DST:
1127 precalc_dst(c, inst);
1128 break;
1129
1130 case OPCODE_LIT:
1131 precalc_lit(c, inst);
1132 break;
1133
1134 case OPCODE_RSQ:
1135 out = emit_scalar_insn(c, inst);
1136 out->SrcReg[0].Abs = GL_TRUE;
1137 break;
1138
1139 case OPCODE_TEX:
1140 precalc_tex(c, inst);
1141 break;
1142
1143 case OPCODE_TXP:
1144 precalc_txp(c, inst);
1145 break;
1146
1147 case OPCODE_TXB:
1148 out = emit_insn(c, inst);
1149 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1150 assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1151 break;
1152
1153 case OPCODE_XPD:
1154 out = emit_insn(c, inst);
1155 /* This should probably be done in the parser.
1156 */
1157 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1158 break;
1159
1160 case OPCODE_KIL:
1161 out = emit_insn(c, inst);
1162 /* This should probably be done in the parser.
1163 */
1164 out->DstReg.WriteMask = 0;
1165 break;
1166 case OPCODE_END:
1167 emit_render_target_writes(c);
1168 break;
1169 case OPCODE_PRINT:
1170 break;
1171 default:
1172 if (brw_wm_is_scalar_result(inst->Opcode))
1173 emit_scalar_insn(c, inst);
1174 else
1175 emit_insn(c, inst);
1176 break;
1177 }
1178 }
1179
1180 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1181 printf("pass_fp:\n");
1182 print_insns( c->prog_instructions, c->nr_fp_insns );
1183 printf("\n");
1184 }
1185 }
1186