radeon/r200/r300: cleanup some of the renderbuffer code
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47 #define X 0
48 #define Y 1
49 #define Z 2
50 #define W 3
51
52
53 static const char *wm_opcode_strings[] = {
54 "PIXELXY",
55 "DELTAXY",
56 "PIXELW",
57 "LINTERP",
58 "PINTERP",
59 "CINTERP",
60 "WPOSXY",
61 "FB_WRITE"
62 };
63
64 #if 0
65 static const char *wm_file_strings[] = {
66 "PAYLOAD"
67 };
68 #endif
69
70
71 /***********************************************************************
72 * Source regs
73 */
74
75 static struct prog_src_register src_reg(GLuint file, GLuint idx)
76 {
77 struct prog_src_register reg;
78 reg.File = file;
79 reg.Index = idx;
80 reg.Swizzle = SWIZZLE_NOOP;
81 reg.RelAddr = 0;
82 reg.NegateBase = 0;
83 reg.Abs = 0;
84 reg.NegateAbs = 0;
85 return reg;
86 }
87
88 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89 {
90 return src_reg(dst.File, dst.Index);
91 }
92
93 static struct prog_src_register src_undef( void )
94 {
95 return src_reg(PROGRAM_UNDEFINED, 0);
96 }
97
98 static GLboolean src_is_undef(struct prog_src_register src)
99 {
100 return src.File == PROGRAM_UNDEFINED;
101 }
102
103 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104 {
105 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106 return reg;
107 }
108
109 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110 {
111 return src_swizzle(reg, x, x, x, x);
112 }
113
114 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
115 {
116 reg.Swizzle = swizzle;
117 return reg;
118 }
119
120
121 /***********************************************************************
122 * Dest regs
123 */
124
125 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
126 {
127 struct prog_dst_register reg;
128 reg.File = file;
129 reg.Index = idx;
130 reg.WriteMask = WRITEMASK_XYZW;
131 reg.RelAddr = 0;
132 reg.CondMask = 0;
133 reg.CondSwizzle = 0;
134 reg.CondSrc = 0;
135 reg.pad = 0;
136 return reg;
137 }
138
139 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
140 {
141 reg.WriteMask &= mask;
142 return reg;
143 }
144
145 static struct prog_dst_register dst_undef( void )
146 {
147 return dst_reg(PROGRAM_UNDEFINED, 0);
148 }
149
150
151
152 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
153 {
154 int bit = _mesa_ffs( ~c->fp_temp );
155
156 if (!bit) {
157 _mesa_printf("%s: out of temporaries\n", __FILE__);
158 exit(1);
159 }
160
161 c->fp_temp |= 1<<(bit-1);
162 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
163 }
164
165
166 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
167 {
168 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
169 }
170
171
172 /***********************************************************************
173 * Instructions
174 */
175
176 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
177 {
178 return &c->prog_instructions[c->nr_fp_insns++];
179 }
180
181 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
182 const struct prog_instruction *inst0)
183 {
184 struct prog_instruction *inst = get_fp_inst(c);
185 *inst = *inst0;
186 inst->Data = (void *)inst0;
187 return inst;
188 }
189
190 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
191 GLuint op,
192 struct prog_dst_register dest,
193 GLuint saturate,
194 GLuint tex_src_unit,
195 GLuint tex_src_target,
196 struct prog_src_register src0,
197 struct prog_src_register src1,
198 struct prog_src_register src2 )
199 {
200 struct prog_instruction *inst = get_fp_inst(c);
201
202 memset(inst, 0, sizeof(*inst));
203
204 inst->Opcode = op;
205 inst->DstReg = dest;
206 inst->SaturateMode = saturate;
207 inst->TexSrcUnit = tex_src_unit;
208 inst->TexSrcTarget = tex_src_target;
209 inst->SrcReg[0] = src0;
210 inst->SrcReg[1] = src1;
211 inst->SrcReg[2] = src2;
212 return inst;
213 }
214
215
216
217
218 /***********************************************************************
219 * Special instructions for interpolation and other tasks
220 */
221
222 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
223 {
224 if (src_is_undef(c->pixel_xy)) {
225 struct prog_dst_register pixel_xy = get_temp(c);
226 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
227
228
229 /* Emit the out calculations, and hold onto the results. Use
230 * two instructions as a temporary is required.
231 */
232 /* pixel_xy.xy = PIXELXY payload[0];
233 */
234 emit_op(c,
235 WM_PIXELXY,
236 dst_mask(pixel_xy, WRITEMASK_XY),
237 0, 0, 0,
238 payload_r0_depth,
239 src_undef(),
240 src_undef());
241
242 c->pixel_xy = src_reg_from_dst(pixel_xy);
243 }
244
245 return c->pixel_xy;
246 }
247
248 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
249 {
250 if (src_is_undef(c->delta_xy)) {
251 struct prog_dst_register delta_xy = get_temp(c);
252 struct prog_src_register pixel_xy = get_pixel_xy(c);
253 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
254
255 /* deltas.xy = DELTAXY pixel_xy, payload[0]
256 */
257 emit_op(c,
258 WM_DELTAXY,
259 dst_mask(delta_xy, WRITEMASK_XY),
260 0, 0, 0,
261 pixel_xy,
262 payload_r0_depth,
263 src_undef());
264
265 c->delta_xy = src_reg_from_dst(delta_xy);
266 }
267
268 return c->delta_xy;
269 }
270
271 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
272 {
273 if (src_is_undef(c->pixel_w)) {
274 struct prog_dst_register pixel_w = get_temp(c);
275 struct prog_src_register deltas = get_delta_xy(c);
276 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
277
278
279 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
280 */
281 emit_op(c,
282 WM_PIXELW,
283 dst_mask(pixel_w, WRITEMASK_W),
284 0, 0, 0,
285 interp_wpos,
286 deltas,
287 src_undef());
288
289
290 c->pixel_w = src_reg_from_dst(pixel_w);
291 }
292
293 return c->pixel_w;
294 }
295
296 static void emit_interp( struct brw_wm_compile *c,
297 GLuint idx )
298 {
299 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
300 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
301 struct prog_src_register deltas = get_delta_xy(c);
302 struct prog_src_register arg2;
303 GLuint opcode;
304
305 /* Need to use PINTERP on attributes which have been
306 * multiplied by 1/W in the SF program, and LINTERP on those
307 * which have not:
308 */
309 switch (idx) {
310 case FRAG_ATTRIB_WPOS:
311 opcode = WM_LINTERP;
312 arg2 = src_undef();
313
314 /* Have to treat wpos.xy specially:
315 */
316 emit_op(c,
317 WM_WPOSXY,
318 dst_mask(dst, WRITEMASK_XY),
319 0, 0, 0,
320 get_pixel_xy(c),
321 src_undef(),
322 src_undef());
323
324 dst = dst_mask(dst, WRITEMASK_ZW);
325
326 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
327 */
328 emit_op(c,
329 WM_LINTERP,
330 dst,
331 0, 0, 0,
332 interp,
333 deltas,
334 arg2);
335 break;
336 case FRAG_ATTRIB_COL0:
337 case FRAG_ATTRIB_COL1:
338 if (c->key.flat_shade) {
339 emit_op(c,
340 WM_CINTERP,
341 dst,
342 0, 0, 0,
343 interp,
344 src_undef(),
345 src_undef());
346 }
347 else {
348 emit_op(c,
349 WM_LINTERP,
350 dst,
351 0, 0, 0,
352 interp,
353 deltas,
354 src_undef());
355 }
356 break;
357 default:
358 emit_op(c,
359 WM_PINTERP,
360 dst,
361 0, 0, 0,
362 interp,
363 deltas,
364 get_pixel_w(c));
365 break;
366 }
367
368 c->fp_interp_emitted |= 1<<idx;
369 }
370
371 static void emit_ddx( struct brw_wm_compile *c,
372 const struct prog_instruction *inst )
373 {
374 GLuint idx = inst->SrcReg[0].Index;
375 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
376
377 c->fp_deriv_emitted |= 1<<idx;
378 emit_op(c,
379 OPCODE_DDX,
380 inst->DstReg,
381 0, 0, 0,
382 interp,
383 get_pixel_w(c),
384 src_undef());
385 }
386
387 static void emit_ddy( struct brw_wm_compile *c,
388 const struct prog_instruction *inst )
389 {
390 GLuint idx = inst->SrcReg[0].Index;
391 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
392
393 c->fp_deriv_emitted |= 1<<idx;
394 emit_op(c,
395 OPCODE_DDY,
396 inst->DstReg,
397 0, 0, 0,
398 interp,
399 get_pixel_w(c),
400 src_undef());
401 }
402
403 /***********************************************************************
404 * Hacks to extend the program parameter and constant lists.
405 */
406
407 /* Add the fog parameters to the parameter list of the original
408 * program, rather than creating a new list. Doesn't really do any
409 * harm and it's not as if the parameter handling isn't a big hack
410 * anyway.
411 */
412 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
413 GLint s0,
414 GLint s1,
415 GLint s2,
416 GLint s3,
417 GLint s4)
418 {
419 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
420 gl_state_index tokens[STATE_LENGTH];
421 GLuint idx;
422 tokens[0] = s0;
423 tokens[1] = s1;
424 tokens[2] = s2;
425 tokens[3] = s3;
426 tokens[4] = s4;
427
428 for (idx = 0; idx < paramList->NumParameters; idx++) {
429 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
430 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
431 return src_reg(PROGRAM_STATE_VAR, idx);
432 }
433
434 idx = _mesa_add_state_reference( paramList, tokens );
435
436 return src_reg(PROGRAM_STATE_VAR, idx);
437 }
438
439
440 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
441 GLfloat s0,
442 GLfloat s1,
443 GLfloat s2,
444 GLfloat s3)
445 {
446 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
447 GLfloat values[4];
448 GLuint idx;
449 GLuint swizzle;
450
451 values[0] = s0;
452 values[1] = s1;
453 values[2] = s2;
454 values[3] = s3;
455
456 /* Have to search, otherwise multiple compilations will each grow
457 * the parameter list.
458 */
459 for (idx = 0; idx < paramList->NumParameters; idx++) {
460 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
461 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
462
463 /* XXX: this mimics the mesa bug which puts all constants and
464 * parameters into the "PROGRAM_STATE_VAR" category:
465 */
466 return src_reg(PROGRAM_STATE_VAR, idx);
467 }
468
469 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
470 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
471 return src_reg(PROGRAM_STATE_VAR, idx);
472 }
473
474
475
476 /***********************************************************************
477 * Expand various instructions here to simpler forms.
478 */
479 static void precalc_dst( struct brw_wm_compile *c,
480 const struct prog_instruction *inst )
481 {
482 struct prog_src_register src0 = inst->SrcReg[0];
483 struct prog_src_register src1 = inst->SrcReg[1];
484 struct prog_dst_register dst = inst->DstReg;
485
486 if (dst.WriteMask & WRITEMASK_Y) {
487 /* dst.y = mul src0.y, src1.y
488 */
489 emit_op(c,
490 OPCODE_MUL,
491 dst_mask(dst, WRITEMASK_Y),
492 inst->SaturateMode, 0, 0,
493 src0,
494 src1,
495 src_undef());
496 }
497
498
499 if (dst.WriteMask & WRITEMASK_XZ) {
500 struct prog_instruction *swz;
501 GLuint z = GET_SWZ(src0.Swizzle, Z);
502
503 /* dst.xz = swz src0.1zzz
504 */
505 swz = emit_op(c,
506 OPCODE_SWZ,
507 dst_mask(dst, WRITEMASK_XZ),
508 inst->SaturateMode, 0, 0,
509 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
510 src_undef(),
511 src_undef());
512 /* Avoid letting negation flag of src0 affect our 1 constant. */
513 swz->SrcReg[0].NegateBase &= ~NEGATE_X;
514 }
515 if (dst.WriteMask & WRITEMASK_W) {
516 /* dst.w = mov src1.w
517 */
518 emit_op(c,
519 OPCODE_MOV,
520 dst_mask(dst, WRITEMASK_W),
521 inst->SaturateMode, 0, 0,
522 src1,
523 src_undef(),
524 src_undef());
525 }
526 }
527
528
529 static void precalc_lit( struct brw_wm_compile *c,
530 const struct prog_instruction *inst )
531 {
532 struct prog_src_register src0 = inst->SrcReg[0];
533 struct prog_dst_register dst = inst->DstReg;
534
535 if (dst.WriteMask & WRITEMASK_XW) {
536 struct prog_instruction *swz;
537
538 /* dst.xw = swz src0.1111
539 */
540 swz = emit_op(c,
541 OPCODE_SWZ,
542 dst_mask(dst, WRITEMASK_XW),
543 0, 0, 0,
544 src_swizzle1(src0, SWIZZLE_ONE),
545 src_undef(),
546 src_undef());
547 /* Avoid letting the negation flag of src0 affect our 1 constant. */
548 swz->SrcReg[0].NegateBase = 0;
549 }
550
551
552 if (dst.WriteMask & WRITEMASK_YZ) {
553 emit_op(c,
554 OPCODE_LIT,
555 dst_mask(dst, WRITEMASK_YZ),
556 inst->SaturateMode, 0, 0,
557 src0,
558 src_undef(),
559 src_undef());
560 }
561 }
562
563
564 /**
565 * Some TEX instructions require extra code, cube map coordinate
566 * normalization, or coordinate scaling for RECT textures, etc.
567 * This function emits those extra instructions and the TEX
568 * instruction itself.
569 */
570 static void precalc_tex( struct brw_wm_compile *c,
571 const struct prog_instruction *inst )
572 {
573 struct prog_src_register coord;
574 struct prog_dst_register tmpcoord;
575 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
576
577 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
578 struct prog_instruction *out;
579 struct prog_dst_register tmp0 = get_temp(c);
580 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
581 struct prog_dst_register tmp1 = get_temp(c);
582 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
583 struct prog_src_register src0 = inst->SrcReg[0];
584
585 /* find longest component of coord vector and normalize it */
586 tmpcoord = get_temp(c);
587 coord = src_reg_from_dst(tmpcoord);
588
589 /* tmpcoord = src0 (i.e.: coord = src0) */
590 out = emit_op(c, OPCODE_MOV,
591 tmpcoord,
592 0, 0, 0,
593 src0,
594 src_undef(),
595 src_undef());
596 out->SrcReg[0].NegateBase = 0;
597 out->SrcReg[0].Abs = 1;
598
599 /* tmp0 = MAX(coord.X, coord.Y) */
600 emit_op(c, OPCODE_MAX,
601 tmp0,
602 0, 0, 0,
603 src_swizzle1(coord, X),
604 src_swizzle1(coord, Y),
605 src_undef());
606
607 /* tmp1 = MAX(tmp0, coord.Z) */
608 emit_op(c, OPCODE_MAX,
609 tmp1,
610 0, 0, 0,
611 tmp0src,
612 src_swizzle1(coord, Z),
613 src_undef());
614
615 /* tmp0 = 1 / tmp1 */
616 emit_op(c, OPCODE_RCP,
617 tmp0,
618 0, 0, 0,
619 tmp1src,
620 src_undef(),
621 src_undef());
622
623 /* tmpCoord = src0 * tmp0 */
624 emit_op(c, OPCODE_MUL,
625 tmpcoord,
626 0, 0, 0,
627 src0,
628 tmp0src,
629 src_undef());
630
631 release_temp(c, tmp0);
632 release_temp(c, tmp1);
633 }
634 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
635 struct prog_src_register scale =
636 search_or_add_param5( c,
637 STATE_INTERNAL,
638 STATE_TEXRECT_SCALE,
639 unit,
640 0,0 );
641
642 tmpcoord = get_temp(c);
643
644 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
645 */
646 emit_op(c,
647 OPCODE_MUL,
648 tmpcoord,
649 0, 0, 0,
650 inst->SrcReg[0],
651 scale,
652 src_undef());
653
654 coord = src_reg_from_dst(tmpcoord);
655 }
656 else {
657 coord = inst->SrcReg[0];
658 }
659
660 /* Need to emit YUV texture conversions by hand. Probably need to
661 * do this here - the alternative is in brw_wm_emit.c, but the
662 * conversion requires allocating a temporary variable which we
663 * don't have the facility to do that late in the compilation.
664 */
665 if (c->key.yuvtex_mask & (1 << unit)) {
666 /* convert ycbcr to RGBA */
667 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
668
669 /*
670 CONST C0 = { -.5, -.0625, -.5, 1.164 }
671 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
672 UYV = TEX ...
673 UYV.xyz = ADD UYV, C0
674 UYV.y = MUL UYV.y, C0.w
675 if (UV swaped)
676 RGB.xyz = MAD UYV.zzx, C1, UYV.y
677 else
678 RGB.xyz = MAD UYV.xxz, C1, UYV.y
679 RGB.y = MAD UYV.z, C1.w, RGB.y
680 */
681 struct prog_dst_register dst = inst->DstReg;
682 struct prog_dst_register tmp = get_temp(c);
683 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
684 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
685 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
686
687 /* tmp = TEX ...
688 */
689 emit_op(c,
690 OPCODE_TEX,
691 tmp,
692 inst->SaturateMode,
693 unit,
694 inst->TexSrcTarget,
695 coord,
696 src_undef(),
697 src_undef());
698
699 /* tmp.xyz = ADD TMP, C0
700 */
701 emit_op(c,
702 OPCODE_ADD,
703 dst_mask(tmp, WRITEMASK_XYZ),
704 0, 0, 0,
705 tmpsrc,
706 C0,
707 src_undef());
708
709 /* YUV.y = MUL YUV.y, C0.w
710 */
711
712 emit_op(c,
713 OPCODE_MUL,
714 dst_mask(tmp, WRITEMASK_Y),
715 0, 0, 0,
716 tmpsrc,
717 src_swizzle1(C0, W),
718 src_undef());
719
720 /*
721 * if (UV swaped)
722 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
723 * else
724 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
725 */
726
727 emit_op(c,
728 OPCODE_MAD,
729 dst_mask(dst, WRITEMASK_XYZ),
730 0, 0, 0,
731 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
732 C1,
733 src_swizzle1(tmpsrc, Y));
734
735 /* RGB.y = MAD YUV.z, C1.w, RGB.y
736 */
737 emit_op(c,
738 OPCODE_MAD,
739 dst_mask(dst, WRITEMASK_Y),
740 0, 0, 0,
741 src_swizzle1(tmpsrc, Z),
742 src_swizzle1(C1, W),
743 src_swizzle1(src_reg_from_dst(dst), Y));
744
745 release_temp(c, tmp);
746 }
747 else {
748 /* ordinary RGBA tex instruction */
749 emit_op(c,
750 OPCODE_TEX,
751 inst->DstReg,
752 inst->SaturateMode,
753 unit,
754 inst->TexSrcTarget,
755 coord,
756 src_undef(),
757 src_undef());
758 }
759
760 /* For GL_EXT_texture_swizzle: */
761 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
762 /* swizzle the result of the TEX instruction */
763 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
764 emit_op(c, OPCODE_SWZ,
765 inst->DstReg,
766 SATURATE_OFF, /* saturate already done above */
767 0, 0, /* tex unit, target N/A */
768 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
769 src_undef(),
770 src_undef());
771 }
772
773 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
774 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
775 release_temp(c, tmpcoord);
776 }
777
778
779 static GLboolean projtex( struct brw_wm_compile *c,
780 const struct prog_instruction *inst )
781 {
782 struct prog_src_register src = inst->SrcReg[0];
783
784 /* Only try to detect the simplest cases. Could detect (later)
785 * cases where we are trying to emit code like RCP {1.0}, MUL x,
786 * {1.0}, and so on.
787 *
788 * More complex cases than this typically only arise from
789 * user-provided fragment programs anyway:
790 */
791 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
792 return 0; /* ut2004 gun rendering !?! */
793 else if (src.File == PROGRAM_INPUT &&
794 GET_SWZ(src.Swizzle, W) == W &&
795 (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
796 return 0;
797 else
798 return 1;
799 }
800
801
802 static void precalc_txp( struct brw_wm_compile *c,
803 const struct prog_instruction *inst )
804 {
805 struct prog_src_register src0 = inst->SrcReg[0];
806
807 if (projtex(c, inst)) {
808 struct prog_dst_register tmp = get_temp(c);
809 struct prog_instruction tmp_inst;
810
811 /* tmp0.w = RCP inst.arg[0][3]
812 */
813 emit_op(c,
814 OPCODE_RCP,
815 dst_mask(tmp, WRITEMASK_W),
816 0, 0, 0,
817 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
818 src_undef(),
819 src_undef());
820
821 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
822 */
823 emit_op(c,
824 OPCODE_MUL,
825 dst_mask(tmp, WRITEMASK_XYZ),
826 0, 0, 0,
827 src0,
828 src_swizzle1(src_reg_from_dst(tmp), W),
829 src_undef());
830
831 /* dst = precalc(TEX tmp0)
832 */
833 tmp_inst = *inst;
834 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
835 precalc_tex(c, &tmp_inst);
836
837 release_temp(c, tmp);
838 }
839 else
840 {
841 /* dst = precalc(TEX src0)
842 */
843 precalc_tex(c, inst);
844 }
845 }
846
847
848
849 static void emit_fb_write( struct brw_wm_compile *c )
850 {
851 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
852 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
853 struct prog_src_register outcolor;
854 GLuint i;
855
856 struct prog_instruction *inst, *last_inst;
857 struct brw_context *brw = c->func.brw;
858
859 /* inst->Sampler is not used by backend,
860 use it for fb write target and eot */
861
862 if (brw->state.nr_draw_regions > 1) {
863 for (i = 0 ; i < brw->state.nr_draw_regions; i++) {
864 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
865 last_inst = inst = emit_op(c,
866 WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0,
867 outcolor, payload_r0_depth, outdepth);
868 inst->Sampler = (i<<1);
869 if (c->fp_fragcolor_emitted) {
870 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
871 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
872 0, 0, 0, outcolor, payload_r0_depth, outdepth);
873 inst->Sampler = (i<<1);
874 }
875 }
876 last_inst->Sampler |= 1; //eot
877 }
878 else {
879 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
880 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
881 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
882 else
883 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
884
885 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
886 0, 0, 0, outcolor, payload_r0_depth, outdepth);
887 inst->Sampler = 1|(0<<1);
888 }
889 }
890
891
892
893
894 /***********************************************************************
895 * Emit INTERP instructions ahead of first use of each attrib.
896 */
897
898 static void validate_src_regs( struct brw_wm_compile *c,
899 const struct prog_instruction *inst )
900 {
901 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
902 GLuint i;
903
904 for (i = 0; i < nr_args; i++) {
905 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
906 GLuint idx = inst->SrcReg[i].Index;
907 if (!(c->fp_interp_emitted & (1<<idx))) {
908 emit_interp(c, idx);
909 }
910 }
911 }
912 }
913
914 static void validate_dst_regs( struct brw_wm_compile *c,
915 const struct prog_instruction *inst )
916 {
917 if (inst->DstReg.File == PROGRAM_OUTPUT) {
918 GLuint idx = inst->DstReg.Index;
919 if (idx == FRAG_RESULT_COLR)
920 c->fp_fragcolor_emitted = 1;
921 }
922 }
923
924 static void print_insns( const struct prog_instruction *insn,
925 GLuint nr )
926 {
927 GLuint i;
928 for (i = 0; i < nr; i++, insn++) {
929 _mesa_printf("%3d: ", i);
930 if (insn->Opcode < MAX_OPCODE)
931 _mesa_print_instruction(insn);
932 else if (insn->Opcode < MAX_WM_OPCODE) {
933 GLuint idx = insn->Opcode - MAX_OPCODE;
934
935 _mesa_print_alu_instruction(insn,
936 wm_opcode_strings[idx],
937 3);
938 }
939 else
940 _mesa_printf("UNKNOWN\n");
941
942 }
943 }
944
945 void brw_wm_pass_fp( struct brw_wm_compile *c )
946 {
947 struct brw_fragment_program *fp = c->fp;
948 GLuint insn;
949
950 if (INTEL_DEBUG & DEBUG_WM) {
951 _mesa_printf("pre-fp:\n");
952 _mesa_print_program(&fp->program.Base);
953 _mesa_printf("\n");
954 }
955
956 c->pixel_xy = src_undef();
957 c->delta_xy = src_undef();
958 c->pixel_w = src_undef();
959 c->nr_fp_insns = 0;
960
961 /* Emit preamble instructions:
962 */
963
964
965 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
966 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
967 validate_src_regs(c, inst);
968 validate_dst_regs(c, inst);
969 }
970 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
971 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
972 struct prog_instruction *out;
973
974 /* Check for INPUT values, emit INTERP instructions where
975 * necessary:
976 */
977
978
979 switch (inst->Opcode) {
980 case OPCODE_SWZ:
981 out = emit_insn(c, inst);
982 out->Opcode = OPCODE_MOV;
983 break;
984
985 case OPCODE_ABS:
986 out = emit_insn(c, inst);
987 out->Opcode = OPCODE_MOV;
988 out->SrcReg[0].NegateBase = 0;
989 out->SrcReg[0].Abs = 1;
990 break;
991
992 case OPCODE_SUB:
993 out = emit_insn(c, inst);
994 out->Opcode = OPCODE_ADD;
995 out->SrcReg[1].NegateBase ^= 0xf;
996 break;
997
998 case OPCODE_SCS:
999 out = emit_insn(c, inst);
1000 /* This should probably be done in the parser.
1001 */
1002 out->DstReg.WriteMask &= WRITEMASK_XY;
1003 break;
1004
1005 case OPCODE_DST:
1006 precalc_dst(c, inst);
1007 break;
1008
1009 case OPCODE_LIT:
1010 precalc_lit(c, inst);
1011 break;
1012
1013 case OPCODE_TEX:
1014 precalc_tex(c, inst);
1015 break;
1016
1017 case OPCODE_TXP:
1018 precalc_txp(c, inst);
1019 break;
1020
1021 case OPCODE_TXB:
1022 out = emit_insn(c, inst);
1023 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1024 break;
1025
1026 case OPCODE_XPD:
1027 out = emit_insn(c, inst);
1028 /* This should probably be done in the parser.
1029 */
1030 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1031 break;
1032
1033 case OPCODE_KIL:
1034 out = emit_insn(c, inst);
1035 /* This should probably be done in the parser.
1036 */
1037 out->DstReg.WriteMask = 0;
1038 break;
1039 case OPCODE_DDX:
1040 emit_ddx(c, inst);
1041 break;
1042 case OPCODE_DDY:
1043 emit_ddy(c, inst);
1044 break;
1045 case OPCODE_END:
1046 emit_fb_write(c);
1047 break;
1048 case OPCODE_PRINT:
1049 break;
1050
1051 default:
1052 emit_insn(c, inst);
1053 break;
1054 }
1055 }
1056
1057 if (INTEL_DEBUG & DEBUG_WM) {
1058 _mesa_printf("pass_fp:\n");
1059 print_insns( c->prog_instructions, c->nr_fp_insns );
1060 _mesa_printf("\n");
1061 }
1062 }
1063