2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
36 #include "brw_context.h"
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
53 static const char *wm_opcode_strings
[] = {
65 static const char *wm_file_strings
[] = {
71 /***********************************************************************
75 static struct prog_src_register
src_reg(GLuint file
, GLuint idx
)
77 struct prog_src_register reg
;
80 reg
.Swizzle
= SWIZZLE_NOOP
;
88 static struct prog_src_register
src_reg_from_dst(struct prog_dst_register dst
)
90 return src_reg(dst
.File
, dst
.Index
);
93 static struct prog_src_register
src_undef( void )
95 return src_reg(PROGRAM_UNDEFINED
, 0);
98 static GLboolean
src_is_undef(struct prog_src_register src
)
100 return src
.File
== PROGRAM_UNDEFINED
;
103 static struct prog_src_register
src_swizzle( struct prog_src_register reg
, int x
, int y
, int z
, int w
)
105 reg
.Swizzle
= MAKE_SWIZZLE4(x
,y
,z
,w
);
109 static struct prog_src_register
src_swizzle1( struct prog_src_register reg
, int x
)
111 return src_swizzle(reg
, x
, x
, x
, x
);
115 /***********************************************************************
119 static struct prog_dst_register
dst_reg(GLuint file
, GLuint idx
)
121 struct prog_dst_register reg
;
124 reg
.WriteMask
= WRITEMASK_XYZW
;
132 static struct prog_dst_register
dst_mask( struct prog_dst_register reg
, int mask
)
134 reg
.WriteMask
&= mask
;
138 static struct prog_dst_register
dst_undef( void )
140 return dst_reg(PROGRAM_UNDEFINED
, 0);
145 static struct prog_dst_register
get_temp( struct brw_wm_compile
*c
)
147 int bit
= ffs( ~c
->fp_temp
);
150 _mesa_printf("%s: out of temporaries\n", __FILE__
);
154 c
->fp_temp
|= 1<<(bit
-1);
155 return dst_reg(PROGRAM_TEMPORARY
, FIRST_INTERNAL_TEMP
+(bit
-1));
159 static void release_temp( struct brw_wm_compile
*c
, struct prog_dst_register temp
)
161 c
->fp_temp
&= ~1<<(temp
.Index
+ 1 - FIRST_INTERNAL_TEMP
);
165 /***********************************************************************
169 static struct prog_instruction
*get_fp_inst(struct brw_wm_compile
*c
)
171 return &c
->prog_instructions
[c
->nr_fp_insns
++];
174 static struct prog_instruction
*emit_insn(struct brw_wm_compile
*c
,
175 const struct prog_instruction
*inst0
)
177 struct prog_instruction
*inst
= get_fp_inst(c
);
182 static struct prog_instruction
* emit_op(struct brw_wm_compile
*c
,
184 struct prog_dst_register dest
,
187 GLuint tex_src_target
,
188 struct prog_src_register src0
,
189 struct prog_src_register src1
,
190 struct prog_src_register src2
)
192 struct prog_instruction
*inst
= get_fp_inst(c
);
194 memset(inst
, 0, sizeof(*inst
));
198 inst
->SaturateMode
= saturate
;
199 inst
->TexSrcUnit
= tex_src_unit
;
200 inst
->TexSrcTarget
= tex_src_target
;
201 inst
->SrcReg
[0] = src0
;
202 inst
->SrcReg
[1] = src1
;
203 inst
->SrcReg
[2] = src2
;
211 /***********************************************************************
212 * Special instructions for interpolation and other tasks
215 static struct prog_src_register
get_pixel_xy( struct brw_wm_compile
*c
)
217 if (src_is_undef(c
->pixel_xy
)) {
218 struct prog_dst_register pixel_xy
= get_temp(c
);
219 struct prog_src_register payload_r0_depth
= src_reg(PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
);
222 /* Emit the out calculations, and hold onto the results. Use
223 * two instructions as a temporary is required.
225 /* pixel_xy.xy = PIXELXY payload[0];
229 dst_mask(pixel_xy
, WRITEMASK_XY
),
235 c
->pixel_xy
= src_reg_from_dst(pixel_xy
);
241 static struct prog_src_register
get_delta_xy( struct brw_wm_compile
*c
)
243 if (src_is_undef(c
->delta_xy
)) {
244 struct prog_dst_register delta_xy
= get_temp(c
);
245 struct prog_src_register pixel_xy
= get_pixel_xy(c
);
246 struct prog_src_register payload_r0_depth
= src_reg(PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
);
248 /* deltas.xy = DELTAXY pixel_xy, payload[0]
252 dst_mask(delta_xy
, WRITEMASK_XY
),
258 c
->delta_xy
= src_reg_from_dst(delta_xy
);
264 static struct prog_src_register
get_pixel_w( struct brw_wm_compile
*c
)
266 if (src_is_undef(c
->pixel_w
)) {
267 struct prog_dst_register pixel_w
= get_temp(c
);
268 struct prog_src_register deltas
= get_delta_xy(c
);
269 struct prog_src_register interp_wpos
= src_reg(PROGRAM_PAYLOAD
, FRAG_ATTRIB_WPOS
);
272 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
276 dst_mask(pixel_w
, WRITEMASK_W
),
283 c
->pixel_w
= src_reg_from_dst(pixel_w
);
289 static void emit_interp( struct brw_wm_compile
*c
,
292 struct prog_dst_register dst
= dst_reg(PROGRAM_INPUT
, idx
);
293 struct prog_src_register interp
= src_reg(PROGRAM_PAYLOAD
, idx
);
294 struct prog_src_register deltas
= get_delta_xy(c
);
295 struct prog_src_register arg2
;
298 /* Need to use PINTERP on attributes which have been
299 * multiplied by 1/W in the SF program, and LINTERP on those
303 case FRAG_ATTRIB_WPOS
:
307 /* Have to treat wpos.xy specially:
311 dst_mask(dst
, WRITEMASK_XY
),
317 dst
= dst_mask(dst
, WRITEMASK_ZW
);
319 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
329 case FRAG_ATTRIB_COL0
:
330 case FRAG_ATTRIB_COL1
:
331 if (c
->key
.flat_shade
) {
361 c
->fp_interp_emitted
|= 1<<idx
;
365 /***********************************************************************
366 * Hacks to extend the program parameter and constant lists.
369 /* Add the fog parameters to the parameter list of the original
370 * program, rather than creating a new list. Doesn't really do any
371 * harm and it's not as if the parameter handling isn't a big hack
374 static struct prog_src_register
search_or_add_param5(struct brw_wm_compile
*c
,
381 struct gl_program_parameter_list
*paramList
= c
->fp
->program
.Base
.Parameters
;
382 gl_state_index tokens
[STATE_LENGTH
];
390 for (idx
= 0; idx
< paramList
->NumParameters
; idx
++) {
391 if (paramList
->Parameters
[idx
].Type
== PROGRAM_STATE_VAR
&&
392 memcmp(paramList
->Parameters
[idx
].StateIndexes
, tokens
, sizeof(tokens
)) == 0)
393 return src_reg(PROGRAM_STATE_VAR
, idx
);
396 idx
= _mesa_add_state_reference( paramList
, tokens
);
398 /* Recalculate state dependency:
400 c
->fp
->param_state
= paramList
->StateFlags
;
402 return src_reg(PROGRAM_STATE_VAR
, idx
);
406 static struct prog_src_register
search_or_add_const4f( struct brw_wm_compile
*c
,
412 struct gl_program_parameter_list
*paramList
= c
->fp
->program
.Base
.Parameters
;
422 /* Have to search, otherwise multiple compilations will each grow
423 * the parameter list.
425 for (idx
= 0; idx
< paramList
->NumParameters
; idx
++) {
426 if (paramList
->Parameters
[idx
].Type
== PROGRAM_CONSTANT
&&
427 memcmp(paramList
->ParameterValues
[idx
], values
, sizeof(values
)) == 0)
429 /* XXX: this mimics the mesa bug which puts all constants and
430 * parameters into the "PROGRAM_STATE_VAR" category:
432 return src_reg(PROGRAM_STATE_VAR
, idx
);
435 idx
= _mesa_add_unnamed_constant( paramList
, values
, 4, &swizzle
);
436 /* XXX what about swizzle? */
437 return src_reg(PROGRAM_STATE_VAR
, idx
);
442 /***********************************************************************
443 * Expand various instructions here to simpler forms.
445 static void precalc_dst( struct brw_wm_compile
*c
,
446 const struct prog_instruction
*inst
)
448 struct prog_src_register src0
= inst
->SrcReg
[0];
449 struct prog_src_register src1
= inst
->SrcReg
[1];
450 struct prog_dst_register dst
= inst
->DstReg
;
452 if (dst
.WriteMask
& WRITEMASK_Y
) {
453 /* dst.y = mul src0.y, src1.y
457 dst_mask(dst
, WRITEMASK_Y
),
458 inst
->SaturateMode
, 0, 0,
465 if (dst
.WriteMask
& WRITEMASK_XZ
) {
466 GLuint z
= GET_SWZ(src0
.Swizzle
, Z
);
468 /* dst.xz = swz src0.1zzz
472 dst_mask(dst
, WRITEMASK_XZ
),
473 inst
->SaturateMode
, 0, 0,
474 src_swizzle(src0
, SWIZZLE_ONE
, z
, z
, z
),
478 if (dst
.WriteMask
& WRITEMASK_W
) {
479 /* dst.w = mov src1.w
483 dst_mask(dst
, WRITEMASK_W
),
484 inst
->SaturateMode
, 0, 0,
492 static void precalc_lit( struct brw_wm_compile
*c
,
493 const struct prog_instruction
*inst
)
495 struct prog_src_register src0
= inst
->SrcReg
[0];
496 struct prog_dst_register dst
= inst
->DstReg
;
498 if (dst
.WriteMask
& WRITEMASK_XW
) {
499 /* dst.xw = swz src0.1111
503 dst_mask(dst
, WRITEMASK_XW
),
505 src_swizzle1(src0
, SWIZZLE_ONE
),
511 if (dst
.WriteMask
& WRITEMASK_YZ
) {
514 dst_mask(dst
, WRITEMASK_YZ
),
515 inst
->SaturateMode
, 0, 0,
522 static void precalc_tex( struct brw_wm_compile
*c
,
523 const struct prog_instruction
*inst
)
525 struct prog_src_register coord
;
526 struct prog_dst_register tmpcoord
;
528 if (inst
->TexSrcTarget
== TEXTURE_RECT_INDEX
) {
529 struct prog_src_register scale
=
530 search_or_add_param5( c
,
536 tmpcoord
= get_temp(c
);
538 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
548 coord
= src_reg_from_dst(tmpcoord
);
551 coord
= inst
->SrcReg
[0];
554 /* Need to emit YUV texture conversions by hand. Probably need to
555 * do this here - the alternative is in brw_wm_emit.c, but the
556 * conversion requires allocating a temporary variable which we
557 * don't have the facility to do that late in the compilation.
559 if (!(c
->key
.yuvtex_mask
& (1<<inst
->TexSrcUnit
))) {
572 CONST C0 = { -.5, -.0625, -.5, 1.164 }
573 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
575 UYV.xyz = ADD UYV, C0
576 UYV.y = MUL UYV.y, C0.w
577 RGB.xyz = MAD UYV.xxz, C1, UYV.y
578 RGB.y = MAD UYV.z, C1.w, RGB.y
580 struct prog_dst_register dst
= inst
->DstReg
;
581 struct prog_src_register src0
= inst
->SrcReg
[0];
582 struct prog_dst_register tmp
= get_temp(c
);
583 struct prog_src_register tmpsrc
= src_reg_from_dst(tmp
);
584 struct prog_src_register C0
= search_or_add_const4f( c
, -.5, -.0625, -.5, 1.164 );
585 struct prog_src_register C1
= search_or_add_const4f( c
, 1.596, -0.813, 2.018, -.391 );
599 /* tmp.xyz = ADD TMP, C0
603 dst_mask(tmp
, WRITEMASK_XYZ
),
609 /* YUV.y = MUL YUV.y, C0.w
613 dst_mask(tmp
, WRITEMASK_Y
),
619 /* RGB.xyz = MAD YUV.xxz, C1, YUV.y
623 dst_mask(dst
, WRITEMASK_XYZ
),
625 src_swizzle(tmpsrc
, X
,X
,Z
,Z
),
627 src_swizzle1(tmpsrc
, Y
));
629 /* RGB.y = MAD YUV.z, C1.w, RGB.y
633 dst_mask(dst
, WRITEMASK_Y
),
635 src_swizzle1(tmpsrc
, Z
),
637 src_swizzle1(src_reg_from_dst(dst
), Y
));
639 release_temp(c
, tmp
);
642 if (inst
->TexSrcTarget
== GL_TEXTURE_RECTANGLE_NV
)
643 release_temp(c
, tmpcoord
);
647 static GLboolean
projtex( struct brw_wm_compile
*c
,
648 const struct prog_instruction
*inst
)
650 struct prog_src_register src
= inst
->SrcReg
[0];
652 /* Only try to detect the simplest cases. Could detect (later)
653 * cases where we are trying to emit code like RCP {1.0}, MUL x,
656 * More complex cases than this typically only arise from
657 * user-provided fragment programs anyway:
659 if (inst
->TexSrcTarget
== TEXTURE_CUBE_INDEX
)
660 return 0; /* ut2004 gun rendering !?! */
661 else if (src
.File
== PROGRAM_INPUT
&&
662 GET_SWZ(src
.Swizzle
, W
) == W
&&
663 (c
->key
.projtex_mask
& (1<<src
.Index
)) == 0)
670 static void precalc_txp( struct brw_wm_compile
*c
,
671 const struct prog_instruction
*inst
)
673 struct prog_src_register src0
= inst
->SrcReg
[0];
675 if (projtex(c
, inst
)) {
676 struct prog_dst_register tmp
= get_temp(c
);
677 struct prog_instruction tmp_inst
;
679 /* tmp0.w = RCP inst.arg[0][3]
683 dst_mask(tmp
, WRITEMASK_W
),
685 src_swizzle1(src0
, GET_SWZ(src0
.Swizzle
, W
)),
689 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
693 dst_mask(tmp
, WRITEMASK_XYZ
),
696 src_swizzle1(src_reg_from_dst(tmp
), W
),
699 /* dst = precalc(TEX tmp0)
702 tmp_inst
.SrcReg
[0] = src_reg_from_dst(tmp
);
703 precalc_tex(c
, &tmp_inst
);
705 release_temp(c
, tmp
);
709 /* dst = precalc(TEX src0)
711 precalc_tex(c
, inst
);
719 /***********************************************************************
720 * Add instructions to perform fog blending
723 static void fog_blend( struct brw_wm_compile
*c
,
724 struct prog_src_register fog_factor
)
726 struct prog_dst_register outcolor
= dst_reg(PROGRAM_OUTPUT
, FRAG_RESULT_COLR
);
727 struct prog_src_register fogcolor
= search_or_add_param5( c
, STATE_FOG_COLOR
, 0,0,0,0 );
729 /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */
733 dst_mask(outcolor
, WRITEMASK_XYZ
),
736 src_reg_from_dst(outcolor
),
742 /* This one is simple - just take the interpolated fog coordinate and
743 * use it as the fog blend factor.
745 static void fog_interpolated( struct brw_wm_compile
*c
)
747 struct prog_src_register fogc
= src_reg(PROGRAM_INPUT
, FRAG_ATTRIB_FOGC
);
749 if (!(c
->fp_interp_emitted
& (1<<FRAG_ATTRIB_FOGC
)))
750 emit_interp(c
, FRAG_ATTRIB_FOGC
);
752 fog_blend( c
, src_swizzle1(fogc
, GET_SWZ(fogc
.Swizzle
,X
)));
755 static void emit_fog( struct brw_wm_compile
*c
)
757 if (!c
->fp
->program
.FogOption
)
761 fog_interpolated( c
);
763 /* TODO: per-pixel fog */
768 static void emit_fb_write( struct brw_wm_compile
*c
)
770 struct prog_src_register outcolor
= src_reg(PROGRAM_OUTPUT
, FRAG_RESULT_COLR
);
771 struct prog_src_register payload_r0_depth
= src_reg(PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
);
772 struct prog_src_register outdepth
= src_reg(PROGRAM_OUTPUT
, FRAG_RESULT_DEPR
);
776 dst_mask(dst_undef(),0),
786 /***********************************************************************
787 * Emit INTERP instructions ahead of first use of each attrib.
790 static void validate_src_regs( struct brw_wm_compile
*c
,
791 const struct prog_instruction
*inst
)
793 GLuint nr_args
= brw_wm_nr_args( inst
->Opcode
);
796 for (i
= 0; i
< nr_args
; i
++) {
797 if (inst
->SrcReg
[i
].File
== PROGRAM_INPUT
) {
798 GLuint idx
= inst
->SrcReg
[i
].Index
;
799 if (!(c
->fp_interp_emitted
& (1<<idx
))) {
808 static void print_insns( const struct prog_instruction
*insn
,
812 for (i
= 0; i
< nr
; i
++, insn
++) {
813 _mesa_printf("%3d: ", i
);
814 if (insn
->Opcode
< MAX_OPCODE
)
815 _mesa_print_instruction(insn
);
816 else if (insn
->Opcode
< MAX_WM_OPCODE
) {
817 GLuint idx
= insn
->Opcode
- MAX_OPCODE
;
819 _mesa_print_alu_instruction(insn
,
820 wm_opcode_strings
[idx
],
824 _mesa_printf("UNKNOWN\n");
829 void brw_wm_pass_fp( struct brw_wm_compile
*c
)
831 struct brw_fragment_program
*fp
= c
->fp
;
834 if (INTEL_DEBUG
& DEBUG_WM
) {
835 _mesa_printf("\n\n\npre-fp:\n");
836 _mesa_print_program(&fp
->program
.Base
);
840 c
->pixel_xy
= src_undef();
841 c
->delta_xy
= src_undef();
842 c
->pixel_w
= src_undef();
845 /* Emit preamble instructions:
849 for (insn
= 0; insn
< fp
->program
.Base
.NumInstructions
; insn
++) {
850 const struct prog_instruction
*inst
= &fp
->program
.Base
.Instructions
[insn
];
851 struct prog_instruction
*out
;
853 /* Check for INPUT values, emit INTERP instructions where
856 validate_src_regs(c
, inst
);
859 switch (inst
->Opcode
) {
861 out
= emit_insn(c
, inst
);
862 out
->Opcode
= OPCODE_MOV
;
866 out
= emit_insn(c
, inst
);
867 out
->Opcode
= OPCODE_MOV
;
868 out
->SrcReg
[0].NegateBase
= 0;
869 out
->SrcReg
[0].Abs
= 1;
873 out
= emit_insn(c
, inst
);
874 out
->Opcode
= OPCODE_ADD
;
875 out
->SrcReg
[1].NegateBase
^= 0xf;
879 out
= emit_insn(c
, inst
);
880 /* This should probably be done in the parser.
882 out
->DstReg
.WriteMask
&= WRITEMASK_XY
;
886 precalc_dst(c
, inst
);
890 precalc_lit(c
, inst
);
894 precalc_txp(c
, inst
);
898 out
= emit_insn(c
, inst
);
899 /* This should probably be done in the parser.
901 out
->DstReg
.WriteMask
&= WRITEMASK_XYZ
;
905 out
= emit_insn(c
, inst
);
906 /* This should probably be done in the parser.
908 out
->DstReg
.WriteMask
= 0;
925 if (INTEL_DEBUG
& DEBUG_WM
) {
926 _mesa_printf("\n\n\npass_fp:\n");
927 print_insns( c
->prog_instructions
, c
->nr_fp_insns
);