2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "pipe/p_shader_tokens.h"
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
38 #include "tgsi/tgsi_parse.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_util.h"
45 #include "brw_debug.h"
50 static const char *wm_opcode_strings
[] = {
62 /***********************************************************************
66 static struct brw_fp_src
src_reg(GLuint file
, GLuint idx
)
68 struct brw_fp_src reg
;
71 reg
.swizzle
= BRW_SWIZZLE_XYZW
;
78 static struct brw_fp_src
src_reg_from_dst(struct brw_fp_dst dst
)
80 return src_reg(dst
.file
, dst
.index
);
83 static struct brw_fp_src
src_undef( void )
85 return src_reg(TGSI_FILE_NULL
, 0);
88 static GLboolean
src_is_undef(struct brw_fp_src src
)
90 return src
.file
== TGSI_FILE_NULL
;
93 static struct brw_fp_src
src_swizzle( struct brw_fp_src reg
, int x
, int y
, int z
, int w
)
95 unsigned swz
= reg
.swizzle
;
97 reg
.swizzle
= ( GET_SWZ(swz
, x
) << 0 |
98 GET_SWZ(swz
, y
) << 2 |
99 GET_SWZ(swz
, z
) << 4 |
100 GET_SWZ(swz
, w
) << 6 );
105 static struct brw_fp_src
src_scalar( struct brw_fp_src reg
, int x
)
107 return src_swizzle(reg
, x
, x
, x
, x
);
110 static struct brw_fp_src
src_abs( struct brw_fp_src src
)
117 static struct brw_fp_src
src_negate( struct brw_fp_src src
)
125 static int match_or_expand_immediate( const float *v
,
135 for (i
= 0; i
< nr
; i
++) {
136 boolean found
= FALSE
;
138 for (j
= 0; j
< *nr2
&& !found
; j
++) {
140 *swizzle
|= j
<< (i
* 2);
150 *swizzle
|= *nr2
<< (i
* 2);
160 /* Internally generated immediates: overkill...
162 static struct brw_fp_src
src_imm( struct brw_wm_compile
*c
,
169 /* Could do a first pass where we examine all existing immediates
173 for (i
= 0; i
< c
->nr_immediates
; i
++) {
174 if (match_or_expand_immediate( v
,
182 if (c
->nr_immediates
< Elements(c
->immediate
)) {
183 i
= c
->nr_immediates
++;
184 if (match_or_expand_immediate( v
,
196 /* Make sure that all referenced elements are from this immediate.
197 * Has the effect of making size-one immediates into scalars.
199 for (j
= nr
; j
< 4; j
++)
200 swizzle
|= (swizzle
& 0x3) << (j
* 2);
202 return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE
, i
),
206 GET_SWZ(swizzle
, W
) );
211 static struct brw_fp_src
src_imm1f( struct brw_wm_compile
*c
,
214 return src_imm(c
, &f
, 1);
217 static struct brw_fp_src
src_imm4f( struct brw_wm_compile
*c
,
223 GLfloat f
[4] = {x
,y
,z
,w
};
224 return src_imm(c
, f
, 4);
229 /***********************************************************************
233 static struct brw_fp_dst
dst_reg(GLuint file
, GLuint idx
)
235 struct brw_fp_dst reg
;
238 reg
.writemask
= BRW_WRITEMASK_XYZW
;
243 static struct brw_fp_dst
dst_mask( struct brw_fp_dst reg
, int mask
)
245 reg
.writemask
&= mask
;
249 static struct brw_fp_dst
dst_undef( void )
251 return dst_reg(TGSI_FILE_NULL
, 0);
254 static boolean
dst_is_undef( struct brw_fp_dst dst
)
256 return dst
.file
== TGSI_FILE_NULL
;
259 static struct brw_fp_dst
dst_saturate( struct brw_fp_dst reg
, boolean flag
)
265 static struct brw_fp_dst
get_temp( struct brw_wm_compile
*c
)
267 int bit
= ffs( ~c
->fp_temp
);
270 debug_printf("%s: out of temporaries\n", __FILE__
);
273 c
->fp_temp
|= 1<<(bit
-1);
274 return dst_reg(TGSI_FILE_TEMPORARY
, c
->fp_first_internal_temp
+(bit
-1));
278 static void release_temp( struct brw_wm_compile
*c
, struct brw_fp_dst temp
)
280 c
->fp_temp
&= ~(1 << (temp
.index
- c
->fp_first_internal_temp
));
284 /***********************************************************************
288 static struct brw_fp_instruction
*get_fp_inst(struct brw_wm_compile
*c
)
290 return &c
->fp_instructions
[c
->nr_fp_insns
++];
293 static struct brw_fp_instruction
* emit_tex_op(struct brw_wm_compile
*c
,
295 struct brw_fp_dst dest
,
297 GLuint tex_src_target
,
298 struct brw_fp_src src0
,
299 struct brw_fp_src src1
,
300 struct brw_fp_src src2
)
302 struct brw_fp_instruction
*inst
= get_fp_inst(c
);
306 inst
->tex_unit
= tex_src_unit
;
307 inst
->tex_target
= tex_src_target
;
316 static INLINE
void emit_op3(struct brw_wm_compile
*c
,
318 struct brw_fp_dst dest
,
319 struct brw_fp_src src0
,
320 struct brw_fp_src src1
,
321 struct brw_fp_src src2
)
323 emit_tex_op(c
, op
, dest
, 0, 0, src0
, src1
, src2
);
327 static INLINE
void emit_op2(struct brw_wm_compile
*c
,
329 struct brw_fp_dst dest
,
330 struct brw_fp_src src0
,
331 struct brw_fp_src src1
)
333 emit_tex_op(c
, op
, dest
, 0, 0, src0
, src1
, src_undef());
336 static INLINE
void emit_op1(struct brw_wm_compile
*c
,
338 struct brw_fp_dst dest
,
339 struct brw_fp_src src0
)
341 emit_tex_op(c
, op
, dest
, 0, 0, src0
, src_undef(), src_undef());
344 static INLINE
void emit_op0(struct brw_wm_compile
*c
,
346 struct brw_fp_dst dest
)
348 emit_tex_op(c
, op
, dest
, 0, 0, src_undef(), src_undef(), src_undef());
353 /* Many opcodes produce the same value across all the result channels.
354 * We'd rather not have to support that splatting in the opcode implementations,
355 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
356 * anyway. We can easily get both by emitting the opcode to one channel, and
357 * then MOVing it to the others, which brw_wm_pass*.c already understands.
359 static void emit_scalar_insn(struct brw_wm_compile
*c
,
361 struct brw_fp_dst dst
,
362 struct brw_fp_src src0
,
363 struct brw_fp_src src1
,
364 struct brw_fp_src src2
)
366 unsigned first_chan
= ffs(dst
.writemask
) - 1;
367 unsigned first_mask
= 1 << first_chan
;
369 if (dst
.writemask
== 0)
373 dst_mask(dst
, first_mask
),
376 if (dst
.writemask
!= first_mask
) {
377 emit_op1(c
, TGSI_OPCODE_MOV
,
378 dst_mask(dst
, ~first_mask
),
379 src_scalar(src_reg_from_dst(dst
), first_chan
));
384 /***********************************************************************
385 * Special instructions for interpolation and other tasks
388 static struct brw_fp_src
get_pixel_xy( struct brw_wm_compile
*c
)
390 if (src_is_undef(c
->fp_pixel_xy
)) {
391 struct brw_fp_dst pixel_xy
= get_temp(c
);
392 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
395 /* Emit the out calculations, and hold onto the results. Use
396 * two instructions as a temporary is required.
398 /* pixel_xy.xy = PIXELXY payload[0];
402 dst_mask(pixel_xy
, BRW_WRITEMASK_XY
),
405 c
->fp_pixel_xy
= src_reg_from_dst(pixel_xy
);
408 return c
->fp_pixel_xy
;
411 static struct brw_fp_src
get_delta_xy( struct brw_wm_compile
*c
)
413 if (src_is_undef(c
->fp_delta_xy
)) {
414 struct brw_fp_dst delta_xy
= get_temp(c
);
415 struct brw_fp_src pixel_xy
= get_pixel_xy(c
);
416 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
418 /* deltas.xy = DELTAXY pixel_xy, payload[0]
422 dst_mask(delta_xy
, BRW_WRITEMASK_XY
),
427 c
->fp_delta_xy
= src_reg_from_dst(delta_xy
);
430 return c
->fp_delta_xy
;
433 static struct brw_fp_src
get_pixel_w( struct brw_wm_compile
*c
)
435 if (src_is_undef(c
->fp_pixel_w
)) {
436 struct brw_fp_dst pixel_w
= get_temp(c
);
437 struct brw_fp_src deltas
= get_delta_xy(c
);
439 /* XXX: assuming position is always first -- valid?
441 struct brw_fp_src interp_wpos
= src_reg(BRW_FILE_PAYLOAD
, 0);
443 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
447 dst_mask(pixel_w
, BRW_WRITEMASK_W
),
453 c
->fp_pixel_w
= src_reg_from_dst(pixel_w
);
456 return c
->fp_pixel_w
;
460 /***********************************************************************
461 * Emit INTERP instructions ahead of first use of each attrib.
464 static void emit_interp( struct brw_wm_compile
*c
,
469 struct brw_fp_dst dst
= dst_reg(TGSI_FILE_INPUT
, idx
);
470 struct brw_fp_src interp
= src_reg(BRW_FILE_PAYLOAD
, idx
);
471 struct brw_fp_src deltas
= get_delta_xy(c
);
473 /* Need to use PINTERP on attributes which have been
474 * multiplied by 1/W in the SF program, and LINTERP on those
478 case TGSI_SEMANTIC_POSITION
:
479 /* Have to treat wpos.xy specially:
483 dst_mask(dst
, BRW_WRITEMASK_XY
),
486 /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
490 dst_mask(dst
, BRW_WRITEMASK_ZW
),
495 case TGSI_SEMANTIC_COLOR
:
496 if (c
->key
.flat_shade
) {
502 else if (interp_mode
== TGSI_INTERPOLATE_LINEAR
) {
520 case TGSI_SEMANTIC_FOG
:
521 /* Interpolate the fog coordinate */
524 dst_mask(dst
, BRW_WRITEMASK_X
),
531 dst_mask(dst
, BRW_WRITEMASK_YZ
),
536 dst_mask(dst
, BRW_WRITEMASK_W
),
540 case TGSI_SEMANTIC_FACE
:
541 /* XXX review/test this case */
544 dst_mask(dst
, BRW_WRITEMASK_X
));
548 dst_mask(dst
, BRW_WRITEMASK_YZ
),
553 dst_mask(dst
, BRW_WRITEMASK_W
),
557 case TGSI_SEMANTIC_PSIZE
:
558 /* XXX review/test this case */
561 dst_mask(dst
, BRW_WRITEMASK_XY
),
568 dst_mask(dst
, BRW_WRITEMASK_Z
),
573 dst_mask(dst
, BRW_WRITEMASK_W
),
578 switch (interp_mode
) {
579 case TGSI_INTERPOLATE_CONSTANT
:
586 case TGSI_INTERPOLATE_LINEAR
:
594 case TGSI_INTERPOLATE_PERSPECTIVE
:
608 /***********************************************************************
609 * Expand various instructions here to simpler forms.
611 static void precalc_dst( struct brw_wm_compile
*c
,
612 struct brw_fp_dst dst
,
613 struct brw_fp_src src0
,
614 struct brw_fp_src src1
)
616 if (dst
.writemask
& BRW_WRITEMASK_Y
) {
617 /* dst.y = mul src0.y, src1.y
621 dst_mask(dst
, BRW_WRITEMASK_Y
),
626 if (dst
.writemask
& BRW_WRITEMASK_XZ
) {
627 /* dst.z = mov src0.zzzz
631 dst_mask(dst
, BRW_WRITEMASK_Z
),
632 src_scalar(src0
, Z
));
634 /* dst.x = imm1f(1.0)
638 dst_saturate(dst_mask(dst
, BRW_WRITEMASK_X
), 0),
641 if (dst
.writemask
& BRW_WRITEMASK_W
) {
642 /* dst.w = mov src1.w
646 dst_mask(dst
, BRW_WRITEMASK_W
),
652 static void precalc_lit( struct brw_wm_compile
*c
,
653 struct brw_fp_dst dst
,
654 struct brw_fp_src src0
)
656 if (dst
.writemask
& BRW_WRITEMASK_XW
) {
657 /* dst.xw = imm(1.0f)
661 dst_saturate(dst_mask(dst
, BRW_WRITEMASK_XW
), 0),
665 if (dst
.writemask
& BRW_WRITEMASK_YZ
) {
668 dst_mask(dst
, BRW_WRITEMASK_YZ
),
675 * Some TEX instructions require extra code, cube map coordinate
676 * normalization, or coordinate scaling for RECT textures, etc.
677 * This function emits those extra instructions and the TEX
678 * instruction itself.
680 static void precalc_tex( struct brw_wm_compile
*c
,
681 struct brw_fp_dst dst
,
684 struct brw_fp_src src0
)
686 struct brw_fp_src coord
= src_undef();
687 struct brw_fp_dst tmp
= dst_undef();
689 assert(unit
< BRW_MAX_TEX_UNIT
);
691 /* Cubemap: find longest component of coord vector and normalize
694 if (target
== TGSI_TEXTURE_CUBE
) {
695 struct brw_fp_src tmpsrc
;
698 tmpsrc
= src_reg_from_dst(tmp
);
700 /* tmp = abs(src0) */
706 /* tmp.X = MAX(tmp.X, tmp.Y) */
707 emit_op2(c
, TGSI_OPCODE_MAX
,
708 dst_mask(tmp
, BRW_WRITEMASK_X
),
709 src_scalar(tmpsrc
, X
),
710 src_scalar(tmpsrc
, Y
));
712 /* tmp.X = MAX(tmp.X, tmp.Z) */
713 emit_op2(c
, TGSI_OPCODE_MAX
,
714 dst_mask(tmp
, BRW_WRITEMASK_X
),
716 src_scalar(tmpsrc
, Z
));
718 /* tmp.X = 1 / tmp.X */
719 emit_op1(c
, TGSI_OPCODE_RCP
,
720 dst_mask(tmp
, BRW_WRITEMASK_X
),
723 /* tmp = src0 * tmp.xxxx */
724 emit_op2(c
, TGSI_OPCODE_MUL
,
727 src_scalar(tmpsrc
, X
));
731 else if (target
== TGSI_TEXTURE_RECT
||
732 target
== TGSI_TEXTURE_SHADOWRECT
) {
733 /* XXX: need a mechanism for internally generated constants.
741 /* Need to emit YUV texture conversions by hand. Probably need to
742 * do this here - the alternative is in brw_wm_emit.c, but the
743 * conversion requires allocating a temporary variable which we
744 * don't have the facility to do that late in the compilation.
746 if (c
->key
.yuvtex_mask
& (1 << unit
)) {
747 /* convert ycbcr to RGBA */
748 GLboolean swap_uv
= c
->key
.yuvtex_swap_mask
& (1<<unit
);
749 struct brw_fp_dst tmp
= get_temp(c
);
750 struct brw_fp_src tmpsrc
= src_reg_from_dst(tmp
);
751 struct brw_fp_src C0
= src_imm4f( c
, -.5, -.0625, -.5, 1.164 );
752 struct brw_fp_src C1
= src_imm4f( c
, 1.596, -0.813, 2.018, -.391 );
758 dst_saturate(tmp
, dst
.saturate
),
765 /* tmp.xyz = ADD TMP, C0
767 emit_op2(c
, TGSI_OPCODE_ADD
,
768 dst_mask(tmp
, BRW_WRITEMASK_XYZ
),
772 /* YUV.y = MUL YUV.y, C0.w
774 emit_op2(c
, TGSI_OPCODE_MUL
,
775 dst_mask(tmp
, BRW_WRITEMASK_Y
),
781 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
783 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
786 emit_op3(c
, TGSI_OPCODE_MAD
,
787 dst_mask(dst
, BRW_WRITEMASK_XYZ
),
789 src_swizzle(tmpsrc
, Z
,Z
,X
,X
) :
790 src_swizzle(tmpsrc
, X
,X
,Z
,Z
)),
792 src_scalar(tmpsrc
, Y
));
794 /* RGB.y = MAD YUV.z, C1.w, RGB.y
798 dst_mask(dst
, BRW_WRITEMASK_Y
),
799 src_scalar(tmpsrc
, Z
),
801 src_scalar(src_reg_from_dst(dst
), Y
));
803 release_temp(c
, tmp
);
806 /* ordinary RGBA tex instruction */
817 /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
818 * generating shader varients in mesa state tracker.
821 /* Release this temp if we ended up allocating it:
823 if (!dst_is_undef(tmp
))
824 release_temp(c
, tmp
);
829 * Check if the given TXP instruction really needs the divide-by-W step.
831 static GLboolean
projtex( struct brw_wm_compile
*c
,
833 struct brw_fp_src src
)
835 /* Only try to detect the simplest cases. Could detect (later)
836 * cases where we are trying to emit code like RCP {1.0}, MUL x,
839 * More complex cases than this typically only arise from
840 * user-provided fragment programs anyway:
842 if (target
== TGSI_TEXTURE_CUBE
)
843 return GL_FALSE
; /* ut2004 gun rendering !?! */
845 if (src
.file
== TGSI_FILE_INPUT
&&
846 GET_SWZ(src
.swizzle
, W
) == W
&&
847 c
->fp
->info
.input_interpolate
[src
.index
] != TGSI_INTERPOLATE_PERSPECTIVE
)
857 static void precalc_txp( struct brw_wm_compile
*c
,
858 struct brw_fp_dst dst
,
861 struct brw_fp_src src0
)
863 if (projtex(c
, target
, src0
)) {
864 struct brw_fp_dst tmp
= get_temp(c
);
866 /* tmp0.w = RCP inst.arg[0][3]
870 dst_mask(tmp
, BRW_WRITEMASK_W
),
871 src_scalar(src0
, W
));
873 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
877 dst_mask(tmp
, BRW_WRITEMASK_XYZ
),
879 src_scalar(src_reg_from_dst(tmp
), W
));
887 src_reg_from_dst(tmp
));
889 release_temp(c
, tmp
);
895 precalc_tex(c
, dst
, target
, unit
, src0
);
900 /* XXX: note this returns a src_reg.
902 static struct brw_fp_src
903 find_output_by_semantic( struct brw_wm_compile
*c
,
907 const struct tgsi_shader_info
*info
= &c
->fp
->info
;
910 for (i
= 0; i
< info
->num_outputs
; i
++)
911 if (info
->output_semantic_name
[i
] == semantic
&&
912 info
->output_semantic_index
[i
] == index
)
913 return src_reg( TGSI_FILE_OUTPUT
, i
);
915 /* If not found, return some arbitrary immediate value:
917 return src_imm1f(c
, 1.0);
921 static void emit_fb_write( struct brw_wm_compile
*c
)
923 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
924 struct brw_fp_src outdepth
= find_output_by_semantic(c
, TGSI_SEMANTIC_POSITION
, 0);
928 outdepth
= src_scalar(outdepth
, Z
);
930 for (i
= 0 ; i
< c
->key
.nr_cbufs
; i
++) {
931 struct brw_fp_src outcolor
;
932 unsigned target
= 1<<i
;
934 /* Set EOT flag on last inst:
936 if (i
== c
->key
.nr_cbufs
- 1)
939 outcolor
= find_output_by_semantic(c
, TGSI_SEMANTIC_COLOR
, i
);
941 /* Use emit_tex_op so that we can specify the inst->tex_target
942 * field, which is abused to contain the FB write target and the
945 emit_tex_op(c
, WM_FB_WRITE
,
956 static struct brw_fp_dst
translate_dst( struct brw_wm_compile
*c
,
957 const struct tgsi_full_dst_register
*dst
,
960 struct brw_fp_dst out
;
962 out
.file
= dst
->DstRegister
.File
;
963 out
.index
= dst
->DstRegister
.Index
;
964 out
.writemask
= dst
->DstRegister
.WriteMask
;
965 out
.indirect
= dst
->DstRegister
.Indirect
;
966 out
.saturate
= (saturate
== TGSI_SAT_ZERO_ONE
);
969 assert(dst
->DstRegisterInd
.File
== TGSI_FILE_ADDRESS
);
970 assert(dst
->DstRegisterInd
.Index
== 0);
977 static struct brw_fp_src
translate_src( struct brw_wm_compile
*c
,
978 const struct tgsi_full_src_register
*src
)
980 struct brw_fp_src out
;
982 out
.file
= src
->SrcRegister
.File
;
983 out
.index
= src
->SrcRegister
.Index
;
984 out
.indirect
= src
->SrcRegister
.Indirect
;
986 out
.swizzle
= ((src
->SrcRegister
.SwizzleX
<< 0) |
987 (src
->SrcRegister
.SwizzleY
<< 2) |
988 (src
->SrcRegister
.SwizzleZ
<< 4) |
989 (src
->SrcRegister
.SwizzleW
<< 6));
991 switch (tgsi_util_get_full_src_register_sign_mode( src
, 0 )) {
992 case TGSI_UTIL_SIGN_CLEAR
:
997 case TGSI_UTIL_SIGN_SET
:
1002 case TGSI_UTIL_SIGN_TOGGLE
:
1007 case TGSI_UTIL_SIGN_KEEP
:
1015 assert(src
->SrcRegisterInd
.File
== TGSI_FILE_ADDRESS
);
1016 assert(src
->SrcRegisterInd
.Index
== 0);
1024 static void emit_insn( struct brw_wm_compile
*c
,
1025 const struct tgsi_full_instruction
*inst
)
1027 unsigned opcode
= inst
->Instruction
.Opcode
;
1028 struct brw_fp_dst dst
;
1029 struct brw_fp_src src
[3];
1032 dst
= translate_dst( c
, &inst
->FullDstRegisters
[0],
1033 inst
->Instruction
.Saturate
);
1035 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++)
1036 src
[i
] = translate_src( c
, &inst
->FullSrcRegisters
[0] );
1039 case TGSI_OPCODE_ABS
:
1040 emit_op1(c
, TGSI_OPCODE_MOV
,
1045 case TGSI_OPCODE_SUB
:
1046 emit_op2(c
, TGSI_OPCODE_ADD
,
1049 src_negate(src
[1]));
1052 case TGSI_OPCODE_SCS
:
1053 emit_op1(c
, TGSI_OPCODE_SCS
,
1054 dst_mask(dst
, BRW_WRITEMASK_XY
),
1058 case TGSI_OPCODE_DST
:
1059 precalc_dst(c
, dst
, src
[0], src
[1]);
1062 case TGSI_OPCODE_LIT
:
1063 precalc_lit(c
, dst
, src
[0]);
1066 case TGSI_OPCODE_TEX
:
1068 inst
->InstructionExtTexture
.Texture
,
1069 src
[0].file
, /* sampler unit */
1073 case TGSI_OPCODE_TXP
:
1075 inst
->InstructionExtTexture
.Texture
,
1076 src
[0].file
, /* sampler unit */
1080 case TGSI_OPCODE_TXB
:
1081 /* XXX: TXB not done
1084 inst
->InstructionExtTexture
.Texture
,
1085 src
[0].file
, /* sampler unit */
1089 case TGSI_OPCODE_XPD
:
1090 emit_op2(c
, TGSI_OPCODE_XPD
,
1091 dst_mask(dst
, BRW_WRITEMASK_XYZ
),
1096 case TGSI_OPCODE_KIL
:
1097 emit_op1(c
, TGSI_OPCODE_KIL
,
1098 dst_mask(dst_undef(), 0),
1102 case TGSI_OPCODE_END
:
1106 if (!c
->key
.has_flow_control
&&
1107 brw_wm_is_scalar_result(opcode
))
1108 emit_scalar_insn(c
, opcode
, dst
, src
[0], src
[1], src
[2]);
1110 emit_op3(c
, opcode
, dst
, src
[0], src
[1], src
[2]);
1116 * Initial pass for fragment program code generation.
1117 * This function is used by both the GLSL and non-GLSL paths.
1119 int brw_wm_pass_fp( struct brw_wm_compile
*c
)
1121 struct brw_fragment_shader
*fs
= c
->fp
;
1122 struct tgsi_parse_context parse
;
1123 struct tgsi_full_instruction
*inst
;
1124 struct tgsi_full_declaration
*decl
;
1129 if (BRW_DEBUG
& DEBUG_WM
) {
1130 debug_printf("pre-fp:\n");
1131 tgsi_dump(fs
->tokens
, 0);
1134 c
->fp_pixel_xy
= src_undef();
1135 c
->fp_delta_xy
= src_undef();
1136 c
->fp_pixel_w
= src_undef();
1138 c
->nr_immediates
= 0;
1141 /* Loop over all instructions doing assorted simplifications and
1144 tgsi_parse_init( &parse
, fs
->tokens
);
1145 while( !tgsi_parse_end_of_tokens( &parse
) ) {
1146 tgsi_parse_token( &parse
);
1148 switch( parse
.FullToken
.Token
.Type
) {
1149 case TGSI_TOKEN_TYPE_DECLARATION
:
1150 /* Turn intput declarations into special WM_* instructions.
1152 * XXX: For non-branching shaders, consider deferring variable
1153 * initialization as late as possible to minimize register
1154 * usage. This is how the original BRW driver worked.
1156 * In a branching shader, must preamble instructions at decl
1157 * time, as instruction order in the shader does not
1158 * correspond to the order instructions are executed in the
1161 * This is where special instructions such as WM_CINTERP,
1162 * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1163 * compute shader inputs from the payload registers and pixel
1166 decl
= &parse
.FullToken
.FullDeclaration
;
1167 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1168 unsigned first
, last
, mask
;
1171 first
= decl
->DeclarationRange
.First
;
1172 last
= decl
->DeclarationRange
.Last
;
1173 mask
= decl
->Declaration
.UsageMask
;
1175 for (attrib
= first
; attrib
<= last
; attrib
++) {
1178 decl
->Semantic
.SemanticName
,
1179 decl
->Declaration
.Interpolate
);
1185 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1186 /* Unlike VS programs we can probably manage fine encoding
1187 * immediate values directly into the emitted EU
1188 * instructions, as we probably only need to reference one
1189 * float value per instruction. Just save the data for now
1190 * and use directly later.
1192 i
= c
->nr_immediates
++;
1193 imm
= &parse
.FullToken
.FullImmediate
.u
[i
].Float
;
1194 size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
1196 if (c
->nr_immediates
>= BRW_WM_MAX_CONST
)
1197 return PIPE_ERROR_OUT_OF_MEMORY
;
1199 for (i
= 0; i
< size
; i
++)
1200 c
->immediate
[c
->nr_immediates
].v
[i
] = imm
[i
];
1203 c
->immediate
[c
->nr_immediates
].v
[i
] = 0.0;
1205 c
->immediate
[c
->nr_immediates
].nr
= size
;
1209 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1210 inst
= &parse
.FullToken
.FullInstruction
;
1216 if (BRW_DEBUG
& DEBUG_WM
) {
1217 debug_printf("pass_fp:\n");
1218 //brw_print_program( c->fp_brw_program );