2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "pipe/p_shader_tokens.h"
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
38 #include "tgsi/tgsi_parse.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_util.h"
45 #include "brw_debug.h"
48 /***********************************************************************
52 static struct brw_fp_src
src_reg(GLuint file
, GLuint idx
)
54 struct brw_fp_src reg
;
57 reg
.swizzle
= BRW_SWIZZLE_XYZW
;
64 static struct brw_fp_src
src_reg_from_dst(struct brw_fp_dst dst
)
66 return src_reg(dst
.file
, dst
.index
);
69 static struct brw_fp_src
src_undef( void )
71 return src_reg(TGSI_FILE_NULL
, 0);
74 static GLboolean
src_is_undef(struct brw_fp_src src
)
76 return src
.file
== TGSI_FILE_NULL
;
79 static struct brw_fp_src
src_swizzle( struct brw_fp_src reg
, int x
, int y
, int z
, int w
)
81 unsigned swz
= reg
.swizzle
;
83 reg
.swizzle
= ( BRW_GET_SWZ(swz
, x
) << 0 |
84 BRW_GET_SWZ(swz
, y
) << 2 |
85 BRW_GET_SWZ(swz
, z
) << 4 |
86 BRW_GET_SWZ(swz
, w
) << 6 );
91 static struct brw_fp_src
src_scalar( struct brw_fp_src reg
, int x
)
93 return src_swizzle(reg
, x
, x
, x
, x
);
96 static struct brw_fp_src
src_abs( struct brw_fp_src src
)
103 static struct brw_fp_src
src_negate( struct brw_fp_src src
)
111 static int match_or_expand_immediate( const float *v
,
121 for (i
= 0; i
< nr
; i
++) {
122 boolean found
= FALSE
;
124 for (j
= 0; j
< *nr2
&& !found
; j
++) {
126 *swizzle
|= j
<< (i
* 2);
136 *swizzle
|= *nr2
<< (i
* 2);
146 /* Internally generated immediates: overkill...
148 static struct brw_fp_src
src_imm( struct brw_wm_compile
*c
,
155 /* Could do a first pass where we examine all existing immediates
159 for (i
= 0; i
< c
->nr_immediates
; i
++) {
160 if (match_or_expand_immediate( v
,
168 if (c
->nr_immediates
< Elements(c
->immediate
)) {
169 i
= c
->nr_immediates
++;
170 if (match_or_expand_immediate( v
,
182 /* Make sure that all referenced elements are from this immediate.
183 * Has the effect of making size-one immediates into scalars.
185 for (j
= nr
; j
< 4; j
++)
186 swizzle
|= (swizzle
& 0x3) << (j
* 2);
188 return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE
, i
),
189 BRW_GET_SWZ(swizzle
, X
),
190 BRW_GET_SWZ(swizzle
, Y
),
191 BRW_GET_SWZ(swizzle
, Z
),
192 BRW_GET_SWZ(swizzle
, W
) );
197 static struct brw_fp_src
src_imm1f( struct brw_wm_compile
*c
,
200 return src_imm(c
, &f
, 1);
203 static struct brw_fp_src
src_imm4f( struct brw_wm_compile
*c
,
209 GLfloat f
[4] = {x
,y
,z
,w
};
210 return src_imm(c
, f
, 4);
215 /***********************************************************************
219 static struct brw_fp_dst
dst_reg(GLuint file
, GLuint idx
)
221 struct brw_fp_dst reg
;
224 reg
.writemask
= BRW_WRITEMASK_XYZW
;
229 static struct brw_fp_dst
dst_mask( struct brw_fp_dst reg
, int mask
)
231 reg
.writemask
&= mask
;
235 static struct brw_fp_dst
dst_undef( void )
237 return dst_reg(TGSI_FILE_NULL
, 0);
240 static boolean
dst_is_undef( struct brw_fp_dst dst
)
242 return dst
.file
== TGSI_FILE_NULL
;
245 static struct brw_fp_dst
dst_saturate( struct brw_fp_dst reg
, boolean flag
)
251 static struct brw_fp_dst
get_temp( struct brw_wm_compile
*c
)
253 int bit
= ffs( ~c
->fp_temp
);
256 debug_printf("%s: out of temporaries\n", __FILE__
);
259 c
->fp_temp
|= 1<<(bit
-1);
260 return dst_reg(TGSI_FILE_TEMPORARY
, c
->fp_first_internal_temp
+(bit
-1));
264 static void release_temp( struct brw_wm_compile
*c
, struct brw_fp_dst temp
)
266 c
->fp_temp
&= ~(1 << (temp
.index
- c
->fp_first_internal_temp
));
270 /***********************************************************************
274 static struct brw_fp_instruction
*get_fp_inst(struct brw_wm_compile
*c
)
276 return &c
->fp_instructions
[c
->nr_fp_insns
++];
279 static struct brw_fp_instruction
* emit_tex_op(struct brw_wm_compile
*c
,
281 struct brw_fp_dst dest
,
283 GLuint tex_src_target
,
284 struct brw_fp_src src0
,
285 struct brw_fp_src src1
,
286 struct brw_fp_src src2
)
288 struct brw_fp_instruction
*inst
= get_fp_inst(c
);
292 inst
->tex_unit
= tex_src_unit
;
293 inst
->tex_target
= tex_src_target
;
302 static INLINE
void emit_op3(struct brw_wm_compile
*c
,
304 struct brw_fp_dst dest
,
305 struct brw_fp_src src0
,
306 struct brw_fp_src src1
,
307 struct brw_fp_src src2
)
309 emit_tex_op(c
, op
, dest
, 0, 0, src0
, src1
, src2
);
313 static INLINE
void emit_op2(struct brw_wm_compile
*c
,
315 struct brw_fp_dst dest
,
316 struct brw_fp_src src0
,
317 struct brw_fp_src src1
)
319 emit_tex_op(c
, op
, dest
, 0, 0, src0
, src1
, src_undef());
322 static INLINE
void emit_op1(struct brw_wm_compile
*c
,
324 struct brw_fp_dst dest
,
325 struct brw_fp_src src0
)
327 emit_tex_op(c
, op
, dest
, 0, 0, src0
, src_undef(), src_undef());
330 static INLINE
void emit_op0(struct brw_wm_compile
*c
,
332 struct brw_fp_dst dest
)
334 emit_tex_op(c
, op
, dest
, 0, 0, src_undef(), src_undef(), src_undef());
339 /* Many opcodes produce the same value across all the result channels.
340 * We'd rather not have to support that splatting in the opcode implementations,
341 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
342 * anyway. We can easily get both by emitting the opcode to one channel, and
343 * then MOVing it to the others, which brw_wm_pass*.c already understands.
345 static void emit_scalar_insn(struct brw_wm_compile
*c
,
347 struct brw_fp_dst dst
,
348 struct brw_fp_src src0
,
349 struct brw_fp_src src1
,
350 struct brw_fp_src src2
)
352 unsigned first_chan
= ffs(dst
.writemask
) - 1;
353 unsigned first_mask
= 1 << first_chan
;
355 if (dst
.writemask
== 0)
359 dst_mask(dst
, first_mask
),
362 if (dst
.writemask
!= first_mask
) {
363 emit_op1(c
, TGSI_OPCODE_MOV
,
364 dst_mask(dst
, ~first_mask
),
365 src_scalar(src_reg_from_dst(dst
), first_chan
));
370 /***********************************************************************
371 * Special instructions for interpolation and other tasks
374 static struct brw_fp_src
get_pixel_xy( struct brw_wm_compile
*c
)
376 if (src_is_undef(c
->fp_pixel_xy
)) {
377 struct brw_fp_dst pixel_xy
= get_temp(c
);
378 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
381 /* Emit the out calculations, and hold onto the results. Use
382 * two instructions as a temporary is required.
384 /* pixel_xy.xy = PIXELXY payload[0];
388 dst_mask(pixel_xy
, BRW_WRITEMASK_XY
),
391 c
->fp_pixel_xy
= src_reg_from_dst(pixel_xy
);
394 return c
->fp_pixel_xy
;
397 static struct brw_fp_src
get_delta_xy( struct brw_wm_compile
*c
)
399 if (src_is_undef(c
->fp_delta_xy
)) {
400 struct brw_fp_dst delta_xy
= get_temp(c
);
401 struct brw_fp_src pixel_xy
= get_pixel_xy(c
);
402 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
404 /* deltas.xy = DELTAXY pixel_xy, payload[0]
408 dst_mask(delta_xy
, BRW_WRITEMASK_XY
),
413 c
->fp_delta_xy
= src_reg_from_dst(delta_xy
);
416 return c
->fp_delta_xy
;
419 static struct brw_fp_src
get_pixel_w( struct brw_wm_compile
*c
)
421 if (src_is_undef(c
->fp_pixel_w
)) {
422 struct brw_fp_dst pixel_w
= get_temp(c
);
423 struct brw_fp_src deltas
= get_delta_xy(c
);
425 /* XXX: assuming position is always first -- valid?
427 struct brw_fp_src interp_wpos
= src_reg(BRW_FILE_PAYLOAD
, 0);
429 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
433 dst_mask(pixel_w
, BRW_WRITEMASK_W
),
439 c
->fp_pixel_w
= src_reg_from_dst(pixel_w
);
442 return c
->fp_pixel_w
;
446 /***********************************************************************
447 * Emit INTERP instructions ahead of first use of each attrib.
450 static void emit_interp( struct brw_wm_compile
*c
,
455 struct brw_fp_dst dst
= dst_reg(TGSI_FILE_INPUT
, idx
);
456 struct brw_fp_src interp
= src_reg(BRW_FILE_PAYLOAD
, idx
);
457 struct brw_fp_src deltas
= get_delta_xy(c
);
459 /* Need to use PINTERP on attributes which have been
460 * multiplied by 1/W in the SF program, and LINTERP on those
464 case TGSI_SEMANTIC_POSITION
:
465 /* Have to treat wpos.xy specially:
469 dst_mask(dst
, BRW_WRITEMASK_XY
),
472 /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
476 dst_mask(dst
, BRW_WRITEMASK_ZW
),
481 case TGSI_SEMANTIC_COLOR
:
482 if (c
->key
.flat_shade
) {
488 else if (interp_mode
== TGSI_INTERPOLATE_LINEAR
) {
506 case TGSI_SEMANTIC_FOG
:
507 /* Interpolate the fog coordinate */
510 dst_mask(dst
, BRW_WRITEMASK_X
),
517 dst_mask(dst
, BRW_WRITEMASK_YZ
),
522 dst_mask(dst
, BRW_WRITEMASK_W
),
526 case TGSI_SEMANTIC_FACE
:
527 /* XXX review/test this case */
530 dst_mask(dst
, BRW_WRITEMASK_X
));
534 dst_mask(dst
, BRW_WRITEMASK_YZ
),
539 dst_mask(dst
, BRW_WRITEMASK_W
),
543 case TGSI_SEMANTIC_PSIZE
:
544 /* XXX review/test this case */
547 dst_mask(dst
, BRW_WRITEMASK_XY
),
554 dst_mask(dst
, BRW_WRITEMASK_Z
),
559 dst_mask(dst
, BRW_WRITEMASK_W
),
564 switch (interp_mode
) {
565 case TGSI_INTERPOLATE_CONSTANT
:
572 case TGSI_INTERPOLATE_LINEAR
:
580 case TGSI_INTERPOLATE_PERSPECTIVE
:
594 /***********************************************************************
595 * Expand various instructions here to simpler forms.
597 static void precalc_dst( struct brw_wm_compile
*c
,
598 struct brw_fp_dst dst
,
599 struct brw_fp_src src0
,
600 struct brw_fp_src src1
)
602 if (dst
.writemask
& BRW_WRITEMASK_Y
) {
603 /* dst.y = mul src0.y, src1.y
607 dst_mask(dst
, BRW_WRITEMASK_Y
),
612 if (dst
.writemask
& BRW_WRITEMASK_XZ
) {
613 /* dst.z = mov src0.zzzz
617 dst_mask(dst
, BRW_WRITEMASK_Z
),
618 src_scalar(src0
, Z
));
620 /* dst.x = imm1f(1.0)
624 dst_saturate(dst_mask(dst
, BRW_WRITEMASK_X
), 0),
627 if (dst
.writemask
& BRW_WRITEMASK_W
) {
628 /* dst.w = mov src1.w
632 dst_mask(dst
, BRW_WRITEMASK_W
),
638 static void precalc_lit( struct brw_wm_compile
*c
,
639 struct brw_fp_dst dst
,
640 struct brw_fp_src src0
)
642 if (dst
.writemask
& BRW_WRITEMASK_XW
) {
643 /* dst.xw = imm(1.0f)
647 dst_saturate(dst_mask(dst
, BRW_WRITEMASK_XW
), 0),
651 if (dst
.writemask
& BRW_WRITEMASK_YZ
) {
654 dst_mask(dst
, BRW_WRITEMASK_YZ
),
661 * Some TEX instructions require extra code, cube map coordinate
662 * normalization, or coordinate scaling for RECT textures, etc.
663 * This function emits those extra instructions and the TEX
664 * instruction itself.
666 static void precalc_tex( struct brw_wm_compile
*c
,
667 struct brw_fp_dst dst
,
670 struct brw_fp_src src0
)
672 struct brw_fp_src coord
= src_undef();
673 struct brw_fp_dst tmp
= dst_undef();
675 assert(unit
< BRW_MAX_TEX_UNIT
);
677 /* Cubemap: find longest component of coord vector and normalize
680 if (target
== TGSI_TEXTURE_CUBE
) {
681 struct brw_fp_src tmpsrc
;
684 tmpsrc
= src_reg_from_dst(tmp
);
686 /* tmp = abs(src0) */
692 /* tmp.X = MAX(tmp.X, tmp.Y) */
693 emit_op2(c
, TGSI_OPCODE_MAX
,
694 dst_mask(tmp
, BRW_WRITEMASK_X
),
695 src_scalar(tmpsrc
, X
),
696 src_scalar(tmpsrc
, Y
));
698 /* tmp.X = MAX(tmp.X, tmp.Z) */
699 emit_op2(c
, TGSI_OPCODE_MAX
,
700 dst_mask(tmp
, BRW_WRITEMASK_X
),
702 src_scalar(tmpsrc
, Z
));
704 /* tmp.X = 1 / tmp.X */
705 emit_op1(c
, TGSI_OPCODE_RCP
,
706 dst_mask(tmp
, BRW_WRITEMASK_X
),
709 /* tmp = src0 * tmp.xxxx */
710 emit_op2(c
, TGSI_OPCODE_MUL
,
713 src_scalar(tmpsrc
, X
));
717 else if (target
== TGSI_TEXTURE_RECT
||
718 target
== TGSI_TEXTURE_SHADOWRECT
) {
719 /* XXX: need a mechanism for internally generated constants.
727 /* Need to emit YUV texture conversions by hand. Probably need to
728 * do this here - the alternative is in brw_wm_emit.c, but the
729 * conversion requires allocating a temporary variable which we
730 * don't have the facility to do that late in the compilation.
732 if (c
->key
.yuvtex_mask
& (1 << unit
)) {
733 /* convert ycbcr to RGBA */
734 GLboolean swap_uv
= c
->key
.yuvtex_swap_mask
& (1<<unit
);
735 struct brw_fp_dst tmp
= get_temp(c
);
736 struct brw_fp_src tmpsrc
= src_reg_from_dst(tmp
);
737 struct brw_fp_src C0
= src_imm4f( c
, -.5, -.0625, -.5, 1.164 );
738 struct brw_fp_src C1
= src_imm4f( c
, 1.596, -0.813, 2.018, -.391 );
744 dst_saturate(tmp
, dst
.saturate
),
751 /* tmp.xyz = ADD TMP, C0
753 emit_op2(c
, TGSI_OPCODE_ADD
,
754 dst_mask(tmp
, BRW_WRITEMASK_XYZ
),
758 /* YUV.y = MUL YUV.y, C0.w
760 emit_op2(c
, TGSI_OPCODE_MUL
,
761 dst_mask(tmp
, BRW_WRITEMASK_Y
),
767 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
769 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
772 emit_op3(c
, TGSI_OPCODE_MAD
,
773 dst_mask(dst
, BRW_WRITEMASK_XYZ
),
775 src_swizzle(tmpsrc
, Z
,Z
,X
,X
) :
776 src_swizzle(tmpsrc
, X
,X
,Z
,Z
)),
778 src_scalar(tmpsrc
, Y
));
780 /* RGB.y = MAD YUV.z, C1.w, RGB.y
784 dst_mask(dst
, BRW_WRITEMASK_Y
),
785 src_scalar(tmpsrc
, Z
),
787 src_scalar(src_reg_from_dst(dst
), Y
));
789 release_temp(c
, tmp
);
792 /* ordinary RGBA tex instruction */
803 /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
804 * generating shader varients in mesa state tracker.
807 /* Release this temp if we ended up allocating it:
809 if (!dst_is_undef(tmp
))
810 release_temp(c
, tmp
);
815 * Check if the given TXP instruction really needs the divide-by-W step.
817 static GLboolean
projtex( struct brw_wm_compile
*c
,
819 struct brw_fp_src src
)
821 /* Only try to detect the simplest cases. Could detect (later)
822 * cases where we are trying to emit code like RCP {1.0}, MUL x,
825 * More complex cases than this typically only arise from
826 * user-provided fragment programs anyway:
828 if (target
== TGSI_TEXTURE_CUBE
)
829 return GL_FALSE
; /* ut2004 gun rendering !?! */
831 if (src
.file
== TGSI_FILE_INPUT
&&
832 BRW_GET_SWZ(src
.swizzle
, W
) == W
&&
833 c
->fp
->info
.input_interpolate
[src
.index
] != TGSI_INTERPOLATE_PERSPECTIVE
)
843 static void precalc_txp( struct brw_wm_compile
*c
,
844 struct brw_fp_dst dst
,
847 struct brw_fp_src src0
)
849 if (projtex(c
, target
, src0
)) {
850 struct brw_fp_dst tmp
= get_temp(c
);
852 /* tmp0.w = RCP inst.arg[0][3]
856 dst_mask(tmp
, BRW_WRITEMASK_W
),
857 src_scalar(src0
, W
));
859 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
863 dst_mask(tmp
, BRW_WRITEMASK_XYZ
),
865 src_scalar(src_reg_from_dst(tmp
), W
));
873 src_reg_from_dst(tmp
));
875 release_temp(c
, tmp
);
881 precalc_tex(c
, dst
, target
, unit
, src0
);
886 /* XXX: note this returns a src_reg.
888 static struct brw_fp_src
889 find_output_by_semantic( struct brw_wm_compile
*c
,
893 const struct tgsi_shader_info
*info
= &c
->fp
->info
;
896 for (i
= 0; i
< info
->num_outputs
; i
++)
897 if (info
->output_semantic_name
[i
] == semantic
&&
898 info
->output_semantic_index
[i
] == index
)
899 return src_reg( TGSI_FILE_OUTPUT
, i
);
901 /* If not found, return some arbitrary immediate value:
903 return src_imm1f(c
, 1.0);
907 static void emit_fb_write( struct brw_wm_compile
*c
)
909 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
910 struct brw_fp_src outdepth
= find_output_by_semantic(c
, TGSI_SEMANTIC_POSITION
, 0);
914 outdepth
= src_scalar(outdepth
, Z
);
916 for (i
= 0 ; i
< c
->key
.nr_cbufs
; i
++) {
917 struct brw_fp_src outcolor
;
918 unsigned target
= 1<<i
;
920 /* Set EOT flag on last inst:
922 if (i
== c
->key
.nr_cbufs
- 1)
925 outcolor
= find_output_by_semantic(c
, TGSI_SEMANTIC_COLOR
, i
);
927 /* Use emit_tex_op so that we can specify the inst->tex_target
928 * field, which is abused to contain the FB write target and the
931 emit_tex_op(c
, WM_FB_WRITE
,
942 static struct brw_fp_dst
translate_dst( struct brw_wm_compile
*c
,
943 const struct tgsi_full_dst_register
*dst
,
946 struct brw_fp_dst out
;
948 out
.file
= dst
->DstRegister
.File
;
949 out
.index
= dst
->DstRegister
.Index
;
950 out
.writemask
= dst
->DstRegister
.WriteMask
;
951 out
.indirect
= dst
->DstRegister
.Indirect
;
952 out
.saturate
= (saturate
== TGSI_SAT_ZERO_ONE
);
955 assert(dst
->DstRegisterInd
.File
== TGSI_FILE_ADDRESS
);
956 assert(dst
->DstRegisterInd
.Index
== 0);
963 static struct brw_fp_src
translate_src( struct brw_wm_compile
*c
,
964 const struct tgsi_full_src_register
*src
)
966 struct brw_fp_src out
;
968 out
.file
= src
->SrcRegister
.File
;
969 out
.index
= src
->SrcRegister
.Index
;
970 out
.indirect
= src
->SrcRegister
.Indirect
;
972 out
.swizzle
= ((src
->SrcRegister
.SwizzleX
<< 0) |
973 (src
->SrcRegister
.SwizzleY
<< 2) |
974 (src
->SrcRegister
.SwizzleZ
<< 4) |
975 (src
->SrcRegister
.SwizzleW
<< 6));
977 switch (tgsi_util_get_full_src_register_sign_mode( src
, 0 )) {
978 case TGSI_UTIL_SIGN_CLEAR
:
983 case TGSI_UTIL_SIGN_SET
:
988 case TGSI_UTIL_SIGN_TOGGLE
:
993 case TGSI_UTIL_SIGN_KEEP
:
1001 assert(src
->SrcRegisterInd
.File
== TGSI_FILE_ADDRESS
);
1002 assert(src
->SrcRegisterInd
.Index
== 0);
1010 static void emit_insn( struct brw_wm_compile
*c
,
1011 const struct tgsi_full_instruction
*inst
)
1013 unsigned opcode
= inst
->Instruction
.Opcode
;
1014 struct brw_fp_dst dst
;
1015 struct brw_fp_src src
[3];
1018 dst
= translate_dst( c
, &inst
->FullDstRegisters
[0],
1019 inst
->Instruction
.Saturate
);
1021 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++)
1022 src
[i
] = translate_src( c
, &inst
->FullSrcRegisters
[0] );
1025 case TGSI_OPCODE_ABS
:
1026 emit_op1(c
, TGSI_OPCODE_MOV
,
1031 case TGSI_OPCODE_SUB
:
1032 emit_op2(c
, TGSI_OPCODE_ADD
,
1035 src_negate(src
[1]));
1038 case TGSI_OPCODE_SCS
:
1039 emit_op1(c
, TGSI_OPCODE_SCS
,
1040 dst_mask(dst
, BRW_WRITEMASK_XY
),
1044 case TGSI_OPCODE_DST
:
1045 precalc_dst(c
, dst
, src
[0], src
[1]);
1048 case TGSI_OPCODE_LIT
:
1049 precalc_lit(c
, dst
, src
[0]);
1052 case TGSI_OPCODE_TEX
:
1054 inst
->InstructionExtTexture
.Texture
,
1055 src
[0].file
, /* sampler unit */
1059 case TGSI_OPCODE_TXP
:
1061 inst
->InstructionExtTexture
.Texture
,
1062 src
[0].file
, /* sampler unit */
1066 case TGSI_OPCODE_TXB
:
1067 /* XXX: TXB not done
1070 inst
->InstructionExtTexture
.Texture
,
1071 src
[0].file
, /* sampler unit */
1075 case TGSI_OPCODE_XPD
:
1076 emit_op2(c
, TGSI_OPCODE_XPD
,
1077 dst_mask(dst
, BRW_WRITEMASK_XYZ
),
1082 case TGSI_OPCODE_KIL
:
1083 emit_op1(c
, TGSI_OPCODE_KIL
,
1084 dst_mask(dst_undef(), 0),
1088 case TGSI_OPCODE_END
:
1092 if (!c
->key
.has_flow_control
&&
1093 brw_wm_is_scalar_result(opcode
))
1094 emit_scalar_insn(c
, opcode
, dst
, src
[0], src
[1], src
[2]);
1096 emit_op3(c
, opcode
, dst
, src
[0], src
[1], src
[2]);
1102 * Initial pass for fragment program code generation.
1103 * This function is used by both the GLSL and non-GLSL paths.
1105 int brw_wm_pass_fp( struct brw_wm_compile
*c
)
1107 struct brw_fragment_shader
*fs
= c
->fp
;
1108 struct tgsi_parse_context parse
;
1109 struct tgsi_full_instruction
*inst
;
1110 struct tgsi_full_declaration
*decl
;
1115 if (BRW_DEBUG
& DEBUG_WM
) {
1116 debug_printf("pre-fp:\n");
1117 tgsi_dump(fs
->tokens
, 0);
1120 c
->fp_pixel_xy
= src_undef();
1121 c
->fp_delta_xy
= src_undef();
1122 c
->fp_pixel_w
= src_undef();
1124 c
->nr_immediates
= 0;
1127 /* Loop over all instructions doing assorted simplifications and
1130 tgsi_parse_init( &parse
, fs
->tokens
);
1131 while( !tgsi_parse_end_of_tokens( &parse
) ) {
1132 tgsi_parse_token( &parse
);
1134 switch( parse
.FullToken
.Token
.Type
) {
1135 case TGSI_TOKEN_TYPE_DECLARATION
:
1136 /* Turn intput declarations into special WM_* instructions.
1138 * XXX: For non-branching shaders, consider deferring variable
1139 * initialization as late as possible to minimize register
1140 * usage. This is how the original BRW driver worked.
1142 * In a branching shader, must preamble instructions at decl
1143 * time, as instruction order in the shader does not
1144 * correspond to the order instructions are executed in the
1147 * This is where special instructions such as WM_CINTERP,
1148 * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1149 * compute shader inputs from the payload registers and pixel
1152 decl
= &parse
.FullToken
.FullDeclaration
;
1153 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1154 unsigned first
, last
, mask
;
1157 first
= decl
->DeclarationRange
.First
;
1158 last
= decl
->DeclarationRange
.Last
;
1159 mask
= decl
->Declaration
.UsageMask
;
1161 for (attrib
= first
; attrib
<= last
; attrib
++) {
1164 decl
->Semantic
.SemanticName
,
1165 decl
->Declaration
.Interpolate
);
1171 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1172 /* Unlike VS programs we can probably manage fine encoding
1173 * immediate values directly into the emitted EU
1174 * instructions, as we probably only need to reference one
1175 * float value per instruction. Just save the data for now
1176 * and use directly later.
1178 i
= c
->nr_immediates
++;
1179 imm
= &parse
.FullToken
.FullImmediate
.u
[i
].Float
;
1180 size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
1182 if (c
->nr_immediates
>= BRW_WM_MAX_CONST
)
1183 return PIPE_ERROR_OUT_OF_MEMORY
;
1185 for (i
= 0; i
< size
; i
++)
1186 c
->immediate
[c
->nr_immediates
].v
[i
] = imm
[i
];
1189 c
->immediate
[c
->nr_immediates
].v
[i
] = 0.0;
1191 c
->immediate
[c
->nr_immediates
].nr
= size
;
1195 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1196 inst
= &parse
.FullToken
.FullInstruction
;
1202 if (BRW_DEBUG
& DEBUG_WM
) {
1203 brw_wm_print_fp_program( c
, "pass_fp" );