1 /**************************************************************************
3 * Copyright 2011 The Chromium OS authors.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "i915_context.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
39 static boolean
same_dst_reg(struct i915_full_dst_register
* d1
, struct i915_full_dst_register
* d2
)
41 return (d1
->Register
.File
== d2
->Register
.File
&&
42 d1
->Register
.Indirect
== d2
->Register
.Indirect
&&
43 d1
->Register
.Dimension
== d2
->Register
.Dimension
&&
44 d1
->Register
.Index
== d2
->Register
.Index
);
47 static boolean
same_src_reg(struct i915_full_src_register
* d1
, struct i915_full_src_register
* d2
)
49 return (d1
->Register
.File
== d2
->Register
.File
&&
50 d1
->Register
.Indirect
== d2
->Register
.Indirect
&&
51 d1
->Register
.Dimension
== d2
->Register
.Dimension
&&
52 d1
->Register
.Index
== d2
->Register
.Index
&&
53 d1
->Register
.Absolute
== d2
->Register
.Absolute
&&
54 d1
->Register
.Negate
== d2
->Register
.Negate
);
57 static boolean
is_unswizzled(struct i915_full_src_register
* r
,
60 if ( write_mask
& TGSI_WRITEMASK_X
&& r
->Register
.SwizzleX
!= TGSI_SWIZZLE_X
)
62 if ( write_mask
& TGSI_WRITEMASK_Y
&& r
->Register
.SwizzleY
!= TGSI_SWIZZLE_Y
)
64 if ( write_mask
& TGSI_WRITEMASK_Z
&& r
->Register
.SwizzleZ
!= TGSI_SWIZZLE_Z
)
66 if ( write_mask
& TGSI_WRITEMASK_W
&& r
->Register
.SwizzleW
!= TGSI_SWIZZLE_W
)
71 static boolean
op_commutes(unsigned opcode
)
73 if (opcode
== TGSI_OPCODE_ADD
) return TRUE
;
74 if (opcode
== TGSI_OPCODE_MUL
) return TRUE
;
78 static unsigned op_neutral_element(unsigned opcode
)
80 if (opcode
== TGSI_OPCODE_ADD
)
81 return TGSI_SWIZZLE_ZERO
;
82 if (opcode
== TGSI_OPCODE_MUL
)
83 return TGSI_SWIZZLE_ONE
;
85 debug_printf("Unknown opcode %d\n",opcode
);
86 return TGSI_SWIZZLE_ZERO
;
90 * Sets the swizzle to the neutral element for the operation for the bits
91 * of writemask which are set, swizzle to identity otherwise.
93 static void set_neutral_element_swizzle(struct i915_full_src_register
* r
,
97 if ( write_mask
& TGSI_WRITEMASK_X
)
98 r
->Register
.SwizzleX
= neutral
;
100 r
->Register
.SwizzleX
= TGSI_SWIZZLE_X
;
102 if ( write_mask
& TGSI_WRITEMASK_Y
)
103 r
->Register
.SwizzleY
= neutral
;
105 r
->Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
107 if ( write_mask
& TGSI_WRITEMASK_Z
)
108 r
->Register
.SwizzleZ
= neutral
;
110 r
->Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
112 if ( write_mask
& TGSI_WRITEMASK_W
)
113 r
->Register
.SwizzleW
= neutral
;
115 r
->Register
.SwizzleW
= TGSI_SWIZZLE_W
;
119 * Optimize away things like:
120 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
121 * MOV OUT[0].w, TEMP[2]
123 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
124 * This is useful for optimizing texenv.
126 static void i915_fpc_optimize_mov_after_alu(union i915_full_token
* current
, union i915_full_token
* next
)
128 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
129 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
130 op_commutes(current
->FullInstruction
.Instruction
.Opcode
) &&
131 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
132 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
133 same_dst_reg(&next
->FullInstruction
.Dst
[0], &next
->FullInstruction
.Dst
[0]) &&
134 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[1]) &&
135 is_unswizzled(¤t
->FullInstruction
.Src
[0], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
136 is_unswizzled(¤t
->FullInstruction
.Src
[1], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
137 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) )
139 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
141 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[1], 0, 0);
142 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[0],
143 next
->FullInstruction
.Dst
[0].Register
.WriteMask
,
144 op_neutral_element(current
->FullInstruction
.Instruction
.Opcode
));
146 current
->FullInstruction
.Dst
[0].Register
.WriteMask
= current
->FullInstruction
.Dst
[0].Register
.WriteMask
|
147 next
->FullInstruction
.Dst
[0].Register
.WriteMask
;
151 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
152 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
153 op_commutes(current
->FullInstruction
.Instruction
.Opcode
) &&
154 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
155 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
156 same_dst_reg(&next
->FullInstruction
.Dst
[0], &next
->FullInstruction
.Dst
[0]) &&
157 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[0]) &&
158 is_unswizzled(¤t
->FullInstruction
.Src
[0], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
159 is_unswizzled(¤t
->FullInstruction
.Src
[1], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
160 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) )
162 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
164 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[0], 0, 0);
165 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[1],
166 next
->FullInstruction
.Dst
[0].Register
.WriteMask
,
167 op_neutral_element(current
->FullInstruction
.Instruction
.Opcode
));
169 current
->FullInstruction
.Dst
[0].Register
.WriteMask
= current
->FullInstruction
.Dst
[0].Register
.WriteMask
|
170 next
->FullInstruction
.Dst
[0].Register
.WriteMask
;
175 static void copy_src_reg(struct i915_src_register
* o
, const struct tgsi_src_register
* i
)
178 o
->Indirect
= i
->Indirect
;
179 o
->Dimension
= i
->Dimension
;
181 o
->SwizzleX
= i
->SwizzleX
;
182 o
->SwizzleY
= i
->SwizzleY
;
183 o
->SwizzleZ
= i
->SwizzleZ
;
184 o
->SwizzleW
= i
->SwizzleW
;
185 o
->Absolute
= i
->Absolute
;
186 o
->Negate
= i
->Negate
;
189 static void copy_dst_reg(struct i915_dst_register
* o
, const struct tgsi_dst_register
* i
)
192 o
->WriteMask
= i
->WriteMask
;
193 o
->Indirect
= i
->Indirect
;
194 o
->Dimension
= i
->Dimension
;
198 static void copy_instruction(struct i915_full_instruction
* o
, const struct tgsi_full_instruction
* i
)
200 memcpy(&o
->Instruction
, &i
->Instruction
, sizeof(o
->Instruction
));
201 memcpy(&o
->Texture
, &i
->Texture
, sizeof(o
->Texture
));
203 copy_dst_reg(&o
->Dst
[0].Register
, &i
->Dst
[0].Register
);
205 copy_src_reg(&o
->Src
[0].Register
, &i
->Src
[0].Register
);
206 copy_src_reg(&o
->Src
[1].Register
, &i
->Src
[1].Register
);
207 copy_src_reg(&o
->Src
[2].Register
, &i
->Src
[2].Register
);
210 static void copy_token(union i915_full_token
* o
, union tgsi_full_token
* i
)
212 if (i
->Token
.Type
!= TGSI_TOKEN_TYPE_INSTRUCTION
)
213 memcpy(o
, i
, sizeof(*o
));
215 copy_instruction(&o
->FullInstruction
, &i
->FullInstruction
);
219 struct i915_token_list
* i915_optimize(const struct tgsi_token
*tokens
)
221 struct i915_token_list
*out_tokens
= MALLOC(sizeof(struct i915_token_list
));
222 struct tgsi_parse_context parse
;
225 out_tokens
->NumTokens
= 0;
227 /* Count the tokens */
228 tgsi_parse_init( &parse
, tokens
);
229 while( !tgsi_parse_end_of_tokens( &parse
) ) {
230 tgsi_parse_token( &parse
);
231 out_tokens
->NumTokens
++;
233 tgsi_parse_free (&parse
);
235 /* Allocate our tokens */
236 out_tokens
->Tokens
= MALLOC(sizeof(union i915_full_token
) * out_tokens
->NumTokens
);
238 tgsi_parse_init( &parse
, tokens
);
239 while( !tgsi_parse_end_of_tokens( &parse
) ) {
240 tgsi_parse_token( &parse
);
241 copy_token(&out_tokens
->Tokens
[i
] , &parse
.FullToken
);
244 i915_fpc_optimize_mov_after_alu(&out_tokens
->Tokens
[i
-1], &out_tokens
->Tokens
[i
]);
248 tgsi_parse_free (&parse
);
253 void i915_optimize_free(struct i915_token_list
* tokens
)
255 free(tokens
->Tokens
);