1 /**************************************************************************
3 * Copyright 2011 The Chromium OS authors.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "i915_context.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
39 static boolean
same_src_dst_reg(struct i915_full_src_register
* s1
, struct i915_full_dst_register
* d1
)
41 return (s1
->Register
.File
== d1
->Register
.File
&&
42 s1
->Register
.Indirect
== d1
->Register
.Indirect
&&
43 s1
->Register
.Dimension
== d1
->Register
.Dimension
&&
44 s1
->Register
.Index
== d1
->Register
.Index
);
47 static boolean
same_dst_reg(struct i915_full_dst_register
* d1
, struct i915_full_dst_register
* d2
)
49 return (d1
->Register
.File
== d2
->Register
.File
&&
50 d1
->Register
.Indirect
== d2
->Register
.Indirect
&&
51 d1
->Register
.Dimension
== d2
->Register
.Dimension
&&
52 d1
->Register
.Index
== d2
->Register
.Index
);
55 static boolean
same_src_reg(struct i915_full_src_register
* d1
, struct i915_full_src_register
* d2
)
57 return (d1
->Register
.File
== d2
->Register
.File
&&
58 d1
->Register
.Indirect
== d2
->Register
.Indirect
&&
59 d1
->Register
.Dimension
== d2
->Register
.Dimension
&&
60 d1
->Register
.Index
== d2
->Register
.Index
&&
61 d1
->Register
.Absolute
== d2
->Register
.Absolute
&&
62 d1
->Register
.Negate
== d2
->Register
.Negate
);
65 static boolean
has_destination(unsigned opcode
)
67 return (opcode
!= TGSI_OPCODE_NOP
&&
68 opcode
!= TGSI_OPCODE_KILL_IF
&&
69 opcode
!= TGSI_OPCODE_KILL
&&
70 opcode
!= TGSI_OPCODE_END
&&
71 opcode
!= TGSI_OPCODE_RET
);
74 static boolean
is_unswizzled(struct i915_full_src_register
* r
,
77 if ( write_mask
& TGSI_WRITEMASK_X
&& r
->Register
.SwizzleX
!= TGSI_SWIZZLE_X
)
79 if ( write_mask
& TGSI_WRITEMASK_Y
&& r
->Register
.SwizzleY
!= TGSI_SWIZZLE_Y
)
81 if ( write_mask
& TGSI_WRITEMASK_Z
&& r
->Register
.SwizzleZ
!= TGSI_SWIZZLE_Z
)
83 if ( write_mask
& TGSI_WRITEMASK_W
&& r
->Register
.SwizzleW
!= TGSI_SWIZZLE_W
)
88 static boolean
op_commutes(unsigned opcode
)
102 static unsigned op_neutral_element(unsigned opcode
)
106 case TGSI_OPCODE_ADD
:
107 return TGSI_SWIZZLE_ZERO
;
108 case TGSI_OPCODE_MUL
:
109 case TGSI_OPCODE_DP2
:
110 case TGSI_OPCODE_DP3
:
111 case TGSI_OPCODE_DP4
:
112 return TGSI_SWIZZLE_ONE
;
115 debug_printf("Unknown opcode %d\n",opcode
);
116 return TGSI_SWIZZLE_ZERO
;
120 * Sets the swizzle to the neutral element for the operation for the bits
121 * of writemask which are set, swizzle to identity otherwise.
123 static void set_neutral_element_swizzle(struct i915_full_src_register
* r
,
127 if ( write_mask
& TGSI_WRITEMASK_X
)
128 r
->Register
.SwizzleX
= neutral
;
130 r
->Register
.SwizzleX
= TGSI_SWIZZLE_X
;
132 if ( write_mask
& TGSI_WRITEMASK_Y
)
133 r
->Register
.SwizzleY
= neutral
;
135 r
->Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
137 if ( write_mask
& TGSI_WRITEMASK_Z
)
138 r
->Register
.SwizzleZ
= neutral
;
140 r
->Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
142 if ( write_mask
& TGSI_WRITEMASK_W
)
143 r
->Register
.SwizzleW
= neutral
;
145 r
->Register
.SwizzleW
= TGSI_SWIZZLE_W
;
148 static void copy_src_reg(struct i915_src_register
* o
, const struct tgsi_src_register
* i
)
151 o
->Indirect
= i
->Indirect
;
152 o
->Dimension
= i
->Dimension
;
154 o
->SwizzleX
= i
->SwizzleX
;
155 o
->SwizzleY
= i
->SwizzleY
;
156 o
->SwizzleZ
= i
->SwizzleZ
;
157 o
->SwizzleW
= i
->SwizzleW
;
158 o
->Absolute
= i
->Absolute
;
159 o
->Negate
= i
->Negate
;
162 static void copy_dst_reg(struct i915_dst_register
* o
, const struct tgsi_dst_register
* i
)
165 o
->WriteMask
= i
->WriteMask
;
166 o
->Indirect
= i
->Indirect
;
167 o
->Dimension
= i
->Dimension
;
171 static void copy_instruction(struct i915_full_instruction
* o
, const struct tgsi_full_instruction
* i
)
173 memcpy(&o
->Instruction
, &i
->Instruction
, sizeof(o
->Instruction
));
174 memcpy(&o
->Texture
, &i
->Texture
, sizeof(o
->Texture
));
176 copy_dst_reg(&o
->Dst
[0].Register
, &i
->Dst
[0].Register
);
178 copy_src_reg(&o
->Src
[0].Register
, &i
->Src
[0].Register
);
179 copy_src_reg(&o
->Src
[1].Register
, &i
->Src
[1].Register
);
180 copy_src_reg(&o
->Src
[2].Register
, &i
->Src
[2].Register
);
183 static void copy_token(union i915_full_token
* o
, union tgsi_full_token
* i
)
185 if (i
->Token
.Type
!= TGSI_TOKEN_TYPE_INSTRUCTION
)
186 memcpy(o
, i
, sizeof(*o
));
188 copy_instruction(&o
->FullInstruction
, &i
->FullInstruction
);
193 * Optimize away things like:
194 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
195 * MOV OUT[0].w, TEMP[2]
197 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
198 * This is useful for optimizing texenv.
200 static void i915_fpc_optimize_mov_after_alu(union i915_full_token
* current
, union i915_full_token
* next
)
202 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
203 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
204 op_commutes(current
->FullInstruction
.Instruction
.Opcode
) &&
205 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
206 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
207 same_dst_reg(&next
->FullInstruction
.Dst
[0], ¤t
->FullInstruction
.Dst
[0]) &&
208 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[1]) &&
209 !same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) &&
210 is_unswizzled(¤t
->FullInstruction
.Src
[0], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
211 is_unswizzled(¤t
->FullInstruction
.Src
[1], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
212 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) )
214 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
216 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[1], 0, 0);
217 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[0],
218 next
->FullInstruction
.Dst
[0].Register
.WriteMask
,
219 op_neutral_element(current
->FullInstruction
.Instruction
.Opcode
));
221 current
->FullInstruction
.Dst
[0].Register
.WriteMask
= current
->FullInstruction
.Dst
[0].Register
.WriteMask
|
222 next
->FullInstruction
.Dst
[0].Register
.WriteMask
;
226 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
227 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
228 op_commutes(current
->FullInstruction
.Instruction
.Opcode
) &&
229 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
230 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
231 same_dst_reg(&next
->FullInstruction
.Dst
[0], ¤t
->FullInstruction
.Dst
[0]) &&
232 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[0]) &&
233 !same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) &&
234 is_unswizzled(¤t
->FullInstruction
.Src
[0], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
235 is_unswizzled(¤t
->FullInstruction
.Src
[1], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
236 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) )
238 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
240 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[0], 0, 0);
241 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[1],
242 next
->FullInstruction
.Dst
[0].Register
.WriteMask
,
243 op_neutral_element(current
->FullInstruction
.Instruction
.Opcode
));
245 current
->FullInstruction
.Dst
[0].Register
.WriteMask
= current
->FullInstruction
.Dst
[0].Register
.WriteMask
|
246 next
->FullInstruction
.Dst
[0].Register
.WriteMask
;
252 * Optimize away things like:
253 * MOV TEMP[0].xyz TEMP[0].xyzx
257 static boolean
i915_fpc_useless_mov(union tgsi_full_token
* tgsi_current
)
259 union i915_full_token current
;
260 copy_token(¤t
, tgsi_current
);
261 if ( current
.Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
262 current
.FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
263 has_destination(current
.FullInstruction
.Instruction
.Opcode
) &&
264 current
.FullInstruction
.Instruction
.Saturate
== TGSI_SAT_NONE
&&
265 current
.FullInstruction
.Src
[0].Register
.Absolute
== 0 &&
266 current
.FullInstruction
.Src
[0].Register
.Negate
== 0 &&
267 is_unswizzled(¤t
.FullInstruction
.Src
[0], current
.FullInstruction
.Dst
[0].Register
.WriteMask
) &&
268 same_src_dst_reg(¤t
.FullInstruction
.Src
[0], ¤t
.FullInstruction
.Dst
[0]) )
276 * Optimize away things like:
277 * *** TEMP[0], TEMP[1], TEMP[2]
280 * *** OUT[0], TEMP[1], TEMP[2]
282 static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token
* current
, union i915_full_token
* next
)
284 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
285 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
286 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
287 has_destination(current
->FullInstruction
.Instruction
.Opcode
) &&
288 next
->FullInstruction
.Instruction
.Saturate
== TGSI_SAT_NONE
&&
289 next
->FullInstruction
.Src
[0].Register
.Absolute
== 0 &&
290 next
->FullInstruction
.Src
[0].Register
.Negate
== 0 &&
291 next
->FullInstruction
.Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
&&
292 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
293 current
->FullInstruction
.Dst
[0].Register
.WriteMask
== next
->FullInstruction
.Dst
[0].Register
.WriteMask
&&
294 same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) )
296 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
298 current
->FullInstruction
.Dst
[0] = next
->FullInstruction
.Dst
[0];
303 struct i915_token_list
* i915_optimize(const struct tgsi_token
*tokens
)
305 struct i915_token_list
*out_tokens
= MALLOC(sizeof(struct i915_token_list
));
306 struct tgsi_parse_context parse
;
309 out_tokens
->NumTokens
= 0;
311 /* Count the tokens */
312 tgsi_parse_init( &parse
, tokens
);
313 while( !tgsi_parse_end_of_tokens( &parse
) ) {
314 tgsi_parse_token( &parse
);
315 out_tokens
->NumTokens
++;
317 tgsi_parse_free (&parse
);
319 /* Allocate our tokens */
320 out_tokens
->Tokens
= MALLOC(sizeof(union i915_full_token
) * out_tokens
->NumTokens
);
322 tgsi_parse_init( &parse
, tokens
);
323 while( !tgsi_parse_end_of_tokens( &parse
) ) {
324 tgsi_parse_token( &parse
);
326 if (i915_fpc_useless_mov(&parse
.FullToken
)) {
327 out_tokens
->NumTokens
--;
331 copy_token(&out_tokens
->Tokens
[i
] , &parse
.FullToken
);
334 i915_fpc_optimize_useless_mov_after_inst(&out_tokens
->Tokens
[i
-1], &out_tokens
->Tokens
[i
]);
335 i915_fpc_optimize_mov_after_alu(&out_tokens
->Tokens
[i
-1], &out_tokens
->Tokens
[i
]);
339 tgsi_parse_free (&parse
);
344 void i915_optimize_free(struct i915_token_list
* tokens
)
346 free(tokens
->Tokens
);