1 /**************************************************************************
3 * Copyright 2011 The Chromium OS authors.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "i915_context.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
39 static boolean
same_src_dst_reg(struct i915_full_src_register
* s1
, struct i915_full_dst_register
* d1
)
41 return (s1
->Register
.File
== d1
->Register
.File
&&
42 s1
->Register
.Indirect
== d1
->Register
.Indirect
&&
43 s1
->Register
.Dimension
== d1
->Register
.Dimension
&&
44 s1
->Register
.Index
== d1
->Register
.Index
);
47 static boolean
same_dst_reg(struct i915_full_dst_register
* d1
, struct i915_full_dst_register
* d2
)
49 return (d1
->Register
.File
== d2
->Register
.File
&&
50 d1
->Register
.Indirect
== d2
->Register
.Indirect
&&
51 d1
->Register
.Dimension
== d2
->Register
.Dimension
&&
52 d1
->Register
.Index
== d2
->Register
.Index
);
55 static boolean
same_src_reg(struct i915_full_src_register
* d1
, struct i915_full_src_register
* d2
)
57 return (d1
->Register
.File
== d2
->Register
.File
&&
58 d1
->Register
.Indirect
== d2
->Register
.Indirect
&&
59 d1
->Register
.Dimension
== d2
->Register
.Dimension
&&
60 d1
->Register
.Index
== d2
->Register
.Index
&&
61 d1
->Register
.Absolute
== d2
->Register
.Absolute
&&
62 d1
->Register
.Negate
== d2
->Register
.Negate
);
65 static boolean
has_destination(unsigned opcode
)
67 return (opcode
!= TGSI_OPCODE_NOP
&&
68 opcode
!= TGSI_OPCODE_KIL
&&
69 opcode
!= TGSI_OPCODE_KILP
&&
70 opcode
!= TGSI_OPCODE_END
&&
71 opcode
!= TGSI_OPCODE_RET
);
74 static boolean
is_unswizzled(struct i915_full_src_register
* r
,
77 if ( write_mask
& TGSI_WRITEMASK_X
&& r
->Register
.SwizzleX
!= TGSI_SWIZZLE_X
)
79 if ( write_mask
& TGSI_WRITEMASK_Y
&& r
->Register
.SwizzleY
!= TGSI_SWIZZLE_Y
)
81 if ( write_mask
& TGSI_WRITEMASK_Z
&& r
->Register
.SwizzleZ
!= TGSI_SWIZZLE_Z
)
83 if ( write_mask
& TGSI_WRITEMASK_W
&& r
->Register
.SwizzleW
!= TGSI_SWIZZLE_W
)
88 static boolean
op_commutes(unsigned opcode
)
90 if (opcode
== TGSI_OPCODE_ADD
) return TRUE
;
91 if (opcode
== TGSI_OPCODE_MUL
) return TRUE
;
95 static unsigned op_neutral_element(unsigned opcode
)
97 if (opcode
== TGSI_OPCODE_ADD
)
98 return TGSI_SWIZZLE_ZERO
;
99 if (opcode
== TGSI_OPCODE_MUL
)
100 return TGSI_SWIZZLE_ONE
;
102 debug_printf("Unknown opcode %d\n",opcode
);
103 return TGSI_SWIZZLE_ZERO
;
107 * Sets the swizzle to the neutral element for the operation for the bits
108 * of writemask which are set, swizzle to identity otherwise.
110 static void set_neutral_element_swizzle(struct i915_full_src_register
* r
,
114 if ( write_mask
& TGSI_WRITEMASK_X
)
115 r
->Register
.SwizzleX
= neutral
;
117 r
->Register
.SwizzleX
= TGSI_SWIZZLE_X
;
119 if ( write_mask
& TGSI_WRITEMASK_Y
)
120 r
->Register
.SwizzleY
= neutral
;
122 r
->Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
124 if ( write_mask
& TGSI_WRITEMASK_Z
)
125 r
->Register
.SwizzleZ
= neutral
;
127 r
->Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
129 if ( write_mask
& TGSI_WRITEMASK_W
)
130 r
->Register
.SwizzleW
= neutral
;
132 r
->Register
.SwizzleW
= TGSI_SWIZZLE_W
;
135 static void copy_src_reg(struct i915_src_register
* o
, const struct tgsi_src_register
* i
)
138 o
->Indirect
= i
->Indirect
;
139 o
->Dimension
= i
->Dimension
;
141 o
->SwizzleX
= i
->SwizzleX
;
142 o
->SwizzleY
= i
->SwizzleY
;
143 o
->SwizzleZ
= i
->SwizzleZ
;
144 o
->SwizzleW
= i
->SwizzleW
;
145 o
->Absolute
= i
->Absolute
;
146 o
->Negate
= i
->Negate
;
149 static void copy_dst_reg(struct i915_dst_register
* o
, const struct tgsi_dst_register
* i
)
152 o
->WriteMask
= i
->WriteMask
;
153 o
->Indirect
= i
->Indirect
;
154 o
->Dimension
= i
->Dimension
;
158 static void copy_instruction(struct i915_full_instruction
* o
, const struct tgsi_full_instruction
* i
)
160 memcpy(&o
->Instruction
, &i
->Instruction
, sizeof(o
->Instruction
));
161 memcpy(&o
->Texture
, &i
->Texture
, sizeof(o
->Texture
));
163 copy_dst_reg(&o
->Dst
[0].Register
, &i
->Dst
[0].Register
);
165 copy_src_reg(&o
->Src
[0].Register
, &i
->Src
[0].Register
);
166 copy_src_reg(&o
->Src
[1].Register
, &i
->Src
[1].Register
);
167 copy_src_reg(&o
->Src
[2].Register
, &i
->Src
[2].Register
);
170 static void copy_token(union i915_full_token
* o
, union tgsi_full_token
* i
)
172 if (i
->Token
.Type
!= TGSI_TOKEN_TYPE_INSTRUCTION
)
173 memcpy(o
, i
, sizeof(*o
));
175 copy_instruction(&o
->FullInstruction
, &i
->FullInstruction
);
180 * Optimize away things like:
181 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
182 * MOV OUT[0].w, TEMP[2]
184 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
185 * This is useful for optimizing texenv.
187 static void i915_fpc_optimize_mov_after_alu(union i915_full_token
* current
, union i915_full_token
* next
)
189 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
190 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
191 op_commutes(current
->FullInstruction
.Instruction
.Opcode
) &&
192 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
193 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
194 same_dst_reg(&next
->FullInstruction
.Dst
[0], ¤t
->FullInstruction
.Dst
[0]) &&
195 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[1]) &&
196 !same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) &&
197 is_unswizzled(¤t
->FullInstruction
.Src
[0], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
198 is_unswizzled(¤t
->FullInstruction
.Src
[1], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
199 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) )
201 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
203 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[1], 0, 0);
204 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[0],
205 next
->FullInstruction
.Dst
[0].Register
.WriteMask
,
206 op_neutral_element(current
->FullInstruction
.Instruction
.Opcode
));
208 current
->FullInstruction
.Dst
[0].Register
.WriteMask
= current
->FullInstruction
.Dst
[0].Register
.WriteMask
|
209 next
->FullInstruction
.Dst
[0].Register
.WriteMask
;
213 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
214 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
215 op_commutes(current
->FullInstruction
.Instruction
.Opcode
) &&
216 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
217 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
218 same_dst_reg(&next
->FullInstruction
.Dst
[0], ¤t
->FullInstruction
.Dst
[0]) &&
219 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[0]) &&
220 !same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) &&
221 is_unswizzled(¤t
->FullInstruction
.Src
[0], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
222 is_unswizzled(¤t
->FullInstruction
.Src
[1], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
223 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) )
225 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
227 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[0], 0, 0);
228 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[1],
229 next
->FullInstruction
.Dst
[0].Register
.WriteMask
,
230 op_neutral_element(current
->FullInstruction
.Instruction
.Opcode
));
232 current
->FullInstruction
.Dst
[0].Register
.WriteMask
= current
->FullInstruction
.Dst
[0].Register
.WriteMask
|
233 next
->FullInstruction
.Dst
[0].Register
.WriteMask
;
239 * Optimize away things like:
240 * MOV TEMP[0].xyz TEMP[0].xyzx
244 static boolean
i915_fpc_useless_mov(union tgsi_full_token
* tgsi_current
)
246 union i915_full_token current
;
247 copy_token(¤t
, tgsi_current
);
248 if ( current
.Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
249 current
.FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
250 has_destination(current
.FullInstruction
.Instruction
.Opcode
) &&
251 current
.FullInstruction
.Instruction
.Saturate
== TGSI_SAT_NONE
&&
252 current
.FullInstruction
.Src
[0].Register
.Absolute
== 0 &&
253 current
.FullInstruction
.Src
[0].Register
.Negate
== 0 &&
254 is_unswizzled(¤t
.FullInstruction
.Src
[0], current
.FullInstruction
.Dst
[0].Register
.WriteMask
) &&
255 same_src_dst_reg(¤t
.FullInstruction
.Src
[0], ¤t
.FullInstruction
.Dst
[0]) )
263 * Optimize away things like:
264 * *** TEMP[0], TEMP[1], TEMP[2]
267 * *** OUT[0], TEMP[1], TEMP[2]
269 static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token
* current
, union i915_full_token
* next
)
271 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
272 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
273 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
274 has_destination(current
->FullInstruction
.Instruction
.Opcode
) &&
275 next
->FullInstruction
.Instruction
.Saturate
== TGSI_SAT_NONE
&&
276 next
->FullInstruction
.Src
[0].Register
.Absolute
== 0 &&
277 next
->FullInstruction
.Src
[0].Register
.Negate
== 0 &&
278 next
->FullInstruction
.Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
&&
279 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
280 current
->FullInstruction
.Dst
[0].Register
.WriteMask
== next
->FullInstruction
.Dst
[0].Register
.WriteMask
&&
281 same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) )
283 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
285 current
->FullInstruction
.Dst
[0] = next
->FullInstruction
.Dst
[0];
290 struct i915_token_list
* i915_optimize(const struct tgsi_token
*tokens
)
292 struct i915_token_list
*out_tokens
= MALLOC(sizeof(struct i915_token_list
));
293 struct tgsi_parse_context parse
;
296 out_tokens
->NumTokens
= 0;
298 /* Count the tokens */
299 tgsi_parse_init( &parse
, tokens
);
300 while( !tgsi_parse_end_of_tokens( &parse
) ) {
301 tgsi_parse_token( &parse
);
302 out_tokens
->NumTokens
++;
304 tgsi_parse_free (&parse
);
306 /* Allocate our tokens */
307 out_tokens
->Tokens
= MALLOC(sizeof(union i915_full_token
) * out_tokens
->NumTokens
);
309 tgsi_parse_init( &parse
, tokens
);
310 while( !tgsi_parse_end_of_tokens( &parse
) ) {
311 tgsi_parse_token( &parse
);
313 if (i915_fpc_useless_mov(&parse
.FullToken
)) {
314 out_tokens
->NumTokens
--;
318 copy_token(&out_tokens
->Tokens
[i
] , &parse
.FullToken
);
321 i915_fpc_optimize_useless_mov_after_inst(&out_tokens
->Tokens
[i
-1], &out_tokens
->Tokens
[i
]);
322 i915_fpc_optimize_mov_after_alu(&out_tokens
->Tokens
[i
-1], &out_tokens
->Tokens
[i
]);
326 tgsi_parse_free (&parse
);
331 void i915_optimize_free(struct i915_token_list
* tokens
)
333 free(tokens
->Tokens
);