1 /**************************************************************************
3 * Copyright 2011 The Chromium OS authors.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "i915_context.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
39 static boolean
same_src_dst_reg(struct i915_full_src_register
* s1
, struct i915_full_dst_register
* d1
)
41 return (s1
->Register
.File
== d1
->Register
.File
&&
42 s1
->Register
.Indirect
== d1
->Register
.Indirect
&&
43 s1
->Register
.Dimension
== d1
->Register
.Dimension
&&
44 s1
->Register
.Index
== d1
->Register
.Index
);
47 static boolean
same_dst_reg(struct i915_full_dst_register
* d1
, struct i915_full_dst_register
* d2
)
49 return (d1
->Register
.File
== d2
->Register
.File
&&
50 d1
->Register
.Indirect
== d2
->Register
.Indirect
&&
51 d1
->Register
.Dimension
== d2
->Register
.Dimension
&&
52 d1
->Register
.Index
== d2
->Register
.Index
);
55 static boolean
same_src_reg(struct i915_full_src_register
* d1
, struct i915_full_src_register
* d2
)
57 return (d1
->Register
.File
== d2
->Register
.File
&&
58 d1
->Register
.Indirect
== d2
->Register
.Indirect
&&
59 d1
->Register
.Dimension
== d2
->Register
.Dimension
&&
60 d1
->Register
.Index
== d2
->Register
.Index
&&
61 d1
->Register
.Absolute
== d2
->Register
.Absolute
&&
62 d1
->Register
.Negate
== d2
->Register
.Negate
);
65 static boolean
has_destination(unsigned opcode
)
67 return (opcode
!= TGSI_OPCODE_NOP
&&
68 opcode
!= TGSI_OPCODE_KIL
&&
69 opcode
!= TGSI_OPCODE_RET
);
72 static boolean
is_unswizzled(struct i915_full_src_register
* r
,
75 if ( write_mask
& TGSI_WRITEMASK_X
&& r
->Register
.SwizzleX
!= TGSI_SWIZZLE_X
)
77 if ( write_mask
& TGSI_WRITEMASK_Y
&& r
->Register
.SwizzleY
!= TGSI_SWIZZLE_Y
)
79 if ( write_mask
& TGSI_WRITEMASK_Z
&& r
->Register
.SwizzleZ
!= TGSI_SWIZZLE_Z
)
81 if ( write_mask
& TGSI_WRITEMASK_W
&& r
->Register
.SwizzleW
!= TGSI_SWIZZLE_W
)
86 static boolean
op_commutes(unsigned opcode
)
88 if (opcode
== TGSI_OPCODE_ADD
) return TRUE
;
89 if (opcode
== TGSI_OPCODE_MUL
) return TRUE
;
93 static unsigned op_neutral_element(unsigned opcode
)
95 if (opcode
== TGSI_OPCODE_ADD
)
96 return TGSI_SWIZZLE_ZERO
;
97 if (opcode
== TGSI_OPCODE_MUL
)
98 return TGSI_SWIZZLE_ONE
;
100 debug_printf("Unknown opcode %d\n",opcode
);
101 return TGSI_SWIZZLE_ZERO
;
105 * Sets the swizzle to the neutral element for the operation for the bits
106 * of writemask which are set, swizzle to identity otherwise.
108 static void set_neutral_element_swizzle(struct i915_full_src_register
* r
,
112 if ( write_mask
& TGSI_WRITEMASK_X
)
113 r
->Register
.SwizzleX
= neutral
;
115 r
->Register
.SwizzleX
= TGSI_SWIZZLE_X
;
117 if ( write_mask
& TGSI_WRITEMASK_Y
)
118 r
->Register
.SwizzleY
= neutral
;
120 r
->Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
122 if ( write_mask
& TGSI_WRITEMASK_Z
)
123 r
->Register
.SwizzleZ
= neutral
;
125 r
->Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
127 if ( write_mask
& TGSI_WRITEMASK_W
)
128 r
->Register
.SwizzleW
= neutral
;
130 r
->Register
.SwizzleW
= TGSI_SWIZZLE_W
;
134 * Optimize away things like:
135 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
136 * MOV OUT[0].w, TEMP[2]
138 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
139 * This is useful for optimizing texenv.
141 static void i915_fpc_optimize_mov_after_alu(union i915_full_token
* current
, union i915_full_token
* next
)
143 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
144 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
145 op_commutes(current
->FullInstruction
.Instruction
.Opcode
) &&
146 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
147 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
148 same_dst_reg(&next
->FullInstruction
.Dst
[0], ¤t
->FullInstruction
.Dst
[0]) &&
149 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[1]) &&
150 is_unswizzled(¤t
->FullInstruction
.Src
[0], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
151 is_unswizzled(¤t
->FullInstruction
.Src
[1], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
152 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) )
154 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
156 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[1], 0, 0);
157 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[0],
158 next
->FullInstruction
.Dst
[0].Register
.WriteMask
,
159 op_neutral_element(current
->FullInstruction
.Instruction
.Opcode
));
161 current
->FullInstruction
.Dst
[0].Register
.WriteMask
= current
->FullInstruction
.Dst
[0].Register
.WriteMask
|
162 next
->FullInstruction
.Dst
[0].Register
.WriteMask
;
166 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
167 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
168 op_commutes(current
->FullInstruction
.Instruction
.Opcode
) &&
169 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
170 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
171 same_dst_reg(&next
->FullInstruction
.Dst
[0], ¤t
->FullInstruction
.Dst
[0]) &&
172 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[0]) &&
173 is_unswizzled(¤t
->FullInstruction
.Src
[0], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
174 is_unswizzled(¤t
->FullInstruction
.Src
[1], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
175 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) )
177 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
179 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[0], 0, 0);
180 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[1],
181 next
->FullInstruction
.Dst
[0].Register
.WriteMask
,
182 op_neutral_element(current
->FullInstruction
.Instruction
.Opcode
));
184 current
->FullInstruction
.Dst
[0].Register
.WriteMask
= current
->FullInstruction
.Dst
[0].Register
.WriteMask
|
185 next
->FullInstruction
.Dst
[0].Register
.WriteMask
;
191 * Optimize away things like:
192 * *** TEMP[0], TEMP[1], TEMP[2]
195 * *** OUT[0], TEMP[1], TEMP[2]
197 static void i915_fpc_optimize_useless_mov(union i915_full_token
* current
, union i915_full_token
* next
)
199 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
200 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
201 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
202 has_destination(current
->FullInstruction
.Instruction
.Opcode
) &&
203 next
->FullInstruction
.Instruction
.Saturate
== TGSI_SAT_NONE
&&
204 next
->FullInstruction
.Src
[0].Register
.Absolute
== 0 &&
205 next
->FullInstruction
.Src
[0].Register
.Negate
== 0 &&
206 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
207 current
->FullInstruction
.Dst
[0].Register
.WriteMask
== next
->FullInstruction
.Dst
[0].Register
.WriteMask
&&
208 same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) )
210 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
212 current
->FullInstruction
.Dst
[0] = next
->FullInstruction
.Dst
[0];
217 static void copy_src_reg(struct i915_src_register
* o
, const struct tgsi_src_register
* i
)
220 o
->Indirect
= i
->Indirect
;
221 o
->Dimension
= i
->Dimension
;
223 o
->SwizzleX
= i
->SwizzleX
;
224 o
->SwizzleY
= i
->SwizzleY
;
225 o
->SwizzleZ
= i
->SwizzleZ
;
226 o
->SwizzleW
= i
->SwizzleW
;
227 o
->Absolute
= i
->Absolute
;
228 o
->Negate
= i
->Negate
;
231 static void copy_dst_reg(struct i915_dst_register
* o
, const struct tgsi_dst_register
* i
)
234 o
->WriteMask
= i
->WriteMask
;
235 o
->Indirect
= i
->Indirect
;
236 o
->Dimension
= i
->Dimension
;
240 static void copy_instruction(struct i915_full_instruction
* o
, const struct tgsi_full_instruction
* i
)
242 memcpy(&o
->Instruction
, &i
->Instruction
, sizeof(o
->Instruction
));
243 memcpy(&o
->Texture
, &i
->Texture
, sizeof(o
->Texture
));
245 copy_dst_reg(&o
->Dst
[0].Register
, &i
->Dst
[0].Register
);
247 copy_src_reg(&o
->Src
[0].Register
, &i
->Src
[0].Register
);
248 copy_src_reg(&o
->Src
[1].Register
, &i
->Src
[1].Register
);
249 copy_src_reg(&o
->Src
[2].Register
, &i
->Src
[2].Register
);
252 static void copy_token(union i915_full_token
* o
, union tgsi_full_token
* i
)
254 if (i
->Token
.Type
!= TGSI_TOKEN_TYPE_INSTRUCTION
)
255 memcpy(o
, i
, sizeof(*o
));
257 copy_instruction(&o
->FullInstruction
, &i
->FullInstruction
);
261 struct i915_token_list
* i915_optimize(const struct tgsi_token
*tokens
)
263 struct i915_token_list
*out_tokens
= MALLOC(sizeof(struct i915_token_list
));
264 struct tgsi_parse_context parse
;
267 out_tokens
->NumTokens
= 0;
269 /* Count the tokens */
270 tgsi_parse_init( &parse
, tokens
);
271 while( !tgsi_parse_end_of_tokens( &parse
) ) {
272 tgsi_parse_token( &parse
);
273 out_tokens
->NumTokens
++;
275 tgsi_parse_free (&parse
);
277 /* Allocate our tokens */
278 out_tokens
->Tokens
= MALLOC(sizeof(union i915_full_token
) * out_tokens
->NumTokens
);
280 tgsi_parse_init( &parse
, tokens
);
281 while( !tgsi_parse_end_of_tokens( &parse
) ) {
282 tgsi_parse_token( &parse
);
283 copy_token(&out_tokens
->Tokens
[i
] , &parse
.FullToken
);
286 i915_fpc_optimize_useless_mov(&out_tokens
->Tokens
[i
-1], &out_tokens
->Tokens
[i
]);
287 i915_fpc_optimize_mov_after_alu(&out_tokens
->Tokens
[i
-1], &out_tokens
->Tokens
[i
]);
291 tgsi_parse_free (&parse
);
296 void i915_optimize_free(struct i915_token_list
* tokens
)
298 free(tokens
->Tokens
);