0d680fbf87639c8a4bb5a42a342b3e02253cfc75
[mesa.git] / src / gallium / drivers / i915 / i915_fpc_optimize.c
1 /**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "i915_reg.h"
29 #include "i915_context.h"
30 #include "i915_fpc.h"
31
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38
39 static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1)
40 {
41 return (s1->Register.File == d1->Register.File &&
42 s1->Register.Indirect == d1->Register.Indirect &&
43 s1->Register.Dimension == d1->Register.Dimension &&
44 s1->Register.Index == d1->Register.Index);
45 }
46
47 static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
48 {
49 return (d1->Register.File == d2->Register.File &&
50 d1->Register.Indirect == d2->Register.Indirect &&
51 d1->Register.Dimension == d2->Register.Dimension &&
52 d1->Register.Index == d2->Register.Index);
53 }
54
55 static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
56 {
57 return (d1->Register.File == d2->Register.File &&
58 d1->Register.Indirect == d2->Register.Indirect &&
59 d1->Register.Dimension == d2->Register.Dimension &&
60 d1->Register.Index == d2->Register.Index &&
61 d1->Register.Absolute == d2->Register.Absolute &&
62 d1->Register.Negate == d2->Register.Negate);
63 }
64
65 static boolean has_destination(unsigned opcode)
66 {
67 return (opcode != TGSI_OPCODE_NOP &&
68 opcode != TGSI_OPCODE_KIL &&
69 opcode != TGSI_OPCODE_RET);
70 }
71
72 static boolean is_unswizzled(struct i915_full_src_register* r,
73 unsigned write_mask)
74 {
75 if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
76 return FALSE;
77 if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
78 return FALSE;
79 if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
80 return FALSE;
81 if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
82 return FALSE;
83 return TRUE;
84 }
85
86 static boolean op_commutes(unsigned opcode)
87 {
88 if (opcode == TGSI_OPCODE_ADD) return TRUE;
89 if (opcode == TGSI_OPCODE_MUL) return TRUE;
90 return FALSE;
91 }
92
93 static unsigned op_neutral_element(unsigned opcode)
94 {
95 if (opcode == TGSI_OPCODE_ADD)
96 return TGSI_SWIZZLE_ZERO;
97 if (opcode == TGSI_OPCODE_MUL)
98 return TGSI_SWIZZLE_ONE;
99
100 debug_printf("Unknown opcode %d\n",opcode);
101 return TGSI_SWIZZLE_ZERO;
102 }
103
104 /*
105 * Sets the swizzle to the neutral element for the operation for the bits
106 * of writemask which are set, swizzle to identity otherwise.
107 */
108 static void set_neutral_element_swizzle(struct i915_full_src_register* r,
109 unsigned write_mask,
110 unsigned neutral)
111 {
112 if ( write_mask & TGSI_WRITEMASK_X )
113 r->Register.SwizzleX = neutral;
114 else
115 r->Register.SwizzleX = TGSI_SWIZZLE_X;
116
117 if ( write_mask & TGSI_WRITEMASK_Y )
118 r->Register.SwizzleY = neutral;
119 else
120 r->Register.SwizzleY = TGSI_SWIZZLE_Y;
121
122 if ( write_mask & TGSI_WRITEMASK_Z )
123 r->Register.SwizzleZ = neutral;
124 else
125 r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
126
127 if ( write_mask & TGSI_WRITEMASK_W )
128 r->Register.SwizzleW = neutral;
129 else
130 r->Register.SwizzleW = TGSI_SWIZZLE_W;
131 }
132
133 /*
134 * Optimize away things like:
135 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
136 * MOV OUT[0].w, TEMP[2]
137 * into:
138 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
139 * This is useful for optimizing texenv.
140 */
141 static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next)
142 {
143 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
144 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
145 op_commutes(current->FullInstruction.Instruction.Opcode) &&
146 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
147 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
148 same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
149 same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
150 is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
151 is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
152 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
153 {
154 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
155
156 set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
157 set_neutral_element_swizzle(&current->FullInstruction.Src[0],
158 next->FullInstruction.Dst[0].Register.WriteMask,
159 op_neutral_element(current->FullInstruction.Instruction.Opcode));
160
161 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
162 next->FullInstruction.Dst[0].Register.WriteMask;
163 return;
164 }
165
166 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
167 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
168 op_commutes(current->FullInstruction.Instruction.Opcode) &&
169 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
170 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
171 same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
172 same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
173 is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
174 is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
175 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
176 {
177 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
178
179 set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
180 set_neutral_element_swizzle(&current->FullInstruction.Src[1],
181 next->FullInstruction.Dst[0].Register.WriteMask,
182 op_neutral_element(current->FullInstruction.Instruction.Opcode));
183
184 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
185 next->FullInstruction.Dst[0].Register.WriteMask;
186 return;
187 }
188 }
189
190 /*
191 * Optimize away things like:
192 * *** TEMP[0], TEMP[1], TEMP[2]
193 * MOV OUT[0] TEMP[0]
194 * into:
195 * *** OUT[0], TEMP[1], TEMP[2]
196 */
197 static void i915_fpc_optimize_useless_mov(union i915_full_token* current, union i915_full_token* next)
198 {
199 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
200 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
201 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
202 has_destination(current->FullInstruction.Instruction.Opcode) &&
203 next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
204 next->FullInstruction.Src[0].Register.Absolute == 0 &&
205 next->FullInstruction.Src[0].Register.Negate == 0 &&
206 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
207 current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
208 same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
209 {
210 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
211
212 current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
213 return;
214 }
215 }
216
217 static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
218 {
219 o->File = i->File;
220 o->Indirect = i->Indirect;
221 o->Dimension = i->Dimension;
222 o->Index = i->Index;
223 o->SwizzleX = i->SwizzleX;
224 o->SwizzleY = i->SwizzleY;
225 o->SwizzleZ = i->SwizzleZ;
226 o->SwizzleW = i->SwizzleW;
227 o->Absolute = i->Absolute;
228 o->Negate = i->Negate;
229 }
230
231 static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
232 {
233 o->File = i->File;
234 o->WriteMask = i->WriteMask;
235 o->Indirect = i->Indirect;
236 o->Dimension = i->Dimension;
237 o->Index = i->Index;
238 }
239
240 static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
241 {
242 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
243 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
244
245 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
246
247 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
248 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
249 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
250 }
251
252 static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
253 {
254 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
255 memcpy(o, i, sizeof(*o));
256 else
257 copy_instruction(&o->FullInstruction, &i->FullInstruction);
258
259 }
260
261 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
262 {
263 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
264 struct tgsi_parse_context parse;
265 int i = 0;
266
267 out_tokens->NumTokens = 0;
268
269 /* Count the tokens */
270 tgsi_parse_init( &parse, tokens );
271 while( !tgsi_parse_end_of_tokens( &parse ) ) {
272 tgsi_parse_token( &parse );
273 out_tokens->NumTokens++;
274 }
275 tgsi_parse_free (&parse);
276
277 /* Allocate our tokens */
278 out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
279
280 tgsi_parse_init( &parse, tokens );
281 while( !tgsi_parse_end_of_tokens( &parse ) ) {
282 tgsi_parse_token( &parse );
283 copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
284
285 if (i > 0) {
286 i915_fpc_optimize_useless_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
287 i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
288 }
289 i++;
290 }
291 tgsi_parse_free (&parse);
292
293 return out_tokens;
294 }
295
296 void i915_optimize_free(struct i915_token_list* tokens)
297 {
298 free(tokens->Tokens);
299 free(tokens);
300 }
301
302