a2b6d272939799d9b6ac92c6a2c8bc85b6b7a785
[mesa.git] / src / gallium / drivers / i915 / i915_fpc_optimize.c
1 /**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "i915_reg.h"
29 #include "i915_context.h"
30 #include "i915_fpc.h"
31
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_exec.h"
39
40 struct i915_optimize_context
41 {
42 int first_write[TGSI_EXEC_NUM_TEMPS];
43 int last_read[TGSI_EXEC_NUM_TEMPS];
44 };
45
46 static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1)
47 {
48 return (s1->Register.File == d1->Register.File &&
49 s1->Register.Indirect == d1->Register.Indirect &&
50 s1->Register.Dimension == d1->Register.Dimension &&
51 s1->Register.Index == d1->Register.Index);
52 }
53
54 static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2)
55 {
56 return (d1->Register.File == d2->Register.File &&
57 d1->Register.Indirect == d2->Register.Indirect &&
58 d1->Register.Dimension == d2->Register.Dimension &&
59 d1->Register.Index == d2->Register.Index);
60 }
61
62 static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2)
63 {
64 return (d1->Register.File == d2->Register.File &&
65 d1->Register.Indirect == d2->Register.Indirect &&
66 d1->Register.Dimension == d2->Register.Dimension &&
67 d1->Register.Index == d2->Register.Index &&
68 d1->Register.Absolute == d2->Register.Absolute &&
69 d1->Register.Negate == d2->Register.Negate);
70 }
71
72 static const struct {
73 boolean is_texture;
74 boolean commutes;
75 unsigned neutral_element;
76 unsigned num_dst;
77 unsigned num_src;
78 } op_table [TGSI_OPCODE_LAST] = {
79 [ TGSI_OPCODE_ABS ] = { false, false, 0, 1, 1 },
80 [ TGSI_OPCODE_ADD ] = { false, true, TGSI_SWIZZLE_ZERO, 1, 2 },
81 [ TGSI_OPCODE_CEIL ] = { false, false, 0, 1, 1 },
82 [ TGSI_OPCODE_CMP ] = { false, false, 0, 1, 2 },
83 [ TGSI_OPCODE_COS ] = { false, false, 0, 1, 1 },
84 [ TGSI_OPCODE_DDX ] = { false, false, 0, 1, 0 },
85 [ TGSI_OPCODE_DDY ] = { false, false, 0, 1, 0 },
86 [ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
87 [ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
88 [ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
89 [ TGSI_OPCODE_DPH ] = { false, false, 0, 1, 2 },
90 [ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 },
91 [ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 },
92 [ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 },
93 [ TGSI_OPCODE_FLR ] = { false, false, 0, 1, 1 },
94 [ TGSI_OPCODE_FRC ] = { false, false, 0, 1, 1 },
95 [ TGSI_OPCODE_KILL_IF ] = { false, false, 0, 0, 1 },
96 [ TGSI_OPCODE_KILL ] = { false, false, 0, 0, 0 },
97 [ TGSI_OPCODE_LG2 ] = { false, false, 0, 1, 1 },
98 [ TGSI_OPCODE_LIT ] = { false, false, 0, 1, 1 },
99 [ TGSI_OPCODE_LRP ] = { false, false, 0, 1, 3 },
100 [ TGSI_OPCODE_MAX ] = { false, false, 0, 1, 2 },
101 [ TGSI_OPCODE_MAD ] = { false, false, 0, 1, 3 },
102 [ TGSI_OPCODE_MIN ] = { false, false, 0, 1, 2 },
103 [ TGSI_OPCODE_MOV ] = { false, false, 0, 1, 1 },
104 [ TGSI_OPCODE_MUL ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
105 [ TGSI_OPCODE_NOP ] = { false, false, 0, 0, 0 },
106 [ TGSI_OPCODE_POW ] = { false, false, 0, 1, 2 },
107 [ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 },
108 [ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 },
109 [ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 },
110 [ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 },
111 [ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 },
112 [ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 },
113 [ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 },
114 [ TGSI_OPCODE_SIN ] = { false, false, 0, 1, 1 },
115 [ TGSI_OPCODE_SLE ] = { false, false, 0, 1, 2 },
116 [ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 },
117 [ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 },
118 [ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 },
119 [ TGSI_OPCODE_SUB ] = { false, false, 0, 1, 2 },
120 [ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 },
121 [ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 },
122 [ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 },
123 [ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 },
124 [ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 },
125 };
126
127 static boolean op_has_dst(unsigned opcode)
128 {
129 return (op_table[opcode].num_dst > 0);
130 }
131
132 static int op_num_dst(unsigned opcode)
133 {
134 return op_table[opcode].num_dst;
135 }
136
137 static int op_num_src(unsigned opcode)
138 {
139 return op_table[opcode].num_src;
140 }
141
142 static boolean op_commutes(unsigned opcode)
143 {
144 return op_table[opcode].commutes;
145 }
146
147 static unsigned mask_for_unswizzled(int num_components)
148 {
149 unsigned mask = 0;
150 switch(num_components)
151 {
152 case 4:
153 mask |= TGSI_WRITEMASK_W;
154 case 3:
155 mask |= TGSI_WRITEMASK_Z;
156 case 2:
157 mask |= TGSI_WRITEMASK_Y;
158 case 1:
159 mask |= TGSI_WRITEMASK_X;
160 }
161 return mask;
162 }
163
164 static boolean is_unswizzled(struct i915_full_src_register *r,
165 unsigned write_mask)
166 {
167 if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
168 return FALSE;
169 if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
170 return FALSE;
171 if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
172 return FALSE;
173 if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
174 return FALSE;
175 return TRUE;
176 }
177
178 static boolean op_is_texture(unsigned opcode)
179 {
180 return op_table[opcode].is_texture;
181 }
182
183 static unsigned op_neutral_element(unsigned opcode)
184 {
185 unsigned ne = op_table[opcode].neutral_element;
186 if (!ne) {
187 debug_printf("No neutral element for opcode %d\n",opcode);
188 ne = TGSI_SWIZZLE_ZERO;
189 }
190 return ne;
191 }
192
193 /*
194 * Sets the swizzle to the neutral element for the operation for the bits
195 * of writemask which are set, swizzle to identity otherwise.
196 */
197 static void set_neutral_element_swizzle(struct i915_full_src_register *r,
198 unsigned write_mask,
199 unsigned neutral)
200 {
201 if ( write_mask & TGSI_WRITEMASK_X )
202 r->Register.SwizzleX = neutral;
203 else
204 r->Register.SwizzleX = TGSI_SWIZZLE_X;
205
206 if ( write_mask & TGSI_WRITEMASK_Y )
207 r->Register.SwizzleY = neutral;
208 else
209 r->Register.SwizzleY = TGSI_SWIZZLE_Y;
210
211 if ( write_mask & TGSI_WRITEMASK_Z )
212 r->Register.SwizzleZ = neutral;
213 else
214 r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
215
216 if ( write_mask & TGSI_WRITEMASK_W )
217 r->Register.SwizzleW = neutral;
218 else
219 r->Register.SwizzleW = TGSI_SWIZZLE_W;
220 }
221
222 static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
223 {
224 o->File = i->File;
225 o->Indirect = i->Indirect;
226 o->Dimension = i->Dimension;
227 o->Index = i->Index;
228 o->SwizzleX = i->SwizzleX;
229 o->SwizzleY = i->SwizzleY;
230 o->SwizzleZ = i->SwizzleZ;
231 o->SwizzleW = i->SwizzleW;
232 o->Absolute = i->Absolute;
233 o->Negate = i->Negate;
234 }
235
236 static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
237 {
238 o->File = i->File;
239 o->WriteMask = i->WriteMask;
240 o->Indirect = i->Indirect;
241 o->Dimension = i->Dimension;
242 o->Index = i->Index;
243 }
244
245 static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i)
246 {
247 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
248 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
249
250 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
251
252 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
253 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
254 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
255 }
256
257 static void copy_token(union i915_full_token *o, union tgsi_full_token *i)
258 {
259 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
260 memcpy(o, i, sizeof(*o));
261 else
262 copy_instruction(&o->FullInstruction, &i->FullInstruction);
263
264 }
265
266 static void liveness_mark_written(struct i915_optimize_context *ctx,
267 struct i915_full_dst_register *dst_reg,
268 int pos)
269 {
270 int dst_reg_index;
271 if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
272 dst_reg_index = dst_reg->Register.Index;
273 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
274 /* dead -> live transition */
275 if (ctx->first_write[dst_reg_index] != -1)
276 ctx->first_write[dst_reg_index] = pos;
277 }
278 }
279
280 static void liveness_mark_read(struct i915_optimize_context *ctx,
281 struct i915_full_src_register *src_reg,
282 int pos)
283 {
284 int src_reg_index;
285 if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
286 src_reg_index = src_reg->Register.Index;
287 assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
288 /* live -> dead transition */
289 if (ctx->last_read[src_reg_index] != -1)
290 ctx->last_read[src_reg_index] = pos;
291 }
292 }
293
294 static void liveness_analysis(struct i915_optimize_context *ctx,
295 struct i915_token_list *tokens)
296 {
297 struct i915_full_dst_register *dst_reg;
298 struct i915_full_src_register *src_reg;
299 union i915_full_token *current;
300 unsigned opcode;
301 int num_dst, num_src;
302 int i = 0;
303
304 for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++)
305 {
306 ctx->first_write[i] = -1;
307 ctx->last_read[i] = -1;
308 }
309
310 for(i = 0; i < tokens->NumTokens; i++)
311 {
312 current = &tokens->Tokens[i];
313
314 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
315 continue;
316
317 opcode = current->FullInstruction.Instruction.Opcode;
318 num_dst = op_num_dst(opcode);
319
320 switch(num_dst)
321 {
322 case 1:
323 dst_reg = &current->FullInstruction.Dst[0];
324 liveness_mark_written(ctx, dst_reg, i);
325 case 0:
326 break;
327 default:
328 debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
329 break;
330 }
331 }
332
333 for(i = tokens->NumTokens - 1; i >= 0; i--)
334 {
335 current = &tokens->Tokens[i];
336
337 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
338 continue;
339
340 opcode = current->FullInstruction.Instruction.Opcode;
341 num_src = op_num_src(opcode);
342
343 switch(num_src)
344 {
345 case 3:
346 src_reg = &current->FullInstruction.Src[2];
347 liveness_mark_read(ctx, src_reg, i);
348 case 2:
349 src_reg = &current->FullInstruction.Src[1];
350 liveness_mark_read(ctx, src_reg, i);
351 case 1:
352 src_reg = &current->FullInstruction.Src[0];
353 liveness_mark_read(ctx, src_reg, i);
354 case 0:
355 break;
356 default:
357 debug_printf("Op %d has %d src regs\n", opcode, num_src);
358 break;
359 }
360 }
361 }
362
363 static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from)
364 {
365 int dst_reg_index = dst_reg->Register.Index;
366 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
367 return (from >= ctx->last_read[dst_reg_index]);
368 }
369
370 /* Returns a mask with the components used for a texture access instruction */
371 static unsigned i915_tex_mask(union i915_full_token *instr)
372 {
373 unsigned mask;
374
375 /* Get the number of coords */
376 mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture));
377
378 /* Add the W component if projective */
379 if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
380 mask |= TGSI_WRITEMASK_W;
381
382 return mask;
383 }
384
385 static boolean target_is_texture2d(uint tex)
386 {
387 switch (tex) {
388 case TGSI_TEXTURE_2D:
389 case TGSI_TEXTURE_RECT:
390 return true;
391 default:
392 return false;
393 }
394 }
395
396
397 /*
398 * Optimize away useless indirect texture reads:
399 * MOV TEMP[0].xy, IN[0].xyyy
400 * TEX TEMP[1], TEMP[0], SAMP[0], 2D
401 * into:
402 * TEX TEMP[1], IN[0], SAMP[0], 2D
403 *
404 * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
405 */
406 static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
407 struct i915_token_list *tokens,
408 int index)
409 {
410 union i915_full_token *current = &tokens->Tokens[index - 1];
411 union i915_full_token *next = &tokens->Tokens[index];
412
413 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
414 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
415 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
416 op_is_texture(next->FullInstruction.Instruction.Opcode) &&
417 target_is_texture2d(next->FullInstruction.Texture.Texture) &&
418 same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
419 is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
420 unused_from(ctx, &current->FullInstruction.Dst[0], index))
421 {
422 memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0], sizeof(struct i915_src_register));
423 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
424 }
425 }
426
427 /*
428 * Optimize away things like:
429 * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
430 * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
431 * into:
432 * NOP
433 * MOV OUT[0].xyw, TEMP[1].xyww
434 */
435 static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next)
436 {
437 struct i915_full_src_register *src_reg1, *src_reg2;
438 struct i915_full_dst_register *dst_reg1, *dst_reg2;
439 unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
440
441 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
442 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
443 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
444 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
445 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
446 same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
447 same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
448 !same_src_dst_reg(&current->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
449 {
450 src_reg1 = &current->FullInstruction.Src[0];
451 dst_reg1 = &current->FullInstruction.Dst[0];
452 src_reg2 = &next->FullInstruction.Src[0];
453 dst_reg2 = &next->FullInstruction.Dst[0];
454
455 /* Start with swizzles from the first mov */
456 swizzle_x = src_reg1->Register.SwizzleX;
457 swizzle_y = src_reg1->Register.SwizzleY;
458 swizzle_z = src_reg1->Register.SwizzleZ;
459 swizzle_w = src_reg1->Register.SwizzleW;
460
461 /* Pile the second mov on top */
462 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
463 swizzle_x = src_reg2->Register.SwizzleX;
464 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
465 swizzle_y = src_reg2->Register.SwizzleY;
466 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
467 swizzle_z = src_reg2->Register.SwizzleZ;
468 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
469 swizzle_w = src_reg2->Register.SwizzleW;
470
471 dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
472 src_reg2->Register.SwizzleX = swizzle_x;
473 src_reg2->Register.SwizzleY = swizzle_y;
474 src_reg2->Register.SwizzleZ = swizzle_z;
475 src_reg2->Register.SwizzleW = swizzle_w;
476
477 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
478
479 return;
480 }
481 }
482
483 /*
484 * Optimize away things like:
485 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
486 * MOV OUT[0].w, TEMP[2]
487 * into:
488 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
489 * This is useful for optimizing texenv.
490 */
491 static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next)
492 {
493 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
494 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
495 op_commutes(current->FullInstruction.Instruction.Opcode) &&
496 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
497 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
498 same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
499 same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
500 !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
501 is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
502 is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
503 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
504 {
505 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
506
507 set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
508 set_neutral_element_swizzle(&current->FullInstruction.Src[0],
509 next->FullInstruction.Dst[0].Register.WriteMask,
510 op_neutral_element(current->FullInstruction.Instruction.Opcode));
511
512 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
513 next->FullInstruction.Dst[0].Register.WriteMask;
514 return;
515 }
516
517 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
518 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
519 op_commutes(current->FullInstruction.Instruction.Opcode) &&
520 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
521 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
522 same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
523 same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
524 !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
525 is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
526 is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
527 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
528 {
529 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
530
531 set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
532 set_neutral_element_swizzle(&current->FullInstruction.Src[1],
533 next->FullInstruction.Dst[0].Register.WriteMask,
534 op_neutral_element(current->FullInstruction.Instruction.Opcode));
535
536 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
537 next->FullInstruction.Dst[0].Register.WriteMask;
538 return;
539 }
540 }
541
542 /*
543 * Optimize away things like:
544 * MOV TEMP[0].xyz TEMP[0].xyzx
545 * into:
546 * NOP
547 */
548 static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
549 {
550 union i915_full_token current;
551 copy_token(&current , tgsi_current);
552 if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
553 current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
554 op_has_dst(current.FullInstruction.Instruction.Opcode) &&
555 !current.FullInstruction.Instruction.Saturate &&
556 current.FullInstruction.Src[0].Register.Absolute == 0 &&
557 current.FullInstruction.Src[0].Register.Negate == 0 &&
558 is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
559 same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
560 {
561 return TRUE;
562 }
563 return FALSE;
564 }
565
566 /*
567 * Optimize away things like:
568 * *** TEMP[0], TEMP[1], TEMP[2]
569 * MOV OUT[0] TEMP[0]
570 * into:
571 * *** OUT[0], TEMP[1], TEMP[2]
572 */
573 static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
574 struct i915_token_list *tokens,
575 int index)
576 {
577 union i915_full_token *current = &tokens->Tokens[index - 1];
578 union i915_full_token *next = &tokens->Tokens[index];
579
580 // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
581 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
582 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
583 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
584 op_has_dst(current->FullInstruction.Instruction.Opcode) &&
585 !next->FullInstruction.Instruction.Saturate &&
586 next->FullInstruction.Src[0].Register.Absolute == 0 &&
587 next->FullInstruction.Src[0].Register.Negate == 0 &&
588 unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
589 current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW &&
590 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
591 current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
592 same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
593 {
594 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
595
596 current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
597 return;
598 }
599 }
600
601 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
602 {
603 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
604 struct tgsi_parse_context parse;
605 struct i915_optimize_context *ctx;
606 int i = 0;
607
608 ctx = malloc(sizeof(*ctx));
609
610 out_tokens->NumTokens = 0;
611
612 /* Count the tokens */
613 tgsi_parse_init( &parse, tokens );
614 while( !tgsi_parse_end_of_tokens( &parse ) ) {
615 tgsi_parse_token( &parse );
616 out_tokens->NumTokens++;
617 }
618 tgsi_parse_free (&parse);
619
620 /* Allocate our tokens */
621 out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
622
623 tgsi_parse_init( &parse, tokens );
624 while( !tgsi_parse_end_of_tokens( &parse ) ) {
625 tgsi_parse_token( &parse );
626
627 if (i915_fpc_useless_mov(&parse.FullToken)) {
628 out_tokens->NumTokens--;
629 continue;
630 }
631
632 copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
633
634 i++;
635 }
636 tgsi_parse_free (&parse);
637
638 liveness_analysis(ctx, out_tokens);
639
640 i = 1;
641 while( i < out_tokens->NumTokens) {
642 i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
643 i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
644 i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
645 i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
646 i++;
647 }
648
649 free(ctx);
650
651 return out_tokens;
652 }
653
654 void i915_optimize_free(struct i915_token_list *tokens)
655 {
656 free(tokens->Tokens);
657 free(tokens);
658 }
659
660