gallium: remove TGSI_OPCODE_SUB
[mesa.git] / src / gallium / drivers / i915 / i915_fpc_optimize.c
1 /**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "i915_reg.h"
29 #include "i915_context.h"
30 #include "i915_fpc.h"
31
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_exec.h"
39
40 struct i915_optimize_context
41 {
42 int first_write[TGSI_EXEC_NUM_TEMPS];
43 int last_read[TGSI_EXEC_NUM_TEMPS];
44 };
45
46 static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1)
47 {
48 return (s1->Register.File == d1->Register.File &&
49 s1->Register.Indirect == d1->Register.Indirect &&
50 s1->Register.Dimension == d1->Register.Dimension &&
51 s1->Register.Index == d1->Register.Index);
52 }
53
54 static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2)
55 {
56 return (d1->Register.File == d2->Register.File &&
57 d1->Register.Indirect == d2->Register.Indirect &&
58 d1->Register.Dimension == d2->Register.Dimension &&
59 d1->Register.Index == d2->Register.Index);
60 }
61
62 static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2)
63 {
64 return (d1->Register.File == d2->Register.File &&
65 d1->Register.Indirect == d2->Register.Indirect &&
66 d1->Register.Dimension == d2->Register.Dimension &&
67 d1->Register.Index == d2->Register.Index &&
68 d1->Register.Absolute == d2->Register.Absolute &&
69 d1->Register.Negate == d2->Register.Negate);
70 }
71
72 static const struct {
73 boolean is_texture;
74 boolean commutes;
75 unsigned neutral_element;
76 unsigned num_dst;
77 unsigned num_src;
78 } op_table [TGSI_OPCODE_LAST] = {
79 [ TGSI_OPCODE_ADD ] = { false, true, TGSI_SWIZZLE_ZERO, 1, 2 },
80 [ TGSI_OPCODE_CEIL ] = { false, false, 0, 1, 1 },
81 [ TGSI_OPCODE_CMP ] = { false, false, 0, 1, 2 },
82 [ TGSI_OPCODE_COS ] = { false, false, 0, 1, 1 },
83 [ TGSI_OPCODE_DDX ] = { false, false, 0, 1, 0 },
84 [ TGSI_OPCODE_DDY ] = { false, false, 0, 1, 0 },
85 [ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
86 [ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
87 [ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
88 [ TGSI_OPCODE_DPH ] = { false, false, 0, 1, 2 },
89 [ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 },
90 [ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 },
91 [ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 },
92 [ TGSI_OPCODE_FLR ] = { false, false, 0, 1, 1 },
93 [ TGSI_OPCODE_FRC ] = { false, false, 0, 1, 1 },
94 [ TGSI_OPCODE_KILL_IF ] = { false, false, 0, 0, 1 },
95 [ TGSI_OPCODE_KILL ] = { false, false, 0, 0, 0 },
96 [ TGSI_OPCODE_LG2 ] = { false, false, 0, 1, 1 },
97 [ TGSI_OPCODE_LIT ] = { false, false, 0, 1, 1 },
98 [ TGSI_OPCODE_LRP ] = { false, false, 0, 1, 3 },
99 [ TGSI_OPCODE_MAX ] = { false, false, 0, 1, 2 },
100 [ TGSI_OPCODE_MAD ] = { false, false, 0, 1, 3 },
101 [ TGSI_OPCODE_MIN ] = { false, false, 0, 1, 2 },
102 [ TGSI_OPCODE_MOV ] = { false, false, 0, 1, 1 },
103 [ TGSI_OPCODE_MUL ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
104 [ TGSI_OPCODE_NOP ] = { false, false, 0, 0, 0 },
105 [ TGSI_OPCODE_POW ] = { false, false, 0, 1, 2 },
106 [ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 },
107 [ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 },
108 [ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 },
109 [ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 },
110 [ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 },
111 [ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 },
112 [ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 },
113 [ TGSI_OPCODE_SIN ] = { false, false, 0, 1, 1 },
114 [ TGSI_OPCODE_SLE ] = { false, false, 0, 1, 2 },
115 [ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 },
116 [ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 },
117 [ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 },
118 [ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 },
119 [ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 },
120 [ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 },
121 [ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 },
122 [ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 },
123 };
124
125 static boolean op_has_dst(unsigned opcode)
126 {
127 return (op_table[opcode].num_dst > 0);
128 }
129
130 static int op_num_dst(unsigned opcode)
131 {
132 return op_table[opcode].num_dst;
133 }
134
135 static int op_num_src(unsigned opcode)
136 {
137 return op_table[opcode].num_src;
138 }
139
140 static boolean op_commutes(unsigned opcode)
141 {
142 return op_table[opcode].commutes;
143 }
144
145 static unsigned mask_for_unswizzled(int num_components)
146 {
147 unsigned mask = 0;
148 switch(num_components)
149 {
150 case 4:
151 mask |= TGSI_WRITEMASK_W;
152 case 3:
153 mask |= TGSI_WRITEMASK_Z;
154 case 2:
155 mask |= TGSI_WRITEMASK_Y;
156 case 1:
157 mask |= TGSI_WRITEMASK_X;
158 }
159 return mask;
160 }
161
162 static boolean is_unswizzled(struct i915_full_src_register *r,
163 unsigned write_mask)
164 {
165 if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
166 return FALSE;
167 if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
168 return FALSE;
169 if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
170 return FALSE;
171 if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
172 return FALSE;
173 return TRUE;
174 }
175
176 static boolean op_is_texture(unsigned opcode)
177 {
178 return op_table[opcode].is_texture;
179 }
180
181 static unsigned op_neutral_element(unsigned opcode)
182 {
183 unsigned ne = op_table[opcode].neutral_element;
184 if (!ne) {
185 debug_printf("No neutral element for opcode %d\n",opcode);
186 ne = TGSI_SWIZZLE_ZERO;
187 }
188 return ne;
189 }
190
191 /*
192 * Sets the swizzle to the neutral element for the operation for the bits
193 * of writemask which are set, swizzle to identity otherwise.
194 */
195 static void set_neutral_element_swizzle(struct i915_full_src_register *r,
196 unsigned write_mask,
197 unsigned neutral)
198 {
199 if ( write_mask & TGSI_WRITEMASK_X )
200 r->Register.SwizzleX = neutral;
201 else
202 r->Register.SwizzleX = TGSI_SWIZZLE_X;
203
204 if ( write_mask & TGSI_WRITEMASK_Y )
205 r->Register.SwizzleY = neutral;
206 else
207 r->Register.SwizzleY = TGSI_SWIZZLE_Y;
208
209 if ( write_mask & TGSI_WRITEMASK_Z )
210 r->Register.SwizzleZ = neutral;
211 else
212 r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
213
214 if ( write_mask & TGSI_WRITEMASK_W )
215 r->Register.SwizzleW = neutral;
216 else
217 r->Register.SwizzleW = TGSI_SWIZZLE_W;
218 }
219
220 static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
221 {
222 o->File = i->File;
223 o->Indirect = i->Indirect;
224 o->Dimension = i->Dimension;
225 o->Index = i->Index;
226 o->SwizzleX = i->SwizzleX;
227 o->SwizzleY = i->SwizzleY;
228 o->SwizzleZ = i->SwizzleZ;
229 o->SwizzleW = i->SwizzleW;
230 o->Absolute = i->Absolute;
231 o->Negate = i->Negate;
232 }
233
234 static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
235 {
236 o->File = i->File;
237 o->WriteMask = i->WriteMask;
238 o->Indirect = i->Indirect;
239 o->Dimension = i->Dimension;
240 o->Index = i->Index;
241 }
242
243 static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i)
244 {
245 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
246 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
247
248 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
249
250 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
251 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
252 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
253 }
254
255 static void copy_token(union i915_full_token *o, union tgsi_full_token *i)
256 {
257 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
258 memcpy(o, i, sizeof(*o));
259 else
260 copy_instruction(&o->FullInstruction, &i->FullInstruction);
261
262 }
263
264 static void liveness_mark_written(struct i915_optimize_context *ctx,
265 struct i915_full_dst_register *dst_reg,
266 int pos)
267 {
268 int dst_reg_index;
269 if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
270 dst_reg_index = dst_reg->Register.Index;
271 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
272 /* dead -> live transition */
273 if (ctx->first_write[dst_reg_index] != -1)
274 ctx->first_write[dst_reg_index] = pos;
275 }
276 }
277
278 static void liveness_mark_read(struct i915_optimize_context *ctx,
279 struct i915_full_src_register *src_reg,
280 int pos)
281 {
282 int src_reg_index;
283 if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
284 src_reg_index = src_reg->Register.Index;
285 assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
286 /* live -> dead transition */
287 if (ctx->last_read[src_reg_index] != -1)
288 ctx->last_read[src_reg_index] = pos;
289 }
290 }
291
292 static void liveness_analysis(struct i915_optimize_context *ctx,
293 struct i915_token_list *tokens)
294 {
295 struct i915_full_dst_register *dst_reg;
296 struct i915_full_src_register *src_reg;
297 union i915_full_token *current;
298 unsigned opcode;
299 int num_dst, num_src;
300 int i = 0;
301
302 for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++)
303 {
304 ctx->first_write[i] = -1;
305 ctx->last_read[i] = -1;
306 }
307
308 for(i = 0; i < tokens->NumTokens; i++)
309 {
310 current = &tokens->Tokens[i];
311
312 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
313 continue;
314
315 opcode = current->FullInstruction.Instruction.Opcode;
316 num_dst = op_num_dst(opcode);
317
318 switch(num_dst)
319 {
320 case 1:
321 dst_reg = &current->FullInstruction.Dst[0];
322 liveness_mark_written(ctx, dst_reg, i);
323 case 0:
324 break;
325 default:
326 debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
327 break;
328 }
329 }
330
331 for(i = tokens->NumTokens - 1; i >= 0; i--)
332 {
333 current = &tokens->Tokens[i];
334
335 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
336 continue;
337
338 opcode = current->FullInstruction.Instruction.Opcode;
339 num_src = op_num_src(opcode);
340
341 switch(num_src)
342 {
343 case 3:
344 src_reg = &current->FullInstruction.Src[2];
345 liveness_mark_read(ctx, src_reg, i);
346 case 2:
347 src_reg = &current->FullInstruction.Src[1];
348 liveness_mark_read(ctx, src_reg, i);
349 case 1:
350 src_reg = &current->FullInstruction.Src[0];
351 liveness_mark_read(ctx, src_reg, i);
352 case 0:
353 break;
354 default:
355 debug_printf("Op %d has %d src regs\n", opcode, num_src);
356 break;
357 }
358 }
359 }
360
361 static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from)
362 {
363 int dst_reg_index = dst_reg->Register.Index;
364 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
365 return (from >= ctx->last_read[dst_reg_index]);
366 }
367
368 /* Returns a mask with the components used for a texture access instruction */
369 static unsigned i915_tex_mask(union i915_full_token *instr)
370 {
371 unsigned mask;
372
373 /* Get the number of coords */
374 mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture));
375
376 /* Add the W component if projective */
377 if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
378 mask |= TGSI_WRITEMASK_W;
379
380 return mask;
381 }
382
383 static boolean target_is_texture2d(uint tex)
384 {
385 switch (tex) {
386 case TGSI_TEXTURE_2D:
387 case TGSI_TEXTURE_RECT:
388 return true;
389 default:
390 return false;
391 }
392 }
393
394
395 /*
396 * Optimize away useless indirect texture reads:
397 * MOV TEMP[0].xy, IN[0].xyyy
398 * TEX TEMP[1], TEMP[0], SAMP[0], 2D
399 * into:
400 * TEX TEMP[1], IN[0], SAMP[0], 2D
401 *
402 * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
403 */
404 static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
405 struct i915_token_list *tokens,
406 int index)
407 {
408 union i915_full_token *current = &tokens->Tokens[index - 1];
409 union i915_full_token *next = &tokens->Tokens[index];
410
411 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
412 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
413 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
414 op_is_texture(next->FullInstruction.Instruction.Opcode) &&
415 target_is_texture2d(next->FullInstruction.Texture.Texture) &&
416 same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
417 is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
418 unused_from(ctx, &current->FullInstruction.Dst[0], index))
419 {
420 memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0], sizeof(struct i915_src_register));
421 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
422 }
423 }
424
425 /*
426 * Optimize away things like:
427 * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
428 * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
429 * into:
430 * NOP
431 * MOV OUT[0].xyw, TEMP[1].xyww
432 */
433 static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next)
434 {
435 struct i915_full_src_register *src_reg1, *src_reg2;
436 struct i915_full_dst_register *dst_reg1, *dst_reg2;
437 unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
438
439 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
440 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
441 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
442 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
443 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
444 same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
445 same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
446 !same_src_dst_reg(&current->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
447 {
448 src_reg1 = &current->FullInstruction.Src[0];
449 dst_reg1 = &current->FullInstruction.Dst[0];
450 src_reg2 = &next->FullInstruction.Src[0];
451 dst_reg2 = &next->FullInstruction.Dst[0];
452
453 /* Start with swizzles from the first mov */
454 swizzle_x = src_reg1->Register.SwizzleX;
455 swizzle_y = src_reg1->Register.SwizzleY;
456 swizzle_z = src_reg1->Register.SwizzleZ;
457 swizzle_w = src_reg1->Register.SwizzleW;
458
459 /* Pile the second mov on top */
460 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
461 swizzle_x = src_reg2->Register.SwizzleX;
462 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
463 swizzle_y = src_reg2->Register.SwizzleY;
464 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
465 swizzle_z = src_reg2->Register.SwizzleZ;
466 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
467 swizzle_w = src_reg2->Register.SwizzleW;
468
469 dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
470 src_reg2->Register.SwizzleX = swizzle_x;
471 src_reg2->Register.SwizzleY = swizzle_y;
472 src_reg2->Register.SwizzleZ = swizzle_z;
473 src_reg2->Register.SwizzleW = swizzle_w;
474
475 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
476
477 return;
478 }
479 }
480
481 /*
482 * Optimize away things like:
483 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
484 * MOV OUT[0].w, TEMP[2]
485 * into:
486 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
487 * This is useful for optimizing texenv.
488 */
489 static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next)
490 {
491 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
492 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
493 op_commutes(current->FullInstruction.Instruction.Opcode) &&
494 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
495 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
496 same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
497 same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
498 !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
499 is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
500 is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
501 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
502 {
503 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
504
505 set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
506 set_neutral_element_swizzle(&current->FullInstruction.Src[0],
507 next->FullInstruction.Dst[0].Register.WriteMask,
508 op_neutral_element(current->FullInstruction.Instruction.Opcode));
509
510 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
511 next->FullInstruction.Dst[0].Register.WriteMask;
512 return;
513 }
514
515 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
516 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
517 op_commutes(current->FullInstruction.Instruction.Opcode) &&
518 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
519 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
520 same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
521 same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
522 !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
523 is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
524 is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
525 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
526 {
527 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
528
529 set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
530 set_neutral_element_swizzle(&current->FullInstruction.Src[1],
531 next->FullInstruction.Dst[0].Register.WriteMask,
532 op_neutral_element(current->FullInstruction.Instruction.Opcode));
533
534 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
535 next->FullInstruction.Dst[0].Register.WriteMask;
536 return;
537 }
538 }
539
540 /*
541 * Optimize away things like:
542 * MOV TEMP[0].xyz TEMP[0].xyzx
543 * into:
544 * NOP
545 */
546 static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
547 {
548 union i915_full_token current;
549 copy_token(&current , tgsi_current);
550 if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
551 current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
552 op_has_dst(current.FullInstruction.Instruction.Opcode) &&
553 !current.FullInstruction.Instruction.Saturate &&
554 current.FullInstruction.Src[0].Register.Absolute == 0 &&
555 current.FullInstruction.Src[0].Register.Negate == 0 &&
556 is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
557 same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
558 {
559 return TRUE;
560 }
561 return FALSE;
562 }
563
564 /*
565 * Optimize away things like:
566 * *** TEMP[0], TEMP[1], TEMP[2]
567 * MOV OUT[0] TEMP[0]
568 * into:
569 * *** OUT[0], TEMP[1], TEMP[2]
570 */
571 static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
572 struct i915_token_list *tokens,
573 int index)
574 {
575 union i915_full_token *current = &tokens->Tokens[index - 1];
576 union i915_full_token *next = &tokens->Tokens[index];
577
578 // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
579 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
580 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
581 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
582 op_has_dst(current->FullInstruction.Instruction.Opcode) &&
583 !next->FullInstruction.Instruction.Saturate &&
584 next->FullInstruction.Src[0].Register.Absolute == 0 &&
585 next->FullInstruction.Src[0].Register.Negate == 0 &&
586 unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
587 current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW &&
588 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
589 current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
590 same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
591 {
592 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
593
594 current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
595 return;
596 }
597 }
598
599 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
600 {
601 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
602 struct tgsi_parse_context parse;
603 struct i915_optimize_context *ctx;
604 int i = 0;
605
606 ctx = malloc(sizeof(*ctx));
607
608 out_tokens->NumTokens = 0;
609
610 /* Count the tokens */
611 tgsi_parse_init( &parse, tokens );
612 while( !tgsi_parse_end_of_tokens( &parse ) ) {
613 tgsi_parse_token( &parse );
614 out_tokens->NumTokens++;
615 }
616 tgsi_parse_free (&parse);
617
618 /* Allocate our tokens */
619 out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
620
621 tgsi_parse_init( &parse, tokens );
622 while( !tgsi_parse_end_of_tokens( &parse ) ) {
623 tgsi_parse_token( &parse );
624
625 if (i915_fpc_useless_mov(&parse.FullToken)) {
626 out_tokens->NumTokens--;
627 continue;
628 }
629
630 copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
631
632 i++;
633 }
634 tgsi_parse_free (&parse);
635
636 liveness_analysis(ctx, out_tokens);
637
638 i = 1;
639 while( i < out_tokens->NumTokens) {
640 i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
641 i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
642 i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
643 i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
644 i++;
645 }
646
647 free(ctx);
648
649 return out_tokens;
650 }
651
652 void i915_optimize_free(struct i915_token_list *tokens)
653 {
654 free(tokens->Tokens);
655 free(tokens);
656 }
657
658