Merge branch 'llvm-cliptest-viewport'
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_dataflow.h"
30
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_swizzle.h"
34
35 struct peephole_state {
36 struct rc_instruction * Inst;
37 /** Stores a bitmask of the components that are still "alive" (i.e.
38 * they have not been written to since Inst was executed.)
39 */
40 unsigned int WriteMask;
41 };
42
43 typedef void (*rc_presub_replace_fn)(struct peephole_state *,
44 struct rc_instruction *,
45 unsigned int);
46
47 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
48 {
49 struct rc_src_register combine;
50 combine.File = inner.File;
51 combine.Index = inner.Index;
52 combine.RelAddr = inner.RelAddr;
53 if (outer.Abs) {
54 combine.Abs = 1;
55 combine.Negate = outer.Negate;
56 } else {
57 combine.Abs = inner.Abs;
58 combine.Negate = 0;
59 for(unsigned int chan = 0; chan < 4; ++chan) {
60 unsigned int swz = GET_SWZ(outer.Swizzle, chan);
61 if (swz < 4)
62 combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
63 }
64 combine.Negate ^= outer.Negate;
65 }
66 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
67 return combine;
68 }
69
70 struct copy_propagate_state {
71 struct radeon_compiler * C;
72 struct rc_instruction * Mov;
73 unsigned int Conflict:1;
74
75 /** Whether Mov's source has been clobbered */
76 unsigned int SourceClobbered:1;
77
78 /** Which components of Mov's destination register are still from that Mov? */
79 unsigned int MovMask:4;
80
81 /** Which components of Mov's destination register are clearly *not* from that Mov */
82 unsigned int DefinedMask:4;
83
84 /** Which components of Mov's source register are sourced */
85 unsigned int SourcedMask:4;
86
87 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
88 int BranchDepth;
89 };
90
91 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
92 struct rc_src_register * src)
93 {
94 rc_register_file file = src->File;
95 struct rc_reader_data * reader_data = data;
96 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
97
98 /* It is possible to do copy propigation in this situation,
99 * just not right now, see peephole_add_presub_inv() */
100 if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
101 (info->NumSrcRegs > 2 || info->HasTexture)) {
102 reader_data->Abort = 1;
103 return;
104 }
105
106 /* XXX This could probably be handled better. */
107 if (file == RC_FILE_ADDRESS) {
108 reader_data->Abort = 1;
109 return;
110 }
111
112 /* These instructions cannot read from the constants file.
113 * see radeonTransformTEX()
114 */
115 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
116 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
117 (inst->U.I.Opcode == RC_OPCODE_TEX ||
118 inst->U.I.Opcode == RC_OPCODE_TXB ||
119 inst->U.I.Opcode == RC_OPCODE_TXP ||
120 inst->U.I.Opcode == RC_OPCODE_KIL)){
121 reader_data->Abort = 1;
122 return;
123 }
124 }
125
126 static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
127 rc_register_file file, unsigned int index, unsigned int mask)
128 {
129 struct rc_reader_data * reader_data = data;
130 struct copy_propagate_state * s = reader_data->CbData;
131
132 if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) {
133 if (mask & s->SourcedMask)
134 reader_data->AbortOnRead = 1;
135 } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
136 reader_data->AbortOnRead = 1;
137 }
138 }
139
140 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
141 {
142 struct copy_propagate_state s;
143 struct rc_reader_data reader_data;
144 unsigned int i;
145
146 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
147 inst_mov->U.I.DstReg.RelAddr ||
148 inst_mov->U.I.WriteALUResult ||
149 inst_mov->U.I.SaturateMode)
150 return;
151
152 memset(&s, 0, sizeof(s));
153 s.C = c;
154 s.Mov = inst_mov;
155 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
156 s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
157
158 reader_data.CbData = &s;
159
160 for(unsigned int chan = 0; chan < 4; ++chan) {
161 unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
162 s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
163 }
164
165 /* Get a list of all the readers of this MOV instruction. */
166 rc_get_readers_normal(c, inst_mov, &reader_data,
167 copy_propagate_scan_read, copy_propagate_scan_write);
168
169 if (reader_data.Abort || reader_data.ReaderCount == 0)
170 return;
171
172 /* Propagate the MOV instruction. */
173 for (i = 0; i < reader_data.ReaderCount; i++) {
174 struct rc_instruction * inst = reader_data.Readers[i].Inst;
175 *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]);
176
177 if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
178 inst->U.I.PreSub = s.Mov->U.I.PreSub;
179 }
180
181 /* Finally, remove the original MOV instruction */
182 rc_remove_instruction(inst_mov);
183 }
184
185 /**
186 * Check if a source register is actually always the same
187 * swizzle constant.
188 */
189 static int is_src_uniform_constant(struct rc_src_register src,
190 rc_swizzle * pswz, unsigned int * pnegate)
191 {
192 int have_used = 0;
193
194 if (src.File != RC_FILE_NONE) {
195 *pswz = 0;
196 return 0;
197 }
198
199 for(unsigned int chan = 0; chan < 4; ++chan) {
200 unsigned int swz = GET_SWZ(src.Swizzle, chan);
201 if (swz < 4) {
202 *pswz = 0;
203 return 0;
204 }
205 if (swz == RC_SWIZZLE_UNUSED)
206 continue;
207
208 if (!have_used) {
209 *pswz = swz;
210 *pnegate = GET_BIT(src.Negate, chan);
211 have_used = 1;
212 } else {
213 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
214 *pswz = 0;
215 return 0;
216 }
217 }
218 }
219
220 return 1;
221 }
222
223 static void constant_folding_mad(struct rc_instruction * inst)
224 {
225 rc_swizzle swz;
226 unsigned int negate;
227
228 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
229 if (swz == RC_SWIZZLE_ZERO) {
230 inst->U.I.Opcode = RC_OPCODE_MUL;
231 return;
232 }
233 }
234
235 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
236 if (swz == RC_SWIZZLE_ONE) {
237 inst->U.I.Opcode = RC_OPCODE_ADD;
238 if (negate)
239 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
240 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
241 return;
242 } else if (swz == RC_SWIZZLE_ZERO) {
243 inst->U.I.Opcode = RC_OPCODE_MOV;
244 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
245 return;
246 }
247 }
248
249 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
250 if (swz == RC_SWIZZLE_ONE) {
251 inst->U.I.Opcode = RC_OPCODE_ADD;
252 if (negate)
253 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
254 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
255 return;
256 } else if (swz == RC_SWIZZLE_ZERO) {
257 inst->U.I.Opcode = RC_OPCODE_MOV;
258 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
259 return;
260 }
261 }
262 }
263
264 static void constant_folding_mul(struct rc_instruction * inst)
265 {
266 rc_swizzle swz;
267 unsigned int negate;
268
269 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
270 if (swz == RC_SWIZZLE_ONE) {
271 inst->U.I.Opcode = RC_OPCODE_MOV;
272 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
273 if (negate)
274 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
275 return;
276 } else if (swz == RC_SWIZZLE_ZERO) {
277 inst->U.I.Opcode = RC_OPCODE_MOV;
278 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
279 return;
280 }
281 }
282
283 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
284 if (swz == RC_SWIZZLE_ONE) {
285 inst->U.I.Opcode = RC_OPCODE_MOV;
286 if (negate)
287 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
288 return;
289 } else if (swz == RC_SWIZZLE_ZERO) {
290 inst->U.I.Opcode = RC_OPCODE_MOV;
291 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
292 return;
293 }
294 }
295 }
296
297 static void constant_folding_add(struct rc_instruction * inst)
298 {
299 rc_swizzle swz;
300 unsigned int negate;
301
302 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
303 if (swz == RC_SWIZZLE_ZERO) {
304 inst->U.I.Opcode = RC_OPCODE_MOV;
305 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
306 return;
307 }
308 }
309
310 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
311 if (swz == RC_SWIZZLE_ZERO) {
312 inst->U.I.Opcode = RC_OPCODE_MOV;
313 return;
314 }
315 }
316 }
317
318 /**
319 * Replace 0.0, 1.0 and 0.5 immediate constants by their
320 * respective swizzles. Simplify instructions like ADD dst, src, 0;
321 */
322 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
323 {
324 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
325 unsigned int i;
326
327 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
328 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
329 struct rc_constant * constant;
330 struct rc_src_register newsrc;
331 int have_real_reference;
332
333 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
334 inst->U.I.SrcReg[src].RelAddr ||
335 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
336 continue;
337
338 constant =
339 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
340
341 if (constant->Type != RC_CONSTANT_IMMEDIATE)
342 continue;
343
344 newsrc = inst->U.I.SrcReg[src];
345 have_real_reference = 0;
346 for(unsigned int chan = 0; chan < 4; ++chan) {
347 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
348 unsigned int newswz;
349 float imm;
350 float baseimm;
351
352 if (swz >= 4)
353 continue;
354
355 imm = constant->u.Immediate[swz];
356 baseimm = imm;
357 if (imm < 0.0)
358 baseimm = -baseimm;
359
360 if (baseimm == 0.0) {
361 newswz = RC_SWIZZLE_ZERO;
362 } else if (baseimm == 1.0) {
363 newswz = RC_SWIZZLE_ONE;
364 } else if (baseimm == 0.5 && c->has_half_swizzles) {
365 newswz = RC_SWIZZLE_HALF;
366 } else {
367 have_real_reference = 1;
368 continue;
369 }
370
371 SET_SWZ(newsrc.Swizzle, chan, newswz);
372 if (imm < 0.0 && !newsrc.Abs)
373 newsrc.Negate ^= 1 << chan;
374 }
375
376 if (!have_real_reference) {
377 newsrc.File = RC_FILE_NONE;
378 newsrc.Index = 0;
379 }
380
381 /* don't make the swizzle worse */
382 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
383 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
384 continue;
385
386 inst->U.I.SrcReg[src] = newsrc;
387 }
388
389 /* Simplify instructions based on constants */
390 if (inst->U.I.Opcode == RC_OPCODE_MAD)
391 constant_folding_mad(inst);
392
393 /* note: MAD can simplify to MUL or ADD */
394 if (inst->U.I.Opcode == RC_OPCODE_MUL)
395 constant_folding_mul(inst);
396 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
397 constant_folding_add(inst);
398
399 /* In case this instruction has been converted, make sure all of the
400 * registers that are no longer used are empty. */
401 opcode = rc_get_opcode_info(inst->U.I.Opcode);
402 for(i = opcode->NumSrcRegs; i < 3; i++) {
403 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
404 }
405 }
406
407 /**
408 * If src and dst use the same register, this function returns a writemask that
409 * indicates wich components are read by src. Otherwise zero is returned.
410 */
411 static unsigned int src_reads_dst_mask(struct rc_src_register src,
412 struct rc_dst_register dst)
413 {
414 if (dst.File != src.File || dst.Index != src.Index) {
415 return 0;
416 }
417 return rc_swizzle_to_writemask(src.Swizzle);
418 }
419
420 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
421 * in any of its channels. Return 0 otherwise. */
422 static int src_has_const_swz(struct rc_src_register src) {
423 int chan;
424 for(chan = 0; chan < 4; chan++) {
425 unsigned int swz = GET_SWZ(src.Swizzle, chan);
426 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
427 || swz == RC_SWIZZLE_ONE) {
428 return 1;
429 }
430 }
431 return 0;
432 }
433
434 static void peephole_scan_write(void * data, struct rc_instruction * inst,
435 rc_register_file file, unsigned int index, unsigned int mask)
436 {
437 struct peephole_state * s = data;
438 if(s->Inst->U.I.DstReg.File == file
439 && s->Inst->U.I.DstReg.Index == index) {
440 unsigned int common_mask = s->WriteMask & mask;
441 s->WriteMask &= ~common_mask;
442 }
443 }
444
445 static int presub_helper(
446 struct radeon_compiler * c,
447 struct peephole_state * s,
448 rc_presubtract_op presub_opcode,
449 rc_presub_replace_fn presub_replace)
450 {
451 struct rc_instruction * inst;
452 unsigned int can_remove = 0;
453 unsigned int cant_sub = 0;
454
455 for(inst = s->Inst->Next; inst != &c->Program.Instructions;
456 inst = inst->Next) {
457 unsigned int i;
458 unsigned char can_use_presub = 1;
459 const struct rc_opcode_info * info =
460 rc_get_opcode_info(inst->U.I.Opcode);
461 /* XXX: There are some situations where instructions
462 * with more than 2 src registers can use the
463 * presubtract select, but to keep things simple we
464 * will disable presubtract on these instructions for
465 * now. */
466 if (info->NumSrcRegs > 2 || info->HasTexture) {
467 can_use_presub = 0;
468 }
469
470 /* We can't use more than one presubtract value in an
471 * instruction, unless the two prsubtract operations
472 * are the same and read from the same registers. */
473 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
474 if (inst->U.I.PreSub.Opcode != presub_opcode
475 || inst->U.I.PreSub.SrcReg[0].File !=
476 s->Inst->U.I.SrcReg[1].File
477 || inst->U.I.PreSub.SrcReg[0].Index !=
478 s->Inst->U.I.SrcReg[1].Index) {
479 can_use_presub = 0;
480 }
481 }
482
483 /* Even if the instruction can't use a presubtract operation
484 * we still need to check if the instruction reads from
485 * s->Inst->U.I.DstReg, because if it does we must not
486 * remove s->Inst. */
487 for(i = 0; i < info->NumSrcRegs; i++) {
488 unsigned int mask = src_reads_dst_mask(
489 inst->U.I.SrcReg[i], s->Inst->U.I.DstReg);
490 /* XXX We could be more aggressive here using
491 * presubtract. It is okay if SrcReg[i] only reads
492 * from some of the mask components. */
493 if(s->Inst->U.I.DstReg.WriteMask != mask) {
494 if (s->Inst->U.I.DstReg.WriteMask & mask) {
495 can_remove = 0;
496 break;
497 } else {
498 continue;
499 }
500 }
501 if (cant_sub || !can_use_presub) {
502 can_remove = 0;
503 break;
504 }
505 presub_replace(s, inst, i);
506 can_remove = 1;
507 }
508 if(!can_remove)
509 break;
510 rc_for_all_writes_mask(inst, peephole_scan_write, s);
511 /* If all components of inst_add's destination register have
512 * been written to by subsequent instructions, the original
513 * value of the destination register is no longer valid and
514 * we can't keep doing substitutions. */
515 if (!s->WriteMask){
516 break;
517 }
518 /* Make this instruction doesn't write to the presubtract source. */
519 if (inst->U.I.DstReg.WriteMask &
520 src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
521 inst->U.I.DstReg)
522 || src_reads_dst_mask(s->Inst->U.I.SrcReg[0],
523 inst->U.I.DstReg)
524 || info->IsFlowControl) {
525 cant_sub = 1;
526 }
527 }
528 return can_remove;
529 }
530
531 /* This function assumes that s->Inst->U.I.SrcReg[0] and
532 * s->Inst->U.I.SrcReg[1] aren't both negative. */
533 static void presub_replace_add(struct peephole_state *s,
534 struct rc_instruction * inst,
535 unsigned int src_index)
536 {
537 rc_presubtract_op presub_opcode;
538 if (s->Inst->U.I.SrcReg[1].Negate || s->Inst->U.I.SrcReg[0].Negate)
539 presub_opcode = RC_PRESUB_SUB;
540 else
541 presub_opcode = RC_PRESUB_ADD;
542
543 if (s->Inst->U.I.SrcReg[1].Negate) {
544 inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
545 inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[0];
546 } else {
547 inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
548 inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
549 }
550 inst->U.I.PreSub.SrcReg[0].Negate = 0;
551 inst->U.I.PreSub.SrcReg[1].Negate = 0;
552 inst->U.I.PreSub.Opcode = presub_opcode;
553 inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
554 inst->U.I.PreSub.SrcReg[0]);
555 inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
556 inst->U.I.SrcReg[src_index].Index = presub_opcode;
557 }
558
559 static int is_presub_candidate(struct rc_instruction * inst)
560 {
561 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
562 unsigned int i;
563
564 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
565 return 0;
566
567 for(i = 0; i < info->NumSrcRegs; i++) {
568 if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg))
569 return 0;
570 }
571 return 1;
572 }
573
574 static int peephole_add_presub_add(
575 struct radeon_compiler * c,
576 struct rc_instruction * inst_add)
577 {
578 struct rc_src_register * src0 = NULL;
579 struct rc_src_register * src1 = NULL;
580 unsigned int i;
581 struct peephole_state s;
582
583 if (!is_presub_candidate(inst_add))
584 return 0;
585
586 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
587 return 0;
588
589 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
590 for (i = 0; i < 2; i++) {
591 if (inst_add->U.I.SrcReg[i].Abs)
592 return 0;
593 if ((inst_add->U.I.SrcReg[i].Negate
594 & inst_add->U.I.DstReg.WriteMask) ==
595 inst_add->U.I.DstReg.WriteMask) {
596 src0 = &inst_add->U.I.SrcReg[i];
597 } else if (!src1) {
598 src1 = &inst_add->U.I.SrcReg[i];
599 } else {
600 src0 = &inst_add->U.I.SrcReg[i];
601 }
602 }
603
604 if (!src1)
605 return 0;
606
607 s.Inst = inst_add;
608 s.WriteMask = inst_add->U.I.DstReg.WriteMask;
609 if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
610 rc_remove_instruction(inst_add);
611 return 1;
612 }
613 return 0;
614 }
615
616 static void presub_replace_inv(struct peephole_state * s,
617 struct rc_instruction * inst,
618 unsigned int src_index)
619 {
620 /* We must be careful not to modify s->Inst, since it
621 * is possible it will remain part of the program.
622 * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
623 inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
624 inst->U.I.PreSub.SrcReg[0].Negate = 0;
625 inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
626 inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
627 inst->U.I.PreSub.SrcReg[0]);
628
629 inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
630 inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
631 }
632
633 /**
634 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
635 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
636 * of the add instruction must have the constatnt 1 swizzle. This function
637 * does not check const registers to see if their value is 1.0, so it should
638 * be called after the constant_folding optimization.
639 * @return
640 * 0 if the ADD instruction is still part of the program.
641 * 1 if the ADD instruction is no longer part of the program.
642 */
643 static int peephole_add_presub_inv(
644 struct radeon_compiler * c,
645 struct rc_instruction * inst_add)
646 {
647 unsigned int i, swz, mask;
648 struct peephole_state s;
649
650 if (!is_presub_candidate(inst_add))
651 return 0;
652
653 mask = inst_add->U.I.DstReg.WriteMask;
654
655 /* Check if src0 is 1. */
656 /* XXX It would be nice to use is_src_uniform_constant here, but that
657 * function only works if the register's file is RC_FILE_NONE */
658 for(i = 0; i < 4; i++ ) {
659 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
660 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
661 && swz != RC_SWIZZLE_ONE) {
662 return 0;
663 }
664 }
665
666 /* Check src1. */
667 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
668 inst_add->U.I.DstReg.WriteMask
669 || inst_add->U.I.SrcReg[1].Abs
670 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
671 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
672 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
673
674 return 0;
675 }
676
677 /* Setup the peephole_state information. */
678 s.Inst = inst_add;
679 s.WriteMask = inst_add->U.I.DstReg.WriteMask;
680
681 if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
682 rc_remove_instruction(inst_add);
683 return 1;
684 }
685 return 0;
686 }
687
688 /**
689 * @return
690 * 0 if inst is still part of the program.
691 * 1 if inst is no longer part of the program.
692 */
693 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
694 {
695 switch(inst->U.I.Opcode){
696 case RC_OPCODE_ADD:
697 if (c->has_presub) {
698 if(peephole_add_presub_inv(c, inst))
699 return 1;
700 if(peephole_add_presub_add(c, inst))
701 return 1;
702 }
703 break;
704 default:
705 break;
706 }
707 return 0;
708 }
709
710 void rc_optimize(struct radeon_compiler * c, void *user)
711 {
712 struct rc_instruction * inst = c->Program.Instructions.Next;
713 while(inst != &c->Program.Instructions) {
714 struct rc_instruction * cur = inst;
715 inst = inst->Next;
716
717 constant_folding(c, cur);
718
719 if(peephole(c, cur))
720 continue;
721
722 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
723 copy_propagate(c, cur);
724 /* cur may no longer be part of the program */
725 }
726 }
727 }