r300g: implement TRUNC correctly
[mesa.git] / src / gallium / drivers / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_dataflow.h"
30
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_list.h"
34 #include "radeon_swizzle.h"
35 #include "radeon_variable.h"
36
37 struct src_clobbered_reads_cb_data {
38 rc_register_file File;
39 unsigned int Index;
40 unsigned int Mask;
41 struct rc_reader_data * ReaderData;
42 };
43
44 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
45 struct rc_instruction *,
46 unsigned int);
47
48 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
49 {
50 struct rc_src_register combine;
51 combine.File = inner.File;
52 combine.Index = inner.Index;
53 combine.RelAddr = inner.RelAddr;
54 if (outer.Abs) {
55 combine.Abs = 1;
56 combine.Negate = outer.Negate;
57 } else {
58 combine.Abs = inner.Abs;
59 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
60 combine.Negate ^= outer.Negate;
61 }
62 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
63 return combine;
64 }
65
66 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
67 struct rc_src_register * src)
68 {
69 rc_register_file file = src->File;
70 struct rc_reader_data * reader_data = data;
71
72 if(!rc_inst_can_use_presub(inst,
73 reader_data->Writer->U.I.PreSub.Opcode,
74 rc_swizzle_to_writemask(src->Swizzle),
75 src,
76 &reader_data->Writer->U.I.PreSub.SrcReg[0],
77 &reader_data->Writer->U.I.PreSub.SrcReg[1])) {
78 reader_data->Abort = 1;
79 return;
80 }
81
82 /* XXX This could probably be handled better. */
83 if (file == RC_FILE_ADDRESS) {
84 reader_data->Abort = 1;
85 return;
86 }
87
88 /* These instructions cannot read from the constants file.
89 * see radeonTransformTEX()
90 */
91 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
92 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
93 (inst->U.I.Opcode == RC_OPCODE_TEX ||
94 inst->U.I.Opcode == RC_OPCODE_TXB ||
95 inst->U.I.Opcode == RC_OPCODE_TXP ||
96 inst->U.I.Opcode == RC_OPCODE_TXD ||
97 inst->U.I.Opcode == RC_OPCODE_TXL ||
98 inst->U.I.Opcode == RC_OPCODE_KIL)){
99 reader_data->Abort = 1;
100 return;
101 }
102 }
103
104 static void src_clobbered_reads_cb(
105 void * data,
106 struct rc_instruction * inst,
107 struct rc_src_register * src)
108 {
109 struct src_clobbered_reads_cb_data * sc_data = data;
110
111 if (src->File == sc_data->File
112 && src->Index == sc_data->Index
113 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
114
115 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
116 }
117
118 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
119 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
120 }
121 }
122
123 static void is_src_clobbered_scan_write(
124 void * data,
125 struct rc_instruction * inst,
126 rc_register_file file,
127 unsigned int index,
128 unsigned int mask)
129 {
130 struct src_clobbered_reads_cb_data sc_data;
131 struct rc_reader_data * reader_data = data;
132 sc_data.File = file;
133 sc_data.Index = index;
134 sc_data.Mask = mask;
135 sc_data.ReaderData = reader_data;
136 rc_for_all_reads_src(reader_data->Writer,
137 src_clobbered_reads_cb, &sc_data);
138 }
139
140 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
141 {
142 struct rc_reader_data reader_data;
143 unsigned int i;
144
145 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
146 inst_mov->U.I.WriteALUResult ||
147 inst_mov->U.I.SaturateMode)
148 return;
149
150 /* Get a list of all the readers of this MOV instruction. */
151 reader_data.ExitOnAbort = 1;
152 rc_get_readers(c, inst_mov, &reader_data,
153 copy_propagate_scan_read, NULL,
154 is_src_clobbered_scan_write);
155
156 if (reader_data.Abort || reader_data.ReaderCount == 0)
157 return;
158
159 /* Propagate the MOV instruction. */
160 for (i = 0; i < reader_data.ReaderCount; i++) {
161 struct rc_instruction * inst = reader_data.Readers[i].Inst;
162 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
163
164 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
165 inst->U.I.PreSub = inst_mov->U.I.PreSub;
166 }
167
168 /* Finally, remove the original MOV instruction */
169 rc_remove_instruction(inst_mov);
170 }
171
172 /**
173 * Check if a source register is actually always the same
174 * swizzle constant.
175 */
176 static int is_src_uniform_constant(struct rc_src_register src,
177 rc_swizzle * pswz, unsigned int * pnegate)
178 {
179 int have_used = 0;
180
181 if (src.File != RC_FILE_NONE) {
182 *pswz = 0;
183 return 0;
184 }
185
186 for(unsigned int chan = 0; chan < 4; ++chan) {
187 unsigned int swz = GET_SWZ(src.Swizzle, chan);
188 if (swz < 4) {
189 *pswz = 0;
190 return 0;
191 }
192 if (swz == RC_SWIZZLE_UNUSED)
193 continue;
194
195 if (!have_used) {
196 *pswz = swz;
197 *pnegate = GET_BIT(src.Negate, chan);
198 have_used = 1;
199 } else {
200 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
201 *pswz = 0;
202 return 0;
203 }
204 }
205 }
206
207 return 1;
208 }
209
210 static void constant_folding_mad(struct rc_instruction * inst)
211 {
212 rc_swizzle swz = 0;
213 unsigned int negate= 0;
214
215 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
216 if (swz == RC_SWIZZLE_ZERO) {
217 inst->U.I.Opcode = RC_OPCODE_MUL;
218 return;
219 }
220 }
221
222 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
223 if (swz == RC_SWIZZLE_ONE) {
224 inst->U.I.Opcode = RC_OPCODE_ADD;
225 if (negate)
226 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
227 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
228 return;
229 } else if (swz == RC_SWIZZLE_ZERO) {
230 inst->U.I.Opcode = RC_OPCODE_MOV;
231 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
232 return;
233 }
234 }
235
236 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
237 if (swz == RC_SWIZZLE_ONE) {
238 inst->U.I.Opcode = RC_OPCODE_ADD;
239 if (negate)
240 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
241 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
242 return;
243 } else if (swz == RC_SWIZZLE_ZERO) {
244 inst->U.I.Opcode = RC_OPCODE_MOV;
245 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
246 return;
247 }
248 }
249 }
250
251 static void constant_folding_mul(struct rc_instruction * inst)
252 {
253 rc_swizzle swz = 0;
254 unsigned int negate = 0;
255
256 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
257 if (swz == RC_SWIZZLE_ONE) {
258 inst->U.I.Opcode = RC_OPCODE_MOV;
259 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
260 if (negate)
261 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
262 return;
263 } else if (swz == RC_SWIZZLE_ZERO) {
264 inst->U.I.Opcode = RC_OPCODE_MOV;
265 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
266 return;
267 }
268 }
269
270 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
271 if (swz == RC_SWIZZLE_ONE) {
272 inst->U.I.Opcode = RC_OPCODE_MOV;
273 if (negate)
274 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
275 return;
276 } else if (swz == RC_SWIZZLE_ZERO) {
277 inst->U.I.Opcode = RC_OPCODE_MOV;
278 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
279 return;
280 }
281 }
282 }
283
284 static void constant_folding_add(struct rc_instruction * inst)
285 {
286 rc_swizzle swz = 0;
287 unsigned int negate = 0;
288
289 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
290 if (swz == RC_SWIZZLE_ZERO) {
291 inst->U.I.Opcode = RC_OPCODE_MOV;
292 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
293 return;
294 }
295 }
296
297 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
298 if (swz == RC_SWIZZLE_ZERO) {
299 inst->U.I.Opcode = RC_OPCODE_MOV;
300 return;
301 }
302 }
303 }
304
305 /**
306 * Replace 0.0, 1.0 and 0.5 immediate constants by their
307 * respective swizzles. Simplify instructions like ADD dst, src, 0;
308 */
309 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
310 {
311 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
312 unsigned int i;
313
314 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
315 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
316 struct rc_constant * constant;
317 struct rc_src_register newsrc;
318 int have_real_reference;
319 unsigned int chan;
320
321 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
322 for (chan = 0; chan < 4; ++chan)
323 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
324 break;
325 if (chan == 4) {
326 inst->U.I.SrcReg[src].File = RC_FILE_NONE;
327 continue;
328 }
329
330 /* Convert immediates to swizzles. */
331 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
332 inst->U.I.SrcReg[src].RelAddr ||
333 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
334 continue;
335
336 constant =
337 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
338
339 if (constant->Type != RC_CONSTANT_IMMEDIATE)
340 continue;
341
342 newsrc = inst->U.I.SrcReg[src];
343 have_real_reference = 0;
344 for (chan = 0; chan < 4; ++chan) {
345 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
346 unsigned int newswz;
347 float imm;
348 float baseimm;
349
350 if (swz >= 4)
351 continue;
352
353 imm = constant->u.Immediate[swz];
354 baseimm = imm;
355 if (imm < 0.0)
356 baseimm = -baseimm;
357
358 if (baseimm == 0.0) {
359 newswz = RC_SWIZZLE_ZERO;
360 } else if (baseimm == 1.0) {
361 newswz = RC_SWIZZLE_ONE;
362 } else if (baseimm == 0.5 && c->has_half_swizzles) {
363 newswz = RC_SWIZZLE_HALF;
364 } else {
365 have_real_reference = 1;
366 continue;
367 }
368
369 SET_SWZ(newsrc.Swizzle, chan, newswz);
370 if (imm < 0.0 && !newsrc.Abs)
371 newsrc.Negate ^= 1 << chan;
372 }
373
374 if (!have_real_reference) {
375 newsrc.File = RC_FILE_NONE;
376 newsrc.Index = 0;
377 }
378
379 /* don't make the swizzle worse */
380 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
381 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
382 continue;
383
384 inst->U.I.SrcReg[src] = newsrc;
385 }
386
387 /* Simplify instructions based on constants */
388 if (inst->U.I.Opcode == RC_OPCODE_MAD)
389 constant_folding_mad(inst);
390
391 /* note: MAD can simplify to MUL or ADD */
392 if (inst->U.I.Opcode == RC_OPCODE_MUL)
393 constant_folding_mul(inst);
394 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
395 constant_folding_add(inst);
396
397 /* In case this instruction has been converted, make sure all of the
398 * registers that are no longer used are empty. */
399 opcode = rc_get_opcode_info(inst->U.I.Opcode);
400 for(i = opcode->NumSrcRegs; i < 3; i++) {
401 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
402 }
403 }
404
405 /**
406 * If src and dst use the same register, this function returns a writemask that
407 * indicates wich components are read by src. Otherwise zero is returned.
408 */
409 static unsigned int src_reads_dst_mask(struct rc_src_register src,
410 struct rc_dst_register dst)
411 {
412 if (dst.File != src.File || dst.Index != src.Index) {
413 return 0;
414 }
415 return rc_swizzle_to_writemask(src.Swizzle);
416 }
417
418 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
419 * in any of its channels. Return 0 otherwise. */
420 static int src_has_const_swz(struct rc_src_register src) {
421 int chan;
422 for(chan = 0; chan < 4; chan++) {
423 unsigned int swz = GET_SWZ(src.Swizzle, chan);
424 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
425 || swz == RC_SWIZZLE_ONE) {
426 return 1;
427 }
428 }
429 return 0;
430 }
431
432 static void presub_scan_read(
433 void * data,
434 struct rc_instruction * inst,
435 struct rc_src_register * src)
436 {
437 struct rc_reader_data * reader_data = data;
438 rc_presubtract_op * presub_opcode = reader_data->CbData;
439
440 if (!rc_inst_can_use_presub(inst, *presub_opcode,
441 reader_data->Writer->U.I.DstReg.WriteMask,
442 src,
443 &reader_data->Writer->U.I.SrcReg[0],
444 &reader_data->Writer->U.I.SrcReg[1])) {
445 reader_data->Abort = 1;
446 return;
447 }
448 }
449
450 static int presub_helper(
451 struct radeon_compiler * c,
452 struct rc_instruction * inst_add,
453 rc_presubtract_op presub_opcode,
454 rc_presub_replace_fn presub_replace)
455 {
456 struct rc_reader_data reader_data;
457 unsigned int i;
458 rc_presubtract_op cb_op = presub_opcode;
459
460 reader_data.CbData = &cb_op;
461 reader_data.ExitOnAbort = 1;
462 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
463 is_src_clobbered_scan_write);
464
465 if (reader_data.Abort || reader_data.ReaderCount == 0)
466 return 0;
467
468 for(i = 0; i < reader_data.ReaderCount; i++) {
469 unsigned int src_index;
470 struct rc_reader reader = reader_data.Readers[i];
471 const struct rc_opcode_info * info =
472 rc_get_opcode_info(reader.Inst->U.I.Opcode);
473
474 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
475 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
476 presub_replace(inst_add, reader.Inst, src_index);
477 }
478 }
479 return 1;
480 }
481
482 /* This function assumes that inst_add->U.I.SrcReg[0] and
483 * inst_add->U.I.SrcReg[1] aren't both negative. */
484 static void presub_replace_add(
485 struct rc_instruction * inst_add,
486 struct rc_instruction * inst_reader,
487 unsigned int src_index)
488 {
489 rc_presubtract_op presub_opcode;
490 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
491 presub_opcode = RC_PRESUB_SUB;
492 else
493 presub_opcode = RC_PRESUB_ADD;
494
495 if (inst_add->U.I.SrcReg[1].Negate) {
496 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
497 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
498 } else {
499 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
500 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
501 }
502 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
503 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
504 inst_reader->U.I.PreSub.Opcode = presub_opcode;
505 inst_reader->U.I.SrcReg[src_index] =
506 chain_srcregs(inst_reader->U.I.SrcReg[src_index],
507 inst_reader->U.I.PreSub.SrcReg[0]);
508 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
509 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
510 }
511
512 static int is_presub_candidate(
513 struct radeon_compiler * c,
514 struct rc_instruction * inst)
515 {
516 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
517 unsigned int i;
518 unsigned int is_constant[2] = {0, 0};
519
520 assert(inst->U.I.Opcode == RC_OPCODE_ADD);
521
522 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
523 || inst->U.I.SaturateMode
524 || inst->U.I.WriteALUResult
525 || inst->U.I.Omod) {
526 return 0;
527 }
528
529 /* If both sources use a constant swizzle, then we can't convert it to
530 * a presubtract operation. In fact for the ADD and SUB presubtract
531 * operations neither source can contain a constant swizzle. This
532 * specific case is checked in peephole_add_presub_add() when
533 * we make sure the swizzles for both sources are equal, so we
534 * don't need to worry about it here. */
535 for (i = 0; i < 2; i++) {
536 int chan;
537 for (chan = 0; chan < 4; chan++) {
538 rc_swizzle swz =
539 get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
540 if (swz == RC_SWIZZLE_ONE
541 || swz == RC_SWIZZLE_ZERO
542 || swz == RC_SWIZZLE_HALF) {
543 is_constant[i] = 1;
544 }
545 }
546 }
547 if (is_constant[0] && is_constant[1])
548 return 0;
549
550 for(i = 0; i < info->NumSrcRegs; i++) {
551 struct rc_src_register src = inst->U.I.SrcReg[i];
552 if (src_reads_dst_mask(src, inst->U.I.DstReg))
553 return 0;
554
555 src.File = RC_FILE_PRESUB;
556 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
557 return 0;
558 }
559 return 1;
560 }
561
562 static int peephole_add_presub_add(
563 struct radeon_compiler * c,
564 struct rc_instruction * inst_add)
565 {
566 unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
567 unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
568 unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
569
570 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
571 return 0;
572
573 /* src0 and src1 can't have absolute values */
574 if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
575 return 0;
576
577 /* presub_replace_add() assumes only one is negative */
578 if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
579 return 0;
580
581 /* if src0 is negative, at least all bits of dstmask have to be set */
582 if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
583 return 0;
584
585 /* if src1 is negative, at least all bits of dstmask have to be set */
586 if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
587 return 0;
588
589 if (!is_presub_candidate(c, inst_add))
590 return 0;
591
592 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
593 rc_remove_instruction(inst_add);
594 return 1;
595 }
596 return 0;
597 }
598
599 static void presub_replace_inv(
600 struct rc_instruction * inst_add,
601 struct rc_instruction * inst_reader,
602 unsigned int src_index)
603 {
604 /* We must be careful not to modify inst_add, since it
605 * is possible it will remain part of the program.*/
606 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
607 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
608 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
609 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
610 inst_reader->U.I.PreSub.SrcReg[0]);
611
612 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
613 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
614 }
615
616 /**
617 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
618 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
619 * of the add instruction must have the constatnt 1 swizzle. This function
620 * does not check const registers to see if their value is 1.0, so it should
621 * be called after the constant_folding optimization.
622 * @return
623 * 0 if the ADD instruction is still part of the program.
624 * 1 if the ADD instruction is no longer part of the program.
625 */
626 static int peephole_add_presub_inv(
627 struct radeon_compiler * c,
628 struct rc_instruction * inst_add)
629 {
630 unsigned int i, swz;
631
632 if (!is_presub_candidate(c, inst_add))
633 return 0;
634
635 /* Check if src0 is 1. */
636 /* XXX It would be nice to use is_src_uniform_constant here, but that
637 * function only works if the register's file is RC_FILE_NONE */
638 for(i = 0; i < 4; i++ ) {
639 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
640 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
641 && swz != RC_SWIZZLE_ONE) {
642 return 0;
643 }
644 }
645
646 /* Check src1. */
647 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
648 inst_add->U.I.DstReg.WriteMask
649 || inst_add->U.I.SrcReg[1].Abs
650 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
651 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
652 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
653
654 return 0;
655 }
656
657 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
658 rc_remove_instruction(inst_add);
659 return 1;
660 }
661 return 0;
662 }
663
664 struct peephole_mul_cb_data {
665 struct rc_dst_register * Writer;
666 unsigned int Clobbered;
667 };
668
669 static void omod_filter_reader_cb(
670 void * userdata,
671 struct rc_instruction * inst,
672 rc_register_file file,
673 unsigned int index,
674 unsigned int mask)
675 {
676 struct peephole_mul_cb_data * d = userdata;
677 if (rc_src_reads_dst_mask(file, mask, index,
678 d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
679
680 d->Clobbered = 1;
681 }
682 }
683
684 static void omod_filter_writer_cb(
685 void * userdata,
686 struct rc_instruction * inst,
687 rc_register_file file,
688 unsigned int index,
689 unsigned int mask)
690 {
691 struct peephole_mul_cb_data * d = userdata;
692 if (file == d->Writer->File && index == d->Writer->Index &&
693 (mask & d->Writer->WriteMask)) {
694 d->Clobbered = 1;
695 }
696 }
697
698 static int peephole_mul_omod(
699 struct radeon_compiler * c,
700 struct rc_instruction * inst_mul,
701 struct rc_list * var_list)
702 {
703 unsigned int chan = 0, swz, i;
704 int const_index = -1;
705 int temp_index = -1;
706 float const_value;
707 rc_omod_op omod_op = RC_OMOD_DISABLE;
708 struct rc_list * writer_list;
709 struct rc_variable * var;
710 struct peephole_mul_cb_data cb_data;
711
712 for (i = 0; i < 2; i++) {
713 unsigned int j;
714 if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
715 && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
716 return 0;
717 }
718 if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
719 if (temp_index != -1) {
720 /* The instruction has two temp sources */
721 return 0;
722 } else {
723 temp_index = i;
724 continue;
725 }
726 }
727 /* If we get this far Src[i] must be a constant src */
728 if (inst_mul->U.I.SrcReg[i].Negate) {
729 return 0;
730 }
731 /* The constant src needs to read from the same swizzle */
732 swz = RC_SWIZZLE_UNUSED;
733 chan = 0;
734 for (j = 0; j < 4; j++) {
735 unsigned int j_swz =
736 GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
737 if (j_swz == RC_SWIZZLE_UNUSED) {
738 continue;
739 }
740 if (swz == RC_SWIZZLE_UNUSED) {
741 swz = j_swz;
742 chan = j;
743 } else if (j_swz != swz) {
744 return 0;
745 }
746 }
747
748 if (const_index != -1) {
749 /* The instruction has two constant sources */
750 return 0;
751 } else {
752 const_index = i;
753 }
754 }
755
756 if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
757 inst_mul->U.I.SrcReg[const_index].Index)) {
758 return 0;
759 }
760 const_value = rc_get_constant_value(c,
761 inst_mul->U.I.SrcReg[const_index].Index,
762 inst_mul->U.I.SrcReg[const_index].Swizzle,
763 inst_mul->U.I.SrcReg[const_index].Negate,
764 chan);
765
766 if (const_value == 2.0f) {
767 omod_op = RC_OMOD_MUL_2;
768 } else if (const_value == 4.0f) {
769 omod_op = RC_OMOD_MUL_4;
770 } else if (const_value == 8.0f) {
771 omod_op = RC_OMOD_MUL_8;
772 } else if (const_value == (1.0f / 2.0f)) {
773 omod_op = RC_OMOD_DIV_2;
774 } else if (const_value == (1.0f / 4.0f)) {
775 omod_op = RC_OMOD_DIV_4;
776 } else if (const_value == (1.0f / 8.0f)) {
777 omod_op = RC_OMOD_DIV_8;
778 } else {
779 return 0;
780 }
781
782 writer_list = rc_variable_list_get_writers_one_reader(var_list,
783 RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
784
785 if (!writer_list) {
786 return 0;
787 }
788
789 cb_data.Clobbered = 0;
790 cb_data.Writer = &inst_mul->U.I.DstReg;
791 for (var = writer_list->Item; var; var = var->Friend) {
792 struct rc_instruction * inst;
793 const struct rc_opcode_info * info = rc_get_opcode_info(
794 var->Inst->U.I.Opcode);
795 if (info->HasTexture) {
796 return 0;
797 }
798 if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
799 return 0;
800 }
801 for (inst = inst_mul->Prev; inst != var->Inst;
802 inst = inst->Prev) {
803 rc_for_all_reads_mask(inst, omod_filter_reader_cb,
804 &cb_data);
805 rc_for_all_writes_mask(inst, omod_filter_writer_cb,
806 &cb_data);
807 if (cb_data.Clobbered) {
808 break;
809 }
810 }
811 }
812
813 if (cb_data.Clobbered) {
814 return 0;
815 }
816
817 /* Rewrite the instructions */
818 for (var = writer_list->Item; var; var = var->Friend) {
819 struct rc_variable * writer = writer_list->Item;
820 unsigned conversion_swizzle = rc_make_conversion_swizzle(
821 writer->Inst->U.I.DstReg.WriteMask,
822 inst_mul->U.I.DstReg.WriteMask);
823 writer->Inst->U.I.Omod = omod_op;
824 writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
825 writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
826 rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
827 writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
828 }
829
830 rc_remove_instruction(inst_mul);
831
832 return 1;
833 }
834
835 /**
836 * @return
837 * 0 if inst is still part of the program.
838 * 1 if inst is no longer part of the program.
839 */
840 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
841 {
842 switch(inst->U.I.Opcode){
843 case RC_OPCODE_ADD:
844 if (c->has_presub) {
845 if(peephole_add_presub_inv(c, inst))
846 return 1;
847 if(peephole_add_presub_add(c, inst))
848 return 1;
849 }
850 break;
851 default:
852 break;
853 }
854 return 0;
855 }
856
857 void rc_optimize(struct radeon_compiler * c, void *user)
858 {
859 struct rc_instruction * inst = c->Program.Instructions.Next;
860 struct rc_list * var_list;
861 while(inst != &c->Program.Instructions) {
862 struct rc_instruction * cur = inst;
863 inst = inst->Next;
864
865 constant_folding(c, cur);
866
867 if(peephole(c, cur))
868 continue;
869
870 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
871 copy_propagate(c, cur);
872 /* cur may no longer be part of the program */
873 }
874 }
875
876 if (!c->has_omod) {
877 return;
878 }
879
880 inst = c->Program.Instructions.Next;
881 while(inst != &c->Program.Instructions) {
882 struct rc_instruction * cur = inst;
883 inst = inst->Next;
884 if (cur->U.I.Opcode == RC_OPCODE_MUL) {
885 var_list = rc_get_variables(c);
886 peephole_mul_omod(c, cur, var_list);
887 }
888 }
889 }