gallium: Add capability for ARB_robust_buffer_access_behavior.
[mesa.git] / src / gallium / drivers / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_dataflow.h"
30
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_list.h"
34 #include "radeon_swizzle.h"
35 #include "radeon_variable.h"
36
37 struct src_clobbered_reads_cb_data {
38 rc_register_file File;
39 unsigned int Index;
40 unsigned int Mask;
41 struct rc_reader_data * ReaderData;
42 };
43
44 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
45 struct rc_instruction *,
46 unsigned int);
47
48 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
49 {
50 struct rc_src_register combine;
51 combine.File = inner.File;
52 combine.Index = inner.Index;
53 combine.RelAddr = inner.RelAddr;
54 if (outer.Abs) {
55 combine.Abs = 1;
56 combine.Negate = outer.Negate;
57 } else {
58 combine.Abs = inner.Abs;
59 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
60 combine.Negate ^= outer.Negate;
61 }
62 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
63 return combine;
64 }
65
66 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
67 struct rc_src_register * src)
68 {
69 rc_register_file file = src->File;
70 struct rc_reader_data * reader_data = data;
71
72 if(!rc_inst_can_use_presub(inst,
73 reader_data->Writer->U.I.PreSub.Opcode,
74 rc_swizzle_to_writemask(src->Swizzle),
75 src,
76 &reader_data->Writer->U.I.PreSub.SrcReg[0],
77 &reader_data->Writer->U.I.PreSub.SrcReg[1])) {
78 reader_data->Abort = 1;
79 return;
80 }
81
82 /* XXX This could probably be handled better. */
83 if (file == RC_FILE_ADDRESS) {
84 reader_data->Abort = 1;
85 return;
86 }
87
88 /* These instructions cannot read from the constants file.
89 * see radeonTransformTEX()
90 */
91 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
92 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
93 (inst->U.I.Opcode == RC_OPCODE_TEX ||
94 inst->U.I.Opcode == RC_OPCODE_TXB ||
95 inst->U.I.Opcode == RC_OPCODE_TXP ||
96 inst->U.I.Opcode == RC_OPCODE_TXD ||
97 inst->U.I.Opcode == RC_OPCODE_TXL ||
98 inst->U.I.Opcode == RC_OPCODE_KIL)){
99 reader_data->Abort = 1;
100 return;
101 }
102 }
103
104 static void src_clobbered_reads_cb(
105 void * data,
106 struct rc_instruction * inst,
107 struct rc_src_register * src)
108 {
109 struct src_clobbered_reads_cb_data * sc_data = data;
110
111 if (src->File == sc_data->File
112 && src->Index == sc_data->Index
113 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
114
115 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
116 }
117
118 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
119 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
120 }
121 }
122
123 static void is_src_clobbered_scan_write(
124 void * data,
125 struct rc_instruction * inst,
126 rc_register_file file,
127 unsigned int index,
128 unsigned int mask)
129 {
130 struct src_clobbered_reads_cb_data sc_data;
131 struct rc_reader_data * reader_data = data;
132 sc_data.File = file;
133 sc_data.Index = index;
134 sc_data.Mask = mask;
135 sc_data.ReaderData = reader_data;
136 rc_for_all_reads_src(reader_data->Writer,
137 src_clobbered_reads_cb, &sc_data);
138 }
139
140 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
141 {
142 struct rc_reader_data reader_data;
143 unsigned int i;
144
145 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
146 inst_mov->U.I.WriteALUResult)
147 return;
148
149 /* Get a list of all the readers of this MOV instruction. */
150 reader_data.ExitOnAbort = 1;
151 rc_get_readers(c, inst_mov, &reader_data,
152 copy_propagate_scan_read, NULL,
153 is_src_clobbered_scan_write);
154
155 if (reader_data.Abort || reader_data.ReaderCount == 0)
156 return;
157
158 /* We can propagate SaturateMode if all the readers are MOV instructions
159 * without a presubtract operation, source negation and absolute.
160 * In that case, we just move SaturateMode to all readers. */
161 if (inst_mov->U.I.SaturateMode) {
162 for (i = 0; i < reader_data.ReaderCount; i++) {
163 struct rc_instruction * inst = reader_data.Readers[i].Inst;
164
165 if (inst->U.I.Opcode != RC_OPCODE_MOV ||
166 inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
167 inst->U.I.SrcReg[0].Abs ||
168 inst->U.I.SrcReg[0].Negate) {
169 return;
170 }
171 }
172 }
173
174 /* Propagate the MOV instruction. */
175 for (i = 0; i < reader_data.ReaderCount; i++) {
176 struct rc_instruction * inst = reader_data.Readers[i].Inst;
177 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
178
179 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
180 inst->U.I.PreSub = inst_mov->U.I.PreSub;
181 if (!inst->U.I.SaturateMode)
182 inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
183 }
184
185 /* Finally, remove the original MOV instruction */
186 rc_remove_instruction(inst_mov);
187 }
188
189 /**
190 * Check if a source register is actually always the same
191 * swizzle constant.
192 */
193 static int is_src_uniform_constant(struct rc_src_register src,
194 rc_swizzle * pswz, unsigned int * pnegate)
195 {
196 int have_used = 0;
197
198 if (src.File != RC_FILE_NONE) {
199 *pswz = 0;
200 return 0;
201 }
202
203 for(unsigned int chan = 0; chan < 4; ++chan) {
204 unsigned int swz = GET_SWZ(src.Swizzle, chan);
205 if (swz < 4) {
206 *pswz = 0;
207 return 0;
208 }
209 if (swz == RC_SWIZZLE_UNUSED)
210 continue;
211
212 if (!have_used) {
213 *pswz = swz;
214 *pnegate = GET_BIT(src.Negate, chan);
215 have_used = 1;
216 } else {
217 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
218 *pswz = 0;
219 return 0;
220 }
221 }
222 }
223
224 return 1;
225 }
226
227 static void constant_folding_mad(struct rc_instruction * inst)
228 {
229 rc_swizzle swz = 0;
230 unsigned int negate= 0;
231
232 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
233 if (swz == RC_SWIZZLE_ZERO) {
234 inst->U.I.Opcode = RC_OPCODE_MUL;
235 return;
236 }
237 }
238
239 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
240 if (swz == RC_SWIZZLE_ONE) {
241 inst->U.I.Opcode = RC_OPCODE_ADD;
242 if (negate)
243 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
244 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
245 return;
246 } else if (swz == RC_SWIZZLE_ZERO) {
247 inst->U.I.Opcode = RC_OPCODE_MOV;
248 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
249 return;
250 }
251 }
252
253 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
254 if (swz == RC_SWIZZLE_ONE) {
255 inst->U.I.Opcode = RC_OPCODE_ADD;
256 if (negate)
257 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
258 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
259 return;
260 } else if (swz == RC_SWIZZLE_ZERO) {
261 inst->U.I.Opcode = RC_OPCODE_MOV;
262 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
263 return;
264 }
265 }
266 }
267
268 static void constant_folding_mul(struct rc_instruction * inst)
269 {
270 rc_swizzle swz = 0;
271 unsigned int negate = 0;
272
273 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
274 if (swz == RC_SWIZZLE_ONE) {
275 inst->U.I.Opcode = RC_OPCODE_MOV;
276 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
277 if (negate)
278 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
279 return;
280 } else if (swz == RC_SWIZZLE_ZERO) {
281 inst->U.I.Opcode = RC_OPCODE_MOV;
282 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
283 return;
284 }
285 }
286
287 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
288 if (swz == RC_SWIZZLE_ONE) {
289 inst->U.I.Opcode = RC_OPCODE_MOV;
290 if (negate)
291 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
292 return;
293 } else if (swz == RC_SWIZZLE_ZERO) {
294 inst->U.I.Opcode = RC_OPCODE_MOV;
295 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
296 return;
297 }
298 }
299 }
300
301 static void constant_folding_add(struct rc_instruction * inst)
302 {
303 rc_swizzle swz = 0;
304 unsigned int negate = 0;
305
306 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
307 if (swz == RC_SWIZZLE_ZERO) {
308 inst->U.I.Opcode = RC_OPCODE_MOV;
309 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
310 return;
311 }
312 }
313
314 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
315 if (swz == RC_SWIZZLE_ZERO) {
316 inst->U.I.Opcode = RC_OPCODE_MOV;
317 return;
318 }
319 }
320 }
321
322 /**
323 * Replace 0.0, 1.0 and 0.5 immediate constants by their
324 * respective swizzles. Simplify instructions like ADD dst, src, 0;
325 */
326 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
327 {
328 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
329 unsigned int i;
330
331 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
332 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
333 struct rc_constant * constant;
334 struct rc_src_register newsrc;
335 int have_real_reference;
336 unsigned int chan;
337
338 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
339 for (chan = 0; chan < 4; ++chan)
340 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
341 break;
342 if (chan == 4) {
343 inst->U.I.SrcReg[src].File = RC_FILE_NONE;
344 continue;
345 }
346
347 /* Convert immediates to swizzles. */
348 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
349 inst->U.I.SrcReg[src].RelAddr ||
350 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
351 continue;
352
353 constant =
354 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
355
356 if (constant->Type != RC_CONSTANT_IMMEDIATE)
357 continue;
358
359 newsrc = inst->U.I.SrcReg[src];
360 have_real_reference = 0;
361 for (chan = 0; chan < 4; ++chan) {
362 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
363 unsigned int newswz;
364 float imm;
365 float baseimm;
366
367 if (swz >= 4)
368 continue;
369
370 imm = constant->u.Immediate[swz];
371 baseimm = imm;
372 if (imm < 0.0)
373 baseimm = -baseimm;
374
375 if (baseimm == 0.0) {
376 newswz = RC_SWIZZLE_ZERO;
377 } else if (baseimm == 1.0) {
378 newswz = RC_SWIZZLE_ONE;
379 } else if (baseimm == 0.5 && c->has_half_swizzles) {
380 newswz = RC_SWIZZLE_HALF;
381 } else {
382 have_real_reference = 1;
383 continue;
384 }
385
386 SET_SWZ(newsrc.Swizzle, chan, newswz);
387 if (imm < 0.0 && !newsrc.Abs)
388 newsrc.Negate ^= 1 << chan;
389 }
390
391 if (!have_real_reference) {
392 newsrc.File = RC_FILE_NONE;
393 newsrc.Index = 0;
394 }
395
396 /* don't make the swizzle worse */
397 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
398 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
399 continue;
400
401 inst->U.I.SrcReg[src] = newsrc;
402 }
403
404 /* Simplify instructions based on constants */
405 if (inst->U.I.Opcode == RC_OPCODE_MAD)
406 constant_folding_mad(inst);
407
408 /* note: MAD can simplify to MUL or ADD */
409 if (inst->U.I.Opcode == RC_OPCODE_MUL)
410 constant_folding_mul(inst);
411 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
412 constant_folding_add(inst);
413
414 /* In case this instruction has been converted, make sure all of the
415 * registers that are no longer used are empty. */
416 opcode = rc_get_opcode_info(inst->U.I.Opcode);
417 for(i = opcode->NumSrcRegs; i < 3; i++) {
418 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
419 }
420 }
421
422 /**
423 * If src and dst use the same register, this function returns a writemask that
424 * indicates wich components are read by src. Otherwise zero is returned.
425 */
426 static unsigned int src_reads_dst_mask(struct rc_src_register src,
427 struct rc_dst_register dst)
428 {
429 if (dst.File != src.File || dst.Index != src.Index) {
430 return 0;
431 }
432 return rc_swizzle_to_writemask(src.Swizzle);
433 }
434
435 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
436 * in any of its channels. Return 0 otherwise. */
437 static int src_has_const_swz(struct rc_src_register src) {
438 int chan;
439 for(chan = 0; chan < 4; chan++) {
440 unsigned int swz = GET_SWZ(src.Swizzle, chan);
441 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
442 || swz == RC_SWIZZLE_ONE) {
443 return 1;
444 }
445 }
446 return 0;
447 }
448
449 static void presub_scan_read(
450 void * data,
451 struct rc_instruction * inst,
452 struct rc_src_register * src)
453 {
454 struct rc_reader_data * reader_data = data;
455 rc_presubtract_op * presub_opcode = reader_data->CbData;
456
457 if (!rc_inst_can_use_presub(inst, *presub_opcode,
458 reader_data->Writer->U.I.DstReg.WriteMask,
459 src,
460 &reader_data->Writer->U.I.SrcReg[0],
461 &reader_data->Writer->U.I.SrcReg[1])) {
462 reader_data->Abort = 1;
463 return;
464 }
465 }
466
467 static int presub_helper(
468 struct radeon_compiler * c,
469 struct rc_instruction * inst_add,
470 rc_presubtract_op presub_opcode,
471 rc_presub_replace_fn presub_replace)
472 {
473 struct rc_reader_data reader_data;
474 unsigned int i;
475 rc_presubtract_op cb_op = presub_opcode;
476
477 reader_data.CbData = &cb_op;
478 reader_data.ExitOnAbort = 1;
479 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
480 is_src_clobbered_scan_write);
481
482 if (reader_data.Abort || reader_data.ReaderCount == 0)
483 return 0;
484
485 for(i = 0; i < reader_data.ReaderCount; i++) {
486 unsigned int src_index;
487 struct rc_reader reader = reader_data.Readers[i];
488 const struct rc_opcode_info * info =
489 rc_get_opcode_info(reader.Inst->U.I.Opcode);
490
491 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
492 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
493 presub_replace(inst_add, reader.Inst, src_index);
494 }
495 }
496 return 1;
497 }
498
499 /* This function assumes that inst_add->U.I.SrcReg[0] and
500 * inst_add->U.I.SrcReg[1] aren't both negative. */
501 static void presub_replace_add(
502 struct rc_instruction * inst_add,
503 struct rc_instruction * inst_reader,
504 unsigned int src_index)
505 {
506 rc_presubtract_op presub_opcode;
507 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
508 presub_opcode = RC_PRESUB_SUB;
509 else
510 presub_opcode = RC_PRESUB_ADD;
511
512 if (inst_add->U.I.SrcReg[1].Negate) {
513 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
514 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
515 } else {
516 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
517 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
518 }
519 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
520 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
521 inst_reader->U.I.PreSub.Opcode = presub_opcode;
522 inst_reader->U.I.SrcReg[src_index] =
523 chain_srcregs(inst_reader->U.I.SrcReg[src_index],
524 inst_reader->U.I.PreSub.SrcReg[0]);
525 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
526 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
527 }
528
529 static int is_presub_candidate(
530 struct radeon_compiler * c,
531 struct rc_instruction * inst)
532 {
533 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
534 unsigned int i;
535 unsigned int is_constant[2] = {0, 0};
536
537 assert(inst->U.I.Opcode == RC_OPCODE_ADD);
538
539 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
540 || inst->U.I.SaturateMode
541 || inst->U.I.WriteALUResult
542 || inst->U.I.Omod) {
543 return 0;
544 }
545
546 /* If both sources use a constant swizzle, then we can't convert it to
547 * a presubtract operation. In fact for the ADD and SUB presubtract
548 * operations neither source can contain a constant swizzle. This
549 * specific case is checked in peephole_add_presub_add() when
550 * we make sure the swizzles for both sources are equal, so we
551 * don't need to worry about it here. */
552 for (i = 0; i < 2; i++) {
553 int chan;
554 for (chan = 0; chan < 4; chan++) {
555 rc_swizzle swz =
556 get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
557 if (swz == RC_SWIZZLE_ONE
558 || swz == RC_SWIZZLE_ZERO
559 || swz == RC_SWIZZLE_HALF) {
560 is_constant[i] = 1;
561 }
562 }
563 }
564 if (is_constant[0] && is_constant[1])
565 return 0;
566
567 for(i = 0; i < info->NumSrcRegs; i++) {
568 struct rc_src_register src = inst->U.I.SrcReg[i];
569 if (src_reads_dst_mask(src, inst->U.I.DstReg))
570 return 0;
571
572 src.File = RC_FILE_PRESUB;
573 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
574 return 0;
575 }
576 return 1;
577 }
578
579 static int peephole_add_presub_add(
580 struct radeon_compiler * c,
581 struct rc_instruction * inst_add)
582 {
583 unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
584 unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
585 unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
586
587 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
588 return 0;
589
590 /* src0 and src1 can't have absolute values */
591 if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
592 return 0;
593
594 /* presub_replace_add() assumes only one is negative */
595 if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
596 return 0;
597
598 /* if src0 is negative, at least all bits of dstmask have to be set */
599 if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
600 return 0;
601
602 /* if src1 is negative, at least all bits of dstmask have to be set */
603 if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
604 return 0;
605
606 if (!is_presub_candidate(c, inst_add))
607 return 0;
608
609 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
610 rc_remove_instruction(inst_add);
611 return 1;
612 }
613 return 0;
614 }
615
616 static void presub_replace_inv(
617 struct rc_instruction * inst_add,
618 struct rc_instruction * inst_reader,
619 unsigned int src_index)
620 {
621 /* We must be careful not to modify inst_add, since it
622 * is possible it will remain part of the program.*/
623 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
624 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
625 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
626 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
627 inst_reader->U.I.PreSub.SrcReg[0]);
628
629 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
630 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
631 }
632
633 /**
634 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
635 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
636 * of the add instruction must have the constatnt 1 swizzle. This function
637 * does not check const registers to see if their value is 1.0, so it should
638 * be called after the constant_folding optimization.
639 * @return
640 * 0 if the ADD instruction is still part of the program.
641 * 1 if the ADD instruction is no longer part of the program.
642 */
643 static int peephole_add_presub_inv(
644 struct radeon_compiler * c,
645 struct rc_instruction * inst_add)
646 {
647 unsigned int i, swz;
648
649 if (!is_presub_candidate(c, inst_add))
650 return 0;
651
652 /* Check if src0 is 1. */
653 /* XXX It would be nice to use is_src_uniform_constant here, but that
654 * function only works if the register's file is RC_FILE_NONE */
655 for(i = 0; i < 4; i++ ) {
656 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
657 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
658 && swz != RC_SWIZZLE_ONE) {
659 return 0;
660 }
661 }
662
663 /* Check src1. */
664 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
665 inst_add->U.I.DstReg.WriteMask
666 || inst_add->U.I.SrcReg[1].Abs
667 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
668 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
669 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
670
671 return 0;
672 }
673
674 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
675 rc_remove_instruction(inst_add);
676 return 1;
677 }
678 return 0;
679 }
680
681 struct peephole_mul_cb_data {
682 struct rc_dst_register * Writer;
683 unsigned int Clobbered;
684 };
685
686 static void omod_filter_reader_cb(
687 void * userdata,
688 struct rc_instruction * inst,
689 rc_register_file file,
690 unsigned int index,
691 unsigned int mask)
692 {
693 struct peephole_mul_cb_data * d = userdata;
694 if (rc_src_reads_dst_mask(file, mask, index,
695 d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
696
697 d->Clobbered = 1;
698 }
699 }
700
701 static void omod_filter_writer_cb(
702 void * userdata,
703 struct rc_instruction * inst,
704 rc_register_file file,
705 unsigned int index,
706 unsigned int mask)
707 {
708 struct peephole_mul_cb_data * d = userdata;
709 if (file == d->Writer->File && index == d->Writer->Index &&
710 (mask & d->Writer->WriteMask)) {
711 d->Clobbered = 1;
712 }
713 }
714
715 static int peephole_mul_omod(
716 struct radeon_compiler * c,
717 struct rc_instruction * inst_mul,
718 struct rc_list * var_list)
719 {
720 unsigned int chan = 0, swz, i;
721 int const_index = -1;
722 int temp_index = -1;
723 float const_value;
724 rc_omod_op omod_op = RC_OMOD_DISABLE;
725 struct rc_list * writer_list;
726 struct rc_variable * var;
727 struct peephole_mul_cb_data cb_data;
728 unsigned writemask_sum;
729
730 for (i = 0; i < 2; i++) {
731 unsigned int j;
732 if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
733 && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
734 return 0;
735 }
736 if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
737 if (temp_index != -1) {
738 /* The instruction has two temp sources */
739 return 0;
740 } else {
741 temp_index = i;
742 continue;
743 }
744 }
745 /* If we get this far Src[i] must be a constant src */
746 if (inst_mul->U.I.SrcReg[i].Negate) {
747 return 0;
748 }
749 /* The constant src needs to read from the same swizzle */
750 swz = RC_SWIZZLE_UNUSED;
751 chan = 0;
752 for (j = 0; j < 4; j++) {
753 unsigned int j_swz =
754 GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
755 if (j_swz == RC_SWIZZLE_UNUSED) {
756 continue;
757 }
758 if (swz == RC_SWIZZLE_UNUSED) {
759 swz = j_swz;
760 chan = j;
761 } else if (j_swz != swz) {
762 return 0;
763 }
764 }
765
766 if (const_index != -1) {
767 /* The instruction has two constant sources */
768 return 0;
769 } else {
770 const_index = i;
771 }
772 }
773
774 if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
775 inst_mul->U.I.SrcReg[const_index].Index)) {
776 return 0;
777 }
778 const_value = rc_get_constant_value(c,
779 inst_mul->U.I.SrcReg[const_index].Index,
780 inst_mul->U.I.SrcReg[const_index].Swizzle,
781 inst_mul->U.I.SrcReg[const_index].Negate,
782 chan);
783
784 if (const_value == 2.0f) {
785 omod_op = RC_OMOD_MUL_2;
786 } else if (const_value == 4.0f) {
787 omod_op = RC_OMOD_MUL_4;
788 } else if (const_value == 8.0f) {
789 omod_op = RC_OMOD_MUL_8;
790 } else if (const_value == (1.0f / 2.0f)) {
791 omod_op = RC_OMOD_DIV_2;
792 } else if (const_value == (1.0f / 4.0f)) {
793 omod_op = RC_OMOD_DIV_4;
794 } else if (const_value == (1.0f / 8.0f)) {
795 omod_op = RC_OMOD_DIV_8;
796 } else {
797 return 0;
798 }
799
800 writer_list = rc_variable_list_get_writers_one_reader(var_list,
801 RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
802
803 if (!writer_list) {
804 return 0;
805 }
806
807 cb_data.Clobbered = 0;
808 cb_data.Writer = &inst_mul->U.I.DstReg;
809 for (var = writer_list->Item; var; var = var->Friend) {
810 struct rc_instruction * inst;
811 const struct rc_opcode_info * info = rc_get_opcode_info(
812 var->Inst->U.I.Opcode);
813 if (info->HasTexture) {
814 return 0;
815 }
816 if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
817 return 0;
818 }
819 for (inst = inst_mul->Prev; inst != var->Inst;
820 inst = inst->Prev) {
821 rc_for_all_reads_mask(inst, omod_filter_reader_cb,
822 &cb_data);
823 rc_for_all_writes_mask(inst, omod_filter_writer_cb,
824 &cb_data);
825 if (cb_data.Clobbered) {
826 break;
827 }
828 }
829 }
830
831 if (cb_data.Clobbered) {
832 return 0;
833 }
834
835 /* Rewrite the instructions */
836 writemask_sum = rc_variable_writemask_sum(writer_list->Item);
837 for (var = writer_list->Item; var; var = var->Friend) {
838 struct rc_variable * writer = var;
839 unsigned conversion_swizzle = rc_make_conversion_swizzle(
840 writemask_sum,
841 inst_mul->U.I.DstReg.WriteMask);
842 writer->Inst->U.I.Omod = omod_op;
843 writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
844 writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
845 rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
846 writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
847 }
848
849 rc_remove_instruction(inst_mul);
850
851 return 1;
852 }
853
854 /**
855 * @return
856 * 0 if inst is still part of the program.
857 * 1 if inst is no longer part of the program.
858 */
859 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
860 {
861 switch(inst->U.I.Opcode){
862 case RC_OPCODE_ADD:
863 if (c->has_presub) {
864 if(peephole_add_presub_inv(c, inst))
865 return 1;
866 if(peephole_add_presub_add(c, inst))
867 return 1;
868 }
869 break;
870 default:
871 break;
872 }
873 return 0;
874 }
875
876 void rc_optimize(struct radeon_compiler * c, void *user)
877 {
878 struct rc_instruction * inst = c->Program.Instructions.Next;
879 struct rc_list * var_list;
880 while(inst != &c->Program.Instructions) {
881 struct rc_instruction * cur = inst;
882 inst = inst->Next;
883
884 constant_folding(c, cur);
885
886 if(peephole(c, cur))
887 continue;
888
889 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
890 copy_propagate(c, cur);
891 /* cur may no longer be part of the program */
892 }
893 }
894
895 if (!c->has_omod) {
896 return;
897 }
898
899 inst = c->Program.Instructions.Next;
900 while(inst != &c->Program.Instructions) {
901 struct rc_instruction * cur = inst;
902 inst = inst->Next;
903 if (cur->U.I.Opcode == RC_OPCODE_MUL) {
904 var_list = rc_get_variables(c);
905 peephole_mul_omod(c, cur, var_list);
906 }
907 }
908 }