r300/compiler: Implement simple peephole optimizer
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_dataflow.h"
29
30 #include "radeon_compiler.h"
31
32
33 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
34 {
35 struct rc_src_register combine;
36 combine.File = inner.File;
37 combine.Index = inner.Index;
38 combine.RelAddr = inner.RelAddr;
39 if (outer.Abs) {
40 combine.Abs = 1;
41 combine.Negate = outer.Negate;
42 } else {
43 combine.Abs = inner.Abs;
44 combine.Negate = 0;
45 for(unsigned int chan = 0; chan < 4; ++chan) {
46 unsigned int swz = GET_SWZ(outer.Swizzle, chan);
47 if (swz < 4)
48 combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
49 }
50 combine.Negate ^= outer.Negate;
51 }
52 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
53 return combine;
54 }
55
56 struct peephole_state {
57 struct radeon_compiler * C;
58 struct rc_instruction * Mov;
59 unsigned int Conflict:1;
60
61 /** Whether Mov's source has been clobbered */
62 unsigned int SourceClobbered:1;
63
64 /** Which components of Mov's destination register are still from that Mov? */
65 unsigned int MovMask:4;
66
67 /** Which components of Mov's destination register are clearly *not* from that Mov */
68 unsigned int DefinedMask:4;
69
70 /** Which components of Mov's source register are sourced */
71 unsigned int SourcedMask:4;
72
73 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
74 int BranchDepth;
75 };
76
77 static void peephole_scan_read(void * data, struct rc_instruction * inst,
78 rc_register_file file, unsigned int index, unsigned int mask)
79 {
80 struct peephole_state * s = data;
81
82 if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
83 return;
84
85 if ((mask & s->MovMask) == mask) {
86 if (s->SourceClobbered) {
87 s->Conflict = 1;
88 }
89 } else if ((mask & s->DefinedMask) == mask) {
90 /* read from something entirely written by other instruction: this is okay */
91 } else {
92 /* read from component combination that is not well-defined without
93 * the MOV: cannot remove it */
94 s->Conflict = 1;
95 }
96 }
97
98 static void peephole_scan_write(void * data, struct rc_instruction * inst,
99 rc_register_file file, unsigned int index, unsigned int mask)
100 {
101 struct peephole_state * s = data;
102
103 if (s->BranchDepth < 0)
104 return;
105
106 if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
107 s->MovMask &= ~mask;
108 if (s->BranchDepth == 0)
109 s->DefinedMask |= mask;
110 else
111 s->DefinedMask &= ~mask;
112 } else if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
113 if (mask & s->SourcedMask)
114 s->SourceClobbered = 1;
115 } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
116 s->SourceClobbered = 1;
117 }
118 }
119
120 static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
121 {
122 struct peephole_state s;
123
124 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult)
125 return;
126
127 memset(&s, 0, sizeof(s));
128 s.C = c;
129 s.Mov = inst_mov;
130 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
131 s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
132
133 for(unsigned int chan = 0; chan < 4; ++chan) {
134 unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
135 s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
136 }
137
138 /* 1st pass: Check whether all subsequent readers can be changed */
139 for(struct rc_instruction * inst = inst_mov->Next;
140 inst != &c->Program.Instructions;
141 inst = inst->Next) {
142 rc_for_all_reads_mask(inst, peephole_scan_read, &s);
143 rc_for_all_writes_mask(inst, peephole_scan_write, &s);
144 if (s.Conflict)
145 return;
146
147 if (s.BranchDepth >= 0) {
148 if (inst->U.I.Opcode == RC_OPCODE_IF) {
149 s.BranchDepth++;
150 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
151 s.BranchDepth--;
152 if (s.BranchDepth < 0) {
153 s.DefinedMask &= ~s.MovMask;
154 s.MovMask = 0;
155 }
156 }
157 }
158 }
159
160 if (s.Conflict)
161 return;
162
163 /* 2nd pass: We can satisfy all readers, so switch them over all at once */
164 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
165 s.BranchDepth = 0;
166
167 for(struct rc_instruction * inst = inst_mov->Next;
168 inst != &c->Program.Instructions;
169 inst = inst->Next) {
170 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
171
172 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
173 if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
174 inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
175 unsigned int refmask = 0;
176
177 for(unsigned int chan = 0; chan < 4; ++chan) {
178 unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
179 refmask |= (1 << swz) & RC_MASK_XYZW;
180 }
181
182 if ((refmask & s.MovMask) == refmask)
183 inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
184 }
185 }
186
187 if (opcode->HasDstReg) {
188 if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
189 inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
190 s.MovMask &= ~inst->U.I.DstReg.WriteMask;
191 }
192 }
193
194 if (s.BranchDepth >= 0) {
195 if (inst->U.I.Opcode == RC_OPCODE_IF) {
196 s.BranchDepth++;
197 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
198 s.BranchDepth--;
199 if (s.BranchDepth < 0)
200 break; /* no more readers after this point */
201 }
202 }
203 }
204
205 /* Finally, remove the original MOV instruction */
206 rc_remove_instruction(inst_mov);
207 }
208
209 void rc_optimize(struct radeon_compiler * c)
210 {
211 struct rc_instruction * inst = c->Program.Instructions.Next;
212 while(inst != &c->Program.Instructions) {
213 struct rc_instruction * cur = inst;
214 inst = inst->Next;
215
216 if (cur->U.I.Opcode == RC_OPCODE_MOV)
217 peephole(c, cur);
218 }
219 }