r300/compiler: Make calculate_inputs_outputs available to external users
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_nqssadce.c
1 /*
2 * Copyright (C) 2008 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * @file
30 *
31 * "Not-quite SSA" and Dead-Code Elimination.
32 *
33 * @note This code uses SWIZZLE_NIL in a source register to indicate that
34 * the corresponding component is ignored by the corresponding instruction.
35 */
36
37 #include "radeon_nqssadce.h"
38
39 #include "radeon_compiler.h"
40
41
42 /**
43 * Return the @ref register_state for the given register (or 0 for untracked
44 * registers, i.e. constants).
45 */
46 static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
47 {
48 switch(file) {
49 case PROGRAM_TEMPORARY: return &s->Temps[index];
50 case PROGRAM_OUTPUT: return &s->Outputs[index];
51 case PROGRAM_ADDRESS: return &s->Address;
52 default: return 0;
53 }
54 }
55
56
57 /**
58 * Left multiplication of a register with a swizzle
59 *
60 * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
61 */
62 struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
63 {
64 struct prog_src_register tmp = srcreg;
65 int i;
66 tmp.Swizzle = 0;
67 tmp.Negate = NEGATE_NONE;
68 for(i = 0; i < 4; ++i) {
69 GLuint swz = GET_SWZ(swizzle, i);
70 if (swz < 4) {
71 tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
72 tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
73 } else {
74 tmp.Swizzle |= swz << (i*3);
75 }
76 }
77 return tmp;
78 }
79
80
81 static void track_used_srcreg(struct nqssadce_state* s,
82 GLint src, GLuint sourced)
83 {
84 struct prog_instruction * inst = &s->IP->I;
85 int i;
86 GLuint deswz_source = 0;
87
88 for(i = 0; i < 4; ++i) {
89 if (GET_BIT(sourced, i)) {
90 GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
91 deswz_source |= 1 << swz;
92 } else {
93 inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
94 inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
95 }
96 }
97
98 if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
99 struct prog_dst_register dstreg = inst->DstReg;
100 dstreg.File = PROGRAM_TEMPORARY;
101 dstreg.Index = rc_find_free_temporary(s->Compiler);
102 dstreg.WriteMask = sourced;
103
104 s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
105
106 inst->SrcReg[src].File = PROGRAM_TEMPORARY;
107 inst->SrcReg[src].Index = dstreg.Index;
108 inst->SrcReg[src].Swizzle = 0;
109 inst->SrcReg[src].Negate = NEGATE_NONE;
110 inst->SrcReg[src].Abs = 0;
111 for(i = 0; i < 4; ++i) {
112 if (GET_BIT(sourced, i))
113 inst->SrcReg[src].Swizzle |= i << (3*i);
114 else
115 inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
116 }
117 deswz_source = sourced;
118 }
119
120 struct register_state *regstate;
121
122 if (inst->SrcReg[src].RelAddr) {
123 regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
124 if (regstate)
125 regstate->Sourced |= WRITEMASK_X;
126 } else {
127 regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
128 if (regstate)
129 regstate->Sourced |= deswz_source & 0xf;
130 }
131 }
132
133 static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex)
134 {
135 int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode);
136 int i;
137 for(i = 0; i < nsrc; ++i)
138 if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex)
139 inst->I.SrcReg[i].Index = newindex;
140 }
141
142 static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
143 {
144 GLuint newindex = rc_find_free_temporary(s->Compiler);
145 struct rc_instruction * inst;
146 for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) {
147 if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex)
148 inst->I.DstReg.Index = newindex;
149 unalias_srcregs(inst, oldindex, newindex);
150 }
151 unalias_srcregs(s->IP, oldindex, newindex);
152 }
153
154
155 /**
156 * Handle one instruction.
157 */
158 static void process_instruction(struct nqssadce_state* s)
159 {
160 struct prog_instruction *inst = &s->IP->I;
161 GLuint WriteMask;
162
163 if (inst->Opcode == OPCODE_END)
164 return;
165
166 if (inst->Opcode != OPCODE_KIL) {
167 struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
168 if (!regstate) {
169 rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n",
170 inst->DstReg.File, inst->DstReg.Index);
171 return;
172 }
173
174 inst->DstReg.WriteMask &= regstate->Sourced;
175 regstate->Sourced &= ~inst->DstReg.WriteMask;
176
177 if (inst->DstReg.WriteMask == 0) {
178 struct rc_instruction * inst_remove = s->IP;
179 s->IP = s->IP->Prev;
180 rc_remove_instruction(inst_remove);
181 return;
182 }
183
184 if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
185 unalias_temporary(s, inst->DstReg.Index);
186 }
187
188 WriteMask = inst->DstReg.WriteMask;
189
190 switch (inst->Opcode) {
191 case OPCODE_ARL:
192 case OPCODE_DDX:
193 case OPCODE_DDY:
194 case OPCODE_FRC:
195 case OPCODE_MOV:
196 track_used_srcreg(s, 0, WriteMask);
197 break;
198 case OPCODE_ADD:
199 case OPCODE_MAX:
200 case OPCODE_MIN:
201 case OPCODE_MUL:
202 case OPCODE_SGE:
203 case OPCODE_SLT:
204 track_used_srcreg(s, 0, WriteMask);
205 track_used_srcreg(s, 1, WriteMask);
206 break;
207 case OPCODE_CMP:
208 case OPCODE_MAD:
209 track_used_srcreg(s, 0, WriteMask);
210 track_used_srcreg(s, 1, WriteMask);
211 track_used_srcreg(s, 2, WriteMask);
212 break;
213 case OPCODE_COS:
214 case OPCODE_EX2:
215 case OPCODE_LG2:
216 case OPCODE_RCP:
217 case OPCODE_RSQ:
218 case OPCODE_SIN:
219 track_used_srcreg(s, 0, 0x1);
220 break;
221 case OPCODE_DP3:
222 track_used_srcreg(s, 0, 0x7);
223 track_used_srcreg(s, 1, 0x7);
224 break;
225 case OPCODE_DP4:
226 track_used_srcreg(s, 0, 0xf);
227 track_used_srcreg(s, 1, 0xf);
228 break;
229 case OPCODE_KIL:
230 case OPCODE_TEX:
231 case OPCODE_TXB:
232 case OPCODE_TXP:
233 track_used_srcreg(s, 0, 0xf);
234 break;
235 case OPCODE_DST:
236 track_used_srcreg(s, 0, 0x6);
237 track_used_srcreg(s, 1, 0xa);
238 break;
239 case OPCODE_EXP:
240 case OPCODE_LOG:
241 case OPCODE_POW:
242 track_used_srcreg(s, 0, 0x3);
243 break;
244 case OPCODE_LIT:
245 track_used_srcreg(s, 0, 0xb);
246 break;
247 default:
248 rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
249 return;
250 }
251
252 s->IP = s->IP->Prev;
253 }
254
255 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
256 {
257 struct rc_instruction *inst;
258
259 c->Program.InputsRead = 0;
260 c->Program.OutputsWritten = 0;
261
262 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
263 {
264 int i;
265 int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode);
266
267 for (i = 0; i < num_src_regs; ++i) {
268 if (inst->I.SrcReg[i].File == PROGRAM_INPUT)
269 c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
270 }
271
272 if (_mesa_num_inst_dst_regs(inst->I.Opcode)) {
273 if (inst->I.DstReg.File == PROGRAM_OUTPUT)
274 c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
275 }
276 }
277 }
278
279 void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data)
280 {
281 struct nqssadce_state s;
282
283 _mesa_bzero(&s, sizeof(s));
284 s.Compiler = c;
285 s.Descr = descr;
286 s.UserData = data;
287 s.Descr->Init(&s);
288 s.IP = c->Program.Instructions.Prev;
289
290 while(s.IP != &c->Program.Instructions && !c->Error)
291 process_instruction(&s);
292
293 rc_calculate_inputs_outputs(c);
294 }