r300: Remove GLcontext requirement from radeon_program_pair
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_nqssadce.c
1 /*
2 * Copyright (C) 2008 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * @file
30 *
31 * "Not-quite SSA" and Dead-Code Elimination.
32 *
33 * @note This code uses SWIZZLE_NIL in a source register to indicate that
34 * the corresponding component is ignored by the corresponding instruction.
35 */
36
37 #include "radeon_nqssadce.h"
38
39
40 /**
41 * Return the @ref register_state for the given register (or 0 for untracked
42 * registers, i.e. constants).
43 */
44 static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
45 {
46 switch(file) {
47 case PROGRAM_TEMPORARY: return &s->Temps[index];
48 case PROGRAM_OUTPUT: return &s->Outputs[index];
49 case PROGRAM_ADDRESS: return &s->Address;
50 default: return 0;
51 }
52 }
53
54
55 /**
56 * Left multiplication of a register with a swizzle
57 *
58 * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
59 */
60 struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
61 {
62 struct prog_src_register tmp = srcreg;
63 int i;
64 tmp.Swizzle = 0;
65 tmp.Negate = NEGATE_NONE;
66 for(i = 0; i < 4; ++i) {
67 GLuint swz = GET_SWZ(swizzle, i);
68 if (swz < 4) {
69 tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
70 tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
71 } else {
72 tmp.Swizzle |= swz << (i*3);
73 }
74 }
75 return tmp;
76 }
77
78
79 static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
80 struct prog_instruction *inst, GLint src, GLuint sourced)
81 {
82 int i;
83 GLuint deswz_source = 0;
84
85 for(i = 0; i < 4; ++i) {
86 if (GET_BIT(sourced, i)) {
87 GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
88 deswz_source |= 1 << swz;
89 } else {
90 inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
91 inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
92 }
93 }
94
95 if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
96 struct prog_dst_register dstreg = inst->DstReg;
97 dstreg.File = PROGRAM_TEMPORARY;
98 dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
99 dstreg.WriteMask = sourced;
100
101 s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
102
103 inst = s->Program->Instructions + s->IP;
104 inst->SrcReg[src].File = PROGRAM_TEMPORARY;
105 inst->SrcReg[src].Index = dstreg.Index;
106 inst->SrcReg[src].Swizzle = 0;
107 inst->SrcReg[src].Negate = NEGATE_NONE;
108 inst->SrcReg[src].Abs = 0;
109 for(i = 0; i < 4; ++i) {
110 if (GET_BIT(sourced, i))
111 inst->SrcReg[src].Swizzle |= i << (3*i);
112 else
113 inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
114 }
115 deswz_source = sourced;
116 }
117
118 struct register_state *regstate;
119
120 if (inst->SrcReg[src].RelAddr) {
121 regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
122 if (regstate)
123 regstate->Sourced |= WRITEMASK_X;
124 } else {
125 regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
126 if (regstate)
127 regstate->Sourced |= deswz_source & 0xf;
128 }
129
130 return inst;
131 }
132
133 static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
134 {
135 int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
136 int i;
137 for(i = 0; i < nsrc; ++i)
138 if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
139 inst->SrcReg[i].Index = newindex;
140 }
141
142 static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
143 {
144 GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
145 int ip;
146 for(ip = 0; ip < s->IP; ++ip) {
147 struct prog_instruction* inst = s->Program->Instructions + ip;
148 if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
149 inst->DstReg.Index = newindex;
150 unalias_srcregs(inst, oldindex, newindex);
151 }
152 unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
153 }
154
155
156 /**
157 * Handle one instruction.
158 */
159 static void process_instruction(struct nqssadce_state* s)
160 {
161 struct prog_instruction *inst = s->Program->Instructions + s->IP;
162
163 if (inst->Opcode == OPCODE_END)
164 return;
165
166 if (inst->Opcode != OPCODE_KIL) {
167 struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
168 if (!regstate) {
169 _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
170 inst->DstReg.File, inst->DstReg.Index);
171 return;
172 }
173
174 inst->DstReg.WriteMask &= regstate->Sourced;
175 regstate->Sourced &= ~inst->DstReg.WriteMask;
176
177 if (inst->DstReg.WriteMask == 0) {
178 _mesa_delete_instructions(s->Program, s->IP, 1);
179 return;
180 }
181
182 if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
183 unalias_temporary(s, inst->DstReg.Index);
184 }
185
186 /* Attention: Due to swizzle emulation code, the following
187 * might change the instruction stream under us, so we have
188 * to be careful with the inst pointer. */
189 switch (inst->Opcode) {
190 case OPCODE_ARL:
191 case OPCODE_DDX:
192 case OPCODE_DDY:
193 case OPCODE_FRC:
194 case OPCODE_MOV:
195 inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
196 break;
197 case OPCODE_ADD:
198 case OPCODE_MAX:
199 case OPCODE_MIN:
200 case OPCODE_MUL:
201 case OPCODE_SGE:
202 case OPCODE_SLT:
203 inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
204 inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
205 break;
206 case OPCODE_CMP:
207 case OPCODE_MAD:
208 inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
209 inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
210 inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
211 break;
212 case OPCODE_COS:
213 case OPCODE_EX2:
214 case OPCODE_LG2:
215 case OPCODE_RCP:
216 case OPCODE_RSQ:
217 case OPCODE_SIN:
218 inst = track_used_srcreg(s, inst, 0, 0x1);
219 break;
220 case OPCODE_DP3:
221 inst = track_used_srcreg(s, inst, 0, 0x7);
222 inst = track_used_srcreg(s, inst, 1, 0x7);
223 break;
224 case OPCODE_DP4:
225 inst = track_used_srcreg(s, inst, 0, 0xf);
226 inst = track_used_srcreg(s, inst, 1, 0xf);
227 break;
228 case OPCODE_KIL:
229 case OPCODE_TEX:
230 case OPCODE_TXB:
231 case OPCODE_TXP:
232 inst = track_used_srcreg(s, inst, 0, 0xf);
233 break;
234 case OPCODE_DST:
235 inst = track_used_srcreg(s, inst, 0, 0x6);
236 inst = track_used_srcreg(s, inst, 1, 0xa);
237 break;
238 case OPCODE_EXP:
239 case OPCODE_LOG:
240 case OPCODE_POW:
241 inst = track_used_srcreg(s, inst, 0, 0x3);
242 break;
243 case OPCODE_LIT:
244 inst = track_used_srcreg(s, inst, 0, 0xb);
245 break;
246 default:
247 _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
248 return;
249 }
250 }
251
252 static void calculateInputsOutputs(struct gl_program *p)
253 {
254 struct prog_instruction *inst;
255 GLuint InputsRead, OutputsWritten;
256
257 inst = p->Instructions;
258 InputsRead = 0;
259 OutputsWritten = 0;
260 while (inst->Opcode != OPCODE_END)
261 {
262 int i, num_src_regs;
263
264 num_src_regs = _mesa_num_inst_src_regs(inst->Opcode);
265 for (i = 0; i < num_src_regs; ++i) {
266 if (inst->SrcReg[i].File == PROGRAM_INPUT)
267 InputsRead |= 1 << inst->SrcReg[i].Index;
268 }
269
270 if (inst->DstReg.File == PROGRAM_OUTPUT)
271 OutputsWritten |= 1 << inst->DstReg.Index;
272
273 ++inst;
274 }
275
276 p->InputsRead = InputsRead;
277 p->OutputsWritten = OutputsWritten;
278 }
279
280 void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
281 {
282 struct nqssadce_state s;
283
284 _mesa_bzero(&s, sizeof(s));
285 s.Ctx = ctx;
286 s.Program = p;
287 s.Descr = descr;
288 s.Descr->Init(&s);
289 s.IP = p->NumInstructions;
290
291 while(s.IP > 0) {
292 s.IP--;
293 process_instruction(&s);
294 }
295
296 calculateInputsOutputs(p);
297 }