program: Clean up after previous commit.
[mesa.git] / src / mesa / program / prog_opt_constant_fold.c
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "main/glheader.h"
25 #include "main/context.h"
26 #include "main/macros.h"
27 #include "program.h"
28 #include "prog_instruction.h"
29 #include "prog_optimize.h"
30 #include "prog_parameter.h"
31 #include <stdbool.h>
32
33 static bool
34 src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
35 {
36 unsigned i;
37
38 for (i = 0; i < num_srcs; i++) {
39 if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
40 return false;
41 if (inst->SrcReg[i].RelAddr)
42 return false;
43 }
44
45 return true;
46 }
47
48 static struct prog_src_register
49 src_reg_for_float(struct gl_program *prog, float val)
50 {
51 struct prog_src_register src;
52 unsigned swiz;
53
54 memset(&src, 0, sizeof(src));
55
56 src.File = PROGRAM_CONSTANT;
57 src.Index = _mesa_add_unnamed_constant(prog->Parameters,
58 (gl_constant_value *) &val, 1, &swiz);
59 src.Swizzle = swiz;
60 return src;
61 }
62
63 static struct prog_src_register
64 src_reg_for_vec4(struct gl_program *prog, const float *val)
65 {
66 struct prog_src_register src;
67 unsigned swiz;
68
69 memset(&src, 0, sizeof(src));
70
71 src.File = PROGRAM_CONSTANT;
72 src.Index = _mesa_add_unnamed_constant(prog->Parameters,
73 (gl_constant_value *) val, 4, &swiz);
74 src.Swizzle = swiz;
75 return src;
76 }
77
78 static bool
79 src_regs_are_same(const struct prog_src_register *a,
80 const struct prog_src_register *b)
81 {
82 return (a->File == b->File)
83 && (a->Index == b->Index)
84 && (a->Swizzle == b->Swizzle)
85 && (a->Abs == b->Abs)
86 && (a->Negate == b->Negate)
87 && (a->RelAddr == 0)
88 && (b->RelAddr == 0);
89 }
90
91 static void
92 get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
93 {
94 const gl_constant_value *const value =
95 prog->Parameters->ParameterValues[r->Index];
96
97 data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
98 data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
99 data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
100 data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
101
102 if (r->Abs) {
103 data[0] = fabsf(data[0]);
104 data[1] = fabsf(data[1]);
105 data[2] = fabsf(data[2]);
106 data[3] = fabsf(data[3]);
107 }
108
109 if (r->Negate & 0x01) {
110 data[0] = -data[0];
111 }
112
113 if (r->Negate & 0x02) {
114 data[1] = -data[1];
115 }
116
117 if (r->Negate & 0x04) {
118 data[2] = -data[2];
119 }
120
121 if (r->Negate & 0x08) {
122 data[3] = -data[3];
123 }
124 }
125
126 /**
127 * Try to replace instructions that produce a constant result with simple moves
128 *
129 * The hope is that a following copy propagation pass will eliminate the
130 * unnecessary move instructions.
131 */
132 GLboolean
133 _mesa_constant_fold(struct gl_program *prog)
134 {
135 bool progress = false;
136 unsigned i;
137
138 for (i = 0; i < prog->NumInstructions; i++) {
139 struct prog_instruction *const inst = &prog->Instructions[i];
140
141 switch (inst->Opcode) {
142 case OPCODE_ADD:
143 if (src_regs_are_constant(inst, 2)) {
144 float a[4];
145 float b[4];
146 float result[4];
147
148 get_value(prog, &inst->SrcReg[0], a);
149 get_value(prog, &inst->SrcReg[1], b);
150
151 result[0] = a[0] + b[0];
152 result[1] = a[1] + b[1];
153 result[2] = a[2] + b[2];
154 result[3] = a[3] + b[3];
155
156 inst->Opcode = OPCODE_MOV;
157 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
158
159 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
160 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
161
162 progress = true;
163 }
164 break;
165
166 case OPCODE_CMP:
167 /* FINISHME: We could also optimize CMP instructions where the first
168 * FINISHME: source is a constant that is either all < 0.0 or all
169 * FINISHME: >= 0.0.
170 */
171 if (src_regs_are_constant(inst, 3)) {
172 float a[4];
173 float b[4];
174 float c[4];
175 float result[4];
176
177 get_value(prog, &inst->SrcReg[0], a);
178 get_value(prog, &inst->SrcReg[1], b);
179 get_value(prog, &inst->SrcReg[2], c);
180
181 result[0] = a[0] < 0.0f ? b[0] : c[0];
182 result[1] = a[1] < 0.0f ? b[1] : c[1];
183 result[2] = a[2] < 0.0f ? b[2] : c[2];
184 result[3] = a[3] < 0.0f ? b[3] : c[3];
185
186 inst->Opcode = OPCODE_MOV;
187 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
188
189 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
190 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
191 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
192 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
193
194 progress = true;
195 }
196 break;
197
198 case OPCODE_DP2:
199 case OPCODE_DP3:
200 case OPCODE_DP4:
201 if (src_regs_are_constant(inst, 2)) {
202 float a[4];
203 float b[4];
204 float result;
205
206 get_value(prog, &inst->SrcReg[0], a);
207 get_value(prog, &inst->SrcReg[1], b);
208
209 result = (a[0] * b[0]) + (a[1] * b[1]);
210
211 if (inst->Opcode >= OPCODE_DP3)
212 result += a[2] * b[2];
213
214 if (inst->Opcode == OPCODE_DP4)
215 result += a[3] * b[3];
216
217 inst->Opcode = OPCODE_MOV;
218 inst->SrcReg[0] = src_reg_for_float(prog, result);
219
220 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
221 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
222
223 progress = true;
224 }
225 break;
226
227 case OPCODE_MUL:
228 if (src_regs_are_constant(inst, 2)) {
229 float a[4];
230 float b[4];
231 float result[4];
232
233 get_value(prog, &inst->SrcReg[0], a);
234 get_value(prog, &inst->SrcReg[1], b);
235
236 result[0] = a[0] * b[0];
237 result[1] = a[1] * b[1];
238 result[2] = a[2] * b[2];
239 result[3] = a[3] * b[3];
240
241 inst->Opcode = OPCODE_MOV;
242 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
243
244 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
245 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
246
247 progress = true;
248 }
249 break;
250
251 case OPCODE_SEQ:
252 if (src_regs_are_constant(inst, 2)) {
253 float a[4];
254 float b[4];
255 float result[4];
256
257 get_value(prog, &inst->SrcReg[0], a);
258 get_value(prog, &inst->SrcReg[1], b);
259
260 result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
261 result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
262 result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
263 result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
264
265 inst->Opcode = OPCODE_MOV;
266 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
267
268 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
269 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
270
271 progress = true;
272 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
273 inst->Opcode = OPCODE_MOV;
274 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
275
276 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
277 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
278
279 progress = true;
280 }
281 break;
282
283 case OPCODE_SGE:
284 if (src_regs_are_constant(inst, 2)) {
285 float a[4];
286 float b[4];
287 float result[4];
288
289 get_value(prog, &inst->SrcReg[0], a);
290 get_value(prog, &inst->SrcReg[1], b);
291
292 result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
293 result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
294 result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
295 result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
296
297 inst->Opcode = OPCODE_MOV;
298 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
299
300 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
301 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
302
303 progress = true;
304 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
305 inst->Opcode = OPCODE_MOV;
306 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
307
308 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
309 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
310
311 progress = true;
312 }
313 break;
314
315 case OPCODE_SGT:
316 if (src_regs_are_constant(inst, 2)) {
317 float a[4];
318 float b[4];
319 float result[4];
320
321 get_value(prog, &inst->SrcReg[0], a);
322 get_value(prog, &inst->SrcReg[1], b);
323
324 result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
325 result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
326 result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
327 result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
328
329 inst->Opcode = OPCODE_MOV;
330 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
331
332 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
333 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
334
335 progress = true;
336 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
337 inst->Opcode = OPCODE_MOV;
338 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
339
340 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
341 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
342
343 progress = true;
344 }
345 break;
346
347 case OPCODE_SLE:
348 if (src_regs_are_constant(inst, 2)) {
349 float a[4];
350 float b[4];
351 float result[4];
352
353 get_value(prog, &inst->SrcReg[0], a);
354 get_value(prog, &inst->SrcReg[1], b);
355
356 result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
357 result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
358 result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
359 result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
360
361 inst->Opcode = OPCODE_MOV;
362 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
363
364 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
365 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
366
367 progress = true;
368 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
369 inst->Opcode = OPCODE_MOV;
370 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
371
372 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
373 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
374
375 progress = true;
376 }
377 break;
378
379 case OPCODE_SLT:
380 if (src_regs_are_constant(inst, 2)) {
381 float a[4];
382 float b[4];
383 float result[4];
384
385 get_value(prog, &inst->SrcReg[0], a);
386 get_value(prog, &inst->SrcReg[1], b);
387
388 result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
389 result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
390 result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
391 result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
392
393 inst->Opcode = OPCODE_MOV;
394 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
395
396 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
397 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
398
399 progress = true;
400 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
401 inst->Opcode = OPCODE_MOV;
402 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
403
404 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
405 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
406
407 progress = true;
408 }
409 break;
410
411 case OPCODE_SNE:
412 if (src_regs_are_constant(inst, 2)) {
413 float a[4];
414 float b[4];
415 float result[4];
416
417 get_value(prog, &inst->SrcReg[0], a);
418 get_value(prog, &inst->SrcReg[1], b);
419
420 result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
421 result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
422 result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
423 result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
424
425 inst->Opcode = OPCODE_MOV;
426 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
427
428 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
429 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
430
431 progress = true;
432 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
433 inst->Opcode = OPCODE_MOV;
434 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
435
436 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
437 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
438
439 progress = true;
440 }
441 break;
442
443 default:
444 break;
445 }
446 }
447
448 return progress;
449 }