2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #include "main/glheader.h"
25 #include "main/context.h"
26 #include "main/macros.h"
28 #include "prog_instruction.h"
29 #include "prog_optimize.h"
30 #include "prog_parameter.h"
34 src_regs_are_constant(const struct prog_instruction
*inst
, unsigned num_srcs
)
38 for (i
= 0; i
< num_srcs
; i
++) {
39 if (inst
->SrcReg
[i
].File
!= PROGRAM_CONSTANT
)
41 if (inst
->SrcReg
[i
].RelAddr
)
48 static struct prog_src_register
49 src_reg_for_float(struct gl_program
*prog
, float val
)
51 struct prog_src_register src
;
54 memset(&src
, 0, sizeof(src
));
56 src
.File
= PROGRAM_CONSTANT
;
57 src
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
,
58 (gl_constant_value
*) &val
, 1, &swiz
);
63 static struct prog_src_register
64 src_reg_for_vec4(struct gl_program
*prog
, const float *val
)
66 struct prog_src_register src
;
69 memset(&src
, 0, sizeof(src
));
71 src
.File
= PROGRAM_CONSTANT
;
72 src
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
,
73 (gl_constant_value
*) val
, 4, &swiz
);
79 src_regs_are_same(const struct prog_src_register
*a
,
80 const struct prog_src_register
*b
)
82 return (a
->File
== b
->File
)
83 && (a
->Index
== b
->Index
)
84 && (a
->Swizzle
== b
->Swizzle
)
86 && (a
->Negate
== b
->Negate
)
92 get_value(struct gl_program
*prog
, struct prog_src_register
*r
, float *data
)
94 const gl_constant_value
*const value
=
95 prog
->Parameters
->ParameterValues
[r
->Index
];
97 data
[0] = value
[GET_SWZ(r
->Swizzle
, 0)].f
;
98 data
[1] = value
[GET_SWZ(r
->Swizzle
, 1)].f
;
99 data
[2] = value
[GET_SWZ(r
->Swizzle
, 2)].f
;
100 data
[3] = value
[GET_SWZ(r
->Swizzle
, 3)].f
;
103 data
[0] = fabsf(data
[0]);
104 data
[1] = fabsf(data
[1]);
105 data
[2] = fabsf(data
[2]);
106 data
[3] = fabsf(data
[3]);
109 if (r
->Negate
& 0x01) {
113 if (r
->Negate
& 0x02) {
117 if (r
->Negate
& 0x04) {
121 if (r
->Negate
& 0x08) {
127 * Try to replace instructions that produce a constant result with simple moves
129 * The hope is that a following copy propagation pass will eliminate the
130 * unnecessary move instructions.
133 _mesa_constant_fold(struct gl_program
*prog
)
135 bool progress
= false;
138 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
139 struct prog_instruction
*const inst
= &prog
->Instructions
[i
];
141 switch (inst
->Opcode
) {
143 if (src_regs_are_constant(inst
, 2)) {
148 get_value(prog
, &inst
->SrcReg
[0], a
);
149 get_value(prog
, &inst
->SrcReg
[1], b
);
151 result
[0] = a
[0] + b
[0];
152 result
[1] = a
[1] + b
[1];
153 result
[2] = a
[2] + b
[2];
154 result
[3] = a
[3] + b
[3];
156 inst
->Opcode
= OPCODE_MOV
;
157 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
159 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
160 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
167 /* FINISHME: We could also optimize CMP instructions where the first
168 * FINISHME: source is a constant that is either all < 0.0 or all
171 if (src_regs_are_constant(inst
, 3)) {
177 get_value(prog
, &inst
->SrcReg
[0], a
);
178 get_value(prog
, &inst
->SrcReg
[1], b
);
179 get_value(prog
, &inst
->SrcReg
[2], c
);
181 result
[0] = a
[0] < 0.0f
? b
[0] : c
[0];
182 result
[1] = a
[1] < 0.0f
? b
[1] : c
[1];
183 result
[2] = a
[2] < 0.0f
? b
[2] : c
[2];
184 result
[3] = a
[3] < 0.0f
? b
[3] : c
[3];
186 inst
->Opcode
= OPCODE_MOV
;
187 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
189 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
190 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
191 inst
->SrcReg
[2].File
= PROGRAM_UNDEFINED
;
192 inst
->SrcReg
[2].Swizzle
= SWIZZLE_NOOP
;
201 if (src_regs_are_constant(inst
, 2)) {
206 get_value(prog
, &inst
->SrcReg
[0], a
);
207 get_value(prog
, &inst
->SrcReg
[1], b
);
209 result
= (a
[0] * b
[0]) + (a
[1] * b
[1]);
211 if (inst
->Opcode
>= OPCODE_DP3
)
212 result
+= a
[2] * b
[2];
214 if (inst
->Opcode
== OPCODE_DP4
)
215 result
+= a
[3] * b
[3];
217 inst
->Opcode
= OPCODE_MOV
;
218 inst
->SrcReg
[0] = src_reg_for_float(prog
, result
);
220 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
221 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
228 if (src_regs_are_constant(inst
, 2)) {
233 get_value(prog
, &inst
->SrcReg
[0], a
);
234 get_value(prog
, &inst
->SrcReg
[1], b
);
236 result
[0] = a
[0] * b
[0];
237 result
[1] = a
[1] * b
[1];
238 result
[2] = a
[2] * b
[2];
239 result
[3] = a
[3] * b
[3];
241 inst
->Opcode
= OPCODE_MOV
;
242 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
244 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
245 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
252 if (src_regs_are_constant(inst
, 2)) {
257 get_value(prog
, &inst
->SrcReg
[0], a
);
258 get_value(prog
, &inst
->SrcReg
[1], b
);
260 result
[0] = (a
[0] == b
[0]) ? 1.0f
: 0.0f
;
261 result
[1] = (a
[1] == b
[1]) ? 1.0f
: 0.0f
;
262 result
[2] = (a
[2] == b
[2]) ? 1.0f
: 0.0f
;
263 result
[3] = (a
[3] == b
[3]) ? 1.0f
: 0.0f
;
265 inst
->Opcode
= OPCODE_MOV
;
266 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
268 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
269 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
272 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
273 inst
->Opcode
= OPCODE_MOV
;
274 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
276 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
277 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
284 if (src_regs_are_constant(inst
, 2)) {
289 get_value(prog
, &inst
->SrcReg
[0], a
);
290 get_value(prog
, &inst
->SrcReg
[1], b
);
292 result
[0] = (a
[0] >= b
[0]) ? 1.0f
: 0.0f
;
293 result
[1] = (a
[1] >= b
[1]) ? 1.0f
: 0.0f
;
294 result
[2] = (a
[2] >= b
[2]) ? 1.0f
: 0.0f
;
295 result
[3] = (a
[3] >= b
[3]) ? 1.0f
: 0.0f
;
297 inst
->Opcode
= OPCODE_MOV
;
298 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
300 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
301 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
304 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
305 inst
->Opcode
= OPCODE_MOV
;
306 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
308 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
309 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
316 if (src_regs_are_constant(inst
, 2)) {
321 get_value(prog
, &inst
->SrcReg
[0], a
);
322 get_value(prog
, &inst
->SrcReg
[1], b
);
324 result
[0] = (a
[0] > b
[0]) ? 1.0f
: 0.0f
;
325 result
[1] = (a
[1] > b
[1]) ? 1.0f
: 0.0f
;
326 result
[2] = (a
[2] > b
[2]) ? 1.0f
: 0.0f
;
327 result
[3] = (a
[3] > b
[3]) ? 1.0f
: 0.0f
;
329 inst
->Opcode
= OPCODE_MOV
;
330 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
332 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
333 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
336 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
337 inst
->Opcode
= OPCODE_MOV
;
338 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
340 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
341 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
348 if (src_regs_are_constant(inst
, 2)) {
353 get_value(prog
, &inst
->SrcReg
[0], a
);
354 get_value(prog
, &inst
->SrcReg
[1], b
);
356 result
[0] = (a
[0] <= b
[0]) ? 1.0f
: 0.0f
;
357 result
[1] = (a
[1] <= b
[1]) ? 1.0f
: 0.0f
;
358 result
[2] = (a
[2] <= b
[2]) ? 1.0f
: 0.0f
;
359 result
[3] = (a
[3] <= b
[3]) ? 1.0f
: 0.0f
;
361 inst
->Opcode
= OPCODE_MOV
;
362 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
364 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
365 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
368 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
369 inst
->Opcode
= OPCODE_MOV
;
370 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
372 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
373 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
380 if (src_regs_are_constant(inst
, 2)) {
385 get_value(prog
, &inst
->SrcReg
[0], a
);
386 get_value(prog
, &inst
->SrcReg
[1], b
);
388 result
[0] = (a
[0] < b
[0]) ? 1.0f
: 0.0f
;
389 result
[1] = (a
[1] < b
[1]) ? 1.0f
: 0.0f
;
390 result
[2] = (a
[2] < b
[2]) ? 1.0f
: 0.0f
;
391 result
[3] = (a
[3] < b
[3]) ? 1.0f
: 0.0f
;
393 inst
->Opcode
= OPCODE_MOV
;
394 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
396 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
397 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
400 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
401 inst
->Opcode
= OPCODE_MOV
;
402 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
404 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
405 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
412 if (src_regs_are_constant(inst
, 2)) {
417 get_value(prog
, &inst
->SrcReg
[0], a
);
418 get_value(prog
, &inst
->SrcReg
[1], b
);
420 result
[0] = (a
[0] != b
[0]) ? 1.0f
: 0.0f
;
421 result
[1] = (a
[1] != b
[1]) ? 1.0f
: 0.0f
;
422 result
[2] = (a
[2] != b
[2]) ? 1.0f
: 0.0f
;
423 result
[3] = (a
[3] != b
[3]) ? 1.0f
: 0.0f
;
425 inst
->Opcode
= OPCODE_MOV
;
426 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
428 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
429 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
432 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
433 inst
->Opcode
= OPCODE_MOV
;
434 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
436 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
437 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;