2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #include "main/glheader.h"
25 #include "main/context.h"
26 #include "main/macros.h"
28 #include "prog_instruction.h"
29 #include "prog_optimize.h"
30 #include "prog_parameter.h"
34 src_regs_are_constant(const struct prog_instruction
*inst
, unsigned num_srcs
)
38 for (i
= 0; i
< num_srcs
; i
++) {
39 if (inst
->SrcReg
[i
].File
!= PROGRAM_CONSTANT
)
46 static struct prog_src_register
47 src_reg_for_float(struct gl_program
*prog
, float val
)
49 struct prog_src_register src
;
52 memset(&src
, 0, sizeof(src
));
54 src
.File
= PROGRAM_CONSTANT
;
55 src
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
,
56 (gl_constant_value
*) &val
, 1, &swiz
);
61 static struct prog_src_register
62 src_reg_for_vec4(struct gl_program
*prog
, const float *val
)
64 struct prog_src_register src
;
67 memset(&src
, 0, sizeof(src
));
69 src
.File
= PROGRAM_CONSTANT
;
70 src
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
,
71 (gl_constant_value
*) val
, 4, &swiz
);
77 src_regs_are_same(const struct prog_src_register
*a
,
78 const struct prog_src_register
*b
)
80 return (a
->File
== b
->File
)
81 && (a
->Index
== b
->Index
)
82 && (a
->Swizzle
== b
->Swizzle
)
84 && (a
->Negate
== b
->Negate
)
90 get_value(struct gl_program
*prog
, struct prog_src_register
*r
, float *data
)
92 const gl_constant_value
*const value
=
93 prog
->Parameters
->ParameterValues
[r
->Index
];
95 data
[0] = value
[GET_SWZ(r
->Swizzle
, 0)].f
;
96 data
[1] = value
[GET_SWZ(r
->Swizzle
, 1)].f
;
97 data
[2] = value
[GET_SWZ(r
->Swizzle
, 2)].f
;
98 data
[3] = value
[GET_SWZ(r
->Swizzle
, 3)].f
;
101 data
[0] = fabsf(data
[0]);
102 data
[1] = fabsf(data
[1]);
103 data
[2] = fabsf(data
[2]);
104 data
[3] = fabsf(data
[3]);
107 if (r
->Negate
& 0x01) {
111 if (r
->Negate
& 0x02) {
115 if (r
->Negate
& 0x04) {
119 if (r
->Negate
& 0x08) {
125 * Try to replace instructions that produce a constant result with simple moves
127 * The hope is that a following copy propagation pass will eliminate the
128 * unnecessary move instructions.
131 _mesa_constant_fold(struct gl_program
*prog
)
133 bool progress
= false;
136 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
137 struct prog_instruction
*const inst
= &prog
->Instructions
[i
];
139 switch (inst
->Opcode
) {
141 if (src_regs_are_constant(inst
, 2)) {
146 get_value(prog
, &inst
->SrcReg
[0], a
);
147 get_value(prog
, &inst
->SrcReg
[1], b
);
149 result
[0] = a
[0] + b
[0];
150 result
[1] = a
[1] + b
[1];
151 result
[2] = a
[2] + b
[2];
152 result
[3] = a
[3] + b
[3];
154 inst
->Opcode
= OPCODE_MOV
;
155 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
157 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
158 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
165 /* FINISHME: We could also optimize CMP instructions where the first
166 * FINISHME: source is a constant that is either all < 0.0 or all
169 if (src_regs_are_constant(inst
, 3)) {
175 get_value(prog
, &inst
->SrcReg
[0], a
);
176 get_value(prog
, &inst
->SrcReg
[1], b
);
177 get_value(prog
, &inst
->SrcReg
[2], c
);
179 result
[0] = a
[0] < 0.0f
? b
[0] : c
[0];
180 result
[1] = a
[1] < 0.0f
? b
[1] : c
[1];
181 result
[2] = a
[2] < 0.0f
? b
[2] : c
[2];
182 result
[3] = a
[3] < 0.0f
? b
[3] : c
[3];
184 inst
->Opcode
= OPCODE_MOV
;
185 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
187 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
188 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
189 inst
->SrcReg
[2].File
= PROGRAM_UNDEFINED
;
190 inst
->SrcReg
[2].Swizzle
= SWIZZLE_NOOP
;
199 if (src_regs_are_constant(inst
, 2)) {
204 get_value(prog
, &inst
->SrcReg
[0], a
);
205 get_value(prog
, &inst
->SrcReg
[1], b
);
207 /* It seems like a loop could be used here, but we cleverly put
208 * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from
209 * the opcode results in various failures of the loop control.
211 result
= (a
[0] * b
[0]) + (a
[1] * b
[1]);
213 if (inst
->Opcode
>= OPCODE_DP3
)
214 result
+= a
[2] * b
[2];
216 if (inst
->Opcode
== OPCODE_DP4
)
217 result
+= a
[3] * b
[3];
219 inst
->Opcode
= OPCODE_MOV
;
220 inst
->SrcReg
[0] = src_reg_for_float(prog
, result
);
222 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
223 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
230 if (src_regs_are_constant(inst
, 2)) {
235 get_value(prog
, &inst
->SrcReg
[0], a
);
236 get_value(prog
, &inst
->SrcReg
[1], b
);
238 result
[0] = a
[0] * b
[0];
239 result
[1] = a
[1] * b
[1];
240 result
[2] = a
[2] * b
[2];
241 result
[3] = a
[3] * b
[3];
243 inst
->Opcode
= OPCODE_MOV
;
244 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
246 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
247 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
254 if (src_regs_are_constant(inst
, 2)) {
259 get_value(prog
, &inst
->SrcReg
[0], a
);
260 get_value(prog
, &inst
->SrcReg
[1], b
);
262 result
[0] = (a
[0] == b
[0]) ? 1.0f
: 0.0f
;
263 result
[1] = (a
[1] == b
[1]) ? 1.0f
: 0.0f
;
264 result
[2] = (a
[2] == b
[2]) ? 1.0f
: 0.0f
;
265 result
[3] = (a
[3] == b
[3]) ? 1.0f
: 0.0f
;
267 inst
->Opcode
= OPCODE_MOV
;
268 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
270 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
271 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
274 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
275 inst
->Opcode
= OPCODE_MOV
;
276 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
278 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
279 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
286 if (src_regs_are_constant(inst
, 2)) {
291 get_value(prog
, &inst
->SrcReg
[0], a
);
292 get_value(prog
, &inst
->SrcReg
[1], b
);
294 result
[0] = (a
[0] >= b
[0]) ? 1.0f
: 0.0f
;
295 result
[1] = (a
[1] >= b
[1]) ? 1.0f
: 0.0f
;
296 result
[2] = (a
[2] >= b
[2]) ? 1.0f
: 0.0f
;
297 result
[3] = (a
[3] >= b
[3]) ? 1.0f
: 0.0f
;
299 inst
->Opcode
= OPCODE_MOV
;
300 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
302 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
303 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
306 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
307 inst
->Opcode
= OPCODE_MOV
;
308 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
310 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
311 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
318 if (src_regs_are_constant(inst
, 2)) {
323 get_value(prog
, &inst
->SrcReg
[0], a
);
324 get_value(prog
, &inst
->SrcReg
[1], b
);
326 result
[0] = (a
[0] > b
[0]) ? 1.0f
: 0.0f
;
327 result
[1] = (a
[1] > b
[1]) ? 1.0f
: 0.0f
;
328 result
[2] = (a
[2] > b
[2]) ? 1.0f
: 0.0f
;
329 result
[3] = (a
[3] > b
[3]) ? 1.0f
: 0.0f
;
331 inst
->Opcode
= OPCODE_MOV
;
332 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
334 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
335 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
338 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
339 inst
->Opcode
= OPCODE_MOV
;
340 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
342 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
343 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
350 if (src_regs_are_constant(inst
, 2)) {
355 get_value(prog
, &inst
->SrcReg
[0], a
);
356 get_value(prog
, &inst
->SrcReg
[1], b
);
358 result
[0] = (a
[0] <= b
[0]) ? 1.0f
: 0.0f
;
359 result
[1] = (a
[1] <= b
[1]) ? 1.0f
: 0.0f
;
360 result
[2] = (a
[2] <= b
[2]) ? 1.0f
: 0.0f
;
361 result
[3] = (a
[3] <= b
[3]) ? 1.0f
: 0.0f
;
363 inst
->Opcode
= OPCODE_MOV
;
364 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
366 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
367 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
370 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
371 inst
->Opcode
= OPCODE_MOV
;
372 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
374 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
375 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
382 if (src_regs_are_constant(inst
, 2)) {
387 get_value(prog
, &inst
->SrcReg
[0], a
);
388 get_value(prog
, &inst
->SrcReg
[1], b
);
390 result
[0] = (a
[0] < b
[0]) ? 1.0f
: 0.0f
;
391 result
[1] = (a
[1] < b
[1]) ? 1.0f
: 0.0f
;
392 result
[2] = (a
[2] < b
[2]) ? 1.0f
: 0.0f
;
393 result
[3] = (a
[3] < b
[3]) ? 1.0f
: 0.0f
;
395 inst
->Opcode
= OPCODE_MOV
;
396 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
398 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
399 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
402 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
403 inst
->Opcode
= OPCODE_MOV
;
404 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
406 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
407 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
414 if (src_regs_are_constant(inst
, 2)) {
419 get_value(prog
, &inst
->SrcReg
[0], a
);
420 get_value(prog
, &inst
->SrcReg
[1], b
);
422 result
[0] = (a
[0] != b
[0]) ? 1.0f
: 0.0f
;
423 result
[1] = (a
[1] != b
[1]) ? 1.0f
: 0.0f
;
424 result
[2] = (a
[2] != b
[2]) ? 1.0f
: 0.0f
;
425 result
[3] = (a
[3] != b
[3]) ? 1.0f
: 0.0f
;
427 inst
->Opcode
= OPCODE_MOV
;
428 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
430 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
431 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
434 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
435 inst
->Opcode
= OPCODE_MOV
;
436 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
438 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
439 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;