2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #include "main/glheader.h"
25 #include "main/context.h"
26 #include "main/macros.h"
28 #include "prog_instruction.h"
29 #include "prog_optimize.h"
30 #include "prog_parameter.h"
34 src_regs_are_constant(const struct prog_instruction
*inst
, unsigned num_srcs
)
38 for (i
= 0; i
< num_srcs
; i
++) {
39 if (inst
->SrcReg
[i
].File
!= PROGRAM_CONSTANT
)
46 static struct prog_src_register
47 src_reg_for_float(struct gl_program
*prog
, float val
)
49 struct prog_src_register src
;
52 memset(&src
, 0, sizeof(src
));
54 src
.File
= PROGRAM_CONSTANT
;
55 src
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
,
56 (gl_constant_value
*) &val
, 1, &swiz
);
61 static struct prog_src_register
62 src_reg_for_vec4(struct gl_program
*prog
, const float *val
)
64 struct prog_src_register src
;
67 memset(&src
, 0, sizeof(src
));
69 src
.File
= PROGRAM_CONSTANT
;
70 src
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
,
71 (gl_constant_value
*) val
, 4, &swiz
);
77 src_regs_are_same(const struct prog_src_register
*a
,
78 const struct prog_src_register
*b
)
80 return (a
->File
== b
->File
)
81 && (a
->Index
== b
->Index
)
82 && (a
->Swizzle
== b
->Swizzle
)
84 && (a
->Negate
== b
->Negate
)
90 get_value(struct gl_program
*prog
, struct prog_src_register
*r
, float *data
)
92 const gl_constant_value
*const value
=
93 prog
->Parameters
->ParameterValues
[r
->Index
];
95 data
[0] = value
[GET_SWZ(r
->Swizzle
, 0)].f
;
96 data
[1] = value
[GET_SWZ(r
->Swizzle
, 1)].f
;
97 data
[2] = value
[GET_SWZ(r
->Swizzle
, 2)].f
;
98 data
[3] = value
[GET_SWZ(r
->Swizzle
, 3)].f
;
101 data
[0] = fabsf(data
[0]);
102 data
[1] = fabsf(data
[1]);
103 data
[2] = fabsf(data
[2]);
104 data
[3] = fabsf(data
[3]);
107 if (r
->Negate
& 0x01) {
111 if (r
->Negate
& 0x02) {
115 if (r
->Negate
& 0x04) {
119 if (r
->Negate
& 0x08) {
125 * Try to replace instructions that produce a constant result with simple moves
127 * The hope is that a following copy propagation pass will eliminate the
128 * unnecessary move instructions.
131 _mesa_constant_fold(struct gl_program
*prog
)
133 bool progress
= false;
136 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
137 struct prog_instruction
*const inst
= &prog
->Instructions
[i
];
139 switch (inst
->Opcode
) {
141 if (src_regs_are_constant(inst
, 2)) {
146 get_value(prog
, &inst
->SrcReg
[0], a
);
147 get_value(prog
, &inst
->SrcReg
[1], b
);
149 result
[0] = a
[0] + b
[0];
150 result
[1] = a
[1] + b
[1];
151 result
[2] = a
[2] + b
[2];
152 result
[3] = a
[3] + b
[3];
154 inst
->Opcode
= OPCODE_MOV
;
155 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
157 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
158 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
165 /* FINISHME: We could also optimize CMP instructions where the first
166 * FINISHME: source is a constant that is either all < 0.0 or all
169 if (src_regs_are_constant(inst
, 3)) {
175 get_value(prog
, &inst
->SrcReg
[0], a
);
176 get_value(prog
, &inst
->SrcReg
[1], b
);
177 get_value(prog
, &inst
->SrcReg
[2], c
);
179 result
[0] = a
[0] < 0.0f
? b
[0] : c
[0];
180 result
[1] = a
[1] < 0.0f
? b
[1] : c
[1];
181 result
[2] = a
[2] < 0.0f
? b
[2] : c
[2];
182 result
[3] = a
[3] < 0.0f
? b
[3] : c
[3];
184 inst
->Opcode
= OPCODE_MOV
;
185 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
187 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
188 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
189 inst
->SrcReg
[2].File
= PROGRAM_UNDEFINED
;
190 inst
->SrcReg
[2].Swizzle
= SWIZZLE_NOOP
;
199 if (src_regs_are_constant(inst
, 2)) {
204 get_value(prog
, &inst
->SrcReg
[0], a
);
205 get_value(prog
, &inst
->SrcReg
[1], b
);
207 result
= (a
[0] * b
[0]) + (a
[1] * b
[1]);
209 if (inst
->Opcode
>= OPCODE_DP3
)
210 result
+= a
[2] * b
[2];
212 if (inst
->Opcode
== OPCODE_DP4
)
213 result
+= a
[3] * b
[3];
215 inst
->Opcode
= OPCODE_MOV
;
216 inst
->SrcReg
[0] = src_reg_for_float(prog
, result
);
218 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
219 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
226 if (src_regs_are_constant(inst
, 2)) {
231 get_value(prog
, &inst
->SrcReg
[0], a
);
232 get_value(prog
, &inst
->SrcReg
[1], b
);
234 result
[0] = a
[0] * b
[0];
235 result
[1] = a
[1] * b
[1];
236 result
[2] = a
[2] * b
[2];
237 result
[3] = a
[3] * b
[3];
239 inst
->Opcode
= OPCODE_MOV
;
240 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
242 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
243 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
250 if (src_regs_are_constant(inst
, 2)) {
255 get_value(prog
, &inst
->SrcReg
[0], a
);
256 get_value(prog
, &inst
->SrcReg
[1], b
);
258 result
[0] = (a
[0] == b
[0]) ? 1.0f
: 0.0f
;
259 result
[1] = (a
[1] == b
[1]) ? 1.0f
: 0.0f
;
260 result
[2] = (a
[2] == b
[2]) ? 1.0f
: 0.0f
;
261 result
[3] = (a
[3] == b
[3]) ? 1.0f
: 0.0f
;
263 inst
->Opcode
= OPCODE_MOV
;
264 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
266 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
267 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
270 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
271 inst
->Opcode
= OPCODE_MOV
;
272 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
274 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
275 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
282 if (src_regs_are_constant(inst
, 2)) {
287 get_value(prog
, &inst
->SrcReg
[0], a
);
288 get_value(prog
, &inst
->SrcReg
[1], b
);
290 result
[0] = (a
[0] >= b
[0]) ? 1.0f
: 0.0f
;
291 result
[1] = (a
[1] >= b
[1]) ? 1.0f
: 0.0f
;
292 result
[2] = (a
[2] >= b
[2]) ? 1.0f
: 0.0f
;
293 result
[3] = (a
[3] >= b
[3]) ? 1.0f
: 0.0f
;
295 inst
->Opcode
= OPCODE_MOV
;
296 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
298 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
299 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
302 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
303 inst
->Opcode
= OPCODE_MOV
;
304 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
306 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
307 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
314 if (src_regs_are_constant(inst
, 2)) {
319 get_value(prog
, &inst
->SrcReg
[0], a
);
320 get_value(prog
, &inst
->SrcReg
[1], b
);
322 result
[0] = (a
[0] > b
[0]) ? 1.0f
: 0.0f
;
323 result
[1] = (a
[1] > b
[1]) ? 1.0f
: 0.0f
;
324 result
[2] = (a
[2] > b
[2]) ? 1.0f
: 0.0f
;
325 result
[3] = (a
[3] > b
[3]) ? 1.0f
: 0.0f
;
327 inst
->Opcode
= OPCODE_MOV
;
328 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
330 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
331 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
334 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
335 inst
->Opcode
= OPCODE_MOV
;
336 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
338 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
339 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
346 if (src_regs_are_constant(inst
, 2)) {
351 get_value(prog
, &inst
->SrcReg
[0], a
);
352 get_value(prog
, &inst
->SrcReg
[1], b
);
354 result
[0] = (a
[0] <= b
[0]) ? 1.0f
: 0.0f
;
355 result
[1] = (a
[1] <= b
[1]) ? 1.0f
: 0.0f
;
356 result
[2] = (a
[2] <= b
[2]) ? 1.0f
: 0.0f
;
357 result
[3] = (a
[3] <= b
[3]) ? 1.0f
: 0.0f
;
359 inst
->Opcode
= OPCODE_MOV
;
360 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
362 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
363 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
366 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
367 inst
->Opcode
= OPCODE_MOV
;
368 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
370 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
371 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
378 if (src_regs_are_constant(inst
, 2)) {
383 get_value(prog
, &inst
->SrcReg
[0], a
);
384 get_value(prog
, &inst
->SrcReg
[1], b
);
386 result
[0] = (a
[0] < b
[0]) ? 1.0f
: 0.0f
;
387 result
[1] = (a
[1] < b
[1]) ? 1.0f
: 0.0f
;
388 result
[2] = (a
[2] < b
[2]) ? 1.0f
: 0.0f
;
389 result
[3] = (a
[3] < b
[3]) ? 1.0f
: 0.0f
;
391 inst
->Opcode
= OPCODE_MOV
;
392 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
394 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
395 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
398 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
399 inst
->Opcode
= OPCODE_MOV
;
400 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
402 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
403 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
410 if (src_regs_are_constant(inst
, 2)) {
415 get_value(prog
, &inst
->SrcReg
[0], a
);
416 get_value(prog
, &inst
->SrcReg
[1], b
);
418 result
[0] = (a
[0] != b
[0]) ? 1.0f
: 0.0f
;
419 result
[1] = (a
[1] != b
[1]) ? 1.0f
: 0.0f
;
420 result
[2] = (a
[2] != b
[2]) ? 1.0f
: 0.0f
;
421 result
[3] = (a
[3] != b
[3]) ? 1.0f
: 0.0f
;
423 inst
->Opcode
= OPCODE_MOV
;
424 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
426 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
427 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
430 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
431 inst
->Opcode
= OPCODE_MOV
;
432 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
434 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
435 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;