2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #include "main/glheader.h"
25 #include "main/context.h"
26 #include "main/macros.h"
28 #include "prog_instruction.h"
29 #include "prog_optimize.h"
30 #include "prog_parameter.h"
34 src_regs_are_constant(const struct prog_instruction
*inst
, unsigned num_srcs
)
38 for (i
= 0; i
< num_srcs
; i
++) {
39 if (inst
->SrcReg
[i
].File
!= PROGRAM_CONSTANT
)
41 if (inst
->SrcReg
[i
].RelAddr
)
48 static struct prog_src_register
49 src_reg_for_float(struct gl_program
*prog
, float val
)
51 struct prog_src_register src
;
54 memset(&src
, 0, sizeof(src
));
56 src
.File
= PROGRAM_CONSTANT
;
57 src
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
,
58 (gl_constant_value
*) &val
, 1, &swiz
);
63 static struct prog_src_register
64 src_reg_for_vec4(struct gl_program
*prog
, const float *val
)
66 struct prog_src_register src
;
69 memset(&src
, 0, sizeof(src
));
71 src
.File
= PROGRAM_CONSTANT
;
72 src
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
,
73 (gl_constant_value
*) val
, 4, &swiz
);
79 src_regs_are_same(const struct prog_src_register
*a
,
80 const struct prog_src_register
*b
)
82 return (a
->File
== b
->File
)
83 && (a
->Index
== b
->Index
)
84 && (a
->Swizzle
== b
->Swizzle
)
85 && (a
->Negate
== b
->Negate
)
91 get_value(struct gl_program
*prog
, struct prog_src_register
*r
, float *data
)
93 const gl_constant_value
*const value
=
94 prog
->Parameters
->ParameterValues
[r
->Index
];
96 data
[0] = value
[GET_SWZ(r
->Swizzle
, 0)].f
;
97 data
[1] = value
[GET_SWZ(r
->Swizzle
, 1)].f
;
98 data
[2] = value
[GET_SWZ(r
->Swizzle
, 2)].f
;
99 data
[3] = value
[GET_SWZ(r
->Swizzle
, 3)].f
;
101 if (r
->Negate
& 0x01) {
105 if (r
->Negate
& 0x02) {
109 if (r
->Negate
& 0x04) {
113 if (r
->Negate
& 0x08) {
119 * Try to replace instructions that produce a constant result with simple moves
121 * The hope is that a following copy propagation pass will eliminate the
122 * unnecessary move instructions.
125 _mesa_constant_fold(struct gl_program
*prog
)
127 bool progress
= false;
130 for (i
= 0; i
< prog
->arb
.NumInstructions
; i
++) {
131 struct prog_instruction
*const inst
= &prog
->arb
.Instructions
[i
];
133 switch (inst
->Opcode
) {
135 if (src_regs_are_constant(inst
, 2)) {
140 get_value(prog
, &inst
->SrcReg
[0], a
);
141 get_value(prog
, &inst
->SrcReg
[1], b
);
143 result
[0] = a
[0] + b
[0];
144 result
[1] = a
[1] + b
[1];
145 result
[2] = a
[2] + b
[2];
146 result
[3] = a
[3] + b
[3];
148 inst
->Opcode
= OPCODE_MOV
;
149 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
151 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
152 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
159 /* FINISHME: We could also optimize CMP instructions where the first
160 * FINISHME: source is a constant that is either all < 0.0 or all
163 if (src_regs_are_constant(inst
, 3)) {
169 get_value(prog
, &inst
->SrcReg
[0], a
);
170 get_value(prog
, &inst
->SrcReg
[1], b
);
171 get_value(prog
, &inst
->SrcReg
[2], c
);
173 result
[0] = a
[0] < 0.0f
? b
[0] : c
[0];
174 result
[1] = a
[1] < 0.0f
? b
[1] : c
[1];
175 result
[2] = a
[2] < 0.0f
? b
[2] : c
[2];
176 result
[3] = a
[3] < 0.0f
? b
[3] : c
[3];
178 inst
->Opcode
= OPCODE_MOV
;
179 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
181 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
182 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
183 inst
->SrcReg
[2].File
= PROGRAM_UNDEFINED
;
184 inst
->SrcReg
[2].Swizzle
= SWIZZLE_NOOP
;
193 if (src_regs_are_constant(inst
, 2)) {
198 get_value(prog
, &inst
->SrcReg
[0], a
);
199 get_value(prog
, &inst
->SrcReg
[1], b
);
201 result
= (a
[0] * b
[0]) + (a
[1] * b
[1]);
203 if (inst
->Opcode
>= OPCODE_DP3
)
204 result
+= a
[2] * b
[2];
206 if (inst
->Opcode
== OPCODE_DP4
)
207 result
+= a
[3] * b
[3];
209 inst
->Opcode
= OPCODE_MOV
;
210 inst
->SrcReg
[0] = src_reg_for_float(prog
, result
);
212 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
213 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
220 if (src_regs_are_constant(inst
, 2)) {
225 get_value(prog
, &inst
->SrcReg
[0], a
);
226 get_value(prog
, &inst
->SrcReg
[1], b
);
228 result
[0] = a
[0] * b
[0];
229 result
[1] = a
[1] * b
[1];
230 result
[2] = a
[2] * b
[2];
231 result
[3] = a
[3] * b
[3];
233 inst
->Opcode
= OPCODE_MOV
;
234 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
236 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
237 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
244 if (src_regs_are_constant(inst
, 2)) {
249 get_value(prog
, &inst
->SrcReg
[0], a
);
250 get_value(prog
, &inst
->SrcReg
[1], b
);
252 result
[0] = (a
[0] >= b
[0]) ? 1.0f
: 0.0f
;
253 result
[1] = (a
[1] >= b
[1]) ? 1.0f
: 0.0f
;
254 result
[2] = (a
[2] >= b
[2]) ? 1.0f
: 0.0f
;
255 result
[3] = (a
[3] >= b
[3]) ? 1.0f
: 0.0f
;
257 inst
->Opcode
= OPCODE_MOV
;
258 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
260 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
261 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
264 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
265 inst
->Opcode
= OPCODE_MOV
;
266 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
268 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
269 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
276 if (src_regs_are_constant(inst
, 2)) {
281 get_value(prog
, &inst
->SrcReg
[0], a
);
282 get_value(prog
, &inst
->SrcReg
[1], b
);
284 result
[0] = (a
[0] < b
[0]) ? 1.0f
: 0.0f
;
285 result
[1] = (a
[1] < b
[1]) ? 1.0f
: 0.0f
;
286 result
[2] = (a
[2] < b
[2]) ? 1.0f
: 0.0f
;
287 result
[3] = (a
[3] < b
[3]) ? 1.0f
: 0.0f
;
289 inst
->Opcode
= OPCODE_MOV
;
290 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
292 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
293 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
296 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
297 inst
->Opcode
= OPCODE_MOV
;
298 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
300 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
301 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;