2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #include "main/glheader.h"
25 #include "main/context.h"
26 #include "main/macros.h"
28 #include "prog_instruction.h"
29 #include "prog_optimize.h"
30 #include "prog_parameter.h"
34 src_regs_are_constant(const struct prog_instruction
*inst
, unsigned num_srcs
)
38 for (i
= 0; i
< num_srcs
; i
++) {
39 if (inst
->SrcReg
[i
].File
!= PROGRAM_CONSTANT
)
41 if (inst
->SrcReg
[i
].RelAddr
)
48 static struct prog_src_register
49 src_reg_for_float(struct gl_program
*prog
, float val
)
51 struct prog_src_register src
;
54 memset(&src
, 0, sizeof(src
));
56 src
.File
= PROGRAM_CONSTANT
;
57 src
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
,
58 (gl_constant_value
*) &val
, 1, &swiz
);
63 static struct prog_src_register
64 src_reg_for_vec4(struct gl_program
*prog
, const float *val
)
66 struct prog_src_register src
;
69 memset(&src
, 0, sizeof(src
));
71 src
.File
= PROGRAM_CONSTANT
;
72 src
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
,
73 (gl_constant_value
*) val
, 4, &swiz
);
79 src_regs_are_same(const struct prog_src_register
*a
,
80 const struct prog_src_register
*b
)
82 return (a
->File
== b
->File
)
83 && (a
->Index
== b
->Index
)
84 && (a
->Swizzle
== b
->Swizzle
)
85 && (a
->Negate
== b
->Negate
)
91 get_value(struct gl_program
*prog
, struct prog_src_register
*r
, float *data
)
93 unsigned pvo
= prog
->Parameters
->ParameterValueOffset
[r
->Index
];
94 const gl_constant_value
*const value
=
95 prog
->Parameters
->ParameterValues
+ pvo
;
97 data
[0] = value
[GET_SWZ(r
->Swizzle
, 0)].f
;
98 data
[1] = value
[GET_SWZ(r
->Swizzle
, 1)].f
;
99 data
[2] = value
[GET_SWZ(r
->Swizzle
, 2)].f
;
100 data
[3] = value
[GET_SWZ(r
->Swizzle
, 3)].f
;
102 if (r
->Negate
& 0x01) {
106 if (r
->Negate
& 0x02) {
110 if (r
->Negate
& 0x04) {
114 if (r
->Negate
& 0x08) {
120 * Try to replace instructions that produce a constant result with simple moves
122 * The hope is that a following copy propagation pass will eliminate the
123 * unnecessary move instructions.
126 _mesa_constant_fold(struct gl_program
*prog
)
128 bool progress
= false;
131 for (i
= 0; i
< prog
->arb
.NumInstructions
; i
++) {
132 struct prog_instruction
*const inst
= &prog
->arb
.Instructions
[i
];
134 switch (inst
->Opcode
) {
136 if (src_regs_are_constant(inst
, 2)) {
141 get_value(prog
, &inst
->SrcReg
[0], a
);
142 get_value(prog
, &inst
->SrcReg
[1], b
);
144 result
[0] = a
[0] + b
[0];
145 result
[1] = a
[1] + b
[1];
146 result
[2] = a
[2] + b
[2];
147 result
[3] = a
[3] + b
[3];
149 inst
->Opcode
= OPCODE_MOV
;
150 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
152 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
153 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
160 /* FINISHME: We could also optimize CMP instructions where the first
161 * FINISHME: source is a constant that is either all < 0.0 or all
164 if (src_regs_are_constant(inst
, 3)) {
170 get_value(prog
, &inst
->SrcReg
[0], a
);
171 get_value(prog
, &inst
->SrcReg
[1], b
);
172 get_value(prog
, &inst
->SrcReg
[2], c
);
174 result
[0] = a
[0] < 0.0f
? b
[0] : c
[0];
175 result
[1] = a
[1] < 0.0f
? b
[1] : c
[1];
176 result
[2] = a
[2] < 0.0f
? b
[2] : c
[2];
177 result
[3] = a
[3] < 0.0f
? b
[3] : c
[3];
179 inst
->Opcode
= OPCODE_MOV
;
180 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
182 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
183 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
184 inst
->SrcReg
[2].File
= PROGRAM_UNDEFINED
;
185 inst
->SrcReg
[2].Swizzle
= SWIZZLE_NOOP
;
194 if (src_regs_are_constant(inst
, 2)) {
199 get_value(prog
, &inst
->SrcReg
[0], a
);
200 get_value(prog
, &inst
->SrcReg
[1], b
);
202 result
= (a
[0] * b
[0]) + (a
[1] * b
[1]);
204 if (inst
->Opcode
>= OPCODE_DP3
)
205 result
+= a
[2] * b
[2];
207 if (inst
->Opcode
== OPCODE_DP4
)
208 result
+= a
[3] * b
[3];
210 inst
->Opcode
= OPCODE_MOV
;
211 inst
->SrcReg
[0] = src_reg_for_float(prog
, result
);
213 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
214 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
221 if (src_regs_are_constant(inst
, 2)) {
226 get_value(prog
, &inst
->SrcReg
[0], a
);
227 get_value(prog
, &inst
->SrcReg
[1], b
);
229 result
[0] = a
[0] * b
[0];
230 result
[1] = a
[1] * b
[1];
231 result
[2] = a
[2] * b
[2];
232 result
[3] = a
[3] * b
[3];
234 inst
->Opcode
= OPCODE_MOV
;
235 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
237 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
238 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
245 if (src_regs_are_constant(inst
, 2)) {
250 get_value(prog
, &inst
->SrcReg
[0], a
);
251 get_value(prog
, &inst
->SrcReg
[1], b
);
253 result
[0] = (a
[0] >= b
[0]) ? 1.0f
: 0.0f
;
254 result
[1] = (a
[1] >= b
[1]) ? 1.0f
: 0.0f
;
255 result
[2] = (a
[2] >= b
[2]) ? 1.0f
: 0.0f
;
256 result
[3] = (a
[3] >= b
[3]) ? 1.0f
: 0.0f
;
258 inst
->Opcode
= OPCODE_MOV
;
259 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
261 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
262 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
265 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
266 inst
->Opcode
= OPCODE_MOV
;
267 inst
->SrcReg
[0] = src_reg_for_float(prog
, 1.0f
);
269 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
270 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
277 if (src_regs_are_constant(inst
, 2)) {
282 get_value(prog
, &inst
->SrcReg
[0], a
);
283 get_value(prog
, &inst
->SrcReg
[1], b
);
285 result
[0] = (a
[0] < b
[0]) ? 1.0f
: 0.0f
;
286 result
[1] = (a
[1] < b
[1]) ? 1.0f
: 0.0f
;
287 result
[2] = (a
[2] < b
[2]) ? 1.0f
: 0.0f
;
288 result
[3] = (a
[3] < b
[3]) ? 1.0f
: 0.0f
;
290 inst
->Opcode
= OPCODE_MOV
;
291 inst
->SrcReg
[0] = src_reg_for_vec4(prog
, result
);
293 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
294 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;
297 } else if (src_regs_are_same(&inst
->SrcReg
[0], &inst
->SrcReg
[1])) {
298 inst
->Opcode
= OPCODE_MOV
;
299 inst
->SrcReg
[0] = src_reg_for_float(prog
, 0.0f
);
301 inst
->SrcReg
[1].File
= PROGRAM_UNDEFINED
;
302 inst
->SrcReg
[1].Swizzle
= SWIZZLE_NOOP
;