i965: w/a for gather4 green RG32F
[mesa.git] / src / glsl / ir_constant_expression.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file ir_constant_expression.cpp
26 * Evaluate and process constant valued expressions
27 *
28 * In GLSL, constant valued expressions are used in several places. These
29 * must be processed and evaluated very early in the compilation process.
30 *
31 * * Sizes of arrays
32 * * Initializers for uniforms
33 * * Initializers for \c const variables
34 */
35
36 #include <math.h>
37 #include "main/core.h" /* for MAX2, MIN2, CLAMP */
38 #include "ir.h"
39 #include "ir_visitor.h"
40 #include "glsl_types.h"
41 #include "program/hash_table.h"
42
43 #if defined(_MSC_VER) && (_MSC_VER < 1800)
44 static int isnormal(double x)
45 {
46 return _fpclass(x) == _FPCLASS_NN || _fpclass(x) == _FPCLASS_PN;
47 }
48 #endif
49
50 #if defined(_MSC_VER)
51 static double copysign(double x, double y)
52 {
53 return _copysign(x, y);
54 }
55 #endif
56
57 static float
58 dot(ir_constant *op0, ir_constant *op1)
59 {
60 assert(op0->type->is_float() && op1->type->is_float());
61
62 float result = 0;
63 for (unsigned c = 0; c < op0->type->components(); c++)
64 result += op0->value.f[c] * op1->value.f[c];
65
66 return result;
67 }
68
69 /* This method is the only one supported by gcc. Unions in particular
70 * are iffy, and read-through-converted-pointer is killed by strict
71 * aliasing. OTOH, the compiler sees through the memcpy, so the
72 * resulting asm is reasonable.
73 */
74 static float
75 bitcast_u2f(unsigned int u)
76 {
77 assert(sizeof(float) == sizeof(unsigned int));
78 float f;
79 memcpy(&f, &u, sizeof(f));
80 return f;
81 }
82
83 static unsigned int
84 bitcast_f2u(float f)
85 {
86 assert(sizeof(float) == sizeof(unsigned int));
87 unsigned int u;
88 memcpy(&u, &f, sizeof(f));
89 return u;
90 }
91
92 /**
93 * Evaluate one component of a floating-point 4x8 unpacking function.
94 */
95 typedef uint8_t
96 (*pack_1x8_func_t)(float);
97
98 /**
99 * Evaluate one component of a floating-point 2x16 unpacking function.
100 */
101 typedef uint16_t
102 (*pack_1x16_func_t)(float);
103
104 /**
105 * Evaluate one component of a floating-point 4x8 unpacking function.
106 */
107 typedef float
108 (*unpack_1x8_func_t)(uint8_t);
109
110 /**
111 * Evaluate one component of a floating-point 2x16 unpacking function.
112 */
113 typedef float
114 (*unpack_1x16_func_t)(uint16_t);
115
116 /**
117 * Evaluate a 2x16 floating-point packing function.
118 */
119 static uint32_t
120 pack_2x16(pack_1x16_func_t pack_1x16,
121 float x, float y)
122 {
123 /* From section 8.4 of the GLSL ES 3.00 spec:
124 *
125 * packSnorm2x16
126 * -------------
127 * The first component of the vector will be written to the least
128 * significant bits of the output; the last component will be written to
129 * the most significant bits.
130 *
131 * The specifications for the other packing functions contain similar
132 * language.
133 */
134 uint32_t u = 0;
135 u |= ((uint32_t) pack_1x16(x) << 0);
136 u |= ((uint32_t) pack_1x16(y) << 16);
137 return u;
138 }
139
140 /**
141 * Evaluate a 4x8 floating-point packing function.
142 */
143 static uint32_t
144 pack_4x8(pack_1x8_func_t pack_1x8,
145 float x, float y, float z, float w)
146 {
147 /* From section 8.4 of the GLSL 4.30 spec:
148 *
149 * packSnorm4x8
150 * ------------
151 * The first component of the vector will be written to the least
152 * significant bits of the output; the last component will be written to
153 * the most significant bits.
154 *
155 * The specifications for the other packing functions contain similar
156 * language.
157 */
158 uint32_t u = 0;
159 u |= ((uint32_t) pack_1x8(x) << 0);
160 u |= ((uint32_t) pack_1x8(y) << 8);
161 u |= ((uint32_t) pack_1x8(z) << 16);
162 u |= ((uint32_t) pack_1x8(w) << 24);
163 return u;
164 }
165
166 /**
167 * Evaluate a 2x16 floating-point unpacking function.
168 */
169 static void
170 unpack_2x16(unpack_1x16_func_t unpack_1x16,
171 uint32_t u,
172 float *x, float *y)
173 {
174 /* From section 8.4 of the GLSL ES 3.00 spec:
175 *
176 * unpackSnorm2x16
177 * ---------------
178 * The first component of the returned vector will be extracted from
179 * the least significant bits of the input; the last component will be
180 * extracted from the most significant bits.
181 *
182 * The specifications for the other unpacking functions contain similar
183 * language.
184 */
185 *x = unpack_1x16((uint16_t) (u & 0xffff));
186 *y = unpack_1x16((uint16_t) (u >> 16));
187 }
188
189 /**
190 * Evaluate a 4x8 floating-point unpacking function.
191 */
192 static void
193 unpack_4x8(unpack_1x8_func_t unpack_1x8, uint32_t u,
194 float *x, float *y, float *z, float *w)
195 {
196 /* From section 8.4 of the GLSL 4.30 spec:
197 *
198 * unpackSnorm4x8
199 * --------------
200 * The first component of the returned vector will be extracted from
201 * the least significant bits of the input; the last component will be
202 * extracted from the most significant bits.
203 *
204 * The specifications for the other unpacking functions contain similar
205 * language.
206 */
207 *x = unpack_1x8((uint8_t) (u & 0xff));
208 *y = unpack_1x8((uint8_t) (u >> 8));
209 *z = unpack_1x8((uint8_t) (u >> 16));
210 *w = unpack_1x8((uint8_t) (u >> 24));
211 }
212
213 /**
214 * Evaluate one component of packSnorm4x8.
215 */
216 static uint8_t
217 pack_snorm_1x8(float x)
218 {
219 /* From section 8.4 of the GLSL 4.30 spec:
220 *
221 * packSnorm4x8
222 * ------------
223 * The conversion for component c of v to fixed point is done as
224 * follows:
225 *
226 * packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
227 *
228 * We must first cast the float to an int, because casting a negative
229 * float to a uint is undefined.
230 */
231 return (uint8_t) (int8_t)
232 _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 127.0f);
233 }
234
235 /**
236 * Evaluate one component of packSnorm2x16.
237 */
238 static uint16_t
239 pack_snorm_1x16(float x)
240 {
241 /* From section 8.4 of the GLSL ES 3.00 spec:
242 *
243 * packSnorm2x16
244 * -------------
245 * The conversion for component c of v to fixed point is done as
246 * follows:
247 *
248 * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
249 *
250 * We must first cast the float to an int, because casting a negative
251 * float to a uint is undefined.
252 */
253 return (uint16_t) (int16_t)
254 _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
255 }
256
257 /**
258 * Evaluate one component of unpackSnorm4x8.
259 */
260 static float
261 unpack_snorm_1x8(uint8_t u)
262 {
263 /* From section 8.4 of the GLSL 4.30 spec:
264 *
265 * unpackSnorm4x8
266 * --------------
267 * The conversion for unpacked fixed-point value f to floating point is
268 * done as follows:
269 *
270 * unpackSnorm4x8: clamp(f / 127.0, -1, +1)
271 */
272 return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
273 }
274
275 /**
276 * Evaluate one component of unpackSnorm2x16.
277 */
278 static float
279 unpack_snorm_1x16(uint16_t u)
280 {
281 /* From section 8.4 of the GLSL ES 3.00 spec:
282 *
283 * unpackSnorm2x16
284 * ---------------
285 * The conversion for unpacked fixed-point value f to floating point is
286 * done as follows:
287 *
288 * unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
289 */
290 return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
291 }
292
293 /**
294 * Evaluate one component packUnorm4x8.
295 */
296 static uint8_t
297 pack_unorm_1x8(float x)
298 {
299 /* From section 8.4 of the GLSL 4.30 spec:
300 *
301 * packUnorm4x8
302 * ------------
303 * The conversion for component c of v to fixed point is done as
304 * follows:
305 *
306 * packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
307 */
308 return (uint8_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 255.0f);
309 }
310
311 /**
312 * Evaluate one component packUnorm2x16.
313 */
314 static uint16_t
315 pack_unorm_1x16(float x)
316 {
317 /* From section 8.4 of the GLSL ES 3.00 spec:
318 *
319 * packUnorm2x16
320 * -------------
321 * The conversion for component c of v to fixed point is done as
322 * follows:
323 *
324 * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
325 */
326 return (uint16_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
327 }
328
329 /**
330 * Evaluate one component of unpackUnorm4x8.
331 */
332 static float
333 unpack_unorm_1x8(uint8_t u)
334 {
335 /* From section 8.4 of the GLSL 4.30 spec:
336 *
337 * unpackUnorm4x8
338 * --------------
339 * The conversion for unpacked fixed-point value f to floating point is
340 * done as follows:
341 *
342 * unpackUnorm4x8: f / 255.0
343 */
344 return (float) u / 255.0f;
345 }
346
347 /**
348 * Evaluate one component of unpackUnorm2x16.
349 */
350 static float
351 unpack_unorm_1x16(uint16_t u)
352 {
353 /* From section 8.4 of the GLSL ES 3.00 spec:
354 *
355 * unpackUnorm2x16
356 * ---------------
357 * The conversion for unpacked fixed-point value f to floating point is
358 * done as follows:
359 *
360 * unpackUnorm2x16: f / 65535.0
361 */
362 return (float) u / 65535.0f;
363 }
364
365 /**
366 * Evaluate one component of packHalf2x16.
367 */
368 static uint16_t
369 pack_half_1x16(float x)
370 {
371 return _mesa_float_to_half(x);
372 }
373
374 /**
375 * Evaluate one component of unpackHalf2x16.
376 */
377 static float
378 unpack_half_1x16(uint16_t u)
379 {
380 return _mesa_half_to_float(u);
381 }
382
383 ir_constant *
384 ir_rvalue::constant_expression_value(struct hash_table *variable_context)
385 {
386 assert(this->type->is_error());
387 return NULL;
388 }
389
390 ir_constant *
391 ir_expression::constant_expression_value(struct hash_table *variable_context)
392 {
393 if (this->type->is_error())
394 return NULL;
395
396 ir_constant *op[Elements(this->operands)] = { NULL, };
397 ir_constant_data data;
398
399 memset(&data, 0, sizeof(data));
400
401 for (unsigned operand = 0; operand < this->get_num_operands(); operand++) {
402 op[operand] = this->operands[operand]->constant_expression_value(variable_context);
403 if (!op[operand])
404 return NULL;
405 }
406
407 if (op[1] != NULL)
408 switch (this->operation) {
409 case ir_binop_lshift:
410 case ir_binop_rshift:
411 case ir_binop_ldexp:
412 case ir_binop_vector_extract:
413 case ir_triop_csel:
414 case ir_triop_bitfield_extract:
415 break;
416
417 default:
418 assert(op[0]->type->base_type == op[1]->type->base_type);
419 break;
420 }
421
422 bool op0_scalar = op[0]->type->is_scalar();
423 bool op1_scalar = op[1] != NULL && op[1]->type->is_scalar();
424
425 /* When iterating over a vector or matrix's components, we want to increase
426 * the loop counter. However, for scalars, we want to stay at 0.
427 */
428 unsigned c0_inc = op0_scalar ? 0 : 1;
429 unsigned c1_inc = op1_scalar ? 0 : 1;
430 unsigned components;
431 if (op1_scalar || !op[1]) {
432 components = op[0]->type->components();
433 } else {
434 components = op[1]->type->components();
435 }
436
437 void *ctx = ralloc_parent(this);
438
439 /* Handle array operations here, rather than below. */
440 if (op[0]->type->is_array()) {
441 assert(op[1] != NULL && op[1]->type->is_array());
442 switch (this->operation) {
443 case ir_binop_all_equal:
444 return new(ctx) ir_constant(op[0]->has_value(op[1]));
445 case ir_binop_any_nequal:
446 return new(ctx) ir_constant(!op[0]->has_value(op[1]));
447 default:
448 break;
449 }
450 return NULL;
451 }
452
453 switch (this->operation) {
454 case ir_unop_bit_not:
455 switch (op[0]->type->base_type) {
456 case GLSL_TYPE_INT:
457 for (unsigned c = 0; c < components; c++)
458 data.i[c] = ~ op[0]->value.i[c];
459 break;
460 case GLSL_TYPE_UINT:
461 for (unsigned c = 0; c < components; c++)
462 data.u[c] = ~ op[0]->value.u[c];
463 break;
464 default:
465 assert(0);
466 }
467 break;
468
469 case ir_unop_logic_not:
470 assert(op[0]->type->base_type == GLSL_TYPE_BOOL);
471 for (unsigned c = 0; c < op[0]->type->components(); c++)
472 data.b[c] = !op[0]->value.b[c];
473 break;
474
475 case ir_unop_f2i:
476 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
477 for (unsigned c = 0; c < op[0]->type->components(); c++) {
478 data.i[c] = (int) op[0]->value.f[c];
479 }
480 break;
481 case ir_unop_f2u:
482 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
483 for (unsigned c = 0; c < op[0]->type->components(); c++) {
484 data.i[c] = (unsigned) op[0]->value.f[c];
485 }
486 break;
487 case ir_unop_i2f:
488 assert(op[0]->type->base_type == GLSL_TYPE_INT);
489 for (unsigned c = 0; c < op[0]->type->components(); c++) {
490 data.f[c] = (float) op[0]->value.i[c];
491 }
492 break;
493 case ir_unop_u2f:
494 assert(op[0]->type->base_type == GLSL_TYPE_UINT);
495 for (unsigned c = 0; c < op[0]->type->components(); c++) {
496 data.f[c] = (float) op[0]->value.u[c];
497 }
498 break;
499 case ir_unop_b2f:
500 assert(op[0]->type->base_type == GLSL_TYPE_BOOL);
501 for (unsigned c = 0; c < op[0]->type->components(); c++) {
502 data.f[c] = op[0]->value.b[c] ? 1.0F : 0.0F;
503 }
504 break;
505 case ir_unop_f2b:
506 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
507 for (unsigned c = 0; c < op[0]->type->components(); c++) {
508 data.b[c] = op[0]->value.f[c] != 0.0F ? true : false;
509 }
510 break;
511 case ir_unop_b2i:
512 assert(op[0]->type->base_type == GLSL_TYPE_BOOL);
513 for (unsigned c = 0; c < op[0]->type->components(); c++) {
514 data.u[c] = op[0]->value.b[c] ? 1 : 0;
515 }
516 break;
517 case ir_unop_i2b:
518 assert(op[0]->type->is_integer());
519 for (unsigned c = 0; c < op[0]->type->components(); c++) {
520 data.b[c] = op[0]->value.u[c] ? true : false;
521 }
522 break;
523 case ir_unop_u2i:
524 assert(op[0]->type->base_type == GLSL_TYPE_UINT);
525 for (unsigned c = 0; c < op[0]->type->components(); c++) {
526 data.i[c] = op[0]->value.u[c];
527 }
528 break;
529 case ir_unop_i2u:
530 assert(op[0]->type->base_type == GLSL_TYPE_INT);
531 for (unsigned c = 0; c < op[0]->type->components(); c++) {
532 data.u[c] = op[0]->value.i[c];
533 }
534 break;
535 case ir_unop_bitcast_i2f:
536 assert(op[0]->type->base_type == GLSL_TYPE_INT);
537 for (unsigned c = 0; c < op[0]->type->components(); c++) {
538 data.f[c] = bitcast_u2f(op[0]->value.i[c]);
539 }
540 break;
541 case ir_unop_bitcast_f2i:
542 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
543 for (unsigned c = 0; c < op[0]->type->components(); c++) {
544 data.i[c] = bitcast_f2u(op[0]->value.f[c]);
545 }
546 break;
547 case ir_unop_bitcast_u2f:
548 assert(op[0]->type->base_type == GLSL_TYPE_UINT);
549 for (unsigned c = 0; c < op[0]->type->components(); c++) {
550 data.f[c] = bitcast_u2f(op[0]->value.u[c]);
551 }
552 break;
553 case ir_unop_bitcast_f2u:
554 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
555 for (unsigned c = 0; c < op[0]->type->components(); c++) {
556 data.u[c] = bitcast_f2u(op[0]->value.f[c]);
557 }
558 break;
559 case ir_unop_any:
560 assert(op[0]->type->is_boolean());
561 data.b[0] = false;
562 for (unsigned c = 0; c < op[0]->type->components(); c++) {
563 if (op[0]->value.b[c])
564 data.b[0] = true;
565 }
566 break;
567
568 case ir_unop_trunc:
569 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
570 for (unsigned c = 0; c < op[0]->type->components(); c++) {
571 data.f[c] = truncf(op[0]->value.f[c]);
572 }
573 break;
574
575 case ir_unop_round_even:
576 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
577 for (unsigned c = 0; c < op[0]->type->components(); c++) {
578 data.f[c] = _mesa_round_to_even(op[0]->value.f[c]);
579 }
580 break;
581
582 case ir_unop_ceil:
583 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
584 for (unsigned c = 0; c < op[0]->type->components(); c++) {
585 data.f[c] = ceilf(op[0]->value.f[c]);
586 }
587 break;
588
589 case ir_unop_floor:
590 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
591 for (unsigned c = 0; c < op[0]->type->components(); c++) {
592 data.f[c] = floorf(op[0]->value.f[c]);
593 }
594 break;
595
596 case ir_unop_fract:
597 for (unsigned c = 0; c < op[0]->type->components(); c++) {
598 switch (this->type->base_type) {
599 case GLSL_TYPE_UINT:
600 data.u[c] = 0;
601 break;
602 case GLSL_TYPE_INT:
603 data.i[c] = 0;
604 break;
605 case GLSL_TYPE_FLOAT:
606 data.f[c] = op[0]->value.f[c] - floor(op[0]->value.f[c]);
607 break;
608 default:
609 assert(0);
610 }
611 }
612 break;
613
614 case ir_unop_sin:
615 case ir_unop_sin_reduced:
616 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
617 for (unsigned c = 0; c < op[0]->type->components(); c++) {
618 data.f[c] = sinf(op[0]->value.f[c]);
619 }
620 break;
621
622 case ir_unop_cos:
623 case ir_unop_cos_reduced:
624 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
625 for (unsigned c = 0; c < op[0]->type->components(); c++) {
626 data.f[c] = cosf(op[0]->value.f[c]);
627 }
628 break;
629
630 case ir_unop_neg:
631 for (unsigned c = 0; c < op[0]->type->components(); c++) {
632 switch (this->type->base_type) {
633 case GLSL_TYPE_UINT:
634 data.u[c] = -((int) op[0]->value.u[c]);
635 break;
636 case GLSL_TYPE_INT:
637 data.i[c] = -op[0]->value.i[c];
638 break;
639 case GLSL_TYPE_FLOAT:
640 data.f[c] = -op[0]->value.f[c];
641 break;
642 default:
643 assert(0);
644 }
645 }
646 break;
647
648 case ir_unop_abs:
649 for (unsigned c = 0; c < op[0]->type->components(); c++) {
650 switch (this->type->base_type) {
651 case GLSL_TYPE_UINT:
652 data.u[c] = op[0]->value.u[c];
653 break;
654 case GLSL_TYPE_INT:
655 data.i[c] = op[0]->value.i[c];
656 if (data.i[c] < 0)
657 data.i[c] = -data.i[c];
658 break;
659 case GLSL_TYPE_FLOAT:
660 data.f[c] = fabs(op[0]->value.f[c]);
661 break;
662 default:
663 assert(0);
664 }
665 }
666 break;
667
668 case ir_unop_sign:
669 for (unsigned c = 0; c < op[0]->type->components(); c++) {
670 switch (this->type->base_type) {
671 case GLSL_TYPE_UINT:
672 data.u[c] = op[0]->value.i[c] > 0;
673 break;
674 case GLSL_TYPE_INT:
675 data.i[c] = (op[0]->value.i[c] > 0) - (op[0]->value.i[c] < 0);
676 break;
677 case GLSL_TYPE_FLOAT:
678 data.f[c] = float((op[0]->value.f[c] > 0)-(op[0]->value.f[c] < 0));
679 break;
680 default:
681 assert(0);
682 }
683 }
684 break;
685
686 case ir_unop_rcp:
687 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
688 for (unsigned c = 0; c < op[0]->type->components(); c++) {
689 switch (this->type->base_type) {
690 case GLSL_TYPE_UINT:
691 if (op[0]->value.u[c] != 0.0)
692 data.u[c] = 1 / op[0]->value.u[c];
693 break;
694 case GLSL_TYPE_INT:
695 if (op[0]->value.i[c] != 0.0)
696 data.i[c] = 1 / op[0]->value.i[c];
697 break;
698 case GLSL_TYPE_FLOAT:
699 if (op[0]->value.f[c] != 0.0)
700 data.f[c] = 1.0F / op[0]->value.f[c];
701 break;
702 default:
703 assert(0);
704 }
705 }
706 break;
707
708 case ir_unop_rsq:
709 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
710 for (unsigned c = 0; c < op[0]->type->components(); c++) {
711 data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]);
712 }
713 break;
714
715 case ir_unop_sqrt:
716 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
717 for (unsigned c = 0; c < op[0]->type->components(); c++) {
718 data.f[c] = sqrtf(op[0]->value.f[c]);
719 }
720 break;
721
722 case ir_unop_exp:
723 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
724 for (unsigned c = 0; c < op[0]->type->components(); c++) {
725 data.f[c] = expf(op[0]->value.f[c]);
726 }
727 break;
728
729 case ir_unop_exp2:
730 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
731 for (unsigned c = 0; c < op[0]->type->components(); c++) {
732 data.f[c] = exp2f(op[0]->value.f[c]);
733 }
734 break;
735
736 case ir_unop_log:
737 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
738 for (unsigned c = 0; c < op[0]->type->components(); c++) {
739 data.f[c] = logf(op[0]->value.f[c]);
740 }
741 break;
742
743 case ir_unop_log2:
744 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
745 for (unsigned c = 0; c < op[0]->type->components(); c++) {
746 data.f[c] = log2f(op[0]->value.f[c]);
747 }
748 break;
749
750 case ir_unop_dFdx:
751 case ir_unop_dFdy:
752 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
753 for (unsigned c = 0; c < op[0]->type->components(); c++) {
754 data.f[c] = 0.0;
755 }
756 break;
757
758 case ir_unop_pack_snorm_2x16:
759 assert(op[0]->type == glsl_type::vec2_type);
760 data.u[0] = pack_2x16(pack_snorm_1x16,
761 op[0]->value.f[0],
762 op[0]->value.f[1]);
763 break;
764 case ir_unop_pack_snorm_4x8:
765 assert(op[0]->type == glsl_type::vec4_type);
766 data.u[0] = pack_4x8(pack_snorm_1x8,
767 op[0]->value.f[0],
768 op[0]->value.f[1],
769 op[0]->value.f[2],
770 op[0]->value.f[3]);
771 break;
772 case ir_unop_unpack_snorm_2x16:
773 assert(op[0]->type == glsl_type::uint_type);
774 unpack_2x16(unpack_snorm_1x16,
775 op[0]->value.u[0],
776 &data.f[0], &data.f[1]);
777 break;
778 case ir_unop_unpack_snorm_4x8:
779 assert(op[0]->type == glsl_type::uint_type);
780 unpack_4x8(unpack_snorm_1x8,
781 op[0]->value.u[0],
782 &data.f[0], &data.f[1], &data.f[2], &data.f[3]);
783 break;
784 case ir_unop_pack_unorm_2x16:
785 assert(op[0]->type == glsl_type::vec2_type);
786 data.u[0] = pack_2x16(pack_unorm_1x16,
787 op[0]->value.f[0],
788 op[0]->value.f[1]);
789 break;
790 case ir_unop_pack_unorm_4x8:
791 assert(op[0]->type == glsl_type::vec4_type);
792 data.u[0] = pack_4x8(pack_unorm_1x8,
793 op[0]->value.f[0],
794 op[0]->value.f[1],
795 op[0]->value.f[2],
796 op[0]->value.f[3]);
797 break;
798 case ir_unop_unpack_unorm_2x16:
799 assert(op[0]->type == glsl_type::uint_type);
800 unpack_2x16(unpack_unorm_1x16,
801 op[0]->value.u[0],
802 &data.f[0], &data.f[1]);
803 break;
804 case ir_unop_unpack_unorm_4x8:
805 assert(op[0]->type == glsl_type::uint_type);
806 unpack_4x8(unpack_unorm_1x8,
807 op[0]->value.u[0],
808 &data.f[0], &data.f[1], &data.f[2], &data.f[3]);
809 break;
810 case ir_unop_pack_half_2x16:
811 assert(op[0]->type == glsl_type::vec2_type);
812 data.u[0] = pack_2x16(pack_half_1x16,
813 op[0]->value.f[0],
814 op[0]->value.f[1]);
815 break;
816 case ir_unop_unpack_half_2x16:
817 assert(op[0]->type == glsl_type::uint_type);
818 unpack_2x16(unpack_half_1x16,
819 op[0]->value.u[0],
820 &data.f[0], &data.f[1]);
821 break;
822 case ir_binop_pow:
823 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
824 for (unsigned c = 0; c < op[0]->type->components(); c++) {
825 data.f[c] = powf(op[0]->value.f[c], op[1]->value.f[c]);
826 }
827 break;
828
829 case ir_binop_dot:
830 data.f[0] = dot(op[0], op[1]);
831 break;
832
833 case ir_binop_min:
834 assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar);
835 for (unsigned c = 0, c0 = 0, c1 = 0;
836 c < components;
837 c0 += c0_inc, c1 += c1_inc, c++) {
838
839 switch (op[0]->type->base_type) {
840 case GLSL_TYPE_UINT:
841 data.u[c] = MIN2(op[0]->value.u[c0], op[1]->value.u[c1]);
842 break;
843 case GLSL_TYPE_INT:
844 data.i[c] = MIN2(op[0]->value.i[c0], op[1]->value.i[c1]);
845 break;
846 case GLSL_TYPE_FLOAT:
847 data.f[c] = MIN2(op[0]->value.f[c0], op[1]->value.f[c1]);
848 break;
849 default:
850 assert(0);
851 }
852 }
853
854 break;
855 case ir_binop_max:
856 assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar);
857 for (unsigned c = 0, c0 = 0, c1 = 0;
858 c < components;
859 c0 += c0_inc, c1 += c1_inc, c++) {
860
861 switch (op[0]->type->base_type) {
862 case GLSL_TYPE_UINT:
863 data.u[c] = MAX2(op[0]->value.u[c0], op[1]->value.u[c1]);
864 break;
865 case GLSL_TYPE_INT:
866 data.i[c] = MAX2(op[0]->value.i[c0], op[1]->value.i[c1]);
867 break;
868 case GLSL_TYPE_FLOAT:
869 data.f[c] = MAX2(op[0]->value.f[c0], op[1]->value.f[c1]);
870 break;
871 default:
872 assert(0);
873 }
874 }
875 break;
876
877 case ir_binop_add:
878 assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar);
879 for (unsigned c = 0, c0 = 0, c1 = 0;
880 c < components;
881 c0 += c0_inc, c1 += c1_inc, c++) {
882
883 switch (op[0]->type->base_type) {
884 case GLSL_TYPE_UINT:
885 data.u[c] = op[0]->value.u[c0] + op[1]->value.u[c1];
886 break;
887 case GLSL_TYPE_INT:
888 data.i[c] = op[0]->value.i[c0] + op[1]->value.i[c1];
889 break;
890 case GLSL_TYPE_FLOAT:
891 data.f[c] = op[0]->value.f[c0] + op[1]->value.f[c1];
892 break;
893 default:
894 assert(0);
895 }
896 }
897
898 break;
899 case ir_binop_sub:
900 assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar);
901 for (unsigned c = 0, c0 = 0, c1 = 0;
902 c < components;
903 c0 += c0_inc, c1 += c1_inc, c++) {
904
905 switch (op[0]->type->base_type) {
906 case GLSL_TYPE_UINT:
907 data.u[c] = op[0]->value.u[c0] - op[1]->value.u[c1];
908 break;
909 case GLSL_TYPE_INT:
910 data.i[c] = op[0]->value.i[c0] - op[1]->value.i[c1];
911 break;
912 case GLSL_TYPE_FLOAT:
913 data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1];
914 break;
915 default:
916 assert(0);
917 }
918 }
919
920 break;
921 case ir_binop_mul:
922 /* Check for equal types, or unequal types involving scalars */
923 if ((op[0]->type == op[1]->type && !op[0]->type->is_matrix())
924 || op0_scalar || op1_scalar) {
925 for (unsigned c = 0, c0 = 0, c1 = 0;
926 c < components;
927 c0 += c0_inc, c1 += c1_inc, c++) {
928
929 switch (op[0]->type->base_type) {
930 case GLSL_TYPE_UINT:
931 data.u[c] = op[0]->value.u[c0] * op[1]->value.u[c1];
932 break;
933 case GLSL_TYPE_INT:
934 data.i[c] = op[0]->value.i[c0] * op[1]->value.i[c1];
935 break;
936 case GLSL_TYPE_FLOAT:
937 data.f[c] = op[0]->value.f[c0] * op[1]->value.f[c1];
938 break;
939 default:
940 assert(0);
941 }
942 }
943 } else {
944 assert(op[0]->type->is_matrix() || op[1]->type->is_matrix());
945
946 /* Multiply an N-by-M matrix with an M-by-P matrix. Since either
947 * matrix can be a GLSL vector, either N or P can be 1.
948 *
949 * For vec*mat, the vector is treated as a row vector. This
950 * means the vector is a 1-row x M-column matrix.
951 *
952 * For mat*vec, the vector is treated as a column vector. Since
953 * matrix_columns is 1 for vectors, this just works.
954 */
955 const unsigned n = op[0]->type->is_vector()
956 ? 1 : op[0]->type->vector_elements;
957 const unsigned m = op[1]->type->vector_elements;
958 const unsigned p = op[1]->type->matrix_columns;
959 for (unsigned j = 0; j < p; j++) {
960 for (unsigned i = 0; i < n; i++) {
961 for (unsigned k = 0; k < m; k++) {
962 data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j];
963 }
964 }
965 }
966 }
967
968 break;
969 case ir_binop_div:
970 /* FINISHME: Emit warning when division-by-zero is detected. */
971 assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar);
972 for (unsigned c = 0, c0 = 0, c1 = 0;
973 c < components;
974 c0 += c0_inc, c1 += c1_inc, c++) {
975
976 switch (op[0]->type->base_type) {
977 case GLSL_TYPE_UINT:
978 if (op[1]->value.u[c1] == 0) {
979 data.u[c] = 0;
980 } else {
981 data.u[c] = op[0]->value.u[c0] / op[1]->value.u[c1];
982 }
983 break;
984 case GLSL_TYPE_INT:
985 if (op[1]->value.i[c1] == 0) {
986 data.i[c] = 0;
987 } else {
988 data.i[c] = op[0]->value.i[c0] / op[1]->value.i[c1];
989 }
990 break;
991 case GLSL_TYPE_FLOAT:
992 data.f[c] = op[0]->value.f[c0] / op[1]->value.f[c1];
993 break;
994 default:
995 assert(0);
996 }
997 }
998
999 break;
1000 case ir_binop_mod:
1001 /* FINISHME: Emit warning when division-by-zero is detected. */
1002 assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar);
1003 for (unsigned c = 0, c0 = 0, c1 = 0;
1004 c < components;
1005 c0 += c0_inc, c1 += c1_inc, c++) {
1006
1007 switch (op[0]->type->base_type) {
1008 case GLSL_TYPE_UINT:
1009 if (op[1]->value.u[c1] == 0) {
1010 data.u[c] = 0;
1011 } else {
1012 data.u[c] = op[0]->value.u[c0] % op[1]->value.u[c1];
1013 }
1014 break;
1015 case GLSL_TYPE_INT:
1016 if (op[1]->value.i[c1] == 0) {
1017 data.i[c] = 0;
1018 } else {
1019 data.i[c] = op[0]->value.i[c0] % op[1]->value.i[c1];
1020 }
1021 break;
1022 case GLSL_TYPE_FLOAT:
1023 /* We don't use fmod because it rounds toward zero; GLSL specifies
1024 * the use of floor.
1025 */
1026 data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1]
1027 * floorf(op[0]->value.f[c0] / op[1]->value.f[c1]);
1028 break;
1029 default:
1030 assert(0);
1031 }
1032 }
1033
1034 break;
1035
1036 case ir_binop_logic_and:
1037 assert(op[0]->type->base_type == GLSL_TYPE_BOOL);
1038 for (unsigned c = 0; c < op[0]->type->components(); c++)
1039 data.b[c] = op[0]->value.b[c] && op[1]->value.b[c];
1040 break;
1041 case ir_binop_logic_xor:
1042 assert(op[0]->type->base_type == GLSL_TYPE_BOOL);
1043 for (unsigned c = 0; c < op[0]->type->components(); c++)
1044 data.b[c] = op[0]->value.b[c] ^ op[1]->value.b[c];
1045 break;
1046 case ir_binop_logic_or:
1047 assert(op[0]->type->base_type == GLSL_TYPE_BOOL);
1048 for (unsigned c = 0; c < op[0]->type->components(); c++)
1049 data.b[c] = op[0]->value.b[c] || op[1]->value.b[c];
1050 break;
1051
1052 case ir_binop_less:
1053 assert(op[0]->type == op[1]->type);
1054 for (unsigned c = 0; c < op[0]->type->components(); c++) {
1055 switch (op[0]->type->base_type) {
1056 case GLSL_TYPE_UINT:
1057 data.b[c] = op[0]->value.u[c] < op[1]->value.u[c];
1058 break;
1059 case GLSL_TYPE_INT:
1060 data.b[c] = op[0]->value.i[c] < op[1]->value.i[c];
1061 break;
1062 case GLSL_TYPE_FLOAT:
1063 data.b[c] = op[0]->value.f[c] < op[1]->value.f[c];
1064 break;
1065 default:
1066 assert(0);
1067 }
1068 }
1069 break;
1070 case ir_binop_greater:
1071 assert(op[0]->type == op[1]->type);
1072 for (unsigned c = 0; c < op[0]->type->components(); c++) {
1073 switch (op[0]->type->base_type) {
1074 case GLSL_TYPE_UINT:
1075 data.b[c] = op[0]->value.u[c] > op[1]->value.u[c];
1076 break;
1077 case GLSL_TYPE_INT:
1078 data.b[c] = op[0]->value.i[c] > op[1]->value.i[c];
1079 break;
1080 case GLSL_TYPE_FLOAT:
1081 data.b[c] = op[0]->value.f[c] > op[1]->value.f[c];
1082 break;
1083 default:
1084 assert(0);
1085 }
1086 }
1087 break;
1088 case ir_binop_lequal:
1089 assert(op[0]->type == op[1]->type);
1090 for (unsigned c = 0; c < op[0]->type->components(); c++) {
1091 switch (op[0]->type->base_type) {
1092 case GLSL_TYPE_UINT:
1093 data.b[c] = op[0]->value.u[c] <= op[1]->value.u[c];
1094 break;
1095 case GLSL_TYPE_INT:
1096 data.b[c] = op[0]->value.i[c] <= op[1]->value.i[c];
1097 break;
1098 case GLSL_TYPE_FLOAT:
1099 data.b[c] = op[0]->value.f[c] <= op[1]->value.f[c];
1100 break;
1101 default:
1102 assert(0);
1103 }
1104 }
1105 break;
1106 case ir_binop_gequal:
1107 assert(op[0]->type == op[1]->type);
1108 for (unsigned c = 0; c < op[0]->type->components(); c++) {
1109 switch (op[0]->type->base_type) {
1110 case GLSL_TYPE_UINT:
1111 data.b[c] = op[0]->value.u[c] >= op[1]->value.u[c];
1112 break;
1113 case GLSL_TYPE_INT:
1114 data.b[c] = op[0]->value.i[c] >= op[1]->value.i[c];
1115 break;
1116 case GLSL_TYPE_FLOAT:
1117 data.b[c] = op[0]->value.f[c] >= op[1]->value.f[c];
1118 break;
1119 default:
1120 assert(0);
1121 }
1122 }
1123 break;
1124 case ir_binop_equal:
1125 assert(op[0]->type == op[1]->type);
1126 for (unsigned c = 0; c < components; c++) {
1127 switch (op[0]->type->base_type) {
1128 case GLSL_TYPE_UINT:
1129 data.b[c] = op[0]->value.u[c] == op[1]->value.u[c];
1130 break;
1131 case GLSL_TYPE_INT:
1132 data.b[c] = op[0]->value.i[c] == op[1]->value.i[c];
1133 break;
1134 case GLSL_TYPE_FLOAT:
1135 data.b[c] = op[0]->value.f[c] == op[1]->value.f[c];
1136 break;
1137 case GLSL_TYPE_BOOL:
1138 data.b[c] = op[0]->value.b[c] == op[1]->value.b[c];
1139 break;
1140 default:
1141 assert(0);
1142 }
1143 }
1144 break;
1145 case ir_binop_nequal:
1146 assert(op[0]->type == op[1]->type);
1147 for (unsigned c = 0; c < components; c++) {
1148 switch (op[0]->type->base_type) {
1149 case GLSL_TYPE_UINT:
1150 data.b[c] = op[0]->value.u[c] != op[1]->value.u[c];
1151 break;
1152 case GLSL_TYPE_INT:
1153 data.b[c] = op[0]->value.i[c] != op[1]->value.i[c];
1154 break;
1155 case GLSL_TYPE_FLOAT:
1156 data.b[c] = op[0]->value.f[c] != op[1]->value.f[c];
1157 break;
1158 case GLSL_TYPE_BOOL:
1159 data.b[c] = op[0]->value.b[c] != op[1]->value.b[c];
1160 break;
1161 default:
1162 assert(0);
1163 }
1164 }
1165 break;
1166 case ir_binop_all_equal:
1167 data.b[0] = op[0]->has_value(op[1]);
1168 break;
1169 case ir_binop_any_nequal:
1170 data.b[0] = !op[0]->has_value(op[1]);
1171 break;
1172
1173 case ir_binop_lshift:
1174 for (unsigned c = 0, c0 = 0, c1 = 0;
1175 c < components;
1176 c0 += c0_inc, c1 += c1_inc, c++) {
1177
1178 if (op[0]->type->base_type == GLSL_TYPE_INT &&
1179 op[1]->type->base_type == GLSL_TYPE_INT) {
1180 data.i[c] = op[0]->value.i[c0] << op[1]->value.i[c1];
1181
1182 } else if (op[0]->type->base_type == GLSL_TYPE_INT &&
1183 op[1]->type->base_type == GLSL_TYPE_UINT) {
1184 data.i[c] = op[0]->value.i[c0] << op[1]->value.u[c1];
1185
1186 } else if (op[0]->type->base_type == GLSL_TYPE_UINT &&
1187 op[1]->type->base_type == GLSL_TYPE_INT) {
1188 data.u[c] = op[0]->value.u[c0] << op[1]->value.i[c1];
1189
1190 } else if (op[0]->type->base_type == GLSL_TYPE_UINT &&
1191 op[1]->type->base_type == GLSL_TYPE_UINT) {
1192 data.u[c] = op[0]->value.u[c0] << op[1]->value.u[c1];
1193 }
1194 }
1195 break;
1196
1197 case ir_binop_rshift:
1198 for (unsigned c = 0, c0 = 0, c1 = 0;
1199 c < components;
1200 c0 += c0_inc, c1 += c1_inc, c++) {
1201
1202 if (op[0]->type->base_type == GLSL_TYPE_INT &&
1203 op[1]->type->base_type == GLSL_TYPE_INT) {
1204 data.i[c] = op[0]->value.i[c0] >> op[1]->value.i[c1];
1205
1206 } else if (op[0]->type->base_type == GLSL_TYPE_INT &&
1207 op[1]->type->base_type == GLSL_TYPE_UINT) {
1208 data.i[c] = op[0]->value.i[c0] >> op[1]->value.u[c1];
1209
1210 } else if (op[0]->type->base_type == GLSL_TYPE_UINT &&
1211 op[1]->type->base_type == GLSL_TYPE_INT) {
1212 data.u[c] = op[0]->value.u[c0] >> op[1]->value.i[c1];
1213
1214 } else if (op[0]->type->base_type == GLSL_TYPE_UINT &&
1215 op[1]->type->base_type == GLSL_TYPE_UINT) {
1216 data.u[c] = op[0]->value.u[c0] >> op[1]->value.u[c1];
1217 }
1218 }
1219 break;
1220
1221 case ir_binop_bit_and:
1222 for (unsigned c = 0, c0 = 0, c1 = 0;
1223 c < components;
1224 c0 += c0_inc, c1 += c1_inc, c++) {
1225
1226 switch (op[0]->type->base_type) {
1227 case GLSL_TYPE_INT:
1228 data.i[c] = op[0]->value.i[c0] & op[1]->value.i[c1];
1229 break;
1230 case GLSL_TYPE_UINT:
1231 data.u[c] = op[0]->value.u[c0] & op[1]->value.u[c1];
1232 break;
1233 default:
1234 assert(0);
1235 }
1236 }
1237 break;
1238
1239 case ir_binop_bit_or:
1240 for (unsigned c = 0, c0 = 0, c1 = 0;
1241 c < components;
1242 c0 += c0_inc, c1 += c1_inc, c++) {
1243
1244 switch (op[0]->type->base_type) {
1245 case GLSL_TYPE_INT:
1246 data.i[c] = op[0]->value.i[c0] | op[1]->value.i[c1];
1247 break;
1248 case GLSL_TYPE_UINT:
1249 data.u[c] = op[0]->value.u[c0] | op[1]->value.u[c1];
1250 break;
1251 default:
1252 assert(0);
1253 }
1254 }
1255 break;
1256
1257 case ir_binop_vector_extract: {
1258 const int c = CLAMP(op[1]->value.i[0], 0,
1259 (int) op[0]->type->vector_elements - 1);
1260
1261 switch (op[0]->type->base_type) {
1262 case GLSL_TYPE_UINT:
1263 data.u[0] = op[0]->value.u[c];
1264 break;
1265 case GLSL_TYPE_INT:
1266 data.i[0] = op[0]->value.i[c];
1267 break;
1268 case GLSL_TYPE_FLOAT:
1269 data.f[0] = op[0]->value.f[c];
1270 break;
1271 case GLSL_TYPE_BOOL:
1272 data.b[0] = op[0]->value.b[c];
1273 break;
1274 default:
1275 assert(0);
1276 }
1277 break;
1278 }
1279
1280 case ir_binop_bit_xor:
1281 for (unsigned c = 0, c0 = 0, c1 = 0;
1282 c < components;
1283 c0 += c0_inc, c1 += c1_inc, c++) {
1284
1285 switch (op[0]->type->base_type) {
1286 case GLSL_TYPE_INT:
1287 data.i[c] = op[0]->value.i[c0] ^ op[1]->value.i[c1];
1288 break;
1289 case GLSL_TYPE_UINT:
1290 data.u[c] = op[0]->value.u[c0] ^ op[1]->value.u[c1];
1291 break;
1292 default:
1293 assert(0);
1294 }
1295 }
1296 break;
1297
1298 case ir_unop_bitfield_reverse:
1299 /* http://graphics.stanford.edu/~seander/bithacks.html#BitReverseObvious */
1300 for (unsigned c = 0; c < components; c++) {
1301 unsigned int v = op[0]->value.u[c]; // input bits to be reversed
1302 unsigned int r = v; // r will be reversed bits of v; first get LSB of v
1303 int s = sizeof(v) * CHAR_BIT - 1; // extra shift needed at end
1304
1305 for (v >>= 1; v; v >>= 1) {
1306 r <<= 1;
1307 r |= v & 1;
1308 s--;
1309 }
1310 r <<= s; // shift when v's highest bits are zero
1311
1312 data.u[c] = r;
1313 }
1314 break;
1315
1316 case ir_unop_bit_count:
1317 for (unsigned c = 0; c < components; c++) {
1318 unsigned count = 0;
1319 unsigned v = op[0]->value.u[c];
1320
1321 for (; v; count++) {
1322 v &= v - 1;
1323 }
1324 data.u[c] = count;
1325 }
1326 break;
1327
1328 case ir_unop_find_msb:
1329 for (unsigned c = 0; c < components; c++) {
1330 int v = op[0]->value.i[c];
1331
1332 if (v == 0 || (op[0]->type->base_type == GLSL_TYPE_INT && v == -1))
1333 data.i[c] = -1;
1334 else {
1335 int count = 0;
1336 int top_bit = op[0]->type->base_type == GLSL_TYPE_UINT
1337 ? 0 : v & (1 << 31);
1338
1339 while (((v & (1 << 31)) == top_bit) && count != 32) {
1340 count++;
1341 v <<= 1;
1342 }
1343
1344 data.i[c] = 31 - count;
1345 }
1346 }
1347 break;
1348
1349 case ir_unop_find_lsb:
1350 for (unsigned c = 0; c < components; c++) {
1351 if (op[0]->value.i[c] == 0)
1352 data.i[c] = -1;
1353 else {
1354 unsigned pos = 0;
1355 unsigned v = op[0]->value.u[c];
1356
1357 for (; !(v & 1); v >>= 1) {
1358 pos++;
1359 }
1360 data.u[c] = pos;
1361 }
1362 }
1363 break;
1364
1365 case ir_triop_bitfield_extract: {
1366 int offset = op[1]->value.i[0];
1367 int bits = op[2]->value.i[0];
1368
1369 for (unsigned c = 0; c < components; c++) {
1370 if (bits == 0)
1371 data.u[c] = 0;
1372 else if (offset < 0 || bits < 0)
1373 data.u[c] = 0; /* Undefined, per spec. */
1374 else if (offset + bits > 32)
1375 data.u[c] = 0; /* Undefined, per spec. */
1376 else {
1377 if (op[0]->type->base_type == GLSL_TYPE_INT) {
1378 /* int so that the right shift will sign-extend. */
1379 int value = op[0]->value.i[c];
1380 value <<= 32 - bits - offset;
1381 value >>= 32 - bits;
1382 data.i[c] = value;
1383 } else {
1384 unsigned value = op[0]->value.u[c];
1385 value <<= 32 - bits - offset;
1386 value >>= 32 - bits;
1387 data.u[c] = value;
1388 }
1389 }
1390 }
1391 break;
1392 }
1393
1394 case ir_binop_ldexp:
1395 for (unsigned c = 0; c < components; c++) {
1396 data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]);
1397 /* Flush subnormal values to zero. */
1398 if (!isnormal(data.f[c]))
1399 data.f[c] = copysign(0.0, op[0]->value.f[c]);
1400 }
1401 break;
1402
1403 case ir_triop_fma:
1404 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
1405 assert(op[1]->type->base_type == GLSL_TYPE_FLOAT);
1406 assert(op[2]->type->base_type == GLSL_TYPE_FLOAT);
1407
1408 for (unsigned c = 0; c < components; c++) {
1409 data.f[c] = op[0]->value.f[c] * op[1]->value.f[c]
1410 + op[2]->value.f[c];
1411 }
1412 break;
1413
1414 case ir_triop_lrp: {
1415 assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
1416 assert(op[1]->type->base_type == GLSL_TYPE_FLOAT);
1417 assert(op[2]->type->base_type == GLSL_TYPE_FLOAT);
1418
1419 unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1;
1420 for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) {
1421 data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) +
1422 (op[1]->value.f[c] * op[2]->value.f[c2]);
1423 }
1424 break;
1425 }
1426
1427 case ir_triop_csel:
1428 for (unsigned c = 0; c < components; c++) {
1429 data.u[c] = op[0]->value.b[c] ? op[1]->value.u[c]
1430 : op[2]->value.u[c];
1431 }
1432 break;
1433
1434 case ir_triop_vector_insert: {
1435 const unsigned idx = op[2]->value.u[0];
1436
1437 memcpy(&data, &op[0]->value, sizeof(data));
1438
1439 switch (this->type->base_type) {
1440 case GLSL_TYPE_INT:
1441 data.i[idx] = op[1]->value.i[0];
1442 break;
1443 case GLSL_TYPE_UINT:
1444 data.u[idx] = op[1]->value.u[0];
1445 break;
1446 case GLSL_TYPE_FLOAT:
1447 data.f[idx] = op[1]->value.f[0];
1448 break;
1449 case GLSL_TYPE_BOOL:
1450 data.b[idx] = op[1]->value.b[0];
1451 break;
1452 default:
1453 assert(!"Should not get here.");
1454 break;
1455 }
1456 break;
1457 }
1458
1459 case ir_quadop_bitfield_insert: {
1460 int offset = op[2]->value.i[0];
1461 int bits = op[3]->value.i[0];
1462
1463 for (unsigned c = 0; c < components; c++) {
1464 if (bits == 0)
1465 data.u[c] = op[0]->value.u[c];
1466 else if (offset < 0 || bits < 0)
1467 data.u[c] = 0; /* Undefined, per spec. */
1468 else if (offset + bits > 32)
1469 data.u[c] = 0; /* Undefined, per spec. */
1470 else {
1471 unsigned insert_mask = ((1 << bits) - 1) << offset;
1472
1473 unsigned insert = op[1]->value.u[c];
1474 insert <<= offset;
1475 insert &= insert_mask;
1476
1477 unsigned base = op[0]->value.u[c];
1478 base &= ~insert_mask;
1479
1480 data.u[c] = base | insert;
1481 }
1482 }
1483 break;
1484 }
1485
1486 case ir_quadop_vector:
1487 for (unsigned c = 0; c < this->type->vector_elements; c++) {
1488 switch (this->type->base_type) {
1489 case GLSL_TYPE_INT:
1490 data.i[c] = op[c]->value.i[0];
1491 break;
1492 case GLSL_TYPE_UINT:
1493 data.u[c] = op[c]->value.u[0];
1494 break;
1495 case GLSL_TYPE_FLOAT:
1496 data.f[c] = op[c]->value.f[0];
1497 break;
1498 default:
1499 assert(0);
1500 }
1501 }
1502 break;
1503
1504 default:
1505 /* FINISHME: Should handle all expression types. */
1506 return NULL;
1507 }
1508
1509 return new(ctx) ir_constant(this->type, &data);
1510 }
1511
1512
1513 ir_constant *
1514 ir_texture::constant_expression_value(struct hash_table *variable_context)
1515 {
1516 /* texture lookups aren't constant expressions */
1517 return NULL;
1518 }
1519
1520
1521 ir_constant *
1522 ir_swizzle::constant_expression_value(struct hash_table *variable_context)
1523 {
1524 ir_constant *v = this->val->constant_expression_value(variable_context);
1525
1526 if (v != NULL) {
1527 ir_constant_data data = { { 0 } };
1528
1529 const unsigned swiz_idx[4] = {
1530 this->mask.x, this->mask.y, this->mask.z, this->mask.w
1531 };
1532
1533 for (unsigned i = 0; i < this->mask.num_components; i++) {
1534 switch (v->type->base_type) {
1535 case GLSL_TYPE_UINT:
1536 case GLSL_TYPE_INT: data.u[i] = v->value.u[swiz_idx[i]]; break;
1537 case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break;
1538 case GLSL_TYPE_BOOL: data.b[i] = v->value.b[swiz_idx[i]]; break;
1539 default: assert(!"Should not get here."); break;
1540 }
1541 }
1542
1543 void *ctx = ralloc_parent(this);
1544 return new(ctx) ir_constant(this->type, &data);
1545 }
1546 return NULL;
1547 }
1548
1549
1550 void
1551 ir_dereference_variable::constant_referenced(struct hash_table *variable_context,
1552 ir_constant *&store, int &offset) const
1553 {
1554 if (variable_context) {
1555 store = (ir_constant *)hash_table_find(variable_context, var);
1556 offset = 0;
1557 } else {
1558 store = NULL;
1559 offset = 0;
1560 }
1561 }
1562
1563 ir_constant *
1564 ir_dereference_variable::constant_expression_value(struct hash_table *variable_context)
1565 {
1566 /* This may occur during compile and var->type is glsl_type::error_type */
1567 if (!var)
1568 return NULL;
1569
1570 /* Give priority to the context hashtable, if it exists */
1571 if (variable_context) {
1572 ir_constant *value = (ir_constant *)hash_table_find(variable_context, var);
1573 if(value)
1574 return value;
1575 }
1576
1577 /* The constant_value of a uniform variable is its initializer,
1578 * not the lifetime constant value of the uniform.
1579 */
1580 if (var->mode == ir_var_uniform)
1581 return NULL;
1582
1583 if (!var->constant_value)
1584 return NULL;
1585
1586 return var->constant_value->clone(ralloc_parent(var), NULL);
1587 }
1588
1589
1590 void
1591 ir_dereference_array::constant_referenced(struct hash_table *variable_context,
1592 ir_constant *&store, int &offset) const
1593 {
1594 ir_constant *index_c = array_index->constant_expression_value(variable_context);
1595
1596 if (!index_c || !index_c->type->is_scalar() || !index_c->type->is_integer()) {
1597 store = 0;
1598 offset = 0;
1599 return;
1600 }
1601
1602 int index = index_c->type->base_type == GLSL_TYPE_INT ?
1603 index_c->get_int_component(0) :
1604 index_c->get_uint_component(0);
1605
1606 ir_constant *substore;
1607 int suboffset;
1608 const ir_dereference *deref = array->as_dereference();
1609 if (!deref) {
1610 store = 0;
1611 offset = 0;
1612 return;
1613 }
1614
1615 deref->constant_referenced(variable_context, substore, suboffset);
1616
1617 if (!substore) {
1618 store = 0;
1619 offset = 0;
1620 return;
1621 }
1622
1623 const glsl_type *vt = array->type;
1624 if (vt->is_array()) {
1625 store = substore->get_array_element(index);
1626 offset = 0;
1627 return;
1628 }
1629 if (vt->is_matrix()) {
1630 store = substore;
1631 offset = index * vt->vector_elements;
1632 return;
1633 }
1634 if (vt->is_vector()) {
1635 store = substore;
1636 offset = suboffset + index;
1637 return;
1638 }
1639
1640 store = 0;
1641 offset = 0;
1642 }
1643
1644 ir_constant *
1645 ir_dereference_array::constant_expression_value(struct hash_table *variable_context)
1646 {
1647 ir_constant *array = this->array->constant_expression_value(variable_context);
1648 ir_constant *idx = this->array_index->constant_expression_value(variable_context);
1649
1650 if ((array != NULL) && (idx != NULL)) {
1651 void *ctx = ralloc_parent(this);
1652 if (array->type->is_matrix()) {
1653 /* Array access of a matrix results in a vector.
1654 */
1655 const unsigned column = idx->value.u[0];
1656
1657 const glsl_type *const column_type = array->type->column_type();
1658
1659 /* Offset in the constant matrix to the first element of the column
1660 * to be extracted.
1661 */
1662 const unsigned mat_idx = column * column_type->vector_elements;
1663
1664 ir_constant_data data = { { 0 } };
1665
1666 switch (column_type->base_type) {
1667 case GLSL_TYPE_UINT:
1668 case GLSL_TYPE_INT:
1669 for (unsigned i = 0; i < column_type->vector_elements; i++)
1670 data.u[i] = array->value.u[mat_idx + i];
1671
1672 break;
1673
1674 case GLSL_TYPE_FLOAT:
1675 for (unsigned i = 0; i < column_type->vector_elements; i++)
1676 data.f[i] = array->value.f[mat_idx + i];
1677
1678 break;
1679
1680 default:
1681 assert(!"Should not get here.");
1682 break;
1683 }
1684
1685 return new(ctx) ir_constant(column_type, &data);
1686 } else if (array->type->is_vector()) {
1687 const unsigned component = idx->value.u[0];
1688
1689 return new(ctx) ir_constant(array, component);
1690 } else {
1691 const unsigned index = idx->value.u[0];
1692 return array->get_array_element(index)->clone(ctx, NULL);
1693 }
1694 }
1695 return NULL;
1696 }
1697
1698
1699 void
1700 ir_dereference_record::constant_referenced(struct hash_table *variable_context,
1701 ir_constant *&store, int &offset) const
1702 {
1703 ir_constant *substore;
1704 int suboffset;
1705 const ir_dereference *deref = record->as_dereference();
1706 if (!deref) {
1707 store = 0;
1708 offset = 0;
1709 return;
1710 }
1711
1712 deref->constant_referenced(variable_context, substore, suboffset);
1713
1714 if (!substore) {
1715 store = 0;
1716 offset = 0;
1717 return;
1718 }
1719
1720 store = substore->get_record_field(field);
1721 offset = 0;
1722 }
1723
1724 ir_constant *
1725 ir_dereference_record::constant_expression_value(struct hash_table *variable_context)
1726 {
1727 ir_constant *v = this->record->constant_expression_value();
1728
1729 return (v != NULL) ? v->get_record_field(this->field) : NULL;
1730 }
1731
1732
1733 ir_constant *
1734 ir_assignment::constant_expression_value(struct hash_table *variable_context)
1735 {
1736 /* FINISHME: Handle CEs involving assignment (return RHS) */
1737 return NULL;
1738 }
1739
1740
1741 ir_constant *
1742 ir_constant::constant_expression_value(struct hash_table *variable_context)
1743 {
1744 return this;
1745 }
1746
1747
1748 ir_constant *
1749 ir_call::constant_expression_value(struct hash_table *variable_context)
1750 {
1751 return this->callee->constant_expression_value(&this->actual_parameters, variable_context);
1752 }
1753
1754
1755 bool ir_function_signature::constant_expression_evaluate_expression_list(const struct exec_list &body,
1756 struct hash_table *variable_context,
1757 ir_constant **result)
1758 {
1759 foreach_list(n, &body) {
1760 ir_instruction *inst = (ir_instruction *)n;
1761 switch(inst->ir_type) {
1762
1763 /* (declare () type symbol) */
1764 case ir_type_variable: {
1765 ir_variable *var = inst->as_variable();
1766 hash_table_insert(variable_context, ir_constant::zero(this, var->type), var);
1767 break;
1768 }
1769
1770 /* (assign [condition] (write-mask) (ref) (value)) */
1771 case ir_type_assignment: {
1772 ir_assignment *asg = inst->as_assignment();
1773 if (asg->condition) {
1774 ir_constant *cond = asg->condition->constant_expression_value(variable_context);
1775 if (!cond)
1776 return false;
1777 if (!cond->get_bool_component(0))
1778 break;
1779 }
1780
1781 ir_constant *store = NULL;
1782 int offset = 0;
1783 asg->lhs->constant_referenced(variable_context, store, offset);
1784
1785 if (!store)
1786 return false;
1787
1788 ir_constant *value = asg->rhs->constant_expression_value(variable_context);
1789
1790 if (!value)
1791 return false;
1792
1793 store->copy_masked_offset(value, offset, asg->write_mask);
1794 break;
1795 }
1796
1797 /* (return (expression)) */
1798 case ir_type_return:
1799 assert (result);
1800 *result = inst->as_return()->value->constant_expression_value(variable_context);
1801 return *result != NULL;
1802
1803 /* (call name (ref) (params))*/
1804 case ir_type_call: {
1805 ir_call *call = inst->as_call();
1806
1807 /* Just say no to void functions in constant expressions. We
1808 * don't need them at that point.
1809 */
1810
1811 if (!call->return_deref)
1812 return false;
1813
1814 ir_constant *store = NULL;
1815 int offset = 0;
1816 call->return_deref->constant_referenced(variable_context, store, offset);
1817
1818 if (!store)
1819 return false;
1820
1821 ir_constant *value = call->constant_expression_value(variable_context);
1822
1823 if(!value)
1824 return false;
1825
1826 store->copy_offset(value, offset);
1827 break;
1828 }
1829
1830 /* (if condition (then-instructions) (else-instructions)) */
1831 case ir_type_if: {
1832 ir_if *iif = inst->as_if();
1833
1834 ir_constant *cond = iif->condition->constant_expression_value(variable_context);
1835 if (!cond || !cond->type->is_boolean())
1836 return false;
1837
1838 exec_list &branch = cond->get_bool_component(0) ? iif->then_instructions : iif->else_instructions;
1839
1840 *result = NULL;
1841 if (!constant_expression_evaluate_expression_list(branch, variable_context, result))
1842 return false;
1843
1844 /* If there was a return in the branch chosen, drop out now. */
1845 if (*result)
1846 return true;
1847
1848 break;
1849 }
1850
1851 /* Every other expression type, we drop out. */
1852 default:
1853 return false;
1854 }
1855 }
1856
1857 /* Reaching the end of the block is not an error condition */
1858 if (result)
1859 *result = NULL;
1860
1861 return true;
1862 }
1863
1864 ir_constant *
1865 ir_function_signature::constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context)
1866 {
1867 const glsl_type *type = this->return_type;
1868 if (type == glsl_type::void_type)
1869 return NULL;
1870
1871 /* From the GLSL 1.20 spec, page 23:
1872 * "Function calls to user-defined functions (non-built-in functions)
1873 * cannot be used to form constant expressions."
1874 */
1875 if (!this->is_builtin())
1876 return NULL;
1877
1878 /*
1879 * Of the builtin functions, only the texture lookups and the noise
1880 * ones must not be used in constant expressions. They all include
1881 * specific opcodes so they don't need to be special-cased at this
1882 * point.
1883 */
1884
1885 /* Initialize the table of dereferencable names with the function
1886 * parameters. Verify their const-ness on the way.
1887 *
1888 * We expect the correctness of the number of parameters to have
1889 * been checked earlier.
1890 */
1891 hash_table *deref_hash = hash_table_ctor(8, hash_table_pointer_hash,
1892 hash_table_pointer_compare);
1893
1894 /* If "origin" is non-NULL, then the function body is there. So we
1895 * have to use the variable objects from the object with the body,
1896 * but the parameter instanciation on the current object.
1897 */
1898 const exec_node *parameter_info = origin ? origin->parameters.head : parameters.head;
1899
1900 foreach_list(n, actual_parameters) {
1901 ir_constant *constant = ((ir_rvalue *) n)->constant_expression_value(variable_context);
1902 if (constant == NULL) {
1903 hash_table_dtor(deref_hash);
1904 return NULL;
1905 }
1906
1907
1908 ir_variable *var = (ir_variable *)parameter_info;
1909 hash_table_insert(deref_hash, constant, var);
1910
1911 parameter_info = parameter_info->next;
1912 }
1913
1914 ir_constant *result = NULL;
1915
1916 /* Now run the builtin function until something non-constant
1917 * happens or we get the result.
1918 */
1919 if (constant_expression_evaluate_expression_list(origin ? origin->body : body, deref_hash, &result) && result)
1920 result = result->clone(ralloc_parent(this), NULL);
1921
1922 hash_table_dtor(deref_hash);
1923
1924 return result;
1925 }