Fix a few typos
[mesa.git] / src / glsl / lower_packing_builtins.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "ir.h"
25 #include "ir_builder.h"
26 #include "ir_optimization.h"
27 #include "ir_rvalue_visitor.h"
28
29 namespace {
30
31 using namespace ir_builder;
32
33 /**
34 * A visitor that lowers built-in floating-point pack/unpack expressions
35 * such packSnorm2x16.
36 */
37 class lower_packing_builtins_visitor : public ir_rvalue_visitor {
38 public:
39 /**
40 * \param op_mask is a bitmask of `enum lower_packing_builtins_op`
41 */
42 explicit lower_packing_builtins_visitor(int op_mask)
43 : op_mask(op_mask),
44 progress(false)
45 {
46 /* Mutually exclusive options. */
47 assert(!((op_mask & LOWER_PACK_HALF_2x16) &&
48 (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT)));
49
50 assert(!((op_mask & LOWER_UNPACK_HALF_2x16) &&
51 (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT)));
52
53 factory.instructions = &factory_instructions;
54 }
55
56 virtual ~lower_packing_builtins_visitor()
57 {
58 assert(factory_instructions.is_empty());
59 }
60
61 bool get_progress() { return progress; }
62
63 void handle_rvalue(ir_rvalue **rvalue)
64 {
65 if (!*rvalue)
66 return;
67
68 ir_expression *expr = (*rvalue)->as_expression();
69 if (!expr)
70 return;
71
72 enum lower_packing_builtins_op lowering_op =
73 choose_lowering_op(expr->operation);
74
75 if (lowering_op == LOWER_PACK_UNPACK_NONE)
76 return;
77
78 setup_factory(ralloc_parent(expr));
79
80 ir_rvalue *op0 = expr->operands[0];
81 ralloc_steal(factory.mem_ctx, op0);
82
83 switch (lowering_op) {
84 case LOWER_PACK_SNORM_2x16:
85 *rvalue = lower_pack_snorm_2x16(op0);
86 break;
87 case LOWER_PACK_SNORM_4x8:
88 *rvalue = lower_pack_snorm_4x8(op0);
89 break;
90 case LOWER_PACK_UNORM_2x16:
91 *rvalue = lower_pack_unorm_2x16(op0);
92 break;
93 case LOWER_PACK_UNORM_4x8:
94 *rvalue = lower_pack_unorm_4x8(op0);
95 break;
96 case LOWER_PACK_HALF_2x16:
97 *rvalue = lower_pack_half_2x16(op0);
98 break;
99 case LOWER_PACK_HALF_2x16_TO_SPLIT:
100 *rvalue = split_pack_half_2x16(op0);
101 break;
102 case LOWER_UNPACK_SNORM_2x16:
103 *rvalue = lower_unpack_snorm_2x16(op0);
104 break;
105 case LOWER_UNPACK_SNORM_4x8:
106 *rvalue = lower_unpack_snorm_4x8(op0);
107 break;
108 case LOWER_UNPACK_UNORM_2x16:
109 *rvalue = lower_unpack_unorm_2x16(op0);
110 break;
111 case LOWER_UNPACK_UNORM_4x8:
112 *rvalue = lower_unpack_unorm_4x8(op0);
113 break;
114 case LOWER_UNPACK_HALF_2x16:
115 *rvalue = lower_unpack_half_2x16(op0);
116 break;
117 case LOWER_UNPACK_HALF_2x16_TO_SPLIT:
118 *rvalue = split_unpack_half_2x16(op0);
119 break;
120 case LOWER_PACK_UNPACK_NONE:
121 assert(!"not reached");
122 break;
123 }
124
125 teardown_factory();
126 progress = true;
127 }
128
129 private:
130 const int op_mask;
131 bool progress;
132 ir_factory factory;
133 exec_list factory_instructions;
134
135 /**
136 * Determine the needed lowering operation by filtering \a expr_op
137 * through \ref op_mask.
138 */
139 enum lower_packing_builtins_op
140 choose_lowering_op(ir_expression_operation expr_op)
141 {
142 /* C++ regards int and enum as fundamentally different types.
143 * So, we can't simply return from each case; we must cast the return
144 * value.
145 */
146 int result;
147
148 switch (expr_op) {
149 case ir_unop_pack_snorm_2x16:
150 result = op_mask & LOWER_PACK_SNORM_2x16;
151 break;
152 case ir_unop_pack_snorm_4x8:
153 result = op_mask & LOWER_PACK_SNORM_4x8;
154 break;
155 case ir_unop_pack_unorm_2x16:
156 result = op_mask & LOWER_PACK_UNORM_2x16;
157 break;
158 case ir_unop_pack_unorm_4x8:
159 result = op_mask & LOWER_PACK_UNORM_4x8;
160 break;
161 case ir_unop_pack_half_2x16:
162 result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT);
163 break;
164 case ir_unop_unpack_snorm_2x16:
165 result = op_mask & LOWER_UNPACK_SNORM_2x16;
166 break;
167 case ir_unop_unpack_snorm_4x8:
168 result = op_mask & LOWER_UNPACK_SNORM_4x8;
169 break;
170 case ir_unop_unpack_unorm_2x16:
171 result = op_mask & LOWER_UNPACK_UNORM_2x16;
172 break;
173 case ir_unop_unpack_unorm_4x8:
174 result = op_mask & LOWER_UNPACK_UNORM_4x8;
175 break;
176 case ir_unop_unpack_half_2x16:
177 result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT);
178 break;
179 default:
180 result = LOWER_PACK_UNPACK_NONE;
181 break;
182 }
183
184 return static_cast<enum lower_packing_builtins_op>(result);
185 }
186
187 void
188 setup_factory(void *mem_ctx)
189 {
190 assert(factory.mem_ctx == NULL);
191 assert(factory.instructions->is_empty());
192
193 factory.mem_ctx = mem_ctx;
194 }
195
196 void
197 teardown_factory()
198 {
199 base_ir->insert_before(factory.instructions);
200 assert(factory.instructions->is_empty());
201 factory.mem_ctx = NULL;
202 }
203
204 template <typename T>
205 ir_constant*
206 constant(T x)
207 {
208 return factory.constant(x);
209 }
210
211 /**
212 * \brief Pack two uint16's into a single uint32.
213 *
214 * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32
215 * where the least significant bits specify the first element of the pair.
216 * Return the uint32.
217 */
218 ir_rvalue*
219 pack_uvec2_to_uint(ir_rvalue *uvec2_rval)
220 {
221 assert(uvec2_rval->type == glsl_type::uvec2_type);
222
223 /* uvec2 u = UVEC2_RVAL; */
224 ir_variable *u = factory.make_temp(glsl_type::uvec2_type,
225 "tmp_pack_uvec2_to_uint");
226 factory.emit(assign(u, uvec2_rval));
227
228 /* return (u.y << 16) | (u.x & 0xffff); */
229 return bit_or(lshift(swizzle_y(u), constant(16u)),
230 bit_and(swizzle_x(u), constant(0xffffu)));
231 }
232
233 /**
234 * \brief Pack four uint8's into a single uint32.
235 *
236 * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a
237 * uint32 where the least significant bits specify the first element of the
238 * 4-tuple. Return the uint32.
239 */
240 ir_rvalue*
241 pack_uvec4_to_uint(ir_rvalue *uvec4_rval)
242 {
243 assert(uvec4_rval->type == glsl_type::uvec4_type);
244
245 /* uvec4 u = UVEC4_RVAL; */
246 ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
247 "tmp_pack_uvec4_to_uint");
248 factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));
249
250 /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */
251 return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)),
252 lshift(swizzle_z(u), constant(16u))),
253 bit_or(lshift(swizzle_y(u), constant(8u)),
254 swizzle_x(u)));
255 }
256
257 /**
258 * \brief Unpack a uint32 into two uint16's.
259 *
260 * Interpret the given uint32 as a uint16 pair where the uint32's least
261 * significant bits specify the pair's first element. Return the uint16
262 * pair as a uvec2.
263 */
264 ir_rvalue*
265 unpack_uint_to_uvec2(ir_rvalue *uint_rval)
266 {
267 assert(uint_rval->type == glsl_type::uint_type);
268
269 /* uint u = UINT_RVAL; */
270 ir_variable *u = factory.make_temp(glsl_type::uint_type,
271 "tmp_unpack_uint_to_uvec2_u");
272 factory.emit(assign(u, uint_rval));
273
274 /* uvec2 u2; */
275 ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type,
276 "tmp_unpack_uint_to_uvec2_u2");
277
278 /* u2.x = u & 0xffffu; */
279 factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X));
280
281 /* u2.y = u >> 16u; */
282 factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y));
283
284 return deref(u2).val;
285 }
286
287 /**
288 * \brief Unpack a uint32 into four uint8's.
289 *
290 * Interpret the given uint32 as a uint8 4-tuple where the uint32's least
291 * significant bits specify the 4-tuple's first element. Return the uint8
292 * 4-tuple as a uvec4.
293 */
294 ir_rvalue*
295 unpack_uint_to_uvec4(ir_rvalue *uint_rval)
296 {
297 assert(uint_rval->type == glsl_type::uint_type);
298
299 /* uint u = UINT_RVAL; */
300 ir_variable *u = factory.make_temp(glsl_type::uint_type,
301 "tmp_unpack_uint_to_uvec4_u");
302 factory.emit(assign(u, uint_rval));
303
304 /* uvec4 u4; */
305 ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type,
306 "tmp_unpack_uint_to_uvec4_u4");
307
308 /* u4.x = u & 0xffu; */
309 factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
310
311 /* u4.y = (u >> 8u) & 0xffu; */
312 factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
313 constant(0xffu)), WRITEMASK_Y));
314
315 /* u4.z = (u >> 16u) & 0xffu; */
316 factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
317 constant(0xffu)), WRITEMASK_Z));
318
319 /* u4.w = (u >> 24u) */
320 factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
321
322 return deref(u4).val;
323 }
324
325 /**
326 * \brief Lower a packSnorm2x16 expression.
327 *
328 * \param vec2_rval is packSnorm2x16's input
329 * \return packSnorm2x16's output as a uint rvalue
330 */
331 ir_rvalue*
332 lower_pack_snorm_2x16(ir_rvalue *vec2_rval)
333 {
334 /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
335 *
336 * highp uint packSnorm2x16(vec2 v)
337 * --------------------------------
338 * First, converts each component of the normalized floating-point value
339 * v into 16-bit integer values. Then, the results are packed into the
340 * returned 32-bit unsigned integer.
341 *
342 * The conversion for component c of v to fixed point is done as
343 * follows:
344 *
345 * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
346 *
347 * The first component of the vector will be written to the least
348 * significant bits of the output; the last component will be written to
349 * the most significant bits.
350 *
351 * This function generates IR that approximates the following pseudo-GLSL:
352 *
353 * return pack_uvec2_to_uint(
354 * uvec2(ivec2(
355 * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f))));
356 *
357 * It is necessary to first convert the vec2 to ivec2 rather than directly
358 * converting vec2 to uvec2 because the latter conversion is undefined.
359 * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to
360 * convert a negative floating point value to an uint".
361 */
362 assert(vec2_rval->type == glsl_type::vec2_type);
363
364 ir_rvalue *result = pack_uvec2_to_uint(
365 i2u(f2i(round_even(mul(clamp(vec2_rval,
366 constant(-1.0f),
367 constant(1.0f)),
368 constant(32767.0f))))));
369
370 assert(result->type == glsl_type::uint_type);
371 return result;
372 }
373
374 /**
375 * \brief Lower a packSnorm4x8 expression.
376 *
377 * \param vec4_rval is packSnorm4x8's input
378 * \return packSnorm4x8's output as a uint rvalue
379 */
380 ir_rvalue*
381 lower_pack_snorm_4x8(ir_rvalue *vec4_rval)
382 {
383 /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
384 *
385 * highp uint packSnorm4x8(vec4 v)
386 * -------------------------------
387 * First, converts each component of the normalized floating-point value
388 * v into 8-bit integer values. Then, the results are packed into the
389 * returned 32-bit unsigned integer.
390 *
391 * The conversion for component c of v to fixed point is done as
392 * follows:
393 *
394 * packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
395 *
396 * The first component of the vector will be written to the least
397 * significant bits of the output; the last component will be written to
398 * the most significant bits.
399 *
400 * This function generates IR that approximates the following pseudo-GLSL:
401 *
402 * return pack_uvec4_to_uint(
403 * uvec4(ivec4(
404 * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f))));
405 *
406 * It is necessary to first convert the vec4 to ivec4 rather than directly
407 * converting vec4 to uvec4 because the latter conversion is undefined.
408 * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to
409 * convert a negative floating point value to an uint".
410 */
411 assert(vec4_rval->type == glsl_type::vec4_type);
412
413 ir_rvalue *result = pack_uvec4_to_uint(
414 i2u(f2i(round_even(mul(clamp(vec4_rval,
415 constant(-1.0f),
416 constant(1.0f)),
417 constant(127.0f))))));
418
419 assert(result->type == glsl_type::uint_type);
420 return result;
421 }
422
423 /**
424 * \brief Lower an unpackSnorm2x16 expression.
425 *
426 * \param uint_rval is unpackSnorm2x16's input
427 * \return unpackSnorm2x16's output as a vec2 rvalue
428 */
429 ir_rvalue*
430 lower_unpack_snorm_2x16(ir_rvalue *uint_rval)
431 {
432 /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
433 *
434 * highp vec2 unpackSnorm2x16 (highp uint p)
435 * -----------------------------------------
436 * First, unpacks a single 32-bit unsigned integer p into a pair of
437 * 16-bit unsigned integers. Then, each component is converted to
438 * a normalized floating-point value to generate the returned
439 * two-component vector.
440 *
441 * The conversion for unpacked fixed-point value f to floating point is
442 * done as follows:
443 *
444 * unpackSnorm2x16: clamp(f / 32767.0, -1,+1)
445 *
446 * The first component of the returned vector will be extracted from the
447 * least significant bits of the input; the last component will be
448 * extracted from the most significant bits.
449 *
450 * This function generates IR that approximates the following pseudo-GLSL:
451 *
452 * return clamp(
453 * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f,
454 * -1.0f, 1.0f);
455 *
456 * The above IR may appear unnecessarily complex, but the intermediate
457 * conversion to ivec2 and the bit shifts are necessary to correctly unpack
458 * negative floats.
459 *
460 * To see why, consider packing and then unpacking vec2(-1.0, 0.0).
461 * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we
462 * place that int16 into an int32, which results in the *positive* integer
463 * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather
464 * unimportant bit 16. We must now extend the int16's sign bit into bits
465 * 17-32, which is accomplished by left-shifting then right-shifting.
466 */
467
468 assert(uint_rval->type == glsl_type::uint_type);
469
470 ir_rvalue *result =
471 clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)),
472 constant(16)),
473 constant(16u))),
474 constant(32767.0f)),
475 constant(-1.0f),
476 constant(1.0f));
477
478 assert(result->type == glsl_type::vec2_type);
479 return result;
480 }
481
482 /**
483 * \brief Lower an unpackSnorm4x8 expression.
484 *
485 * \param uint_rval is unpackSnorm4x8's input
486 * \return unpackSnorm4x8's output as a vec4 rvalue
487 */
488 ir_rvalue*
489 lower_unpack_snorm_4x8(ir_rvalue *uint_rval)
490 {
491 /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
492 *
493 * highp vec4 unpackSnorm4x8 (highp uint p)
494 * ----------------------------------------
495 * First, unpacks a single 32-bit unsigned integer p into four
496 * 8-bit unsigned integers. Then, each component is converted to
497 * a normalized floating-point value to generate the returned
498 * four-component vector.
499 *
500 * The conversion for unpacked fixed-point value f to floating point is
501 * done as follows:
502 *
503 * unpackSnorm4x8: clamp(f / 127.0, -1, +1)
504 *
505 * The first component of the returned vector will be extracted from the
506 * least significant bits of the input; the last component will be
507 * extracted from the most significant bits.
508 *
509 * This function generates IR that approximates the following pseudo-GLSL:
510 *
511 * return clamp(
512 * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f,
513 * -1.0f, 1.0f);
514 *
515 * The above IR may appear unnecessarily complex, but the intermediate
516 * conversion to ivec4 and the bit shifts are necessary to correctly unpack
517 * negative floats.
518 *
519 * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0,
520 * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we
521 * place that int8 into an int32, which results in the *positive* integer
522 * 0x000000ff. The int8's sign bit becomes, in the int32, the rather
523 * unimportant bit 8. We must now extend the int8's sign bit into bits
524 * 9-32, which is accomplished by left-shifting then right-shifting.
525 */
526
527 assert(uint_rval->type == glsl_type::uint_type);
528
529 ir_rvalue *result =
530 clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
531 constant(24u)),
532 constant(24u))),
533 constant(127.0f)),
534 constant(-1.0f),
535 constant(1.0f));
536
537 assert(result->type == glsl_type::vec4_type);
538 return result;
539 }
540
541 /**
542 * \brief Lower a packUnorm2x16 expression.
543 *
544 * \param vec2_rval is packUnorm2x16's input
545 * \return packUnorm2x16's output as a uint rvalue
546 */
547 ir_rvalue*
548 lower_pack_unorm_2x16(ir_rvalue *vec2_rval)
549 {
550 /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
551 *
552 * highp uint packUnorm2x16 (vec2 v)
553 * ---------------------------------
554 * First, converts each component of the normalized floating-point value
555 * v into 16-bit integer values. Then, the results are packed into the
556 * returned 32-bit unsigned integer.
557 *
558 * The conversion for component c of v to fixed point is done as
559 * follows:
560 *
561 * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
562 *
563 * The first component of the vector will be written to the least
564 * significant bits of the output; the last component will be written to
565 * the most significant bits.
566 *
567 * This function generates IR that approximates the following pseudo-GLSL:
568 *
569 * return pack_uvec2_to_uint(uvec2(
570 * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f)));
571 *
572 * Here it is safe to directly convert the vec2 to uvec2 because the vec2
573 * has been clamped to a non-negative range.
574 */
575
576 assert(vec2_rval->type == glsl_type::vec2_type);
577
578 ir_rvalue *result = pack_uvec2_to_uint(
579 f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f)))));
580
581 assert(result->type == glsl_type::uint_type);
582 return result;
583 }
584
585 /**
586 * \brief Lower a packUnorm4x8 expression.
587 *
588 * \param vec4_rval is packUnorm4x8's input
589 * \return packUnorm4x8's output as a uint rvalue
590 */
591 ir_rvalue*
592 lower_pack_unorm_4x8(ir_rvalue *vec4_rval)
593 {
594 /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
595 *
596 * highp uint packUnorm4x8 (vec4 v)
597 * --------------------------------
598 * First, converts each component of the normalized floating-point value
599 * v into 8-bit integer values. Then, the results are packed into the
600 * returned 32-bit unsigned integer.
601 *
602 * The conversion for component c of v to fixed point is done as
603 * follows:
604 *
605 * packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
606 *
607 * The first component of the vector will be written to the least
608 * significant bits of the output; the last component will be written to
609 * the most significant bits.
610 *
611 * This function generates IR that approximates the following pseudo-GLSL:
612 *
613 * return pack_uvec4_to_uint(uvec4(
614 * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f)));
615 *
616 * Here it is safe to directly convert the vec4 to uvec4 because the vec4
617 * has been clamped to a non-negative range.
618 */
619
620 assert(vec4_rval->type == glsl_type::vec4_type);
621
622 ir_rvalue *result = pack_uvec4_to_uint(
623 f2u(round_even(mul(saturate(vec4_rval), constant(255.0f)))));
624
625 assert(result->type == glsl_type::uint_type);
626 return result;
627 }
628
629 /**
630 * \brief Lower an unpackUnorm2x16 expression.
631 *
632 * \param uint_rval is unpackUnorm2x16's input
633 * \return unpackUnorm2x16's output as a vec2 rvalue
634 */
635 ir_rvalue*
636 lower_unpack_unorm_2x16(ir_rvalue *uint_rval)
637 {
638 /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
639 *
640 * highp vec2 unpackUnorm2x16 (highp uint p)
641 * -----------------------------------------
642 * First, unpacks a single 32-bit unsigned integer p into a pair of
643 * 16-bit unsigned integers. Then, each component is converted to
644 * a normalized floating-point value to generate the returned
645 * two-component vector.
646 *
647 * The conversion for unpacked fixed-point value f to floating point is
648 * done as follows:
649 *
650 * unpackUnorm2x16: f / 65535.0
651 *
652 * The first component of the returned vector will be extracted from the
653 * least significant bits of the input; the last component will be
654 * extracted from the most significant bits.
655 *
656 * This function generates IR that approximates the following pseudo-GLSL:
657 *
658 * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0;
659 */
660
661 assert(uint_rval->type == glsl_type::uint_type);
662
663 ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)),
664 constant(65535.0f));
665
666 assert(result->type == glsl_type::vec2_type);
667 return result;
668 }
669
670 /**
671 * \brief Lower an unpackUnorm4x8 expression.
672 *
673 * \param uint_rval is unpackUnorm4x8's input
674 * \return unpackUnorm4x8's output as a vec4 rvalue
675 */
676 ir_rvalue*
677 lower_unpack_unorm_4x8(ir_rvalue *uint_rval)
678 {
679 /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
680 *
681 * highp vec4 unpackUnorm4x8 (highp uint p)
682 * ----------------------------------------
683 * First, unpacks a single 32-bit unsigned integer p into four
684 * 8-bit unsigned integers. Then, each component is converted to
685 * a normalized floating-point value to generate the returned
686 * two-component vector.
687 *
688 * The conversion for unpacked fixed-point value f to floating point is
689 * done as follows:
690 *
691 * unpackUnorm4x8: f / 255.0
692 *
693 * The first component of the returned vector will be extracted from the
694 * least significant bits of the input; the last component will be
695 * extracted from the most significant bits.
696 *
697 * This function generates IR that approximates the following pseudo-GLSL:
698 *
699 * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0;
700 */
701
702 assert(uint_rval->type == glsl_type::uint_type);
703
704 ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)),
705 constant(255.0f));
706
707 assert(result->type == glsl_type::vec4_type);
708 return result;
709 }
710
711 /**
712 * \brief Lower the component-wise calculation of packHalf2x16.
713 *
714 * \param f_rval is one component of packHafl2x16's input
715 * \param e_rval is the unshifted exponent bits of f_rval
716 * \param m_rval is the unshifted mantissa bits of f_rval
717 *
718 * \return a uint rvalue that encodes a float16 in its lower 16 bits
719 */
720 ir_rvalue*
721 pack_half_1x16_nosign(ir_rvalue *f_rval,
722 ir_rvalue *e_rval,
723 ir_rvalue *m_rval)
724 {
725 assert(e_rval->type == glsl_type::uint_type);
726 assert(m_rval->type == glsl_type::uint_type);
727
728 /* uint u16; */
729 ir_variable *u16 = factory.make_temp(glsl_type::uint_type,
730 "tmp_pack_half_1x16_u16");
731
732 /* float f = FLOAT_RVAL; */
733 ir_variable *f = factory.make_temp(glsl_type::float_type,
734 "tmp_pack_half_1x16_f");
735 factory.emit(assign(f, f_rval));
736
737 /* uint e = E_RVAL; */
738 ir_variable *e = factory.make_temp(glsl_type::uint_type,
739 "tmp_pack_half_1x16_e");
740 factory.emit(assign(e, e_rval));
741
742 /* uint m = M_RVAL; */
743 ir_variable *m = factory.make_temp(glsl_type::uint_type,
744 "tmp_pack_half_1x16_m");
745 factory.emit(assign(m, m_rval));
746
747 /* Preliminaries
748 * -------------
749 *
750 * For a float16, the bit layout is:
751 *
752 * sign: 15
753 * exponent: 10:14
754 * mantissa: 0:9
755 *
756 * Let f16 be a float16 value. The sign, exponent, and mantissa
757 * determine its value thus:
758 *
759 * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1)
760 * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2)
761 * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3)
762 * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4)
763 * if e16 = 31 and m16 != 0, then NaN (5)
764 *
765 * where 0 <= m16 < 2^10.
766 *
767 * For a float32, the bit layout is:
768 *
769 * sign: 31
770 * exponent: 23:30
771 * mantissa: 0:22
772 *
773 * Let f32 be a float32 value. The sign, exponent, and mantissa
774 * determine its value thus:
775 *
776 * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10)
777 * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11)
778 * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12)
779 * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13)
780 * if e32 = 255 and m32 != 0, then NaN (14)
781 *
782 * where 0 <= m32 < 2^23.
783 *
784 * The minimum and maximum normal float16 values are
785 *
786 * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20)
787 * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21)
788 *
789 * The step at max_norm16 is
790 *
791 * max_step16 = 2^5 (22)
792 *
793 * Observe that the float16 boundary values in equations 20-21 lie in the
794 * range of normal float32 values.
795 *
796 *
797 * Rounding Behavior
798 * -----------------
799 * Not all float32 values can be exactly represented as a float16. We
800 * round all such intermediate float32 values to the nearest float16; if
801 * the float32 is exactly between to float16 values, we round to the one
802 * with an even mantissa. This rounding behavior has several benefits:
803 *
804 * - It has no sign bias.
805 *
806 * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
807 * GPU ISA.
808 *
809 * - By reproducing the behavior of the GPU (at least on Intel hardware),
810 * compile-time evaluation of constant packHalf2x16 GLSL expressions will
811 * result in the same value as if the expression were executed on the
812 * GPU.
813 *
814 * Calculation
815 * -----------
816 * Our task is to compute s16, e16, m16 given f32. Since this function
817 * ignores the sign bit, assume that s32 = s16 = 0. There are several
818 * cases consider.
819 */
820
821 factory.emit(
822
823 /* Case 1) f32 is NaN
824 *
825 * The resultant f16 will also be NaN.
826 */
827
828 /* if (e32 == 255 && m32 != 0) { */
829 if_tree(logic_and(equal(e, constant(0xffu << 23u)),
830 logic_not(equal(m, constant(0u)))),
831
832 assign(u16, constant(0x7fffu)),
833
834 /* Case 2) f32 lies in the range [0, min_norm16).
835 *
836 * The resultant float16 will be either zero, subnormal, or normal.
837 *
838 * Solving
839 *
840 * f32 = min_norm16 (30)
841 *
842 * gives
843 *
844 * e32 = 113 and m32 = 0 (31)
845 *
846 * Therefore this case occurs if and only if
847 *
848 * e32 < 113 (32)
849 */
850
851 /* } else if (e32 < 113) { */
852 if_tree(less(e, constant(113u << 23u)),
853
854 /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */
855 assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f),
856 constant((float) (1 << 24)))))),
857
858 /* Case 3) f32 lies in the range
859 * [min_norm16, max_norm16 + max_step16).
860 *
861 * The resultant float16 will be either normal or infinite.
862 *
863 * Solving
864 *
865 * f32 = max_norm16 + max_step16 (40)
866 * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41)
867 * = 2^16 (42)
868 * gives
869 *
870 * e32 = 143 and m32 = 0 (43)
871 *
872 * We already solved the boundary condition f32 = min_norm16 above
873 * in equation 31. Therefore this case occurs if and only if
874 *
875 * 113 <= e32 and e32 < 143
876 */
877
878 /* } else if (e32 < 143) { */
879 if_tree(less(e, constant(143u << 23u)),
880
881 /* The addition below handles the case where the mantissa rounds
882 * up to 1024 and bumps the exponent.
883 *
884 * u16 = ((e - (112u << 23u)) >> 13u)
885 * + round_to_even((float(m) / (1u << 13u));
886 */
887 assign(u16, add(rshift(sub(e, constant(112u << 23u)),
888 constant(13u)),
889 f2u(round_even(
890 div(u2f(m), constant((float) (1 << 13))))))),
891
892 /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf].
893 *
894 * The resultant float16 will be infinite.
895 *
896 * The cases above caught all float32 values in the range
897 * [0, max_norm16 + max_step16), so this is the fall-through case.
898 */
899
900 /* } else { */
901
902 assign(u16, constant(31u << 10u))))));
903
904 /* } */
905
906 return deref(u16).val;
907 }
908
909 /**
910 * \brief Lower a packHalf2x16 expression.
911 *
912 * \param vec2_rval is packHalf2x16's input
913 * \return packHalf2x16's output as a uint rvalue
914 */
915 ir_rvalue*
916 lower_pack_half_2x16(ir_rvalue *vec2_rval)
917 {
918 /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
919 *
920 * highp uint packHalf2x16 (mediump vec2 v)
921 * ----------------------------------------
922 * Returns an unsigned integer obtained by converting the components of
923 * a two-component floating-point vector to the 16-bit floating-point
924 * representation found in the OpenGL ES Specification, and then packing
925 * these two 16-bit integers into a 32-bit unsigned integer.
926 *
927 * The first vector component specifies the 16 least- significant bits
928 * of the result; the second component specifies the 16 most-significant
929 * bits.
930 */
931
932 assert(vec2_rval->type == glsl_type::vec2_type);
933
934 /* vec2 f = VEC2_RVAL; */
935 ir_variable *f = factory.make_temp(glsl_type::vec2_type,
936 "tmp_pack_half_2x16_f");
937 factory.emit(assign(f, vec2_rval));
938
939 /* uvec2 f32 = bitcast_f2u(f); */
940 ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type,
941 "tmp_pack_half_2x16_f32");
942 factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f)));
943
944 /* uvec2 f16; */
945 ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type,
946 "tmp_pack_half_2x16_f16");
947
948 /* Get f32's unshifted exponent bits.
949 *
950 * uvec2 e = f32 & 0x7f800000u;
951 */
952 ir_variable *e = factory.make_temp(glsl_type::uvec2_type,
953 "tmp_pack_half_2x16_e");
954 factory.emit(assign(e, bit_and(f32, constant(0x7f800000u))));
955
956 /* Get f32's unshifted mantissa bits.
957 *
958 * uvec2 m = f32 & 0x007fffffu;
959 */
960 ir_variable *m = factory.make_temp(glsl_type::uvec2_type,
961 "tmp_pack_half_2x16_m");
962 factory.emit(assign(m, bit_and(f32, constant(0x007fffffu))));
963
964 /* Set f16's exponent and mantissa bits.
965 *
966 * f16.x = pack_half_1x16_nosign(e.x, m.x);
967 * f16.y = pack_half_1y16_nosign(e.y, m.y);
968 */
969 factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f),
970 swizzle_x(e),
971 swizzle_x(m)),
972 WRITEMASK_X));
973 factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f),
974 swizzle_y(e),
975 swizzle_y(m)),
976 WRITEMASK_Y));
977
978 /* Set f16's sign bits.
979 *
980 * f16 |= (f32 & (1u << 31u) >> 16u;
981 */
982 factory.emit(
983 assign(f16, bit_or(f16,
984 rshift(bit_and(f32, constant(1u << 31u)),
985 constant(16u)))));
986
987
988 /* return (f16.y << 16u) | f16.x; */
989 ir_rvalue *result = bit_or(lshift(swizzle_y(f16),
990 constant(16u)),
991 swizzle_x(f16));
992
993 assert(result->type == glsl_type::uint_type);
994 return result;
995 }
996
997 /**
998 * \brief Split packHalf2x16's vec2 operand into two floats.
999 *
1000 * \param vec2_rval is packHalf2x16's input
1001 * \return a uint rvalue
1002 *
1003 * Some code generators, such as the i965 fragment shader, require that all
1004 * vector expressions be lowered to a sequence of scalar expressions.
1005 * However, packHalf2x16 cannot be scalarized by the same mechanism as
1006 * a true vector operation because its input and output have a differing
1007 * number of vector components.
1008 *
1009 * This method scalarizes packHalf2x16 by transforming it from an unary
1010 * operation having vector input to a binary operation having scalar input.
1011 * That is, it transforms
1012 *
1013 * packHalf2x16(VEC2_RVAL);
1014 *
1015 * into
1016 *
1017 * vec2 v = VEC2_RVAL;
1018 * return packHalf2x16_split(v.x, v.y);
1019 */
1020 ir_rvalue*
1021 split_pack_half_2x16(ir_rvalue *vec2_rval)
1022 {
1023 assert(vec2_rval->type == glsl_type::vec2_type);
1024
1025 ir_variable *v = factory.make_temp(glsl_type::vec2_type,
1026 "tmp_split_pack_half_2x16_v");
1027 factory.emit(assign(v, vec2_rval));
1028
1029 return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v));
1030 }
1031
1032 /**
1033 * \brief Lower the component-wise calculation of unpackHalf2x16.
1034 *
1035 * Given a uint that encodes a float16 in its lower 16 bits, this function
1036 * returns a uint that encodes a float32 with the same value. The sign bit
1037 * of the float16 is ignored.
1038 *
1039 * \param e_rval is the unshifted exponent bits of a float16
1040 * \param m_rval is the unshifted mantissa bits of a float16
1041 * \param a uint rvalue that encodes a float32
1042 */
1043 ir_rvalue*
1044 unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval)
1045 {
1046 assert(e_rval->type == glsl_type::uint_type);
1047 assert(m_rval->type == glsl_type::uint_type);
1048
1049 /* uint u32; */
1050 ir_variable *u32 = factory.make_temp(glsl_type::uint_type,
1051 "tmp_unpack_half_1x16_u32");
1052
1053 /* uint e = E_RVAL; */
1054 ir_variable *e = factory.make_temp(glsl_type::uint_type,
1055 "tmp_unpack_half_1x16_e");
1056 factory.emit(assign(e, e_rval));
1057
1058 /* uint m = M_RVAL; */
1059 ir_variable *m = factory.make_temp(glsl_type::uint_type,
1060 "tmp_unpack_half_1x16_m");
1061 factory.emit(assign(m, m_rval));
1062
1063 /* Preliminaries
1064 * -------------
1065 *
1066 * For a float16, the bit layout is:
1067 *
1068 * sign: 15
1069 * exponent: 10:14
1070 * mantissa: 0:9
1071 *
1072 * Let f16 be a float16 value. The sign, exponent, and mantissa
1073 * determine its value thus:
1074 *
1075 * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1)
1076 * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2)
1077 * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3)
1078 * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4)
1079 * if e16 = 31 and m16 != 0, then NaN (5)
1080 *
1081 * where 0 <= m16 < 2^10.
1082 *
1083 * For a float32, the bit layout is:
1084 *
1085 * sign: 31
1086 * exponent: 23:30
1087 * mantissa: 0:22
1088 *
1089 * Let f32 be a float32 value. The sign, exponent, and mantissa
1090 * determine its value thus:
1091 *
1092 * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10)
1093 * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11)
1094 * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12)
1095 * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13)
1096 * if e32 = 255 and m32 != 0, then NaN (14)
1097 *
1098 * where 0 <= m32 < 2^23.
1099 *
1100 * Calculation
1101 * -----------
1102 * Our task is to compute s32, e32, m32 given f16. Since this function
1103 * ignores the sign bit, assume that s32 = s16 = 0. There are several
1104 * cases consider.
1105 */
1106
1107 factory.emit(
1108
1109 /* Case 1) f16 is zero or subnormal.
1110 *
1111 * The simplest method of calcuating f32 in this case is
1112 *
1113 * f32 = f16 (20)
1114 * = 2^(-14) * (m16 / 2^10) (21)
1115 * = m16 / 2^(-24) (22)
1116 */
1117
1118 /* if (e16 == 0) { */
1119 if_tree(equal(e, constant(0u)),
1120
1121 /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */
1122 assign(u32, expr(ir_unop_bitcast_f2u,
1123 div(u2f(m), constant((float)(1 << 24))))),
1124
1125 /* Case 2) f16 is normal.
1126 *
1127 * The equation
1128 *
1129 * f32 = f16 (30)
1130 * 2^(e32 - 127) * (1 + m32 / 2^23) = (31)
1131 * 2^(e16 - 15) * (1 + m16 / 2^10)
1132 *
1133 * can be decomposed into two
1134 *
1135 * 2^(e32 - 127) = 2^(e16 - 15) (32)
1136 * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33)
1137 *
1138 * which solve to
1139 *
1140 * e32 = e16 + 112 (34)
1141 * m32 = m16 * 2^13 (35)
1142 */
1143
1144 /* } else if (e16 < 31)) { */
1145 if_tree(less(e, constant(31u << 10u)),
1146
1147 /* u32 = ((e + (112 << 10)) | m) << 13;
1148 */
1149 assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m),
1150 constant(13u))),
1151
1152
1153 /* Case 3) f16 is infinite. */
1154 if_tree(equal(m, constant(0u)),
1155
1156 assign(u32, constant(255u << 23u)),
1157
1158 /* Case 4) f16 is NaN. */
1159 /* } else { */
1160
1161 assign(u32, constant(0x7fffffffu))))));
1162
1163 /* } */
1164
1165 return deref(u32).val;
1166 }
1167
1168 /**
1169 * \brief Lower an unpackHalf2x16 expression.
1170 *
1171 * \param uint_rval is unpackHalf2x16's input
1172 * \return unpackHalf2x16's output as a vec2 rvalue
1173 */
1174 ir_rvalue*
1175 lower_unpack_half_2x16(ir_rvalue *uint_rval)
1176 {
1177 /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
1178 *
1179 * mediump vec2 unpackHalf2x16 (highp uint v)
1180 * ------------------------------------------
1181 * Returns a two-component floating-point vector with components
1182 * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit
1183 * values, interpreting those values as 16-bit floating-point numbers
1184 * according to the OpenGL ES Specification, and converting them to
1185 * 32-bit floating-point values.
1186 *
1187 * The first component of the vector is obtained from the
1188 * 16 least-significant bits of v; the second component is obtained
1189 * from the 16 most-significant bits of v.
1190 */
1191 assert(uint_rval->type == glsl_type::uint_type);
1192
1193 /* uint u = RVALUE;
1194 * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16);
1195 */
1196 ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type,
1197 "tmp_unpack_half_2x16_f16");
1198 factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval)));
1199
1200 /* uvec2 f32; */
1201 ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type,
1202 "tmp_unpack_half_2x16_f32");
1203
1204 /* Get f16's unshifted exponent bits.
1205 *
1206 * uvec2 e = f16 & 0x7c00u;
1207 */
1208 ir_variable *e = factory.make_temp(glsl_type::uvec2_type,
1209 "tmp_unpack_half_2x16_e");
1210 factory.emit(assign(e, bit_and(f16, constant(0x7c00u))));
1211
1212 /* Get f16's unshifted mantissa bits.
1213 *
1214 * uvec2 m = f16 & 0x03ffu;
1215 */
1216 ir_variable *m = factory.make_temp(glsl_type::uvec2_type,
1217 "tmp_unpack_half_2x16_m");
1218 factory.emit(assign(m, bit_and(f16, constant(0x03ffu))));
1219
1220 /* Set f32's exponent and mantissa bits.
1221 *
1222 * f32.x = unpack_half_1x16_nosign(e.x, m.x);
1223 * f32.y = unpack_half_1x16_nosign(e.y, m.y);
1224 */
1225 factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e),
1226 swizzle_x(m)),
1227 WRITEMASK_X));
1228 factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e),
1229 swizzle_y(m)),
1230 WRITEMASK_Y));
1231
1232 /* Set f32's sign bit.
1233 *
1234 * f32 |= (f16 & 0x8000u) << 16u;
1235 */
1236 factory.emit(assign(f32, bit_or(f32,
1237 lshift(bit_and(f16,
1238 constant(0x8000u)),
1239 constant(16u)))));
1240
1241 /* return bitcast_u2f(f32); */
1242 ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32);
1243 assert(result->type == glsl_type::vec2_type);
1244 return result;
1245 }
1246
1247 /**
1248 * \brief Split unpackHalf2x16 into two operations.
1249 *
1250 * \param uint_rval is unpackHalf2x16's input
1251 * \return a vec2 rvalue
1252 *
1253 * Some code generators, such as the i965 fragment shader, require that all
1254 * vector expressions be lowered to a sequence of scalar expressions.
1255 * However, unpackHalf2x16 cannot be scalarized by the same method as
1256 * a true vector operation because the number of components of its input
1257 * and output differ.
1258 *
1259 * This method scalarizes unpackHalf2x16 by transforming it from a single
1260 * operation having vec2 output to a pair of operations each having float
1261 * output. That is, it transforms
1262 *
1263 * unpackHalf2x16(UINT_RVAL)
1264 *
1265 * into
1266 *
1267 * uint u = UINT_RVAL;
1268 * vec2 v;
1269 *
1270 * v.x = unpackHalf2x16_split_x(u);
1271 * v.y = unpackHalf2x16_split_y(u);
1272 *
1273 * return v;
1274 */
1275 ir_rvalue*
1276 split_unpack_half_2x16(ir_rvalue *uint_rval)
1277 {
1278 assert(uint_rval->type == glsl_type::uint_type);
1279
1280 /* uint u = uint_rval; */
1281 ir_variable *u = factory.make_temp(glsl_type::uint_type,
1282 "tmp_split_unpack_half_2x16_u");
1283 factory.emit(assign(u, uint_rval));
1284
1285 /* vec2 v; */
1286 ir_variable *v = factory.make_temp(glsl_type::vec2_type,
1287 "tmp_split_unpack_half_2x16_v");
1288
1289 /* v.x = unpack_half_2x16_split_x(u); */
1290 factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u),
1291 WRITEMASK_X));
1292
1293 /* v.y = unpack_half_2x16_split_y(u); */
1294 factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u),
1295 WRITEMASK_Y));
1296
1297 return deref(v).val;
1298 }
1299 };
1300
1301 } // namespace anonymous
1302
1303 /**
1304 * \brief Lower the builtin packing functions.
1305 *
1306 * \param op_mask is a bitmask of `enum lower_packing_builtins_op`.
1307 */
1308 bool
1309 lower_packing_builtins(exec_list *instructions, int op_mask)
1310 {
1311 lower_packing_builtins_visitor v(op_mask);
1312 visit_list_elements(&v, instructions, true);
1313 return v.get_progress();
1314 }