1 from __future__
import print_function
5 type_split_re
= re
.compile(r
'(?P<type>[a-z]+)(?P<bits>\d+)')
7 def type_has_size(type_
):
8 return type_
[-1:].isdigit()
11 assert type_has_size(type_
)
12 return int(type_split_re
.match(type_
).group('bits'))
14 def type_sizes(type_
):
15 if type_has_size(type_
):
16 return [type_size(type_
)]
17 elif type_
== 'float':
20 return [8, 16, 32, 64]
22 def type_add_size(type_
, size
):
23 if type_has_size(type_
):
25 return type_
+ str(size
)
29 if not type_has_size(op
.output_type
):
30 sizes
= set(type_sizes(op
.output_type
))
32 for input_type
in op
.input_types
:
33 if not type_has_size(input_type
):
35 sizes
= set(type_sizes(input_type
))
37 sizes
= sizes
.intersection(set(type_sizes(input_type
)))
39 return sorted(list(sizes
)) if sizes
is not None else None
41 def get_const_field(type_
):
44 elif type_
== "float16":
47 m
= type_split_re
.match(type_
)
49 raise Exception(str(type_
))
50 return m
.group('type')[0] + m
.group('bits')
54 * Copyright (C) 2014 Intel Corporation
56 * Permission is hereby granted, free of charge, to any person obtaining a
57 * copy of this software and associated documentation files (the "Software"),
58 * to deal in the Software without restriction, including without limitation
59 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
60 * and/or sell copies of the Software, and to permit persons to whom the
61 * Software is furnished to do so, subject to the following conditions:
63 * The above copyright notice and this permission notice (including the next
64 * paragraph) shall be included in all copies or substantial portions of the
67 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
68 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
69 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
70 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
71 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
72 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
76 * Jason Ekstrand (jason@jlekstrand.net)
80 #include "util/rounding.h" /* for _mesa_roundeven */
81 #include "util/half_float.h"
82 #include "nir_constant_expressions.h"
85 * Evaluate one component of packSnorm4x8.
88 pack_snorm_1x8(float x)
90 /* From section 8.4 of the GLSL 4.30 spec:
94 * The conversion for component c of v to fixed point is done as
97 * packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
99 * We must first cast the float to an int, because casting a negative
100 * float to a uint is undefined.
102 return (uint8_t) (int)
103 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
107 * Evaluate one component of packSnorm2x16.
110 pack_snorm_1x16(float x)
112 /* From section 8.4 of the GLSL ES 3.00 spec:
116 * The conversion for component c of v to fixed point is done as
119 * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
121 * We must first cast the float to an int, because casting a negative
122 * float to a uint is undefined.
124 return (uint16_t) (int)
125 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
129 * Evaluate one component of unpackSnorm4x8.
132 unpack_snorm_1x8(uint8_t u)
134 /* From section 8.4 of the GLSL 4.30 spec:
138 * The conversion for unpacked fixed-point value f to floating point is
141 * unpackSnorm4x8: clamp(f / 127.0, -1, +1)
143 return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
147 * Evaluate one component of unpackSnorm2x16.
150 unpack_snorm_1x16(uint16_t u)
152 /* From section 8.4 of the GLSL ES 3.00 spec:
156 * The conversion for unpacked fixed-point value f to floating point is
159 * unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
161 return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
165 * Evaluate one component packUnorm4x8.
168 pack_unorm_1x8(float x)
170 /* From section 8.4 of the GLSL 4.30 spec:
174 * The conversion for component c of v to fixed point is done as
177 * packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
179 return (uint8_t) (int)
180 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
184 * Evaluate one component packUnorm2x16.
187 pack_unorm_1x16(float x)
189 /* From section 8.4 of the GLSL ES 3.00 spec:
193 * The conversion for component c of v to fixed point is done as
196 * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
198 return (uint16_t) (int)
199 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
203 * Evaluate one component of unpackUnorm4x8.
206 unpack_unorm_1x8(uint8_t u)
208 /* From section 8.4 of the GLSL 4.30 spec:
212 * The conversion for unpacked fixed-point value f to floating point is
215 * unpackUnorm4x8: f / 255.0
217 return (float) u / 255.0f;
221 * Evaluate one component of unpackUnorm2x16.
224 unpack_unorm_1x16(uint16_t u)
226 /* From section 8.4 of the GLSL ES 3.00 spec:
230 * The conversion for unpacked fixed-point value f to floating point is
233 * unpackUnorm2x16: f / 65535.0
235 return (float) u / 65535.0f;
239 * Evaluate one component of packHalf2x16.
242 pack_half_1x16(float x)
244 return _mesa_float_to_half(x);
248 * Evaluate one component of unpackHalf2x16.
251 unpack_half_1x16(uint16_t u)
253 return _mesa_half_to_float(u);
256 /* Some typed vector structures to make things like src0.y work */
257 typedef float float16_t;
258 typedef float float32_t;
259 typedef double float64_t;
260 typedef bool bool32_t;
261 % for type in ["float", "int", "uint"]:
262 % for width in type_sizes(type):
263 struct ${type}${width}_vec {
279 <%def name="evaluate_op(op, bit_size)">
281 output_type = type_add_size(op.output_type, bit_size)
282 input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
285 ## For each non-per-component input, create a variable srcN that
286 ## contains x, y, z, and w elements which are filled in with the
287 ## appropriately-typed values.
288 % for j in range(op.num_inputs):
289 % if op.input_sizes[j] == 0:
291 % elif "src" + str(j) not in op.const_expr:
292 ## Avoid unused variable warnings
296 const struct ${input_types[j]}_vec src${j} = {
297 % for k in range(op.input_sizes[j]):
298 % if input_types[j] == "bool32":
299 _src[${j}].u32[${k}] != 0,
300 % elif input_types[j] == "float16":
301 _mesa_half_to_float(_src[${j}].u16[${k}]),
303 _src[${j}].${get_const_field(input_types[j])}[${k}],
306 % for k in range(op.input_sizes[j], 4):
312 % if op.output_size == 0:
313 ## For per-component instructions, we need to iterate over the
314 ## components and apply the constant expression one component
316 for (unsigned _i = 0; _i < num_components; _i++) {
317 ## For each per-component input, create a variable srcN that
318 ## contains the value of the current (_i'th) component.
319 % for j in range(op.num_inputs):
320 % if op.input_sizes[j] != 0:
322 % elif "src" + str(j) not in op.const_expr:
323 ## Avoid unused variable warnings
325 % elif input_types[j] == "bool32":
326 const bool src${j} = _src[${j}].u32[_i] != 0;
327 % elif input_types[j] == "float16":
328 const float src${j} =
329 _mesa_half_to_float(_src[${j}].u16[_i]);
331 const ${input_types[j]}_t src${j} =
332 _src[${j}].${get_const_field(input_types[j])}[_i];
336 ## Create an appropriately-typed variable dst and assign the
337 ## result of the const_expr to it. If const_expr already contains
338 ## writes to dst, just include const_expr directly.
339 % if "dst" in op.const_expr:
340 ${output_type}_t dst;
344 ${output_type}_t dst = ${op.const_expr};
347 ## Store the current component of the actual destination to the
349 % if output_type == "bool32":
350 ## Sanitize the C value to a proper NIR bool
351 _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
352 % elif output_type == "float16":
353 _dst_val.u16[_i] = _mesa_float_to_half(dst);
355 _dst_val.${get_const_field(output_type)}[_i] = dst;
359 ## In the non-per-component case, create a struct dst with
360 ## appropriately-typed elements x, y, z, and w and assign the result
361 ## of the const_expr to all components of dst, or include the
362 ## const_expr directly if it writes to dst already.
363 struct ${output_type}_vec dst;
365 % if "dst" in op.const_expr:
368 ## Splat the value to all components. This way expressions which
369 ## write the same value to all components don't need to explicitly
370 ## write to dest. One such example is fnoise which has a
371 ## const_expr of 0.0f.
372 dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
375 ## For each component in the destination, copy the value of dst to
376 ## the actual destination.
377 % for k in range(op.output_size):
378 % if output_type == "bool32":
379 ## Sanitize the C value to a proper NIR bool
380 _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
381 % elif output_type == "float16":
382 _dst_val.u16[${k}] = _mesa_float_to_half(dst.${"xyzw"[k]});
384 _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
390 % for name, op in sorted(opcodes.items()):
391 static nir_const_value
392 evaluate_${name}(MAYBE_UNUSED unsigned num_components,
393 ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
394 MAYBE_UNUSED nir_const_value *_src)
396 nir_const_value _dst_val = { {0, } };
398 % if op_bit_sizes(op) is not None:
400 % for bit_size in op_bit_sizes(op):
402 ${evaluate_op(op, bit_size)}
408 unreachable("unknown bit width");
411 ${evaluate_op(op, 0)}
419 nir_eval_const_opcode(nir_op op, unsigned num_components,
420 unsigned bit_width, nir_const_value *src)
423 % for name in sorted(opcodes.keys()):
425 return evaluate_${name}(num_components, bit_width, src);
428 unreachable("shouldn't get here");
432 from nir_opcodes
import opcodes
433 from mako
.template
import Template
435 print(Template(template
).render(opcodes
=opcodes
, type_sizes
=type_sizes
,
436 type_has_size
=type_has_size
,
437 type_add_size
=type_add_size
,
438 op_bit_sizes
=op_bit_sizes
,
439 get_const_field
=get_const_field
))