src/compiler/nir/nir_constant_expressions.py

   1
   2 def type_has_size(type_):
   3     return type_[-1:].isdigit()
   4
   5 def type_sizes(type_):
   6     if type_.endswith("8"):
   7         return [8]
   8     elif type_.endswith("16"):
   9         return [16]
  10     elif type_.endswith("32"):
  11         return [32]
  12     elif type_.endswith("64"):
  13         return [64]
  14     else:
  15         return [32, 64]
  16
  17 def type_add_size(type_, size):
  18     if type_has_size(type_):
  19         return type_
  20     return type_ + str(size)
  21
  22 def get_const_field(type_):
  23     if type_ == "int32":
  24         return "i32"
  25     if type_ == "uint32":
  26         return "u32"
  27     if type_ == "int64":
  28         return "i64"
  29     if type_ == "uint64":
  30         return "u64"
  31     if type_ == "bool32":
  32         return "u32"
  33     if type_ == "float32":
  34         return "f32"
  35     if type_ == "float64":
  36         return "f64"
  37     raise Exception(str(type_))
  38     assert(0)
  39
  40 template = """\
  41 /*
  42  * Copyright (C) 2014 Intel Corporation
  43  *
  44  * Permission is hereby granted, free of charge, to any person obtaining a
  45  * copy of this software and associated documentation files (the "Software"),
  46  * to deal in the Software without restriction, including without limitation
  47  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  48  * and/or sell copies of the Software, and to permit persons to whom the
  49  * Software is furnished to do so, subject to the following conditions:
  50  *
  51  * The above copyright notice and this permission notice (including the next
  52  * paragraph) shall be included in all copies or substantial portions of the
  53  * Software.
  54  *
  55  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  56  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  57  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  58  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  59  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  60  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  61  * IN THE SOFTWARE.
  62  *
  63  * Authors:
  64  *    Jason Ekstrand (jason@jlekstrand.net)
  65  */
  66
  67 #include <math.h>
  68 #include "main/core.h"
  69 #include "util/rounding.h" /* for _mesa_roundeven */
  70 #include "util/half_float.h"
  71 #include "nir_constant_expressions.h"
  72
  73 /**
  74  * Evaluate one component of packSnorm4x8.
  75  */
  76 static uint8_t
  77 pack_snorm_1x8(float x)
  78 {
  79     /* From section 8.4 of the GLSL 4.30 spec:
  80      *
  81      *    packSnorm4x8
  82      *    ------------
  83      *    The conversion for component c of v to fixed point is done as
  84      *    follows:
  85      *
  86      *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
  87      *
  88      * We must first cast the float to an int, because casting a negative
  89      * float to a uint is undefined.
  90      */
  91    return (uint8_t) (int)
  92           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
  93 }
  94
  95 /**
  96  * Evaluate one component of packSnorm2x16.
  97  */
  98 static uint16_t
  99 pack_snorm_1x16(float x)
 100 {
 101     /* From section 8.4 of the GLSL ES 3.00 spec:
 102      *
 103      *    packSnorm2x16
 104      *    -------------
 105      *    The conversion for component c of v to fixed point is done as
 106      *    follows:
 107      *
 108      *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
 109      *
 110      * We must first cast the float to an int, because casting a negative
 111      * float to a uint is undefined.
 112      */
 113    return (uint16_t) (int)
 114           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
 115 }
 116
 117 /**
 118  * Evaluate one component of unpackSnorm4x8.
 119  */
 120 static float
 121 unpack_snorm_1x8(uint8_t u)
 122 {
 123     /* From section 8.4 of the GLSL 4.30 spec:
 124      *
 125      *    unpackSnorm4x8
 126      *    --------------
 127      *    The conversion for unpacked fixed-point value f to floating point is
 128      *    done as follows:
 129      *
 130      *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
 131      */
 132    return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
 133 }
 134
 135 /**
 136  * Evaluate one component of unpackSnorm2x16.
 137  */
 138 static float
 139 unpack_snorm_1x16(uint16_t u)
 140 {
 141     /* From section 8.4 of the GLSL ES 3.00 spec:
 142      *
 143      *    unpackSnorm2x16
 144      *    ---------------
 145      *    The conversion for unpacked fixed-point value f to floating point is
 146      *    done as follows:
 147      *
 148      *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
 149      */
 150    return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
 151 }
 152
 153 /**
 154  * Evaluate one component packUnorm4x8.
 155  */
 156 static uint8_t
 157 pack_unorm_1x8(float x)
 158 {
 159     /* From section 8.4 of the GLSL 4.30 spec:
 160      *
 161      *    packUnorm4x8
 162      *    ------------
 163      *    The conversion for component c of v to fixed point is done as
 164      *    follows:
 165      *
 166      *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
 167      */
 168    return (uint8_t) (int)
 169           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
 170 }
 171
 172 /**
 173  * Evaluate one component packUnorm2x16.
 174  */
 175 static uint16_t
 176 pack_unorm_1x16(float x)
 177 {
 178     /* From section 8.4 of the GLSL ES 3.00 spec:
 179      *
 180      *    packUnorm2x16
 181      *    -------------
 182      *    The conversion for component c of v to fixed point is done as
 183      *    follows:
 184      *
 185      *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
 186      */
 187    return (uint16_t) (int)
 188           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
 189 }
 190
 191 /**
 192  * Evaluate one component of unpackUnorm4x8.
 193  */
 194 static float
 195 unpack_unorm_1x8(uint8_t u)
 196 {
 197     /* From section 8.4 of the GLSL 4.30 spec:
 198      *
 199      *    unpackUnorm4x8
 200      *    --------------
 201      *    The conversion for unpacked fixed-point value f to floating point is
 202      *    done as follows:
 203      *
 204      *       unpackUnorm4x8: f / 255.0
 205      */
 206    return (float) u / 255.0f;
 207 }
 208
 209 /**
 210  * Evaluate one component of unpackUnorm2x16.
 211  */
 212 static float
 213 unpack_unorm_1x16(uint16_t u)
 214 {
 215     /* From section 8.4 of the GLSL ES 3.00 spec:
 216      *
 217      *    unpackUnorm2x16
 218      *    ---------------
 219      *    The conversion for unpacked fixed-point value f to floating point is
 220      *    done as follows:
 221      *
 222      *       unpackUnorm2x16: f / 65535.0
 223      */
 224    return (float) u / 65535.0f;
 225 }
 226
 227 /**
 228  * Evaluate one component of packHalf2x16.
 229  */
 230 static uint16_t
 231 pack_half_1x16(float x)
 232 {
 233    return _mesa_float_to_half(x);
 234 }
 235
 236 /**
 237  * Evaluate one component of unpackHalf2x16.
 238  */
 239 static float
 240 unpack_half_1x16(uint16_t u)
 241 {
 242    return _mesa_half_to_float(u);
 243 }
 244
 245 /* Some typed vector structures to make things like src0.y work */
 246 typedef float float32_t;
 247 typedef double float64_t;
 248 typedef bool bool32_t;
 249 % for type in ["float", "int", "uint"]:
 250 % for width in [32, 64]:
 251 struct ${type}${width}_vec {
 252    ${type}${width}_t x;
 253    ${type}${width}_t y;
 254    ${type}${width}_t z;
 255    ${type}${width}_t w;
 256 };
 257 % endfor
 258 % endfor
 259
 260 struct bool32_vec {
 261     bool x;
 262     bool y;
 263     bool z;
 264     bool w;
 265 };
 266
 267 % for name, op in sorted(opcodes.iteritems()):
 268 static nir_const_value
 269 evaluate_${name}(MAYBE_UNUSED unsigned num_components, unsigned bit_size,
 270                  MAYBE_UNUSED nir_const_value *_src)
 271 {
 272    nir_const_value _dst_val = { {0, } };
 273
 274    switch (bit_size) {
 275    % for bit_size in [32, 64]:
 276    case ${bit_size}: {
 277       <%
 278       output_type = type_add_size(op.output_type, bit_size)
 279       input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
 280       %>
 281
 282       ## For each non-per-component input, create a variable srcN that
 283       ## contains x, y, z, and w elements which are filled in with the
 284       ## appropriately-typed values.
 285       % for j in range(op.num_inputs):
 286          % if op.input_sizes[j] == 0:
 287             <% continue %>
 288          % elif "src" + str(j) not in op.const_expr:
 289             ## Avoid unused variable warnings
 290             <% continue %>
 291          %endif
 292
 293          const struct ${input_types[j]}_vec src${j} = {
 294          % for k in range(op.input_sizes[j]):
 295             % if input_types[j] == "bool32":
 296                _src[${j}].u32[${k}] != 0,
 297             % else:
 298                _src[${j}].${get_const_field(input_types[j])}[${k}],
 299             % endif
 300          % endfor
 301          % for k in range(op.input_sizes[j], 4):
 302             0,
 303          % endfor
 304          };
 305       % endfor
 306
 307       % if op.output_size == 0:
 308          ## For per-component instructions, we need to iterate over the
 309          ## components and apply the constant expression one component
 310          ## at a time.
 311          for (unsigned _i = 0; _i < num_components; _i++) {
 312             ## For each per-component input, create a variable srcN that
 313             ## contains the value of the current (_i'th) component.
 314             % for j in range(op.num_inputs):
 315                % if op.input_sizes[j] != 0:
 316                   <% continue %>
 317                % elif "src" + str(j) not in op.const_expr:
 318                   ## Avoid unused variable warnings
 319                   <% continue %>
 320                % elif input_types[j] == "bool32":
 321                   const bool src${j} = _src[${j}].u32[_i] != 0;
 322                % else:
 323                   const ${input_types[j]}_t src${j} =
 324                      _src[${j}].${get_const_field(input_types[j])}[_i];
 325                % endif
 326             % endfor
 327
 328             ## Create an appropriately-typed variable dst and assign the
 329             ## result of the const_expr to it.  If const_expr already contains
 330             ## writes to dst, just include const_expr directly.
 331             % if "dst" in op.const_expr:
 332                ${output_type}_t dst;
 333
 334                ${op.const_expr}
 335             % else:
 336                ${output_type}_t dst = ${op.const_expr};
 337             % endif
 338
 339             ## Store the current component of the actual destination to the
 340             ## value of dst.
 341             % if output_type == "bool32":
 342                ## Sanitize the C value to a proper NIR bool
 343                _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
 344             % else:
 345                _dst_val.${get_const_field(output_type)}[_i] = dst;
 346             % endif
 347          }
 348       % else:
 349          ## In the non-per-component case, create a struct dst with
 350          ## appropriately-typed elements x, y, z, and w and assign the result
 351          ## of the const_expr to all components of dst, or include the
 352          ## const_expr directly if it writes to dst already.
 353          struct ${output_type}_vec dst;
 354
 355          % if "dst" in op.const_expr:
 356             ${op.const_expr}
 357          % else:
 358             ## Splat the value to all components.  This way expressions which
 359             ## write the same value to all components don't need to explicitly
 360             ## write to dest.  One such example is fnoise which has a
 361             ## const_expr of 0.0f.
 362             dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
 363          % endif
 364
 365          ## For each component in the destination, copy the value of dst to
 366          ## the actual destination.
 367          % for k in range(op.output_size):
 368             % if output_type == "bool32":
 369                ## Sanitize the C value to a proper NIR bool
 370                _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
 371             % else:
 372                _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
 373             % endif
 374          % endfor
 375       % endif
 376
 377       break;
 378    }
 379    % endfor
 380
 381    default:
 382       unreachable("unknown bit width");
 383    }
 384
 385    return _dst_val;
 386 }
 387 % endfor
 388
 389 nir_const_value
 390 nir_eval_const_opcode(nir_op op, unsigned num_components,
 391                       unsigned bit_width, nir_const_value *src)
 392 {
 393    switch (op) {
 394 % for name in sorted(opcodes.iterkeys()):
 395    case nir_op_${name}:
 396       return evaluate_${name}(num_components, bit_width, src);
 397 % endfor
 398    default:
 399       unreachable("shouldn't get here");
 400    }
 401 }"""
 402
 403 from nir_opcodes import opcodes
 404 from mako.template import Template
 405
 406 print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
 407                                 type_has_size=type_has_size,
 408                                 type_add_size=type_add_size,
 409                                 get_const_field=get_const_field)