src/compiler/nir/nir_constant_expressions.py

   1 #! /usr/bin/python2
   2
   3 def type_has_size(type_):
   4     return type_[-1:].isdigit()
   5
   6 def type_sizes(type_):
   7     if type_.endswith("8"):
   8         return [8]
   9     elif type_.endswith("16"):
  10         return [16]
  11     elif type_.endswith("32"):
  12         return [32]
  13     elif type_.endswith("64"):
  14         return [64]
  15     else:
  16         return [32, 64]
  17
  18 def type_add_size(type_, size):
  19     if type_has_size(type_):
  20         return type_
  21     return type_ + str(size)
  22
  23 def get_const_field(type_):
  24     if type_ == "int32":
  25         return "i"
  26     if type_ == "uint32":
  27         return "u"
  28     if type_ == "int64":
  29         return "l"
  30     if type_ == "uint64":
  31         return "ul"
  32     if type_ == "bool32":
  33         return "b"
  34     if type_ == "float32":
  35         return "f"
  36     if type_ == "float64":
  37         return "d"
  38     raise Exception(str(type_))
  39     assert(0)
  40
  41 template = """\
  42 /*
  43  * Copyright (C) 2014 Intel Corporation
  44  *
  45  * Permission is hereby granted, free of charge, to any person obtaining a
  46  * copy of this software and associated documentation files (the "Software"),
  47  * to deal in the Software without restriction, including without limitation
  48  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  49  * and/or sell copies of the Software, and to permit persons to whom the
  50  * Software is furnished to do so, subject to the following conditions:
  51  *
  52  * The above copyright notice and this permission notice (including the next
  53  * paragraph) shall be included in all copies or substantial portions of the
  54  * Software.
  55  *
  56  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  57  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  58  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  59  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  60  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  61  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  62  * IN THE SOFTWARE.
  63  *
  64  * Authors:
  65  *    Jason Ekstrand (jason@jlekstrand.net)
  66  */
  67
  68 #include <math.h>
  69 #include "main/core.h"
  70 #include "util/rounding.h" /* for _mesa_roundeven */
  71 #include "util/half_float.h"
  72 #include "nir_constant_expressions.h"
  73
  74 /**
  75  * Evaluate one component of packSnorm4x8.
  76  */
  77 static uint8_t
  78 pack_snorm_1x8(float x)
  79 {
  80     /* From section 8.4 of the GLSL 4.30 spec:
  81      *
  82      *    packSnorm4x8
  83      *    ------------
  84      *    The conversion for component c of v to fixed point is done as
  85      *    follows:
  86      *
  87      *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
  88      *
  89      * We must first cast the float to an int, because casting a negative
  90      * float to a uint is undefined.
  91      */
  92    return (uint8_t) (int)
  93           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
  94 }
  95
  96 /**
  97  * Evaluate one component of packSnorm2x16.
  98  */
  99 static uint16_t
 100 pack_snorm_1x16(float x)
 101 {
 102     /* From section 8.4 of the GLSL ES 3.00 spec:
 103      *
 104      *    packSnorm2x16
 105      *    -------------
 106      *    The conversion for component c of v to fixed point is done as
 107      *    follows:
 108      *
 109      *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
 110      *
 111      * We must first cast the float to an int, because casting a negative
 112      * float to a uint is undefined.
 113      */
 114    return (uint16_t) (int)
 115           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
 116 }
 117
 118 /**
 119  * Evaluate one component of unpackSnorm4x8.
 120  */
 121 static float
 122 unpack_snorm_1x8(uint8_t u)
 123 {
 124     /* From section 8.4 of the GLSL 4.30 spec:
 125      *
 126      *    unpackSnorm4x8
 127      *    --------------
 128      *    The conversion for unpacked fixed-point value f to floating point is
 129      *    done as follows:
 130      *
 131      *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
 132      */
 133    return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
 134 }
 135
 136 /**
 137  * Evaluate one component of unpackSnorm2x16.
 138  */
 139 static float
 140 unpack_snorm_1x16(uint16_t u)
 141 {
 142     /* From section 8.4 of the GLSL ES 3.00 spec:
 143      *
 144      *    unpackSnorm2x16
 145      *    ---------------
 146      *    The conversion for unpacked fixed-point value f to floating point is
 147      *    done as follows:
 148      *
 149      *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
 150      */
 151    return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
 152 }
 153
 154 /**
 155  * Evaluate one component packUnorm4x8.
 156  */
 157 static uint8_t
 158 pack_unorm_1x8(float x)
 159 {
 160     /* From section 8.4 of the GLSL 4.30 spec:
 161      *
 162      *    packUnorm4x8
 163      *    ------------
 164      *    The conversion for component c of v to fixed point is done as
 165      *    follows:
 166      *
 167      *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
 168      */
 169    return (uint8_t) (int)
 170           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
 171 }
 172
 173 /**
 174  * Evaluate one component packUnorm2x16.
 175  */
 176 static uint16_t
 177 pack_unorm_1x16(float x)
 178 {
 179     /* From section 8.4 of the GLSL ES 3.00 spec:
 180      *
 181      *    packUnorm2x16
 182      *    -------------
 183      *    The conversion for component c of v to fixed point is done as
 184      *    follows:
 185      *
 186      *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
 187      */
 188    return (uint16_t) (int)
 189           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
 190 }
 191
 192 /**
 193  * Evaluate one component of unpackUnorm4x8.
 194  */
 195 static float
 196 unpack_unorm_1x8(uint8_t u)
 197 {
 198     /* From section 8.4 of the GLSL 4.30 spec:
 199      *
 200      *    unpackUnorm4x8
 201      *    --------------
 202      *    The conversion for unpacked fixed-point value f to floating point is
 203      *    done as follows:
 204      *
 205      *       unpackUnorm4x8: f / 255.0
 206      */
 207    return (float) u / 255.0f;
 208 }
 209
 210 /**
 211  * Evaluate one component of unpackUnorm2x16.
 212  */
 213 static float
 214 unpack_unorm_1x16(uint16_t u)
 215 {
 216     /* From section 8.4 of the GLSL ES 3.00 spec:
 217      *
 218      *    unpackUnorm2x16
 219      *    ---------------
 220      *    The conversion for unpacked fixed-point value f to floating point is
 221      *    done as follows:
 222      *
 223      *       unpackUnorm2x16: f / 65535.0
 224      */
 225    return (float) u / 65535.0f;
 226 }
 227
 228 /**
 229  * Evaluate one component of packHalf2x16.
 230  */
 231 static uint16_t
 232 pack_half_1x16(float x)
 233 {
 234    return _mesa_float_to_half(x);
 235 }
 236
 237 /**
 238  * Evaluate one component of unpackHalf2x16.
 239  */
 240 static float
 241 unpack_half_1x16(uint16_t u)
 242 {
 243    return _mesa_half_to_float(u);
 244 }
 245
 246 /* Some typed vector structures to make things like src0.y work */
 247 typedef float float32_t;
 248 typedef double float64_t;
 249 typedef bool bool32_t;
 250 % for type in ["float", "int", "uint"]:
 251 % for width in [32, 64]:
 252 struct ${type}${width}_vec {
 253    ${type}${width}_t x;
 254    ${type}${width}_t y;
 255    ${type}${width}_t z;
 256    ${type}${width}_t w;
 257 };
 258 % endfor
 259 % endfor
 260
 261 struct bool32_vec {
 262     bool x;
 263     bool y;
 264     bool z;
 265     bool w;
 266 };
 267
 268 % for name, op in sorted(opcodes.iteritems()):
 269 static nir_const_value
 270 evaluate_${name}(unsigned num_components, unsigned bit_size,
 271                  nir_const_value *_src)
 272 {
 273    nir_const_value _dst_val = { { {0, 0, 0, 0} } };
 274
 275    switch (bit_size) {
 276    % for bit_size in [32, 64]:
 277    case ${bit_size}: {
 278       <%
 279       output_type = type_add_size(op.output_type, bit_size)
 280       input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
 281       %>
 282
 283       ## For each non-per-component input, create a variable srcN that
 284       ## contains x, y, z, and w elements which are filled in with the
 285       ## appropriately-typed values.
 286       % for j in range(op.num_inputs):
 287          % if op.input_sizes[j] == 0:
 288             <% continue %>
 289          % elif "src" + str(j) not in op.const_expr:
 290             ## Avoid unused variable warnings
 291             <% continue %>
 292          %endif
 293
 294          struct ${input_types[j]}_vec src${j} = {
 295          % for k in range(op.input_sizes[j]):
 296             % if input_types[j] == "bool32":
 297                _src[${j}].u[${k}] != 0,
 298             % else:
 299                _src[${j}].${get_const_field(input_types[j])}[${k}],
 300             % endif
 301          % endfor
 302          };
 303       % endfor
 304
 305       % if op.output_size == 0:
 306          ## For per-component instructions, we need to iterate over the
 307          ## components and apply the constant expression one component
 308          ## at a time.
 309          for (unsigned _i = 0; _i < num_components; _i++) {
 310             ## For each per-component input, create a variable srcN that
 311             ## contains the value of the current (_i'th) component.
 312             % for j in range(op.num_inputs):
 313                % if op.input_sizes[j] != 0:
 314                   <% continue %>
 315                % elif "src" + str(j) not in op.const_expr:
 316                   ## Avoid unused variable warnings
 317                   <% continue %>
 318                % elif input_types[j] == "bool32":
 319                   bool src${j} = _src[${j}].u[_i] != 0;
 320                % else:
 321                   ${input_types[j]}_t src${j} =
 322                      _src[${j}].${get_const_field(input_types[j])}[_i];
 323                % endif
 324             % endfor
 325
 326             ## Create an appropriately-typed variable dst and assign the
 327             ## result of the const_expr to it.  If const_expr already contains
 328             ## writes to dst, just include const_expr directly.
 329             % if "dst" in op.const_expr:
 330                ${output_type}_t dst;
 331                ${op.const_expr}
 332             % else:
 333                ${output_type}_t dst = ${op.const_expr};
 334             % endif
 335
 336             ## Store the current component of the actual destination to the
 337             ## value of dst.
 338             % if output_type == "bool32":
 339                ## Sanitize the C value to a proper NIR bool
 340                _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
 341             % else:
 342                _dst_val.${get_const_field(output_type)}[_i] = dst;
 343             % endif
 344          }
 345       % else:
 346          ## In the non-per-component case, create a struct dst with
 347          ## appropriately-typed elements x, y, z, and w and assign the result
 348          ## of the const_expr to all components of dst, or include the
 349          ## const_expr directly if it writes to dst already.
 350          struct ${output_type}_vec dst;
 351
 352          % if "dst" in op.const_expr:
 353             ${op.const_expr}
 354          % else:
 355             ## Splat the value to all components.  This way expressions which
 356             ## write the same value to all components don't need to explicitly
 357             ## write to dest.  One such example is fnoise which has a
 358             ## const_expr of 0.0f.
 359             dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
 360          % endif
 361
 362          ## For each component in the destination, copy the value of dst to
 363          ## the actual destination.
 364          % for k in range(op.output_size):
 365             % if output_type == "bool32":
 366                ## Sanitize the C value to a proper NIR bool
 367                _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
 368             % else:
 369                _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
 370             % endif
 371          % endfor
 372       % endif
 373
 374       break;
 375    }
 376    % endfor
 377
 378    default:
 379       unreachable("unknown bit width");
 380    }
 381
 382    return _dst_val;
 383 }
 384 % endfor
 385
 386 nir_const_value
 387 nir_eval_const_opcode(nir_op op, unsigned num_components,
 388                       unsigned bit_width, nir_const_value *src)
 389 {
 390    switch (op) {
 391 % for name in sorted(opcodes.iterkeys()):
 392    case nir_op_${name}: {
 393       return evaluate_${name}(num_components, bit_width, src);
 394       break;
 395    }
 396 % endfor
 397    default:
 398       unreachable("shouldn't get here");
 399    }
 400 }"""
 401
 402 from nir_opcodes import opcodes
 403 from mako.template import Template
 404
 405 print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
 406                                 type_has_size=type_has_size,
 407                                 type_add_size=type_add_size,
 408                                 get_const_field=get_const_field)