src/glsl/nir/nir_constant_expressions.py

   1 #! /usr/bin/python2
   2 template = """\
   3 /*
   4  * Copyright (C) 2014 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  23  * IN THE SOFTWARE.
  24  *
  25  * Authors:
  26  *    Jason Ekstrand (jason@jlekstrand.net)
  27  */
  28
  29 #include <math.h>
  30 #include "main/core.h"
  31 #include "util/rounding.h" /* for _mesa_roundeven */
  32 #include "nir_constant_expressions.h"
  33
  34 #if defined(_MSC_VER) && (_MSC_VER < 1800)
  35 static int isnormal(double x)
  36 {
  37    return _fpclass(x) == _FPCLASS_NN || _fpclass(x) == _FPCLASS_PN;
  38 }
  39 #elif defined(__SUNPRO_CC)
  40 #include <ieeefp.h>
  41 static int isnormal(double x)
  42 {
  43    return fpclass(x) == FP_NORMAL;
  44 }
  45 #endif
  46
  47 #if defined(_MSC_VER)
  48 static double copysign(double x, double y)
  49 {
  50    return _copysign(x, y);
  51 }
  52 #endif
  53
  54 /**
  55  * Evaluate one component of packSnorm4x8.
  56  */
  57 static uint8_t
  58 pack_snorm_1x8(float x)
  59 {
  60     /* From section 8.4 of the GLSL 4.30 spec:
  61      *
  62      *    packSnorm4x8
  63      *    ------------
  64      *    The conversion for component c of v to fixed point is done as
  65      *    follows:
  66      *
  67      *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
  68      *
  69      * We must first cast the float to an int, because casting a negative
  70      * float to a uint is undefined.
  71      */
  72    return (uint8_t) (int)
  73           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
  74 }
  75
  76 /**
  77  * Evaluate one component of packSnorm2x16.
  78  */
  79 static uint16_t
  80 pack_snorm_1x16(float x)
  81 {
  82     /* From section 8.4 of the GLSL ES 3.00 spec:
  83      *
  84      *    packSnorm2x16
  85      *    -------------
  86      *    The conversion for component c of v to fixed point is done as
  87      *    follows:
  88      *
  89      *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
  90      *
  91      * We must first cast the float to an int, because casting a negative
  92      * float to a uint is undefined.
  93      */
  94    return (uint16_t) (int)
  95           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
  96 }
  97
  98 /**
  99  * Evaluate one component of unpackSnorm4x8.
 100  */
 101 static float
 102 unpack_snorm_1x8(uint8_t u)
 103 {
 104     /* From section 8.4 of the GLSL 4.30 spec:
 105      *
 106      *    unpackSnorm4x8
 107      *    --------------
 108      *    The conversion for unpacked fixed-point value f to floating point is
 109      *    done as follows:
 110      *
 111      *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
 112      */
 113    return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
 114 }
 115
 116 /**
 117  * Evaluate one component of unpackSnorm2x16.
 118  */
 119 static float
 120 unpack_snorm_1x16(uint16_t u)
 121 {
 122     /* From section 8.4 of the GLSL ES 3.00 spec:
 123      *
 124      *    unpackSnorm2x16
 125      *    ---------------
 126      *    The conversion for unpacked fixed-point value f to floating point is
 127      *    done as follows:
 128      *
 129      *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
 130      */
 131    return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
 132 }
 133
 134 /**
 135  * Evaluate one component packUnorm4x8.
 136  */
 137 static uint8_t
 138 pack_unorm_1x8(float x)
 139 {
 140     /* From section 8.4 of the GLSL 4.30 spec:
 141      *
 142      *    packUnorm4x8
 143      *    ------------
 144      *    The conversion for component c of v to fixed point is done as
 145      *    follows:
 146      *
 147      *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
 148      */
 149    return (uint8_t) (int)
 150           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
 151 }
 152
 153 /**
 154  * Evaluate one component packUnorm2x16.
 155  */
 156 static uint16_t
 157 pack_unorm_1x16(float x)
 158 {
 159     /* From section 8.4 of the GLSL ES 3.00 spec:
 160      *
 161      *    packUnorm2x16
 162      *    -------------
 163      *    The conversion for component c of v to fixed point is done as
 164      *    follows:
 165      *
 166      *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
 167      */
 168    return (uint16_t) (int)
 169           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
 170 }
 171
 172 /**
 173  * Evaluate one component of unpackUnorm4x8.
 174  */
 175 static float
 176 unpack_unorm_1x8(uint8_t u)
 177 {
 178     /* From section 8.4 of the GLSL 4.30 spec:
 179      *
 180      *    unpackUnorm4x8
 181      *    --------------
 182      *    The conversion for unpacked fixed-point value f to floating point is
 183      *    done as follows:
 184      *
 185      *       unpackUnorm4x8: f / 255.0
 186      */
 187    return (float) u / 255.0f;
 188 }
 189
 190 /**
 191  * Evaluate one component of unpackUnorm2x16.
 192  */
 193 static float
 194 unpack_unorm_1x16(uint16_t u)
 195 {
 196     /* From section 8.4 of the GLSL ES 3.00 spec:
 197      *
 198      *    unpackUnorm2x16
 199      *    ---------------
 200      *    The conversion for unpacked fixed-point value f to floating point is
 201      *    done as follows:
 202      *
 203      *       unpackUnorm2x16: f / 65535.0
 204      */
 205    return (float) u / 65535.0f;
 206 }
 207
 208 /**
 209  * Evaluate one component of packHalf2x16.
 210  */
 211 static uint16_t
 212 pack_half_1x16(float x)
 213 {
 214    return _mesa_float_to_half(x);
 215 }
 216
 217 /**
 218  * Evaluate one component of unpackHalf2x16.
 219  */
 220 static float
 221 unpack_half_1x16(uint16_t u)
 222 {
 223    return _mesa_half_to_float(u);
 224 }
 225
 226 /* Some typed vector structures to make things like src0.y work */
 227 % for type in ["float", "int", "unsigned", "bool"]:
 228 struct ${type}_vec {
 229    ${type} x;
 230    ${type} y;
 231    ${type} z;
 232    ${type} w;
 233 };
 234 % endfor
 235
 236 % for name, op in sorted(opcodes.iteritems()):
 237 static nir_const_value
 238 evaluate_${name}(unsigned num_components, nir_const_value *_src)
 239 {
 240    nir_const_value _dst_val = { { {0, 0, 0, 0} } };
 241
 242    ## For each non-per-component input, create a variable srcN that
 243    ## contains x, y, z, and w elements which are filled in with the
 244    ## appropriately-typed values.
 245    % for j in range(op.num_inputs):
 246       % if op.input_sizes[j] == 0:
 247          <% continue %>
 248       % elif "src" + str(j) not in op.const_expr:
 249          ## Avoid unused variable warnings
 250          <% continue %>
 251       %endif
 252
 253       struct ${op.input_types[j]}_vec src${j} = {
 254       % for k in range(op.input_sizes[j]):
 255          % if op.input_types[j] == "bool":
 256             _src[${j}].u[${k}] != 0,
 257          % else:
 258             _src[${j}].${op.input_types[j][:1]}[${k}],
 259          % endif
 260       % endfor
 261       };
 262    % endfor
 263
 264    % if op.output_size == 0:
 265       ## For per-component instructions, we need to iterate over the
 266       ## components and apply the constant expression one component
 267       ## at a time.
 268       for (unsigned _i = 0; _i < num_components; _i++) {
 269          ## For each per-component input, create a variable srcN that
 270          ## contains the value of the current (_i'th) component.
 271          % for j in range(op.num_inputs):
 272             % if op.input_sizes[j] != 0:
 273                <% continue %>
 274             % elif "src" + str(j) not in op.const_expr:
 275                ## Avoid unused variable warnings
 276                <% continue %>
 277             % elif op.input_types[j] == "bool":
 278                bool src${j} = _src[${j}].u[_i] != 0;
 279             % else:
 280                ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i];
 281             % endif
 282          % endfor
 283
 284          ## Create an appropriately-typed variable dst and assign the
 285          ## result of the const_expr to it.  If const_expr already contains
 286          ## writes to dst, just include const_expr directly.
 287          % if "dst" in op.const_expr:
 288             ${op.output_type} dst;
 289             ${op.const_expr}
 290          % else:
 291             ${op.output_type} dst = ${op.const_expr};
 292          % endif
 293
 294          ## Store the current component of the actual destination to the
 295          ## value of dst.
 296          % if op.output_type == "bool":
 297             ## Sanitize the C value to a proper NIR bool
 298             _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
 299          % else:
 300             _dst_val.${op.output_type[:1]}[_i] = dst;
 301          % endif
 302       }
 303    % else:
 304       ## In the non-per-component case, create a struct dst with
 305       ## appropriately-typed elements x, y, z, and w and assign the result
 306       ## of the const_expr to all components of dst, or include the
 307       ## const_expr directly if it writes to dst already.
 308       struct ${op.output_type}_vec dst;
 309
 310       % if "dst" in op.const_expr:
 311          ${op.const_expr}
 312       % else:
 313          ## Splat the value to all components.  This way expressions which
 314          ## write the same value to all components don't need to explicitly
 315          ## write to dest.  One such example is fnoise which has a
 316          ## const_expr of 0.0f.
 317          dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
 318       % endif
 319
 320       ## For each component in the destination, copy the value of dst to
 321       ## the actual destination.
 322       % for k in range(op.output_size):
 323          % if op.output_type == "bool":
 324             ## Sanitize the C value to a proper NIR bool
 325             _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
 326          % else:
 327             _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
 328          % endif
 329       % endfor
 330    % endif
 331
 332    return _dst_val;
 333 }
 334 % endfor
 335
 336 nir_const_value
 337 nir_eval_const_opcode(nir_op op, unsigned num_components,
 338                       nir_const_value *src)
 339 {
 340    switch (op) {
 341 % for name in sorted(opcodes.iterkeys()):
 342    case nir_op_${name}: {
 343       return evaluate_${name}(num_components, src);
 344       break;
 345    }
 346 % endfor
 347    default:
 348       unreachable("shouldn't get here");
 349    }
 350 }"""
 351
 352 from nir_opcodes import opcodes
 353 from mako.template import Template
 354
 355 print Template(template).render(opcodes=opcodes)