src/glsl/nir/nir_constant_expressions.py

   1 #! /usr/bin/python2
   2 template = """\
   3 /*
   4  * Copyright (C) 2014 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  23  * IN THE SOFTWARE.
  24  *
  25  * Authors:
  26  *    Jason Ekstrand (jason@jlekstrand.net)
  27  */
  28
  29 #include <math.h>
  30 #include "main/core.h"
  31 #include "util/rounding.h" /* for _mesa_roundeven */
  32 #include "nir_constant_expressions.h"
  33
  34 #if defined(__SUNPRO_CC)
  35 #include <ieeefp.h>
  36 static int isnormal(double x)
  37 {
  38    return fpclass(x) == FP_NORMAL;
  39 }
  40 #endif
  41
  42 /**
  43  * Evaluate one component of packSnorm4x8.
  44  */
  45 static uint8_t
  46 pack_snorm_1x8(float x)
  47 {
  48     /* From section 8.4 of the GLSL 4.30 spec:
  49      *
  50      *    packSnorm4x8
  51      *    ------------
  52      *    The conversion for component c of v to fixed point is done as
  53      *    follows:
  54      *
  55      *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
  56      *
  57      * We must first cast the float to an int, because casting a negative
  58      * float to a uint is undefined.
  59      */
  60    return (uint8_t) (int)
  61           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
  62 }
  63
  64 /**
  65  * Evaluate one component of packSnorm2x16.
  66  */
  67 static uint16_t
  68 pack_snorm_1x16(float x)
  69 {
  70     /* From section 8.4 of the GLSL ES 3.00 spec:
  71      *
  72      *    packSnorm2x16
  73      *    -------------
  74      *    The conversion for component c of v to fixed point is done as
  75      *    follows:
  76      *
  77      *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
  78      *
  79      * We must first cast the float to an int, because casting a negative
  80      * float to a uint is undefined.
  81      */
  82    return (uint16_t) (int)
  83           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
  84 }
  85
  86 /**
  87  * Evaluate one component of unpackSnorm4x8.
  88  */
  89 static float
  90 unpack_snorm_1x8(uint8_t u)
  91 {
  92     /* From section 8.4 of the GLSL 4.30 spec:
  93      *
  94      *    unpackSnorm4x8
  95      *    --------------
  96      *    The conversion for unpacked fixed-point value f to floating point is
  97      *    done as follows:
  98      *
  99      *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
 100      */
 101    return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
 102 }
 103
 104 /**
 105  * Evaluate one component of unpackSnorm2x16.
 106  */
 107 static float
 108 unpack_snorm_1x16(uint16_t u)
 109 {
 110     /* From section 8.4 of the GLSL ES 3.00 spec:
 111      *
 112      *    unpackSnorm2x16
 113      *    ---------------
 114      *    The conversion for unpacked fixed-point value f to floating point is
 115      *    done as follows:
 116      *
 117      *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
 118      */
 119    return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
 120 }
 121
 122 /**
 123  * Evaluate one component packUnorm4x8.
 124  */
 125 static uint8_t
 126 pack_unorm_1x8(float x)
 127 {
 128     /* From section 8.4 of the GLSL 4.30 spec:
 129      *
 130      *    packUnorm4x8
 131      *    ------------
 132      *    The conversion for component c of v to fixed point is done as
 133      *    follows:
 134      *
 135      *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
 136      */
 137    return (uint8_t) (int)
 138           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
 139 }
 140
 141 /**
 142  * Evaluate one component packUnorm2x16.
 143  */
 144 static uint16_t
 145 pack_unorm_1x16(float x)
 146 {
 147     /* From section 8.4 of the GLSL ES 3.00 spec:
 148      *
 149      *    packUnorm2x16
 150      *    -------------
 151      *    The conversion for component c of v to fixed point is done as
 152      *    follows:
 153      *
 154      *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
 155      */
 156    return (uint16_t) (int)
 157           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
 158 }
 159
 160 /**
 161  * Evaluate one component of unpackUnorm4x8.
 162  */
 163 static float
 164 unpack_unorm_1x8(uint8_t u)
 165 {
 166     /* From section 8.4 of the GLSL 4.30 spec:
 167      *
 168      *    unpackUnorm4x8
 169      *    --------------
 170      *    The conversion for unpacked fixed-point value f to floating point is
 171      *    done as follows:
 172      *
 173      *       unpackUnorm4x8: f / 255.0
 174      */
 175    return (float) u / 255.0f;
 176 }
 177
 178 /**
 179  * Evaluate one component of unpackUnorm2x16.
 180  */
 181 static float
 182 unpack_unorm_1x16(uint16_t u)
 183 {
 184     /* From section 8.4 of the GLSL ES 3.00 spec:
 185      *
 186      *    unpackUnorm2x16
 187      *    ---------------
 188      *    The conversion for unpacked fixed-point value f to floating point is
 189      *    done as follows:
 190      *
 191      *       unpackUnorm2x16: f / 65535.0
 192      */
 193    return (float) u / 65535.0f;
 194 }
 195
 196 /**
 197  * Evaluate one component of packHalf2x16.
 198  */
 199 static uint16_t
 200 pack_half_1x16(float x)
 201 {
 202    return _mesa_float_to_half(x);
 203 }
 204
 205 /**
 206  * Evaluate one component of unpackHalf2x16.
 207  */
 208 static float
 209 unpack_half_1x16(uint16_t u)
 210 {
 211    return _mesa_half_to_float(u);
 212 }
 213
 214 /* Some typed vector structures to make things like src0.y work */
 215 % for type in ["float", "int", "unsigned", "bool"]:
 216 struct ${type}_vec {
 217    ${type} x;
 218    ${type} y;
 219    ${type} z;
 220    ${type} w;
 221 };
 222 % endfor
 223
 224 % for name, op in sorted(opcodes.iteritems()):
 225 static nir_const_value
 226 evaluate_${name}(unsigned num_components, nir_const_value *_src)
 227 {
 228    nir_const_value _dst_val = { { {0, 0, 0, 0} } };
 229
 230    ## For each non-per-component input, create a variable srcN that
 231    ## contains x, y, z, and w elements which are filled in with the
 232    ## appropriately-typed values.
 233    % for j in range(op.num_inputs):
 234       % if op.input_sizes[j] == 0:
 235          <% continue %>
 236       % elif "src" + str(j) not in op.const_expr:
 237          ## Avoid unused variable warnings
 238          <% continue %>
 239       %endif
 240
 241       struct ${op.input_types[j]}_vec src${j} = {
 242       % for k in range(op.input_sizes[j]):
 243          % if op.input_types[j] == "bool":
 244             _src[${j}].u[${k}] != 0,
 245          % else:
 246             _src[${j}].${op.input_types[j][:1]}[${k}],
 247          % endif
 248       % endfor
 249       };
 250    % endfor
 251
 252    % if op.output_size == 0:
 253       ## For per-component instructions, we need to iterate over the
 254       ## components and apply the constant expression one component
 255       ## at a time.
 256       for (unsigned _i = 0; _i < num_components; _i++) {
 257          ## For each per-component input, create a variable srcN that
 258          ## contains the value of the current (_i'th) component.
 259          % for j in range(op.num_inputs):
 260             % if op.input_sizes[j] != 0:
 261                <% continue %>
 262             % elif "src" + str(j) not in op.const_expr:
 263                ## Avoid unused variable warnings
 264                <% continue %>
 265             % elif op.input_types[j] == "bool":
 266                bool src${j} = _src[${j}].u[_i] != 0;
 267             % else:
 268                ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i];
 269             % endif
 270          % endfor
 271
 272          ## Create an appropriately-typed variable dst and assign the
 273          ## result of the const_expr to it.  If const_expr already contains
 274          ## writes to dst, just include const_expr directly.
 275          % if "dst" in op.const_expr:
 276             ${op.output_type} dst;
 277             ${op.const_expr}
 278          % else:
 279             ${op.output_type} dst = ${op.const_expr};
 280          % endif
 281
 282          ## Store the current component of the actual destination to the
 283          ## value of dst.
 284          % if op.output_type == "bool":
 285             ## Sanitize the C value to a proper NIR bool
 286             _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
 287          % else:
 288             _dst_val.${op.output_type[:1]}[_i] = dst;
 289          % endif
 290       }
 291    % else:
 292       ## In the non-per-component case, create a struct dst with
 293       ## appropriately-typed elements x, y, z, and w and assign the result
 294       ## of the const_expr to all components of dst, or include the
 295       ## const_expr directly if it writes to dst already.
 296       struct ${op.output_type}_vec dst;
 297
 298       % if "dst" in op.const_expr:
 299          ${op.const_expr}
 300       % else:
 301          ## Splat the value to all components.  This way expressions which
 302          ## write the same value to all components don't need to explicitly
 303          ## write to dest.  One such example is fnoise which has a
 304          ## const_expr of 0.0f.
 305          dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
 306       % endif
 307
 308       ## For each component in the destination, copy the value of dst to
 309       ## the actual destination.
 310       % for k in range(op.output_size):
 311          % if op.output_type == "bool":
 312             ## Sanitize the C value to a proper NIR bool
 313             _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
 314          % else:
 315             _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
 316          % endif
 317       % endfor
 318    % endif
 319
 320    return _dst_val;
 321 }
 322 % endfor
 323
 324 nir_const_value
 325 nir_eval_const_opcode(nir_op op, unsigned num_components,
 326                       nir_const_value *src)
 327 {
 328    switch (op) {
 329 % for name in sorted(opcodes.iterkeys()):
 330    case nir_op_${name}: {
 331       return evaluate_${name}(num_components, src);
 332       break;
 333    }
 334 % endfor
 335    default:
 336       unreachable("shouldn't get here");
 337    }
 338 }"""
 339
 340 from nir_opcodes import opcodes
 341 from mako.template import Template
 342
 343 print Template(template).render(opcodes=opcodes)