src/compiler/nir/nir_constant_expressions.py

   1 #! /usr/bin/python2
   2 template = """\
   3 /*
   4  * Copyright (C) 2014 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  23  * IN THE SOFTWARE.
  24  *
  25  * Authors:
  26  *    Jason Ekstrand (jason@jlekstrand.net)
  27  */
  28
  29 #include <math.h>
  30 #include "main/core.h"
  31 #include "util/rounding.h" /* for _mesa_roundeven */
  32 #include "util/half_float.h"
  33 #include "nir_constant_expressions.h"
  34
  35 /**
  36  * Evaluate one component of packSnorm4x8.
  37  */
  38 static uint8_t
  39 pack_snorm_1x8(float x)
  40 {
  41     /* From section 8.4 of the GLSL 4.30 spec:
  42      *
  43      *    packSnorm4x8
  44      *    ------------
  45      *    The conversion for component c of v to fixed point is done as
  46      *    follows:
  47      *
  48      *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
  49      *
  50      * We must first cast the float to an int, because casting a negative
  51      * float to a uint is undefined.
  52      */
  53    return (uint8_t) (int)
  54           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
  55 }
  56
  57 /**
  58  * Evaluate one component of packSnorm2x16.
  59  */
  60 static uint16_t
  61 pack_snorm_1x16(float x)
  62 {
  63     /* From section 8.4 of the GLSL ES 3.00 spec:
  64      *
  65      *    packSnorm2x16
  66      *    -------------
  67      *    The conversion for component c of v to fixed point is done as
  68      *    follows:
  69      *
  70      *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
  71      *
  72      * We must first cast the float to an int, because casting a negative
  73      * float to a uint is undefined.
  74      */
  75    return (uint16_t) (int)
  76           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
  77 }
  78
  79 /**
  80  * Evaluate one component of unpackSnorm4x8.
  81  */
  82 static float
  83 unpack_snorm_1x8(uint8_t u)
  84 {
  85     /* From section 8.4 of the GLSL 4.30 spec:
  86      *
  87      *    unpackSnorm4x8
  88      *    --------------
  89      *    The conversion for unpacked fixed-point value f to floating point is
  90      *    done as follows:
  91      *
  92      *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
  93      */
  94    return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
  95 }
  96
  97 /**
  98  * Evaluate one component of unpackSnorm2x16.
  99  */
 100 static float
 101 unpack_snorm_1x16(uint16_t u)
 102 {
 103     /* From section 8.4 of the GLSL ES 3.00 spec:
 104      *
 105      *    unpackSnorm2x16
 106      *    ---------------
 107      *    The conversion for unpacked fixed-point value f to floating point is
 108      *    done as follows:
 109      *
 110      *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
 111      */
 112    return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
 113 }
 114
 115 /**
 116  * Evaluate one component packUnorm4x8.
 117  */
 118 static uint8_t
 119 pack_unorm_1x8(float x)
 120 {
 121     /* From section 8.4 of the GLSL 4.30 spec:
 122      *
 123      *    packUnorm4x8
 124      *    ------------
 125      *    The conversion for component c of v to fixed point is done as
 126      *    follows:
 127      *
 128      *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
 129      */
 130    return (uint8_t) (int)
 131           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
 132 }
 133
 134 /**
 135  * Evaluate one component packUnorm2x16.
 136  */
 137 static uint16_t
 138 pack_unorm_1x16(float x)
 139 {
 140     /* From section 8.4 of the GLSL ES 3.00 spec:
 141      *
 142      *    packUnorm2x16
 143      *    -------------
 144      *    The conversion for component c of v to fixed point is done as
 145      *    follows:
 146      *
 147      *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
 148      */
 149    return (uint16_t) (int)
 150           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
 151 }
 152
 153 /**
 154  * Evaluate one component of unpackUnorm4x8.
 155  */
 156 static float
 157 unpack_unorm_1x8(uint8_t u)
 158 {
 159     /* From section 8.4 of the GLSL 4.30 spec:
 160      *
 161      *    unpackUnorm4x8
 162      *    --------------
 163      *    The conversion for unpacked fixed-point value f to floating point is
 164      *    done as follows:
 165      *
 166      *       unpackUnorm4x8: f / 255.0
 167      */
 168    return (float) u / 255.0f;
 169 }
 170
 171 /**
 172  * Evaluate one component of unpackUnorm2x16.
 173  */
 174 static float
 175 unpack_unorm_1x16(uint16_t u)
 176 {
 177     /* From section 8.4 of the GLSL ES 3.00 spec:
 178      *
 179      *    unpackUnorm2x16
 180      *    ---------------
 181      *    The conversion for unpacked fixed-point value f to floating point is
 182      *    done as follows:
 183      *
 184      *       unpackUnorm2x16: f / 65535.0
 185      */
 186    return (float) u / 65535.0f;
 187 }
 188
 189 /**
 190  * Evaluate one component of packHalf2x16.
 191  */
 192 static uint16_t
 193 pack_half_1x16(float x)
 194 {
 195    return _mesa_float_to_half(x);
 196 }
 197
 198 /**
 199  * Evaluate one component of unpackHalf2x16.
 200  */
 201 static float
 202 unpack_half_1x16(uint16_t u)
 203 {
 204    return _mesa_half_to_float(u);
 205 }
 206
 207 /* Some typed vector structures to make things like src0.y work */
 208 % for type in ["float", "int", "uint", "bool"]:
 209 struct ${type}_vec {
 210    ${type} x;
 211    ${type} y;
 212    ${type} z;
 213    ${type} w;
 214 };
 215 % endfor
 216
 217 % for name, op in sorted(opcodes.iteritems()):
 218 static nir_const_value
 219 evaluate_${name}(unsigned num_components, nir_const_value *_src)
 220 {
 221    nir_const_value _dst_val = { { {0, 0, 0, 0} } };
 222
 223    ## For each non-per-component input, create a variable srcN that
 224    ## contains x, y, z, and w elements which are filled in with the
 225    ## appropriately-typed values.
 226    % for j in range(op.num_inputs):
 227       % if op.input_sizes[j] == 0:
 228          <% continue %>
 229       % elif "src" + str(j) not in op.const_expr:
 230          ## Avoid unused variable warnings
 231          <% continue %>
 232       %endif
 233
 234       struct ${op.input_types[j]}_vec src${j} = {
 235       % for k in range(op.input_sizes[j]):
 236          % if op.input_types[j] == "bool":
 237             _src[${j}].u[${k}] != 0,
 238          % else:
 239             _src[${j}].${op.input_types[j][:1]}[${k}],
 240          % endif
 241       % endfor
 242       };
 243    % endfor
 244
 245    % if op.output_size == 0:
 246       ## For per-component instructions, we need to iterate over the
 247       ## components and apply the constant expression one component
 248       ## at a time.
 249       for (unsigned _i = 0; _i < num_components; _i++) {
 250          ## For each per-component input, create a variable srcN that
 251          ## contains the value of the current (_i'th) component.
 252          % for j in range(op.num_inputs):
 253             % if op.input_sizes[j] != 0:
 254                <% continue %>
 255             % elif "src" + str(j) not in op.const_expr:
 256                ## Avoid unused variable warnings
 257                <% continue %>
 258             % elif op.input_types[j] == "bool":
 259                bool src${j} = _src[${j}].u[_i] != 0;
 260             % else:
 261                ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i];
 262             % endif
 263          % endfor
 264
 265          ## Create an appropriately-typed variable dst and assign the
 266          ## result of the const_expr to it.  If const_expr already contains
 267          ## writes to dst, just include const_expr directly.
 268          % if "dst" in op.const_expr:
 269             ${op.output_type} dst;
 270             ${op.const_expr}
 271          % else:
 272             ${op.output_type} dst = ${op.const_expr};
 273          % endif
 274
 275          ## Store the current component of the actual destination to the
 276          ## value of dst.
 277          % if op.output_type == "bool":
 278             ## Sanitize the C value to a proper NIR bool
 279             _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
 280          % else:
 281             _dst_val.${op.output_type[:1]}[_i] = dst;
 282          % endif
 283       }
 284    % else:
 285       ## In the non-per-component case, create a struct dst with
 286       ## appropriately-typed elements x, y, z, and w and assign the result
 287       ## of the const_expr to all components of dst, or include the
 288       ## const_expr directly if it writes to dst already.
 289       struct ${op.output_type}_vec dst;
 290
 291       % if "dst" in op.const_expr:
 292          ${op.const_expr}
 293       % else:
 294          ## Splat the value to all components.  This way expressions which
 295          ## write the same value to all components don't need to explicitly
 296          ## write to dest.  One such example is fnoise which has a
 297          ## const_expr of 0.0f.
 298          dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
 299       % endif
 300
 301       ## For each component in the destination, copy the value of dst to
 302       ## the actual destination.
 303       % for k in range(op.output_size):
 304          % if op.output_type == "bool":
 305             ## Sanitize the C value to a proper NIR bool
 306             _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
 307          % else:
 308             _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
 309          % endif
 310       % endfor
 311    % endif
 312
 313    return _dst_val;
 314 }
 315 % endfor
 316
 317 nir_const_value
 318 nir_eval_const_opcode(nir_op op, unsigned num_components,
 319                       nir_const_value *src)
 320 {
 321    switch (op) {
 322 % for name in sorted(opcodes.iterkeys()):
 323    case nir_op_${name}: {
 324       return evaluate_${name}(num_components, src);
 325       break;
 326    }
 327 % endfor
 328    default:
 329       unreachable("shouldn't get here");
 330    }
 331 }"""
 332
 333 from nir_opcodes import opcodes
 334 from mako.template import Template
 335
 336 print Template(template).render(opcodes=opcodes)