src/compiler/nir/nir_constant_expressions.py

   1 #! /usr/bin/python2
   2
   3 def type_has_size(type_):
   4     return type_[-1:].isdigit()
   5
   6 def type_sizes(type_):
   7     if type_.endswith("8"):
   8         return [8]
   9     elif type_.endswith("16"):
  10         return [16]
  11     elif type_.endswith("32"):
  12         return [32]
  13     elif type_.endswith("64"):
  14         return [64]
  15     else:
  16         return [32, 64]
  17
  18 def type_add_size(type_, size):
  19     if type_has_size(type_):
  20         return type_
  21     return type_ + str(size)
  22
  23 def get_const_field(type_):
  24     if type_ == "int32":
  25         return "i32"
  26     if type_ == "uint32":
  27         return "u32"
  28     if type_ == "int64":
  29         return "i64"
  30     if type_ == "uint64":
  31         return "u64"
  32     if type_ == "bool32":
  33         return "u32"
  34     if type_ == "float32":
  35         return "f32"
  36     if type_ == "float64":
  37         return "f64"
  38     raise Exception(str(type_))
  39     assert(0)
  40
  41 template = """\
  42 /*
  43  * Copyright (C) 2014 Intel Corporation
  44  *
  45  * Permission is hereby granted, free of charge, to any person obtaining a
  46  * copy of this software and associated documentation files (the "Software"),
  47  * to deal in the Software without restriction, including without limitation
  48  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  49  * and/or sell copies of the Software, and to permit persons to whom the
  50  * Software is furnished to do so, subject to the following conditions:
  51  *
  52  * The above copyright notice and this permission notice (including the next
  53  * paragraph) shall be included in all copies or substantial portions of the
  54  * Software.
  55  *
  56  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  57  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  58  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  59  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  60  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  61  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  62  * IN THE SOFTWARE.
  63  *
  64  * Authors:
  65  *    Jason Ekstrand (jason@jlekstrand.net)
  66  */
  67
  68 #include <math.h>
  69 #include "main/core.h"
  70 #include "util/rounding.h" /* for _mesa_roundeven */
  71 #include "util/half_float.h"
  72 #include "nir_constant_expressions.h"
  73
  74 /**
  75  * Evaluate one component of packSnorm4x8.
  76  */
  77 static uint8_t
  78 pack_snorm_1x8(float x)
  79 {
  80     /* From section 8.4 of the GLSL 4.30 spec:
  81      *
  82      *    packSnorm4x8
  83      *    ------------
  84      *    The conversion for component c of v to fixed point is done as
  85      *    follows:
  86      *
  87      *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
  88      *
  89      * We must first cast the float to an int, because casting a negative
  90      * float to a uint is undefined.
  91      */
  92    return (uint8_t) (int)
  93           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
  94 }
  95
  96 /**
  97  * Evaluate one component of packSnorm2x16.
  98  */
  99 static uint16_t
 100 pack_snorm_1x16(float x)
 101 {
 102     /* From section 8.4 of the GLSL ES 3.00 spec:
 103      *
 104      *    packSnorm2x16
 105      *    -------------
 106      *    The conversion for component c of v to fixed point is done as
 107      *    follows:
 108      *
 109      *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
 110      *
 111      * We must first cast the float to an int, because casting a negative
 112      * float to a uint is undefined.
 113      */
 114    return (uint16_t) (int)
 115           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
 116 }
 117
 118 /**
 119  * Evaluate one component of unpackSnorm4x8.
 120  */
 121 static float
 122 unpack_snorm_1x8(uint8_t u)
 123 {
 124     /* From section 8.4 of the GLSL 4.30 spec:
 125      *
 126      *    unpackSnorm4x8
 127      *    --------------
 128      *    The conversion for unpacked fixed-point value f to floating point is
 129      *    done as follows:
 130      *
 131      *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
 132      */
 133    return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
 134 }
 135
 136 /**
 137  * Evaluate one component of unpackSnorm2x16.
 138  */
 139 static float
 140 unpack_snorm_1x16(uint16_t u)
 141 {
 142     /* From section 8.4 of the GLSL ES 3.00 spec:
 143      *
 144      *    unpackSnorm2x16
 145      *    ---------------
 146      *    The conversion for unpacked fixed-point value f to floating point is
 147      *    done as follows:
 148      *
 149      *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
 150      */
 151    return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
 152 }
 153
 154 /**
 155  * Evaluate one component packUnorm4x8.
 156  */
 157 static uint8_t
 158 pack_unorm_1x8(float x)
 159 {
 160     /* From section 8.4 of the GLSL 4.30 spec:
 161      *
 162      *    packUnorm4x8
 163      *    ------------
 164      *    The conversion for component c of v to fixed point is done as
 165      *    follows:
 166      *
 167      *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
 168      */
 169    return (uint8_t) (int)
 170           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
 171 }
 172
 173 /**
 174  * Evaluate one component packUnorm2x16.
 175  */
 176 static uint16_t
 177 pack_unorm_1x16(float x)
 178 {
 179     /* From section 8.4 of the GLSL ES 3.00 spec:
 180      *
 181      *    packUnorm2x16
 182      *    -------------
 183      *    The conversion for component c of v to fixed point is done as
 184      *    follows:
 185      *
 186      *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
 187      */
 188    return (uint16_t) (int)
 189           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
 190 }
 191
 192 /**
 193  * Evaluate one component of unpackUnorm4x8.
 194  */
 195 static float
 196 unpack_unorm_1x8(uint8_t u)
 197 {
 198     /* From section 8.4 of the GLSL 4.30 spec:
 199      *
 200      *    unpackUnorm4x8
 201      *    --------------
 202      *    The conversion for unpacked fixed-point value f to floating point is
 203      *    done as follows:
 204      *
 205      *       unpackUnorm4x8: f / 255.0
 206      */
 207    return (float) u / 255.0f;
 208 }
 209
 210 /**
 211  * Evaluate one component of unpackUnorm2x16.
 212  */
 213 static float
 214 unpack_unorm_1x16(uint16_t u)
 215 {
 216     /* From section 8.4 of the GLSL ES 3.00 spec:
 217      *
 218      *    unpackUnorm2x16
 219      *    ---------------
 220      *    The conversion for unpacked fixed-point value f to floating point is
 221      *    done as follows:
 222      *
 223      *       unpackUnorm2x16: f / 65535.0
 224      */
 225    return (float) u / 65535.0f;
 226 }
 227
 228 /**
 229  * Evaluate one component of packHalf2x16.
 230  */
 231 static uint16_t
 232 pack_half_1x16(float x)
 233 {
 234    return _mesa_float_to_half(x);
 235 }
 236
 237 /**
 238  * Evaluate one component of unpackHalf2x16.
 239  */
 240 static float
 241 unpack_half_1x16(uint16_t u)
 242 {
 243    return _mesa_half_to_float(u);
 244 }
 245
 246 /* Some typed vector structures to make things like src0.y work */
 247 typedef float float32_t;
 248 typedef double float64_t;
 249 typedef bool bool32_t;
 250 % for type in ["float", "int", "uint"]:
 251 % for width in [32, 64]:
 252 struct ${type}${width}_vec {
 253    ${type}${width}_t x;
 254    ${type}${width}_t y;
 255    ${type}${width}_t z;
 256    ${type}${width}_t w;
 257 };
 258 % endfor
 259 % endfor
 260
 261 struct bool32_vec {
 262     bool x;
 263     bool y;
 264     bool z;
 265     bool w;
 266 };
 267
 268 % for name, op in sorted(opcodes.iteritems()):
 269 static nir_const_value
 270 evaluate_${name}(MAYBE_UNUSED unsigned num_components, unsigned bit_size,
 271                  MAYBE_UNUSED nir_const_value *_src)
 272 {
 273    nir_const_value _dst_val = { {0, } };
 274
 275    switch (bit_size) {
 276    % for bit_size in [32, 64]:
 277    case ${bit_size}: {
 278       <%
 279       output_type = type_add_size(op.output_type, bit_size)
 280       input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
 281       %>
 282
 283       ## For each non-per-component input, create a variable srcN that
 284       ## contains x, y, z, and w elements which are filled in with the
 285       ## appropriately-typed values.
 286       % for j in range(op.num_inputs):
 287          % if op.input_sizes[j] == 0:
 288             <% continue %>
 289          % elif "src" + str(j) not in op.const_expr:
 290             ## Avoid unused variable warnings
 291             <% continue %>
 292          %endif
 293
 294          struct ${input_types[j]}_vec src${j} = {
 295          % for k in range(op.input_sizes[j]):
 296             % if input_types[j] == "bool32":
 297                _src[${j}].u32[${k}] != 0,
 298             % else:
 299                _src[${j}].${get_const_field(input_types[j])}[${k}],
 300             % endif
 301          % endfor
 302          % for k in range(op.input_sizes[j], 4):
 303             0,
 304          % endfor
 305          };
 306       % endfor
 307
 308       % if op.output_size == 0:
 309          ## For per-component instructions, we need to iterate over the
 310          ## components and apply the constant expression one component
 311          ## at a time.
 312          for (unsigned _i = 0; _i < num_components; _i++) {
 313             ## For each per-component input, create a variable srcN that
 314             ## contains the value of the current (_i'th) component.
 315             % for j in range(op.num_inputs):
 316                % if op.input_sizes[j] != 0:
 317                   <% continue %>
 318                % elif "src" + str(j) not in op.const_expr:
 319                   ## Avoid unused variable warnings
 320                   <% continue %>
 321                % elif input_types[j] == "bool32":
 322                   bool src${j} = _src[${j}].u32[_i] != 0;
 323                % else:
 324                   ${input_types[j]}_t src${j} =
 325                      _src[${j}].${get_const_field(input_types[j])}[_i];
 326                % endif
 327             % endfor
 328
 329             ## Create an appropriately-typed variable dst and assign the
 330             ## result of the const_expr to it.  If const_expr already contains
 331             ## writes to dst, just include const_expr directly.
 332             % if "dst" in op.const_expr:
 333                ${output_type}_t dst;
 334                ${op.const_expr}
 335             % else:
 336                ${output_type}_t dst = ${op.const_expr};
 337             % endif
 338
 339             ## Store the current component of the actual destination to the
 340             ## value of dst.
 341             % if output_type == "bool32":
 342                ## Sanitize the C value to a proper NIR bool
 343                _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
 344             % else:
 345                _dst_val.${get_const_field(output_type)}[_i] = dst;
 346             % endif
 347          }
 348       % else:
 349          ## In the non-per-component case, create a struct dst with
 350          ## appropriately-typed elements x, y, z, and w and assign the result
 351          ## of the const_expr to all components of dst, or include the
 352          ## const_expr directly if it writes to dst already.
 353          struct ${output_type}_vec dst;
 354
 355          % if "dst" in op.const_expr:
 356             ${op.const_expr}
 357          % else:
 358             ## Splat the value to all components.  This way expressions which
 359             ## write the same value to all components don't need to explicitly
 360             ## write to dest.  One such example is fnoise which has a
 361             ## const_expr of 0.0f.
 362             dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
 363          % endif
 364
 365          ## For each component in the destination, copy the value of dst to
 366          ## the actual destination.
 367          % for k in range(op.output_size):
 368             % if output_type == "bool32":
 369                ## Sanitize the C value to a proper NIR bool
 370                _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
 371             % else:
 372                _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
 373             % endif
 374          % endfor
 375       % endif
 376
 377       break;
 378    }
 379    % endfor
 380
 381    default:
 382       unreachable("unknown bit width");
 383    }
 384
 385    return _dst_val;
 386 }
 387 % endfor
 388
 389 nir_const_value
 390 nir_eval_const_opcode(nir_op op, unsigned num_components,
 391                       unsigned bit_width, nir_const_value *src)
 392 {
 393    switch (op) {
 394 % for name in sorted(opcodes.iterkeys()):
 395    case nir_op_${name}: {
 396       return evaluate_${name}(num_components, bit_width, src);
 397       break;
 398    }
 399 % endfor
 400    default:
 401       unreachable("shouldn't get here");
 402    }
 403 }"""
 404
 405 from nir_opcodes import opcodes
 406 from mako.template import Template
 407
 408 print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
 409                                 type_has_size=type_has_size,
 410                                 type_add_size=type_add_size,
 411                                 get_const_field=get_const_field)