src/compiler/nir/nir_constant_expressions.py

   1
   2 import re
   3
   4 type_split_re = re.compile(r'(?P<type>[a-z]+)(?P<bits>\d+)')
   5
   6 def type_has_size(type_):
   7     return type_[-1:].isdigit()
   8
   9 def type_size(type_):
  10     assert type_has_size(type_)
  11     return int(type_split_re.match(type_).group('bits'))
  12
  13 def type_sizes(type_):
  14     if type_has_size(type_):
  15         return [type_size(type_)]
  16     else:
  17         return [32, 64]
  18
  19 def type_add_size(type_, size):
  20     if type_has_size(type_):
  21         return type_
  22     return type_ + str(size)
  23
  24 def op_bit_sizes(op):
  25     sizes = set([8, 16, 32, 64])
  26     if not type_has_size(op.output_type):
  27         sizes = sizes.intersection(set(type_sizes(op.output_type)))
  28     for input_type in op.input_types:
  29         if not type_has_size(input_type):
  30             sizes = sizes.intersection(set(type_sizes(input_type)))
  31     return sorted(list(sizes))
  32
  33 def get_const_field(type_):
  34     if type_ == "bool32":
  35         return "u32"
  36     else:
  37         m = type_split_re.match(type_)
  38         if not m:
  39             raise Exception(str(type_))
  40         return m.group('type')[0] + m.group('bits')
  41
  42 template = """\
  43 /*
  44  * Copyright (C) 2014 Intel Corporation
  45  *
  46  * Permission is hereby granted, free of charge, to any person obtaining a
  47  * copy of this software and associated documentation files (the "Software"),
  48  * to deal in the Software without restriction, including without limitation
  49  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  50  * and/or sell copies of the Software, and to permit persons to whom the
  51  * Software is furnished to do so, subject to the following conditions:
  52  *
  53  * The above copyright notice and this permission notice (including the next
  54  * paragraph) shall be included in all copies or substantial portions of the
  55  * Software.
  56  *
  57  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  58  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  59  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  60  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  61  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  62  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  63  * IN THE SOFTWARE.
  64  *
  65  * Authors:
  66  *    Jason Ekstrand (jason@jlekstrand.net)
  67  */
  68
  69 #include <math.h>
  70 #include "main/core.h"
  71 #include "util/rounding.h" /* for _mesa_roundeven */
  72 #include "util/half_float.h"
  73 #include "nir_constant_expressions.h"
  74
  75 /**
  76  * Evaluate one component of packSnorm4x8.
  77  */
  78 static uint8_t
  79 pack_snorm_1x8(float x)
  80 {
  81     /* From section 8.4 of the GLSL 4.30 spec:
  82      *
  83      *    packSnorm4x8
  84      *    ------------
  85      *    The conversion for component c of v to fixed point is done as
  86      *    follows:
  87      *
  88      *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
  89      *
  90      * We must first cast the float to an int, because casting a negative
  91      * float to a uint is undefined.
  92      */
  93    return (uint8_t) (int)
  94           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
  95 }
  96
  97 /**
  98  * Evaluate one component of packSnorm2x16.
  99  */
 100 static uint16_t
 101 pack_snorm_1x16(float x)
 102 {
 103     /* From section 8.4 of the GLSL ES 3.00 spec:
 104      *
 105      *    packSnorm2x16
 106      *    -------------
 107      *    The conversion for component c of v to fixed point is done as
 108      *    follows:
 109      *
 110      *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
 111      *
 112      * We must first cast the float to an int, because casting a negative
 113      * float to a uint is undefined.
 114      */
 115    return (uint16_t) (int)
 116           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
 117 }
 118
 119 /**
 120  * Evaluate one component of unpackSnorm4x8.
 121  */
 122 static float
 123 unpack_snorm_1x8(uint8_t u)
 124 {
 125     /* From section 8.4 of the GLSL 4.30 spec:
 126      *
 127      *    unpackSnorm4x8
 128      *    --------------
 129      *    The conversion for unpacked fixed-point value f to floating point is
 130      *    done as follows:
 131      *
 132      *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
 133      */
 134    return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
 135 }
 136
 137 /**
 138  * Evaluate one component of unpackSnorm2x16.
 139  */
 140 static float
 141 unpack_snorm_1x16(uint16_t u)
 142 {
 143     /* From section 8.4 of the GLSL ES 3.00 spec:
 144      *
 145      *    unpackSnorm2x16
 146      *    ---------------
 147      *    The conversion for unpacked fixed-point value f to floating point is
 148      *    done as follows:
 149      *
 150      *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
 151      */
 152    return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
 153 }
 154
 155 /**
 156  * Evaluate one component packUnorm4x8.
 157  */
 158 static uint8_t
 159 pack_unorm_1x8(float x)
 160 {
 161     /* From section 8.4 of the GLSL 4.30 spec:
 162      *
 163      *    packUnorm4x8
 164      *    ------------
 165      *    The conversion for component c of v to fixed point is done as
 166      *    follows:
 167      *
 168      *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
 169      */
 170    return (uint8_t) (int)
 171           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
 172 }
 173
 174 /**
 175  * Evaluate one component packUnorm2x16.
 176  */
 177 static uint16_t
 178 pack_unorm_1x16(float x)
 179 {
 180     /* From section 8.4 of the GLSL ES 3.00 spec:
 181      *
 182      *    packUnorm2x16
 183      *    -------------
 184      *    The conversion for component c of v to fixed point is done as
 185      *    follows:
 186      *
 187      *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
 188      */
 189    return (uint16_t) (int)
 190           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
 191 }
 192
 193 /**
 194  * Evaluate one component of unpackUnorm4x8.
 195  */
 196 static float
 197 unpack_unorm_1x8(uint8_t u)
 198 {
 199     /* From section 8.4 of the GLSL 4.30 spec:
 200      *
 201      *    unpackUnorm4x8
 202      *    --------------
 203      *    The conversion for unpacked fixed-point value f to floating point is
 204      *    done as follows:
 205      *
 206      *       unpackUnorm4x8: f / 255.0
 207      */
 208    return (float) u / 255.0f;
 209 }
 210
 211 /**
 212  * Evaluate one component of unpackUnorm2x16.
 213  */
 214 static float
 215 unpack_unorm_1x16(uint16_t u)
 216 {
 217     /* From section 8.4 of the GLSL ES 3.00 spec:
 218      *
 219      *    unpackUnorm2x16
 220      *    ---------------
 221      *    The conversion for unpacked fixed-point value f to floating point is
 222      *    done as follows:
 223      *
 224      *       unpackUnorm2x16: f / 65535.0
 225      */
 226    return (float) u / 65535.0f;
 227 }
 228
 229 /**
 230  * Evaluate one component of packHalf2x16.
 231  */
 232 static uint16_t
 233 pack_half_1x16(float x)
 234 {
 235    return _mesa_float_to_half(x);
 236 }
 237
 238 /**
 239  * Evaluate one component of unpackHalf2x16.
 240  */
 241 static float
 242 unpack_half_1x16(uint16_t u)
 243 {
 244    return _mesa_half_to_float(u);
 245 }
 246
 247 /* Some typed vector structures to make things like src0.y work */
 248 typedef float float32_t;
 249 typedef double float64_t;
 250 typedef bool bool32_t;
 251 % for type in ["float", "int", "uint"]:
 252 % for width in type_sizes(type):
 253 struct ${type}${width}_vec {
 254    ${type}${width}_t x;
 255    ${type}${width}_t y;
 256    ${type}${width}_t z;
 257    ${type}${width}_t w;
 258 };
 259 % endfor
 260 % endfor
 261
 262 struct bool32_vec {
 263     bool x;
 264     bool y;
 265     bool z;
 266     bool w;
 267 };
 268
 269 <%def name="evaluate_op(op, bit_size)">
 270    <%
 271    output_type = type_add_size(op.output_type, bit_size)
 272    input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
 273    %>
 274
 275    ## For each non-per-component input, create a variable srcN that
 276    ## contains x, y, z, and w elements which are filled in with the
 277    ## appropriately-typed values.
 278    % for j in range(op.num_inputs):
 279       % if op.input_sizes[j] == 0:
 280          <% continue %>
 281       % elif "src" + str(j) not in op.const_expr:
 282          ## Avoid unused variable warnings
 283          <% continue %>
 284       %endif
 285
 286       const struct ${input_types[j]}_vec src${j} = {
 287       % for k in range(op.input_sizes[j]):
 288          % if input_types[j] == "bool32":
 289             _src[${j}].u32[${k}] != 0,
 290          % else:
 291             _src[${j}].${get_const_field(input_types[j])}[${k}],
 292          % endif
 293       % endfor
 294       % for k in range(op.input_sizes[j], 4):
 295          0,
 296       % endfor
 297       };
 298    % endfor
 299
 300    % if op.output_size == 0:
 301       ## For per-component instructions, we need to iterate over the
 302       ## components and apply the constant expression one component
 303       ## at a time.
 304       for (unsigned _i = 0; _i < num_components; _i++) {
 305          ## For each per-component input, create a variable srcN that
 306          ## contains the value of the current (_i'th) component.
 307          % for j in range(op.num_inputs):
 308             % if op.input_sizes[j] != 0:
 309                <% continue %>
 310             % elif "src" + str(j) not in op.const_expr:
 311                ## Avoid unused variable warnings
 312                <% continue %>
 313             % elif input_types[j] == "bool32":
 314                const bool src${j} = _src[${j}].u32[_i] != 0;
 315             % else:
 316                const ${input_types[j]}_t src${j} =
 317                   _src[${j}].${get_const_field(input_types[j])}[_i];
 318             % endif
 319          % endfor
 320
 321          ## Create an appropriately-typed variable dst and assign the
 322          ## result of the const_expr to it.  If const_expr already contains
 323          ## writes to dst, just include const_expr directly.
 324          % if "dst" in op.const_expr:
 325             ${output_type}_t dst;
 326
 327             ${op.const_expr}
 328          % else:
 329             ${output_type}_t dst = ${op.const_expr};
 330          % endif
 331
 332          ## Store the current component of the actual destination to the
 333          ## value of dst.
 334          % if output_type == "bool32":
 335             ## Sanitize the C value to a proper NIR bool
 336             _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
 337          % else:
 338             _dst_val.${get_const_field(output_type)}[_i] = dst;
 339          % endif
 340       }
 341    % else:
 342       ## In the non-per-component case, create a struct dst with
 343       ## appropriately-typed elements x, y, z, and w and assign the result
 344       ## of the const_expr to all components of dst, or include the
 345       ## const_expr directly if it writes to dst already.
 346       struct ${output_type}_vec dst;
 347
 348       % if "dst" in op.const_expr:
 349          ${op.const_expr}
 350       % else:
 351          ## Splat the value to all components.  This way expressions which
 352          ## write the same value to all components don't need to explicitly
 353          ## write to dest.  One such example is fnoise which has a
 354          ## const_expr of 0.0f.
 355          dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
 356       % endif
 357
 358       ## For each component in the destination, copy the value of dst to
 359       ## the actual destination.
 360       % for k in range(op.output_size):
 361          % if output_type == "bool32":
 362             ## Sanitize the C value to a proper NIR bool
 363             _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
 364          % else:
 365             _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
 366          % endif
 367       % endfor
 368    % endif
 369 </%def>
 370
 371 % for name, op in sorted(opcodes.iteritems()):
 372 static nir_const_value
 373 evaluate_${name}(MAYBE_UNUSED unsigned num_components, unsigned bit_size,
 374                  MAYBE_UNUSED nir_const_value *_src)
 375 {
 376    nir_const_value _dst_val = { {0, } };
 377
 378    switch (bit_size) {
 379    % for bit_size in op_bit_sizes(op):
 380    case ${bit_size}: {
 381       ${evaluate_op(op, bit_size)}
 382       break;
 383    }
 384    % endfor
 385
 386    default:
 387       unreachable("unknown bit width");
 388    }
 389
 390    return _dst_val;
 391 }
 392 % endfor
 393
 394 nir_const_value
 395 nir_eval_const_opcode(nir_op op, unsigned num_components,
 396                       unsigned bit_width, nir_const_value *src)
 397 {
 398    switch (op) {
 399 % for name in sorted(opcodes.iterkeys()):
 400    case nir_op_${name}:
 401       return evaluate_${name}(num_components, bit_width, src);
 402 % endfor
 403    default:
 404       unreachable("shouldn't get here");
 405    }
 406 }"""
 407
 408 from nir_opcodes import opcodes
 409 from mako.template import Template
 410
 411 print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
 412                                 type_has_size=type_has_size,
 413                                 type_add_size=type_add_size,
 414                                 op_bit_sizes=op_bit_sizes,
 415                                 get_const_field=get_const_field)