src/compiler/nir/nir_constant_expressions.py

   1
   2 import re
   3
   4 type_split_re = re.compile(r'(?P<type>[a-z]+)(?P<bits>\d+)')
   5
   6 def type_has_size(type_):
   7     return type_[-1:].isdigit()
   8
   9 def type_size(type_):
  10     assert type_has_size(type_)
  11     return int(type_split_re.match(type_).group('bits'))
  12
  13 def type_sizes(type_):
  14     if type_has_size(type_):
  15         return [type_size(type_)]
  16     elif type_ == 'float':
  17         return [16, 32, 64]
  18     else:
  19         return [8, 16, 32, 64]
  20
  21 def type_add_size(type_, size):
  22     if type_has_size(type_):
  23         return type_
  24     return type_ + str(size)
  25
  26 def op_bit_sizes(op):
  27     sizes = None
  28     if not type_has_size(op.output_type):
  29         sizes = set(type_sizes(op.output_type))
  30
  31     for input_type in op.input_types:
  32         if not type_has_size(input_type):
  33             if sizes is None:
  34                 sizes = set(type_sizes(input_type))
  35             else:
  36                 sizes = sizes.intersection(set(type_sizes(input_type)))
  37
  38     return sorted(list(sizes)) if sizes is not None else None
  39
  40 def get_const_field(type_):
  41     if type_ == "bool32":
  42         return "u32"
  43     elif type_ == "float16":
  44         return "u16"
  45     else:
  46         m = type_split_re.match(type_)
  47         if not m:
  48             raise Exception(str(type_))
  49         return m.group('type')[0] + m.group('bits')
  50
  51 template = """\
  52 /*
  53  * Copyright (C) 2014 Intel Corporation
  54  *
  55  * Permission is hereby granted, free of charge, to any person obtaining a
  56  * copy of this software and associated documentation files (the "Software"),
  57  * to deal in the Software without restriction, including without limitation
  58  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  59  * and/or sell copies of the Software, and to permit persons to whom the
  60  * Software is furnished to do so, subject to the following conditions:
  61  *
  62  * The above copyright notice and this permission notice (including the next
  63  * paragraph) shall be included in all copies or substantial portions of the
  64  * Software.
  65  *
  66  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  67  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  68  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  69  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  70  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  71  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  72  * IN THE SOFTWARE.
  73  *
  74  * Authors:
  75  *    Jason Ekstrand (jason@jlekstrand.net)
  76  */
  77
  78 #include <math.h>
  79 #include "main/core.h"
  80 #include "util/rounding.h" /* for _mesa_roundeven */
  81 #include "util/half_float.h"
  82 #include "nir_constant_expressions.h"
  83
  84 /**
  85  * Evaluate one component of packSnorm4x8.
  86  */
  87 static uint8_t
  88 pack_snorm_1x8(float x)
  89 {
  90     /* From section 8.4 of the GLSL 4.30 spec:
  91      *
  92      *    packSnorm4x8
  93      *    ------------
  94      *    The conversion for component c of v to fixed point is done as
  95      *    follows:
  96      *
  97      *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
  98      *
  99      * We must first cast the float to an int, because casting a negative
 100      * float to a uint is undefined.
 101      */
 102    return (uint8_t) (int)
 103           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
 104 }
 105
 106 /**
 107  * Evaluate one component of packSnorm2x16.
 108  */
 109 static uint16_t
 110 pack_snorm_1x16(float x)
 111 {
 112     /* From section 8.4 of the GLSL ES 3.00 spec:
 113      *
 114      *    packSnorm2x16
 115      *    -------------
 116      *    The conversion for component c of v to fixed point is done as
 117      *    follows:
 118      *
 119      *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
 120      *
 121      * We must first cast the float to an int, because casting a negative
 122      * float to a uint is undefined.
 123      */
 124    return (uint16_t) (int)
 125           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
 126 }
 127
 128 /**
 129  * Evaluate one component of unpackSnorm4x8.
 130  */
 131 static float
 132 unpack_snorm_1x8(uint8_t u)
 133 {
 134     /* From section 8.4 of the GLSL 4.30 spec:
 135      *
 136      *    unpackSnorm4x8
 137      *    --------------
 138      *    The conversion for unpacked fixed-point value f to floating point is
 139      *    done as follows:
 140      *
 141      *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
 142      */
 143    return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
 144 }
 145
 146 /**
 147  * Evaluate one component of unpackSnorm2x16.
 148  */
 149 static float
 150 unpack_snorm_1x16(uint16_t u)
 151 {
 152     /* From section 8.4 of the GLSL ES 3.00 spec:
 153      *
 154      *    unpackSnorm2x16
 155      *    ---------------
 156      *    The conversion for unpacked fixed-point value f to floating point is
 157      *    done as follows:
 158      *
 159      *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
 160      */
 161    return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
 162 }
 163
 164 /**
 165  * Evaluate one component packUnorm4x8.
 166  */
 167 static uint8_t
 168 pack_unorm_1x8(float x)
 169 {
 170     /* From section 8.4 of the GLSL 4.30 spec:
 171      *
 172      *    packUnorm4x8
 173      *    ------------
 174      *    The conversion for component c of v to fixed point is done as
 175      *    follows:
 176      *
 177      *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
 178      */
 179    return (uint8_t) (int)
 180           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
 181 }
 182
 183 /**
 184  * Evaluate one component packUnorm2x16.
 185  */
 186 static uint16_t
 187 pack_unorm_1x16(float x)
 188 {
 189     /* From section 8.4 of the GLSL ES 3.00 spec:
 190      *
 191      *    packUnorm2x16
 192      *    -------------
 193      *    The conversion for component c of v to fixed point is done as
 194      *    follows:
 195      *
 196      *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
 197      */
 198    return (uint16_t) (int)
 199           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
 200 }
 201
 202 /**
 203  * Evaluate one component of unpackUnorm4x8.
 204  */
 205 static float
 206 unpack_unorm_1x8(uint8_t u)
 207 {
 208     /* From section 8.4 of the GLSL 4.30 spec:
 209      *
 210      *    unpackUnorm4x8
 211      *    --------------
 212      *    The conversion for unpacked fixed-point value f to floating point is
 213      *    done as follows:
 214      *
 215      *       unpackUnorm4x8: f / 255.0
 216      */
 217    return (float) u / 255.0f;
 218 }
 219
 220 /**
 221  * Evaluate one component of unpackUnorm2x16.
 222  */
 223 static float
 224 unpack_unorm_1x16(uint16_t u)
 225 {
 226     /* From section 8.4 of the GLSL ES 3.00 spec:
 227      *
 228      *    unpackUnorm2x16
 229      *    ---------------
 230      *    The conversion for unpacked fixed-point value f to floating point is
 231      *    done as follows:
 232      *
 233      *       unpackUnorm2x16: f / 65535.0
 234      */
 235    return (float) u / 65535.0f;
 236 }
 237
 238 /**
 239  * Evaluate one component of packHalf2x16.
 240  */
 241 static uint16_t
 242 pack_half_1x16(float x)
 243 {
 244    return _mesa_float_to_half(x);
 245 }
 246
 247 /**
 248  * Evaluate one component of unpackHalf2x16.
 249  */
 250 static float
 251 unpack_half_1x16(uint16_t u)
 252 {
 253    return _mesa_half_to_float(u);
 254 }
 255
 256 /* Some typed vector structures to make things like src0.y work */
 257 typedef float float16_t;
 258 typedef float float32_t;
 259 typedef double float64_t;
 260 typedef bool bool32_t;
 261 % for type in ["float", "int", "uint"]:
 262 % for width in type_sizes(type):
 263 struct ${type}${width}_vec {
 264    ${type}${width}_t x;
 265    ${type}${width}_t y;
 266    ${type}${width}_t z;
 267    ${type}${width}_t w;
 268 };
 269 % endfor
 270 % endfor
 271
 272 struct bool32_vec {
 273     bool x;
 274     bool y;
 275     bool z;
 276     bool w;
 277 };
 278
 279 <%def name="evaluate_op(op, bit_size)">
 280    <%
 281    output_type = type_add_size(op.output_type, bit_size)
 282    input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
 283    %>
 284
 285    ## For each non-per-component input, create a variable srcN that
 286    ## contains x, y, z, and w elements which are filled in with the
 287    ## appropriately-typed values.
 288    % for j in range(op.num_inputs):
 289       % if op.input_sizes[j] == 0:
 290          <% continue %>
 291       % elif "src" + str(j) not in op.const_expr:
 292          ## Avoid unused variable warnings
 293          <% continue %>
 294       %endif
 295
 296       const struct ${input_types[j]}_vec src${j} = {
 297       % for k in range(op.input_sizes[j]):
 298          % if input_types[j] == "bool32":
 299             _src[${j}].u32[${k}] != 0,
 300          % elif input_types[j] == "float16":
 301             _mesa_half_to_float(_src[${j}].u16[${k}]),
 302          % else:
 303             _src[${j}].${get_const_field(input_types[j])}[${k}],
 304          % endif
 305       % endfor
 306       % for k in range(op.input_sizes[j], 4):
 307          0,
 308       % endfor
 309       };
 310    % endfor
 311
 312    % if op.output_size == 0:
 313       ## For per-component instructions, we need to iterate over the
 314       ## components and apply the constant expression one component
 315       ## at a time.
 316       for (unsigned _i = 0; _i < num_components; _i++) {
 317          ## For each per-component input, create a variable srcN that
 318          ## contains the value of the current (_i'th) component.
 319          % for j in range(op.num_inputs):
 320             % if op.input_sizes[j] != 0:
 321                <% continue %>
 322             % elif "src" + str(j) not in op.const_expr:
 323                ## Avoid unused variable warnings
 324                <% continue %>
 325             % elif input_types[j] == "bool32":
 326                const bool src${j} = _src[${j}].u32[_i] != 0;
 327             % elif input_types[j] == "float16":
 328                const float src${j} =
 329                   _mesa_half_to_float(_src[${j}].u16[_i]);
 330             % else:
 331                const ${input_types[j]}_t src${j} =
 332                   _src[${j}].${get_const_field(input_types[j])}[_i];
 333             % endif
 334          % endfor
 335
 336          ## Create an appropriately-typed variable dst and assign the
 337          ## result of the const_expr to it.  If const_expr already contains
 338          ## writes to dst, just include const_expr directly.
 339          % if "dst" in op.const_expr:
 340             ${output_type}_t dst;
 341
 342             ${op.const_expr}
 343          % else:
 344             ${output_type}_t dst = ${op.const_expr};
 345          % endif
 346
 347          ## Store the current component of the actual destination to the
 348          ## value of dst.
 349          % if output_type == "bool32":
 350             ## Sanitize the C value to a proper NIR bool
 351             _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
 352          % elif output_type == "float16":
 353             _dst_val.u16[_i] = _mesa_float_to_half(dst);
 354          % else:
 355             _dst_val.${get_const_field(output_type)}[_i] = dst;
 356          % endif
 357       }
 358    % else:
 359       ## In the non-per-component case, create a struct dst with
 360       ## appropriately-typed elements x, y, z, and w and assign the result
 361       ## of the const_expr to all components of dst, or include the
 362       ## const_expr directly if it writes to dst already.
 363       struct ${output_type}_vec dst;
 364
 365       % if "dst" in op.const_expr:
 366          ${op.const_expr}
 367       % else:
 368          ## Splat the value to all components.  This way expressions which
 369          ## write the same value to all components don't need to explicitly
 370          ## write to dest.  One such example is fnoise which has a
 371          ## const_expr of 0.0f.
 372          dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
 373       % endif
 374
 375       ## For each component in the destination, copy the value of dst to
 376       ## the actual destination.
 377       % for k in range(op.output_size):
 378          % if output_type == "bool32":
 379             ## Sanitize the C value to a proper NIR bool
 380             _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
 381          % elif output_type == "float16":
 382             _dst_val.u16[${k}] = _mesa_float_to_half(dst.${"xyzw"[k]});
 383          % else:
 384             _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
 385          % endif
 386       % endfor
 387    % endif
 388 </%def>
 389
 390 % for name, op in sorted(opcodes.iteritems()):
 391 static nir_const_value
 392 evaluate_${name}(MAYBE_UNUSED unsigned num_components,
 393                  ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
 394                  MAYBE_UNUSED nir_const_value *_src)
 395 {
 396    nir_const_value _dst_val = { {0, } };
 397
 398    % if op_bit_sizes(op) is not None:
 399       switch (bit_size) {
 400       % for bit_size in op_bit_sizes(op):
 401       case ${bit_size}: {
 402          ${evaluate_op(op, bit_size)}
 403          break;
 404       }
 405       % endfor
 406
 407       default:
 408          unreachable("unknown bit width");
 409       }
 410    % else:
 411       ${evaluate_op(op, 0)}
 412    % endif
 413
 414    return _dst_val;
 415 }
 416 % endfor
 417
 418 nir_const_value
 419 nir_eval_const_opcode(nir_op op, unsigned num_components,
 420                       unsigned bit_width, nir_const_value *src)
 421 {
 422    switch (op) {
 423 % for name in sorted(opcodes.iterkeys()):
 424    case nir_op_${name}:
 425       return evaluate_${name}(num_components, bit_width, src);
 426 % endfor
 427    default:
 428       unreachable("shouldn't get here");
 429    }
 430 }"""
 431
 432 from nir_opcodes import opcodes
 433 from mako.template import Template
 434
 435 print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
 436                                 type_has_size=type_has_size,
 437                                 type_add_size=type_add_size,
 438                                 op_bit_sizes=op_bit_sizes,
 439                                 get_const_field=get_const_field)