src/compiler/nir/nir_constant_expressions.py

   1
   2 import re
   3
   4 type_split_re = re.compile(r'(?P<type>[a-z]+)(?P<bits>\d+)')
   5
   6 def type_has_size(type_):
   7     return type_[-1:].isdigit()
   8
   9 def type_size(type_):
  10     assert type_has_size(type_)
  11     return int(type_split_re.match(type_).group('bits'))
  12
  13 def type_sizes(type_):
  14     if type_has_size(type_):
  15         return [type_size(type_)]
  16     elif type_ == 'float':
  17         return [16, 32, 64]
  18     else:
  19         return [8, 16, 32, 64]
  20
  21 def type_add_size(type_, size):
  22     if type_has_size(type_):
  23         return type_
  24     return type_ + str(size)
  25
  26 def op_bit_sizes(op):
  27     sizes = None
  28     if not type_has_size(op.output_type):
  29         sizes = set(type_sizes(op.output_type))
  30
  31     for input_type in op.input_types:
  32         if not type_has_size(input_type):
  33             if sizes is None:
  34                 sizes = set(type_sizes(input_type))
  35             else:
  36                 sizes = sizes.intersection(set(type_sizes(input_type)))
  37
  38     return sorted(list(sizes)) if sizes is not None else None
  39
  40 def get_const_field(type_):
  41     if type_ == "bool32":
  42         return "u32"
  43     elif type_ == "float16":
  44         return "u16"
  45     else:
  46         m = type_split_re.match(type_)
  47         if not m:
  48             raise Exception(str(type_))
  49         return m.group('type')[0] + m.group('bits')
  50
  51 template = """\
  52 /*
  53  * Copyright (C) 2014 Intel Corporation
  54  *
  55  * Permission is hereby granted, free of charge, to any person obtaining a
  56  * copy of this software and associated documentation files (the "Software"),
  57  * to deal in the Software without restriction, including without limitation
  58  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  59  * and/or sell copies of the Software, and to permit persons to whom the
  60  * Software is furnished to do so, subject to the following conditions:
  61  *
  62  * The above copyright notice and this permission notice (including the next
  63  * paragraph) shall be included in all copies or substantial portions of the
  64  * Software.
  65  *
  66  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  67  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  68  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  69  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  70  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  71  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  72  * IN THE SOFTWARE.
  73  *
  74  * Authors:
  75  *    Jason Ekstrand (jason@jlekstrand.net)
  76  */
  77
  78 #include <math.h>
  79 #include "util/rounding.h" /* for _mesa_roundeven */
  80 #include "util/half_float.h"
  81 #include "nir_constant_expressions.h"
  82
  83 /**
  84  * Evaluate one component of packSnorm4x8.
  85  */
  86 static uint8_t
  87 pack_snorm_1x8(float x)
  88 {
  89     /* From section 8.4 of the GLSL 4.30 spec:
  90      *
  91      *    packSnorm4x8
  92      *    ------------
  93      *    The conversion for component c of v to fixed point is done as
  94      *    follows:
  95      *
  96      *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
  97      *
  98      * We must first cast the float to an int, because casting a negative
  99      * float to a uint is undefined.
 100      */
 101    return (uint8_t) (int)
 102           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
 103 }
 104
 105 /**
 106  * Evaluate one component of packSnorm2x16.
 107  */
 108 static uint16_t
 109 pack_snorm_1x16(float x)
 110 {
 111     /* From section 8.4 of the GLSL ES 3.00 spec:
 112      *
 113      *    packSnorm2x16
 114      *    -------------
 115      *    The conversion for component c of v to fixed point is done as
 116      *    follows:
 117      *
 118      *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
 119      *
 120      * We must first cast the float to an int, because casting a negative
 121      * float to a uint is undefined.
 122      */
 123    return (uint16_t) (int)
 124           _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
 125 }
 126
 127 /**
 128  * Evaluate one component of unpackSnorm4x8.
 129  */
 130 static float
 131 unpack_snorm_1x8(uint8_t u)
 132 {
 133     /* From section 8.4 of the GLSL 4.30 spec:
 134      *
 135      *    unpackSnorm4x8
 136      *    --------------
 137      *    The conversion for unpacked fixed-point value f to floating point is
 138      *    done as follows:
 139      *
 140      *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
 141      */
 142    return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
 143 }
 144
 145 /**
 146  * Evaluate one component of unpackSnorm2x16.
 147  */
 148 static float
 149 unpack_snorm_1x16(uint16_t u)
 150 {
 151     /* From section 8.4 of the GLSL ES 3.00 spec:
 152      *
 153      *    unpackSnorm2x16
 154      *    ---------------
 155      *    The conversion for unpacked fixed-point value f to floating point is
 156      *    done as follows:
 157      *
 158      *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
 159      */
 160    return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
 161 }
 162
 163 /**
 164  * Evaluate one component packUnorm4x8.
 165  */
 166 static uint8_t
 167 pack_unorm_1x8(float x)
 168 {
 169     /* From section 8.4 of the GLSL 4.30 spec:
 170      *
 171      *    packUnorm4x8
 172      *    ------------
 173      *    The conversion for component c of v to fixed point is done as
 174      *    follows:
 175      *
 176      *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
 177      */
 178    return (uint8_t) (int)
 179           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
 180 }
 181
 182 /**
 183  * Evaluate one component packUnorm2x16.
 184  */
 185 static uint16_t
 186 pack_unorm_1x16(float x)
 187 {
 188     /* From section 8.4 of the GLSL ES 3.00 spec:
 189      *
 190      *    packUnorm2x16
 191      *    -------------
 192      *    The conversion for component c of v to fixed point is done as
 193      *    follows:
 194      *
 195      *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
 196      */
 197    return (uint16_t) (int)
 198           _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
 199 }
 200
 201 /**
 202  * Evaluate one component of unpackUnorm4x8.
 203  */
 204 static float
 205 unpack_unorm_1x8(uint8_t u)
 206 {
 207     /* From section 8.4 of the GLSL 4.30 spec:
 208      *
 209      *    unpackUnorm4x8
 210      *    --------------
 211      *    The conversion for unpacked fixed-point value f to floating point is
 212      *    done as follows:
 213      *
 214      *       unpackUnorm4x8: f / 255.0
 215      */
 216    return (float) u / 255.0f;
 217 }
 218
 219 /**
 220  * Evaluate one component of unpackUnorm2x16.
 221  */
 222 static float
 223 unpack_unorm_1x16(uint16_t u)
 224 {
 225     /* From section 8.4 of the GLSL ES 3.00 spec:
 226      *
 227      *    unpackUnorm2x16
 228      *    ---------------
 229      *    The conversion for unpacked fixed-point value f to floating point is
 230      *    done as follows:
 231      *
 232      *       unpackUnorm2x16: f / 65535.0
 233      */
 234    return (float) u / 65535.0f;
 235 }
 236
 237 /**
 238  * Evaluate one component of packHalf2x16.
 239  */
 240 static uint16_t
 241 pack_half_1x16(float x)
 242 {
 243    return _mesa_float_to_half(x);
 244 }
 245
 246 /**
 247  * Evaluate one component of unpackHalf2x16.
 248  */
 249 static float
 250 unpack_half_1x16(uint16_t u)
 251 {
 252    return _mesa_half_to_float(u);
 253 }
 254
 255 /* Some typed vector structures to make things like src0.y work */
 256 typedef float float16_t;
 257 typedef float float32_t;
 258 typedef double float64_t;
 259 typedef bool bool32_t;
 260 % for type in ["float", "int", "uint"]:
 261 % for width in type_sizes(type):
 262 struct ${type}${width}_vec {
 263    ${type}${width}_t x;
 264    ${type}${width}_t y;
 265    ${type}${width}_t z;
 266    ${type}${width}_t w;
 267 };
 268 % endfor
 269 % endfor
 270
 271 struct bool32_vec {
 272     bool x;
 273     bool y;
 274     bool z;
 275     bool w;
 276 };
 277
 278 <%def name="evaluate_op(op, bit_size)">
 279    <%
 280    output_type = type_add_size(op.output_type, bit_size)
 281    input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
 282    %>
 283
 284    ## For each non-per-component input, create a variable srcN that
 285    ## contains x, y, z, and w elements which are filled in with the
 286    ## appropriately-typed values.
 287    % for j in range(op.num_inputs):
 288       % if op.input_sizes[j] == 0:
 289          <% continue %>
 290       % elif "src" + str(j) not in op.const_expr:
 291          ## Avoid unused variable warnings
 292          <% continue %>
 293       %endif
 294
 295       const struct ${input_types[j]}_vec src${j} = {
 296       % for k in range(op.input_sizes[j]):
 297          % if input_types[j] == "bool32":
 298             _src[${j}].u32[${k}] != 0,
 299          % elif input_types[j] == "float16":
 300             _mesa_half_to_float(_src[${j}].u16[${k}]),
 301          % else:
 302             _src[${j}].${get_const_field(input_types[j])}[${k}],
 303          % endif
 304       % endfor
 305       % for k in range(op.input_sizes[j], 4):
 306          0,
 307       % endfor
 308       };
 309    % endfor
 310
 311    % if op.output_size == 0:
 312       ## For per-component instructions, we need to iterate over the
 313       ## components and apply the constant expression one component
 314       ## at a time.
 315       for (unsigned _i = 0; _i < num_components; _i++) {
 316          ## For each per-component input, create a variable srcN that
 317          ## contains the value of the current (_i'th) component.
 318          % for j in range(op.num_inputs):
 319             % if op.input_sizes[j] != 0:
 320                <% continue %>
 321             % elif "src" + str(j) not in op.const_expr:
 322                ## Avoid unused variable warnings
 323                <% continue %>
 324             % elif input_types[j] == "bool32":
 325                const bool src${j} = _src[${j}].u32[_i] != 0;
 326             % elif input_types[j] == "float16":
 327                const float src${j} =
 328                   _mesa_half_to_float(_src[${j}].u16[_i]);
 329             % else:
 330                const ${input_types[j]}_t src${j} =
 331                   _src[${j}].${get_const_field(input_types[j])}[_i];
 332             % endif
 333          % endfor
 334
 335          ## Create an appropriately-typed variable dst and assign the
 336          ## result of the const_expr to it.  If const_expr already contains
 337          ## writes to dst, just include const_expr directly.
 338          % if "dst" in op.const_expr:
 339             ${output_type}_t dst;
 340
 341             ${op.const_expr}
 342          % else:
 343             ${output_type}_t dst = ${op.const_expr};
 344          % endif
 345
 346          ## Store the current component of the actual destination to the
 347          ## value of dst.
 348          % if output_type == "bool32":
 349             ## Sanitize the C value to a proper NIR bool
 350             _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
 351          % elif output_type == "float16":
 352             _dst_val.u16[_i] = _mesa_float_to_half(dst);
 353          % else:
 354             _dst_val.${get_const_field(output_type)}[_i] = dst;
 355          % endif
 356       }
 357    % else:
 358       ## In the non-per-component case, create a struct dst with
 359       ## appropriately-typed elements x, y, z, and w and assign the result
 360       ## of the const_expr to all components of dst, or include the
 361       ## const_expr directly if it writes to dst already.
 362       struct ${output_type}_vec dst;
 363
 364       % if "dst" in op.const_expr:
 365          ${op.const_expr}
 366       % else:
 367          ## Splat the value to all components.  This way expressions which
 368          ## write the same value to all components don't need to explicitly
 369          ## write to dest.  One such example is fnoise which has a
 370          ## const_expr of 0.0f.
 371          dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
 372       % endif
 373
 374       ## For each component in the destination, copy the value of dst to
 375       ## the actual destination.
 376       % for k in range(op.output_size):
 377          % if output_type == "bool32":
 378             ## Sanitize the C value to a proper NIR bool
 379             _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
 380          % elif output_type == "float16":
 381             _dst_val.u16[${k}] = _mesa_float_to_half(dst.${"xyzw"[k]});
 382          % else:
 383             _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
 384          % endif
 385       % endfor
 386    % endif
 387 </%def>
 388
 389 % for name, op in sorted(opcodes.iteritems()):
 390 static nir_const_value
 391 evaluate_${name}(MAYBE_UNUSED unsigned num_components,
 392                  ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
 393                  MAYBE_UNUSED nir_const_value *_src)
 394 {
 395    nir_const_value _dst_val = { {0, } };
 396
 397    % if op_bit_sizes(op) is not None:
 398       switch (bit_size) {
 399       % for bit_size in op_bit_sizes(op):
 400       case ${bit_size}: {
 401          ${evaluate_op(op, bit_size)}
 402          break;
 403       }
 404       % endfor
 405
 406       default:
 407          unreachable("unknown bit width");
 408       }
 409    % else:
 410       ${evaluate_op(op, 0)}
 411    % endif
 412
 413    return _dst_val;
 414 }
 415 % endfor
 416
 417 nir_const_value
 418 nir_eval_const_opcode(nir_op op, unsigned num_components,
 419                       unsigned bit_width, nir_const_value *src)
 420 {
 421    switch (op) {
 422 % for name in sorted(opcodes.iterkeys()):
 423    case nir_op_${name}:
 424       return evaluate_${name}(num_components, bit_width, src);
 425 % endfor
 426    default:
 427       unreachable("shouldn't get here");
 428    }
 429 }"""
 430
 431 from nir_opcodes import opcodes
 432 from mako.template import Template
 433
 434 print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
 435                                 type_has_size=type_has_size,
 436                                 type_add_size=type_add_size,
 437                                 op_bit_sizes=op_bit_sizes,
 438                                 get_const_field=get_const_field)