src/compiler/nir/nir_algebraic.py

   1 #
   2 # Copyright (C) 2014 Intel Corporation
   3 #
   4 # Permission is hereby granted, free of charge, to any person obtaining a
   5 # copy of this software and associated documentation files (the "Software"),
   6 # to deal in the Software without restriction, including without limitation
   7 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 # and/or sell copies of the Software, and to permit persons to whom the
   9 # Software is furnished to do so, subject to the following conditions:
  10 #
  11 # The above copyright notice and this permission notice (including the next
  12 # paragraph) shall be included in all copies or substantial portions of the
  13 # Software.
  14 #
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 # IN THE SOFTWARE.
  22 #
  23 # Authors:
  24 #    Jason Ekstrand (jason@jlekstrand.net)
  25
  26 from __future__ import print_function
  27 import ast
  28 from collections import OrderedDict
  29 import itertools
  30 import struct
  31 import sys
  32 import mako.template
  33 import re
  34 import traceback
  35
  36 from nir_opcodes import opcodes
  37
  38 if sys.version_info < (3, 0):
  39     string_type = unicode
  40
  41 else:
  42     string_type = str
  43
  44 _type_re = re.compile(r"(?P<type>int|uint|bool|float)?(?P<bits>\d+)?")
  45
  46 def type_bits(type_str):
  47    m = _type_re.match(type_str)
  48    assert m.group('type')
  49
  50    if m.group('bits') is None:
  51       return 0
  52    else:
  53       return int(m.group('bits'))
  54
  55 # Represents a set of variables, each with a unique id
  56 class VarSet(object):
  57    def __init__(self):
  58       self.names = {}
  59       self.ids = itertools.count()
  60       self.immutable = False;
  61
  62    def __getitem__(self, name):
  63       if name not in self.names:
  64          assert not self.immutable, "Unknown replacement variable: " + name
  65          self.names[name] = next(self.ids)
  66
  67       return self.names[name]
  68
  69    def lock(self):
  70       self.immutable = True
  71
  72 class Value(object):
  73    @staticmethod
  74    def create(val, name_base, varset):
  75       if isinstance(val, bytes):
  76          val = val.decode('utf-8')
  77
  78       if isinstance(val, tuple):
  79          return Expression(val, name_base, varset)
  80       elif isinstance(val, Expression):
  81          return val
  82       elif isinstance(val, string_type):
  83          return Variable(val, name_base, varset)
  84       elif isinstance(val, (bool, int, long, float)):
  85          return Constant(val, name_base)
  86
  87    __template = mako.template.Template("""
  88 static const ${val.c_type} ${val.name} = {
  89    { ${val.type_enum}, ${val.bit_size} },
  90 % if isinstance(val, Constant):
  91    ${val.type()}, { ${val.hex()} /* ${val.value} */ },
  92 % elif isinstance(val, Variable):
  93    ${val.index}, /* ${val.var_name} */
  94    ${'true' if val.is_constant else 'false'},
  95    ${val.type() or 'nir_type_invalid' },
  96    ${val.cond if val.cond else 'NULL'},
  97 % elif isinstance(val, Expression):
  98    ${'true' if val.inexact else 'false'},
  99    nir_op_${val.opcode},
 100    { ${', '.join(src.c_ptr for src in val.sources)} },
 101    ${val.cond if val.cond else 'NULL'},
 102 % endif
 103 };""")
 104
 105    def __init__(self, name, type_str):
 106       self.name = name
 107       self.type_str = type_str
 108
 109    @property
 110    def type_enum(self):
 111       return "nir_search_value_" + self.type_str
 112
 113    @property
 114    def c_type(self):
 115       return "nir_search_" + self.type_str
 116
 117    @property
 118    def c_ptr(self):
 119       return "&{0}.value".format(self.name)
 120
 121    def render(self):
 122       return self.__template.render(val=self,
 123                                     Constant=Constant,
 124                                     Variable=Variable,
 125                                     Expression=Expression)
 126
 127 _constant_re = re.compile(r"(?P<value>[^@\(]+)(?:@(?P<bits>\d+))?")
 128
 129 class Constant(Value):
 130    def __init__(self, val, name):
 131       Value.__init__(self, name, "constant")
 132
 133       if isinstance(val, (str)):
 134          m = _constant_re.match(val)
 135          self.value = ast.literal_eval(m.group('value'))
 136          self.bit_size = int(m.group('bits')) if m.group('bits') else 0
 137       else:
 138          self.value = val
 139          self.bit_size = 0
 140
 141       if isinstance(self.value, bool):
 142          assert self.bit_size == 0 or self.bit_size == 32
 143          self.bit_size = 32
 144
 145    def hex(self):
 146       if isinstance(self.value, (bool)):
 147          return 'NIR_TRUE' if self.value else 'NIR_FALSE'
 148       if isinstance(self.value, (int, long)):
 149          return hex(self.value)
 150       elif isinstance(self.value, float):
 151          i = struct.unpack('Q', struct.pack('d', self.value))[0]
 152          h = hex(i)
 153
 154          # On Python 2 this 'L' suffix is automatically added, but not on Python 3
 155          # Adding it explicitly makes the generated file identical, regardless
 156          # of the Python version running this script.
 157          if h[-1] != 'L' and i > sys.maxsize:
 158             h += 'L'
 159
 160          return h
 161       else:
 162          assert False
 163
 164    def type(self):
 165       if isinstance(self.value, (bool)):
 166          return "nir_type_bool32"
 167       elif isinstance(self.value, (int, long)):
 168          return "nir_type_int"
 169       elif isinstance(self.value, float):
 170          return "nir_type_float"
 171
 172 _var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)"
 173                           r"(?:@(?P<type>int|uint|bool|float)?(?P<bits>\d+)?)?"
 174                           r"(?P<cond>\([^\)]+\))?")
 175
 176 class Variable(Value):
 177    def __init__(self, val, name, varset):
 178       Value.__init__(self, name, "variable")
 179
 180       m = _var_name_re.match(val)
 181       assert m and m.group('name') is not None
 182
 183       self.var_name = m.group('name')
 184       self.is_constant = m.group('const') is not None
 185       self.cond = m.group('cond')
 186       self.required_type = m.group('type')
 187       self.bit_size = int(m.group('bits')) if m.group('bits') else 0
 188
 189       if self.required_type == 'bool':
 190          assert self.bit_size == 0 or self.bit_size == 32
 191          self.bit_size = 32
 192
 193       if self.required_type is not None:
 194          assert self.required_type in ('float', 'bool', 'int', 'uint')
 195
 196       self.index = varset[self.var_name]
 197
 198    def type(self):
 199       if self.required_type == 'bool':
 200          return "nir_type_bool32"
 201       elif self.required_type in ('int', 'uint'):
 202          return "nir_type_int"
 203       elif self.required_type == 'float':
 204          return "nir_type_float"
 205
 206 _opcode_re = re.compile(r"(?P<inexact>~)?(?P<opcode>\w+)(?:@(?P<bits>\d+))?"
 207                         r"(?P<cond>\([^\)]+\))?")
 208
 209 class Expression(Value):
 210    def __init__(self, expr, name_base, varset):
 211       Value.__init__(self, name_base, "expression")
 212       assert isinstance(expr, tuple)
 213
 214       m = _opcode_re.match(expr[0])
 215       assert m and m.group('opcode') is not None
 216
 217       self.opcode = m.group('opcode')
 218       self.bit_size = int(m.group('bits')) if m.group('bits') else 0
 219       self.inexact = m.group('inexact') is not None
 220       self.cond = m.group('cond')
 221       self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
 222                        for (i, src) in enumerate(expr[1:]) ]
 223
 224    def render(self):
 225       srcs = "\n".join(src.render() for src in self.sources)
 226       return srcs + super(Expression, self).render()
 227
 228 class IntEquivalenceRelation(object):
 229    """A class representing an equivalence relation on integers.
 230
 231    Each integer has a canonical form which is the maximum integer to which it
 232    is equivalent.  Two integers are equivalent precisely when they have the
 233    same canonical form.
 234
 235    The convention of maximum is explicitly chosen to make using it in
 236    BitSizeValidator easier because it means that an actual bit_size (if any)
 237    will always be the canonical form.
 238    """
 239    def __init__(self):
 240       self._remap = {}
 241
 242    def get_canonical(self, x):
 243       """Get the canonical integer corresponding to x."""
 244       if x in self._remap:
 245          return self.get_canonical(self._remap[x])
 246       else:
 247          return x
 248
 249    def add_equiv(self, a, b):
 250       """Add an equivalence and return the canonical form."""
 251       c = max(self.get_canonical(a), self.get_canonical(b))
 252       if a != c:
 253          assert a < c
 254          self._remap[a] = c
 255
 256       if b != c:
 257          assert b < c
 258          self._remap[b] = c
 259
 260       return c
 261
 262 class BitSizeValidator(object):
 263    """A class for validating bit sizes of expressions.
 264
 265    NIR supports multiple bit-sizes on expressions in order to handle things
 266    such as fp64.  The source and destination of every ALU operation is
 267    assigned a type and that type may or may not specify a bit size.  Sources
 268    and destinations whose type does not specify a bit size are considered
 269    "unsized" and automatically take on the bit size of the corresponding
 270    register or SSA value.  NIR has two simple rules for bit sizes that are
 271    validated by nir_validator:
 272
 273     1) A given SSA def or register has a single bit size that is respected by
 274        everything that reads from it or writes to it.
 275
 276     2) The bit sizes of all unsized inputs/outputs on any given ALU
 277        instruction must match.  They need not match the sized inputs or
 278        outputs but they must match each other.
 279
 280    In order to keep nir_algebraic relatively simple and easy-to-use,
 281    nir_search supports a type of bit-size inference based on the two rules
 282    above.  This is similar to type inference in many common programming
 283    languages.  If, for instance, you are constructing an add operation and you
 284    know the second source is 16-bit, then you know that the other source and
 285    the destination must also be 16-bit.  There are, however, cases where this
 286    inference can be ambiguous or contradictory.  Consider, for instance, the
 287    following transformation:
 288
 289    (('usub_borrow', a, b), ('b2i', ('ult', a, b)))
 290
 291    This transformation can potentially cause a problem because usub_borrow is
 292    well-defined for any bit-size of integer.  However, b2i always generates a
 293    32-bit result so it could end up replacing a 64-bit expression with one
 294    that takes two 64-bit values and produces a 32-bit value.  As another
 295    example, consider this expression:
 296
 297    (('bcsel', a, b, 0), ('iand', a, b))
 298
 299    In this case, in the search expression a must be 32-bit but b can
 300    potentially have any bit size.  If we had a 64-bit b value, we would end up
 301    trying to and a 32-bit value with a 64-bit value which would be invalid
 302
 303    This class solves that problem by providing a validation layer that proves
 304    that a given search-and-replace operation is 100% well-defined before we
 305    generate any code.  This ensures that bugs are caught at compile time
 306    rather than at run time.
 307
 308    The basic operation of the validator is very similar to the bitsize_tree in
 309    nir_search only a little more subtle.  Instead of simply tracking bit
 310    sizes, it tracks "bit classes" where each class is represented by an
 311    integer.  A value of 0 means we don't know anything yet, positive values
 312    are actual bit-sizes, and negative values are used to track equivalence
 313    classes of sizes that must be the same but have yet to receive an actual
 314    size.  The first stage uses the bitsize_tree algorithm to assign bit
 315    classes to each variable.  If it ever comes across an inconsistency, it
 316    assert-fails.  Then the second stage uses that information to prove that
 317    the resulting expression can always validly be constructed.
 318    """
 319
 320    def __init__(self, varset):
 321       self._num_classes = 0
 322       self._var_classes = [0] * len(varset.names)
 323       self._class_relation = IntEquivalenceRelation()
 324
 325    def validate(self, search, replace):
 326       dst_class = self._propagate_bit_size_up(search)
 327       if dst_class == 0:
 328          dst_class = self._new_class()
 329       self._propagate_bit_class_down(search, dst_class)
 330
 331       validate_dst_class = self._validate_bit_class_up(replace)
 332       assert validate_dst_class == 0 or validate_dst_class == dst_class
 333       self._validate_bit_class_down(replace, dst_class)
 334
 335    def _new_class(self):
 336       self._num_classes += 1
 337       return -self._num_classes
 338
 339    def _set_var_bit_class(self, var_id, bit_class):
 340       assert bit_class != 0
 341       var_class = self._var_classes[var_id]
 342       if var_class == 0:
 343          self._var_classes[var_id] = bit_class
 344       else:
 345          canon_class = self._class_relation.get_canonical(var_class)
 346          assert canon_class < 0 or canon_class == bit_class
 347          var_class = self._class_relation.add_equiv(var_class, bit_class)
 348          self._var_classes[var_id] = var_class
 349
 350    def _get_var_bit_class(self, var_id):
 351       return self._class_relation.get_canonical(self._var_classes[var_id])
 352
 353    def _propagate_bit_size_up(self, val):
 354       if isinstance(val, (Constant, Variable)):
 355          return val.bit_size
 356
 357       elif isinstance(val, Expression):
 358          nir_op = opcodes[val.opcode]
 359          val.common_size = 0
 360          for i in range(nir_op.num_inputs):
 361             src_bits = self._propagate_bit_size_up(val.sources[i])
 362             if src_bits == 0:
 363                continue
 364
 365             src_type_bits = type_bits(nir_op.input_types[i])
 366             if src_type_bits != 0:
 367                assert src_bits == src_type_bits
 368             else:
 369                assert val.common_size == 0 or src_bits == val.common_size
 370                val.common_size = src_bits
 371
 372          dst_type_bits = type_bits(nir_op.output_type)
 373          if dst_type_bits != 0:
 374             assert val.bit_size == 0 or val.bit_size == dst_type_bits
 375             return dst_type_bits
 376          else:
 377             if val.common_size != 0:
 378                assert val.bit_size == 0 or val.bit_size == val.common_size
 379             else:
 380                val.common_size = val.bit_size
 381             return val.common_size
 382
 383    def _propagate_bit_class_down(self, val, bit_class):
 384       if isinstance(val, Constant):
 385          assert val.bit_size == 0 or val.bit_size == bit_class
 386
 387       elif isinstance(val, Variable):
 388          assert val.bit_size == 0 or val.bit_size == bit_class
 389          self._set_var_bit_class(val.index, bit_class)
 390
 391       elif isinstance(val, Expression):
 392          nir_op = opcodes[val.opcode]
 393          dst_type_bits = type_bits(nir_op.output_type)
 394          if dst_type_bits != 0:
 395             assert bit_class == 0 or bit_class == dst_type_bits
 396          else:
 397             assert val.common_size == 0 or val.common_size == bit_class
 398             val.common_size = bit_class
 399
 400          if val.common_size:
 401             common_class = val.common_size
 402          elif nir_op.num_inputs:
 403             # If we got here then we have no idea what the actual size is.
 404             # Instead, we use a generic class
 405             common_class = self._new_class()
 406
 407          for i in range(nir_op.num_inputs):
 408             src_type_bits = type_bits(nir_op.input_types[i])
 409             if src_type_bits != 0:
 410                self._propagate_bit_class_down(val.sources[i], src_type_bits)
 411             else:
 412                self._propagate_bit_class_down(val.sources[i], common_class)
 413
 414    def _validate_bit_class_up(self, val):
 415       if isinstance(val, Constant):
 416          return val.bit_size
 417
 418       elif isinstance(val, Variable):
 419          var_class = self._get_var_bit_class(val.index)
 420          # By the time we get to validation, every variable should have a class
 421          assert var_class != 0
 422
 423          # If we have an explicit size provided by the user, the variable
 424          # *must* exactly match the search.  It cannot be implicitly sized
 425          # because otherwise we could end up with a conflict at runtime.
 426          assert val.bit_size == 0 or val.bit_size == var_class
 427
 428          return var_class
 429
 430       elif isinstance(val, Expression):
 431          nir_op = opcodes[val.opcode]
 432          val.common_class = 0
 433          for i in range(nir_op.num_inputs):
 434             src_class = self._validate_bit_class_up(val.sources[i])
 435             if src_class == 0:
 436                continue
 437
 438             src_type_bits = type_bits(nir_op.input_types[i])
 439             if src_type_bits != 0:
 440                assert src_class == src_type_bits
 441             else:
 442                assert val.common_class == 0 or src_class == val.common_class
 443                val.common_class = src_class
 444
 445          dst_type_bits = type_bits(nir_op.output_type)
 446          if dst_type_bits != 0:
 447             assert val.bit_size == 0 or val.bit_size == dst_type_bits
 448             return dst_type_bits
 449          else:
 450             if val.common_class != 0:
 451                assert val.bit_size == 0 or val.bit_size == val.common_class
 452             else:
 453                val.common_class = val.bit_size
 454             return val.common_class
 455
 456    def _validate_bit_class_down(self, val, bit_class):
 457       # At this point, everything *must* have a bit class.  Otherwise, we have
 458       # a value we don't know how to define.
 459       assert bit_class != 0
 460
 461       if isinstance(val, Constant):
 462          assert val.bit_size == 0 or val.bit_size == bit_class
 463
 464       elif isinstance(val, Variable):
 465          assert val.bit_size == 0 or val.bit_size == bit_class
 466
 467       elif isinstance(val, Expression):
 468          nir_op = opcodes[val.opcode]
 469          dst_type_bits = type_bits(nir_op.output_type)
 470          if dst_type_bits != 0:
 471             assert bit_class == dst_type_bits
 472          else:
 473             assert val.common_class == 0 or val.common_class == bit_class
 474             val.common_class = bit_class
 475
 476          for i in range(nir_op.num_inputs):
 477             src_type_bits = type_bits(nir_op.input_types[i])
 478             if src_type_bits != 0:
 479                self._validate_bit_class_down(val.sources[i], src_type_bits)
 480             else:
 481                self._validate_bit_class_down(val.sources[i], val.common_class)
 482
 483 _optimization_ids = itertools.count()
 484
 485 condition_list = ['true']
 486
 487 class SearchAndReplace(object):
 488    def __init__(self, transform):
 489       self.id = next(_optimization_ids)
 490
 491       search = transform[0]
 492       replace = transform[1]
 493       if len(transform) > 2:
 494          self.condition = transform[2]
 495       else:
 496          self.condition = 'true'
 497
 498       if self.condition not in condition_list:
 499          condition_list.append(self.condition)
 500       self.condition_index = condition_list.index(self.condition)
 501
 502       varset = VarSet()
 503       if isinstance(search, Expression):
 504          self.search = search
 505       else:
 506          self.search = Expression(search, "search{0}".format(self.id), varset)
 507
 508       varset.lock()
 509
 510       if isinstance(replace, Value):
 511          self.replace = replace
 512       else:
 513          self.replace = Value.create(replace, "replace{0}".format(self.id), varset)
 514
 515       BitSizeValidator(varset).validate(self.search, self.replace)
 516
 517 _algebraic_pass_template = mako.template.Template("""
 518 #include "nir.h"
 519 #include "nir_search.h"
 520 #include "nir_search_helpers.h"
 521
 522 #ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
 523 #define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
 524
 525 struct transform {
 526    const nir_search_expression *search;
 527    const nir_search_value *replace;
 528    unsigned condition_offset;
 529 };
 530
 531 #endif
 532
 533 % for (opcode, xform_list) in xform_dict.items():
 534 % for xform in xform_list:
 535    ${xform.search.render()}
 536    ${xform.replace.render()}
 537 % endfor
 538
 539 static const struct transform ${pass_name}_${opcode}_xforms[] = {
 540 % for xform in xform_list:
 541    { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} },
 542 % endfor
 543 };
 544 % endfor
 545
 546 static bool
 547 ${pass_name}_block(nir_block *block, const bool *condition_flags,
 548                    void *mem_ctx)
 549 {
 550    bool progress = false;
 551
 552    nir_foreach_instr_reverse_safe(instr, block) {
 553       if (instr->type != nir_instr_type_alu)
 554          continue;
 555
 556       nir_alu_instr *alu = nir_instr_as_alu(instr);
 557       if (!alu->dest.dest.is_ssa)
 558          continue;
 559
 560       switch (alu->op) {
 561       % for opcode in xform_dict.keys():
 562       case nir_op_${opcode}:
 563          for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) {
 564             const struct transform *xform = &${pass_name}_${opcode}_xforms[i];
 565             if (condition_flags[xform->condition_offset] &&
 566                 nir_replace_instr(alu, xform->search, xform->replace,
 567                                   mem_ctx)) {
 568                progress = true;
 569                break;
 570             }
 571          }
 572          break;
 573       % endfor
 574       default:
 575          break;
 576       }
 577    }
 578
 579    return progress;
 580 }
 581
 582 static bool
 583 ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
 584 {
 585    void *mem_ctx = ralloc_parent(impl);
 586    bool progress = false;
 587
 588    nir_foreach_block_reverse(block, impl) {
 589       progress |= ${pass_name}_block(block, condition_flags, mem_ctx);
 590    }
 591
 592    if (progress)
 593       nir_metadata_preserve(impl, nir_metadata_block_index |
 594                                   nir_metadata_dominance);
 595
 596    return progress;
 597 }
 598
 599
 600 bool
 601 ${pass_name}(nir_shader *shader)
 602 {
 603    bool progress = false;
 604    bool condition_flags[${len(condition_list)}];
 605    const nir_shader_compiler_options *options = shader->options;
 606    (void) options;
 607
 608    % for index, condition in enumerate(condition_list):
 609    condition_flags[${index}] = ${condition};
 610    % endfor
 611
 612    nir_foreach_function(function, shader) {
 613       if (function->impl)
 614          progress |= ${pass_name}_impl(function->impl, condition_flags);
 615    }
 616
 617    return progress;
 618 }
 619 """)
 620
 621 class AlgebraicPass(object):
 622    def __init__(self, pass_name, transforms):
 623       self.xform_dict = OrderedDict()
 624       self.pass_name = pass_name
 625
 626       error = False
 627
 628       for xform in transforms:
 629          if not isinstance(xform, SearchAndReplace):
 630             try:
 631                xform = SearchAndReplace(xform)
 632             except:
 633                print("Failed to parse transformation:", file=sys.stderr)
 634                print("  " + str(xform), file=sys.stderr)
 635                traceback.print_exc(file=sys.stderr)
 636                print('', file=sys.stderr)
 637                error = True
 638                continue
 639
 640          if xform.search.opcode not in self.xform_dict:
 641             self.xform_dict[xform.search.opcode] = []
 642
 643          self.xform_dict[xform.search.opcode].append(xform)
 644
 645       if error:
 646          sys.exit(1)
 647
 648    def render(self):
 649       return _algebraic_pass_template.render(pass_name=self.pass_name,
 650                                              xform_dict=self.xform_dict,
 651                                              condition_list=condition_list)