src/compiler/nir/nir_algebraic.py

   1 #
   2 # Copyright (C) 2014 Intel Corporation
   3 #
   4 # Permission is hereby granted, free of charge, to any person obtaining a
   5 # copy of this software and associated documentation files (the "Software"),
   6 # to deal in the Software without restriction, including without limitation
   7 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 # and/or sell copies of the Software, and to permit persons to whom the
   9 # Software is furnished to do so, subject to the following conditions:
  10 #
  11 # The above copyright notice and this permission notice (including the next
  12 # paragraph) shall be included in all copies or substantial portions of the
  13 # Software.
  14 #
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 # IN THE SOFTWARE.
  22 #
  23 # Authors:
  24 #    Jason Ekstrand (jason@jlekstrand.net)
  25
  26 from __future__ import print_function
  27 import ast
  28 from collections import defaultdict
  29 import itertools
  30 import struct
  31 import sys
  32 import mako.template
  33 import re
  34 import traceback
  35
  36 from nir_opcodes import opcodes, type_sizes
  37
  38 # These opcodes are only employed by nir_search.  This provides a mapping from
  39 # opcode to destination type.
  40 conv_opcode_types = {
  41     'i2f' : 'float',
  42     'u2f' : 'float',
  43     'f2f' : 'float',
  44     'f2u' : 'uint',
  45     'f2i' : 'int',
  46     'u2u' : 'uint',
  47     'i2i' : 'int',
  48     'b2f' : 'float',
  49     'b2i' : 'int',
  50     'i2b' : 'bool',
  51     'f2b' : 'bool',
  52 }
  53
  54 if sys.version_info < (3, 0):
  55     integer_types = (int, long)
  56     string_type = unicode
  57
  58 else:
  59     integer_types = (int, )
  60     string_type = str
  61
  62 _type_re = re.compile(r"(?P<type>int|uint|bool|float)?(?P<bits>\d+)?")
  63
  64 def type_bits(type_str):
  65    m = _type_re.match(type_str)
  66    assert m.group('type')
  67
  68    if m.group('bits') is None:
  69       return 0
  70    else:
  71       return int(m.group('bits'))
  72
  73 # Represents a set of variables, each with a unique id
  74 class VarSet(object):
  75    def __init__(self):
  76       self.names = {}
  77       self.ids = itertools.count()
  78       self.immutable = False;
  79
  80    def __getitem__(self, name):
  81       if name not in self.names:
  82          assert not self.immutable, "Unknown replacement variable: " + name
  83          self.names[name] = next(self.ids)
  84
  85       return self.names[name]
  86
  87    def lock(self):
  88       self.immutable = True
  89
  90 class Value(object):
  91    @staticmethod
  92    def create(val, name_base, varset):
  93       if isinstance(val, bytes):
  94          val = val.decode('utf-8')
  95
  96       if isinstance(val, tuple):
  97          return Expression(val, name_base, varset)
  98       elif isinstance(val, Expression):
  99          return val
 100       elif isinstance(val, string_type):
 101          return Variable(val, name_base, varset)
 102       elif isinstance(val, (bool, float) + integer_types):
 103          return Constant(val, name_base)
 104
 105    __template = mako.template.Template("""
 106 static const ${val.c_type} ${val.name} = {
 107    { ${val.type_enum}, ${val.c_bit_size} },
 108 % if isinstance(val, Constant):
 109    ${val.type()}, { ${val.hex()} /* ${val.value} */ },
 110 % elif isinstance(val, Variable):
 111    ${val.index}, /* ${val.var_name} */
 112    ${'true' if val.is_constant else 'false'},
 113    ${val.type() or 'nir_type_invalid' },
 114    ${val.cond if val.cond else 'NULL'},
 115 % elif isinstance(val, Expression):
 116    ${'true' if val.inexact else 'false'},
 117    ${val.comm_expr_idx}, ${val.comm_exprs},
 118    ${val.c_opcode()},
 119    { ${', '.join(src.c_ptr for src in val.sources)} },
 120    ${val.cond if val.cond else 'NULL'},
 121 % endif
 122 };""")
 123
 124    def __init__(self, val, name, type_str):
 125       self.in_val = str(val)
 126       self.name = name
 127       self.type_str = type_str
 128
 129    def __str__(self):
 130       return self.in_val
 131
 132    def get_bit_size(self):
 133       """Get the physical bit-size that has been chosen for this value, or if
 134       there is none, the canonical value which currently represents this
 135       bit-size class. Variables will be preferred, i.e. if there are any
 136       variables in the equivalence class, the canonical value will be a
 137       variable. We do this since we'll need to know which variable each value
 138       is equivalent to when constructing the replacement expression. This is
 139       the "find" part of the union-find algorithm.
 140       """
 141       bit_size = self
 142
 143       while isinstance(bit_size, Value):
 144          if bit_size._bit_size is None:
 145             break
 146          bit_size = bit_size._bit_size
 147
 148       if bit_size is not self:
 149          self._bit_size = bit_size
 150       return bit_size
 151
 152    def set_bit_size(self, other):
 153       """Make self.get_bit_size() return what other.get_bit_size() return
 154       before calling this, or just "other" if it's a concrete bit-size. This is
 155       the "union" part of the union-find algorithm.
 156       """
 157
 158       self_bit_size = self.get_bit_size()
 159       other_bit_size = other if isinstance(other, int) else other.get_bit_size()
 160
 161       if self_bit_size == other_bit_size:
 162          return
 163
 164       self_bit_size._bit_size = other_bit_size
 165
 166    @property
 167    def type_enum(self):
 168       return "nir_search_value_" + self.type_str
 169
 170    @property
 171    def c_type(self):
 172       return "nir_search_" + self.type_str
 173
 174    @property
 175    def c_ptr(self):
 176       return "&{0}.value".format(self.name)
 177
 178    @property
 179    def c_bit_size(self):
 180       bit_size = self.get_bit_size()
 181       if isinstance(bit_size, int):
 182          return bit_size
 183       elif isinstance(bit_size, Variable):
 184          return -bit_size.index - 1
 185       else:
 186          # If the bit-size class is neither a variable, nor an actual bit-size, then
 187          # - If it's in the search expression, we don't need to check anything
 188          # - If it's in the replace expression, either it's ambiguous (in which
 189          # case we'd reject it), or it equals the bit-size of the search value
 190          # We represent these cases with a 0 bit-size.
 191          return 0
 192
 193    def render(self):
 194       return self.__template.render(val=self,
 195                                     Constant=Constant,
 196                                     Variable=Variable,
 197                                     Expression=Expression)
 198
 199 _constant_re = re.compile(r"(?P<value>[^@\(]+)(?:@(?P<bits>\d+))?")
 200
 201 class Constant(Value):
 202    def __init__(self, val, name):
 203       Value.__init__(self, val, name, "constant")
 204
 205       if isinstance(val, (str)):
 206          m = _constant_re.match(val)
 207          self.value = ast.literal_eval(m.group('value'))
 208          self._bit_size = int(m.group('bits')) if m.group('bits') else None
 209       else:
 210          self.value = val
 211          self._bit_size = None
 212
 213       if isinstance(self.value, bool):
 214          assert self._bit_size is None or self._bit_size == 1
 215          self._bit_size = 1
 216
 217    def hex(self):
 218       if isinstance(self.value, (bool)):
 219          return 'NIR_TRUE' if self.value else 'NIR_FALSE'
 220       if isinstance(self.value, integer_types):
 221          return hex(self.value)
 222       elif isinstance(self.value, float):
 223          i = struct.unpack('Q', struct.pack('d', self.value))[0]
 224          h = hex(i)
 225
 226          # On Python 2 this 'L' suffix is automatically added, but not on Python 3
 227          # Adding it explicitly makes the generated file identical, regardless
 228          # of the Python version running this script.
 229          if h[-1] != 'L' and i > sys.maxsize:
 230             h += 'L'
 231
 232          return h
 233       else:
 234          assert False
 235
 236    def type(self):
 237       if isinstance(self.value, (bool)):
 238          return "nir_type_bool"
 239       elif isinstance(self.value, integer_types):
 240          return "nir_type_int"
 241       elif isinstance(self.value, float):
 242          return "nir_type_float"
 243
 244 _var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)"
 245                           r"(?:@(?P<type>int|uint|bool|float)?(?P<bits>\d+)?)?"
 246                           r"(?P<cond>\([^\)]+\))?")
 247
 248 class Variable(Value):
 249    def __init__(self, val, name, varset):
 250       Value.__init__(self, val, name, "variable")
 251
 252       m = _var_name_re.match(val)
 253       assert m and m.group('name') is not None
 254
 255       self.var_name = m.group('name')
 256
 257       # Prevent common cases where someone puts quotes around a literal
 258       # constant.  If we want to support names that have numeric or
 259       # punctuation characters, we can me the first assertion more flexible.
 260       assert self.var_name.isalpha()
 261       assert self.var_name is not 'True'
 262       assert self.var_name is not 'False'
 263
 264       self.is_constant = m.group('const') is not None
 265       self.cond = m.group('cond')
 266       self.required_type = m.group('type')
 267       self._bit_size = int(m.group('bits')) if m.group('bits') else None
 268
 269       if self.required_type == 'bool':
 270          if self._bit_size is not None:
 271             assert self._bit_size in type_sizes(self.required_type)
 272          else:
 273             self._bit_size = 1
 274
 275       if self.required_type is not None:
 276          assert self.required_type in ('float', 'bool', 'int', 'uint')
 277
 278       self.index = varset[self.var_name]
 279
 280    def type(self):
 281       if self.required_type == 'bool':
 282          return "nir_type_bool"
 283       elif self.required_type in ('int', 'uint'):
 284          return "nir_type_int"
 285       elif self.required_type == 'float':
 286          return "nir_type_float"
 287
 288 _opcode_re = re.compile(r"(?P<inexact>~)?(?P<opcode>\w+)(?:@(?P<bits>\d+))?"
 289                         r"(?P<cond>\([^\)]+\))?")
 290
 291 class Expression(Value):
 292    def __init__(self, expr, name_base, varset):
 293       Value.__init__(self, expr, name_base, "expression")
 294       assert isinstance(expr, tuple)
 295
 296       m = _opcode_re.match(expr[0])
 297       assert m and m.group('opcode') is not None
 298
 299       self.opcode = m.group('opcode')
 300       self._bit_size = int(m.group('bits')) if m.group('bits') else None
 301       self.inexact = m.group('inexact') is not None
 302       self.cond = m.group('cond')
 303       self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
 304                        for (i, src) in enumerate(expr[1:]) ]
 305
 306       if self.opcode in conv_opcode_types:
 307          assert self._bit_size is None, \
 308                 'Expression cannot use an unsized conversion opcode with ' \
 309                 'an explicit size; that\'s silly.'
 310
 311       self.__index_comm_exprs(0)
 312
 313    def __index_comm_exprs(self, base_idx):
 314       """Recursively count and index commutative expressions
 315       """
 316       self.comm_exprs = 0
 317       if self.opcode not in conv_opcode_types and \
 318          "commutative" in opcodes[self.opcode].algebraic_properties:
 319          self.comm_expr_idx = base_idx
 320          self.comm_exprs += 1
 321       else:
 322          self.comm_expr_idx = -1
 323
 324       for s in self.sources:
 325          if isinstance(s, Expression):
 326             s.__index_comm_exprs(base_idx + self.comm_exprs)
 327             self.comm_exprs += s.comm_exprs
 328
 329       return self.comm_exprs
 330
 331    def c_opcode(self):
 332       if self.opcode in conv_opcode_types:
 333          return 'nir_search_op_' + self.opcode
 334       else:
 335          return 'nir_op_' + self.opcode
 336
 337    def render(self):
 338       srcs = "\n".join(src.render() for src in self.sources)
 339       return srcs + super(Expression, self).render()
 340
 341 class BitSizeValidator(object):
 342    """A class for validating bit sizes of expressions.
 343
 344    NIR supports multiple bit-sizes on expressions in order to handle things
 345    such as fp64.  The source and destination of every ALU operation is
 346    assigned a type and that type may or may not specify a bit size.  Sources
 347    and destinations whose type does not specify a bit size are considered
 348    "unsized" and automatically take on the bit size of the corresponding
 349    register or SSA value.  NIR has two simple rules for bit sizes that are
 350    validated by nir_validator:
 351
 352     1) A given SSA def or register has a single bit size that is respected by
 353        everything that reads from it or writes to it.
 354
 355     2) The bit sizes of all unsized inputs/outputs on any given ALU
 356        instruction must match.  They need not match the sized inputs or
 357        outputs but they must match each other.
 358
 359    In order to keep nir_algebraic relatively simple and easy-to-use,
 360    nir_search supports a type of bit-size inference based on the two rules
 361    above.  This is similar to type inference in many common programming
 362    languages.  If, for instance, you are constructing an add operation and you
 363    know the second source is 16-bit, then you know that the other source and
 364    the destination must also be 16-bit.  There are, however, cases where this
 365    inference can be ambiguous or contradictory.  Consider, for instance, the
 366    following transformation:
 367
 368    (('usub_borrow', a, b), ('b2i@32', ('ult', a, b)))
 369
 370    This transformation can potentially cause a problem because usub_borrow is
 371    well-defined for any bit-size of integer.  However, b2i always generates a
 372    32-bit result so it could end up replacing a 64-bit expression with one
 373    that takes two 64-bit values and produces a 32-bit value.  As another
 374    example, consider this expression:
 375
 376    (('bcsel', a, b, 0), ('iand', a, b))
 377
 378    In this case, in the search expression a must be 32-bit but b can
 379    potentially have any bit size.  If we had a 64-bit b value, we would end up
 380    trying to and a 32-bit value with a 64-bit value which would be invalid
 381
 382    This class solves that problem by providing a validation layer that proves
 383    that a given search-and-replace operation is 100% well-defined before we
 384    generate any code.  This ensures that bugs are caught at compile time
 385    rather than at run time.
 386
 387    Each value maintains a "bit-size class", which is either an actual bit size
 388    or an equivalence class with other values that must have the same bit size.
 389    The validator works by combining bit-size classes with each other according
 390    to the NIR rules outlined above, checking that there are no inconsistencies.
 391    When doing this for the replacement expression, we make sure to never change
 392    the equivalence class of any of the search values. We could make the example
 393    transforms above work by doing some extra run-time checking of the search
 394    expression, but we make the user specify those constraints themselves, to
 395    avoid any surprises. Since the replacement bitsizes can only be connected to
 396    the source bitsize via variables (variables must have the same bitsize in
 397    the source and replacment expressions) or the roots of the expression (the
 398    replacement expression must produce the same bit size as the search
 399    expression), we prevent merging a variable with anything when processing the
 400    replacement expression, or specializing the search bitsize
 401    with anything. The former prevents
 402
 403    (('bcsel', a, b, 0), ('iand', a, b))
 404
 405    from being allowed, since we'd have to merge the bitsizes for a and b due to
 406    the 'iand', while the latter prevents
 407
 408    (('usub_borrow', a, b), ('b2i@32', ('ult', a, b)))
 409
 410    from being allowed, since the search expression has the bit size of a and b,
 411    which can't be specialized to 32 which is the bitsize of the replace
 412    expression. It also prevents something like:
 413
 414    (('b2i', ('i2b', a)), ('ineq', a, 0))
 415
 416    since the bitsize of 'b2i', which can be anything, can't be specialized to
 417    the bitsize of a.
 418
 419    After doing all this, we check that every subexpression of the replacement
 420    was assigned a constant bitsize, the bitsize of a variable, or the bitsize
 421    of the search expresssion, since those are the things that are known when
 422    constructing the replacement expresssion. Finally, we record the bitsize
 423    needed in nir_search_value so that we know what to do when building the
 424    replacement expression.
 425    """
 426
 427    def __init__(self, varset):
 428       self._var_classes = [None] * len(varset.names)
 429
 430    def compare_bitsizes(self, a, b):
 431       """Determines which bitsize class is a specialization of the other, or
 432       whether neither is. When we merge two different bitsizes, the
 433       less-specialized bitsize always points to the more-specialized one, so
 434       that calling get_bit_size() always gets you the most specialized bitsize.
 435       The specialization partial order is given by:
 436       - Physical bitsizes are always the most specialized, and a different
 437         bitsize can never specialize another.
 438       - In the search expression, variables can always be specialized to each
 439         other and to physical bitsizes. In the replace expression, we disallow
 440         this to avoid adding extra constraints to the search expression that
 441         the user didn't specify.
 442       - Expressions and constants without a bitsize can always be specialized to
 443         each other and variables, but not the other way around.
 444
 445         We return -1 if a <= b (b can be specialized to a), 0 if a = b, 1 if a >= b,
 446         and None if they are not comparable (neither a <= b nor b <= a).
 447       """
 448       if isinstance(a, int):
 449          if isinstance(b, int):
 450             return 0 if a == b else None
 451          elif isinstance(b, Variable):
 452             return -1 if self.is_search else None
 453          else:
 454             return -1
 455       elif isinstance(a, Variable):
 456          if isinstance(b, int):
 457             return 1 if self.is_search else None
 458          elif isinstance(b, Variable):
 459             return 0 if self.is_search or a.index == b.index else None
 460          else:
 461             return -1
 462       else:
 463          if isinstance(b, int):
 464             return 1
 465          elif isinstance(b, Variable):
 466             return 1
 467          else:
 468             return 0
 469
 470    def unify_bit_size(self, a, b, error_msg):
 471       """Record that a must have the same bit-size as b. If both
 472       have been assigned conflicting physical bit-sizes, call "error_msg" with
 473       the bit-sizes of self and other to get a message and raise an error.
 474       In the replace expression, disallow merging variables with other
 475       variables and physical bit-sizes as well.
 476       """
 477       a_bit_size = a.get_bit_size()
 478       b_bit_size = b if isinstance(b, int) else b.get_bit_size()
 479
 480       cmp_result = self.compare_bitsizes(a_bit_size, b_bit_size)
 481
 482       assert cmp_result is not None, \
 483          error_msg(a_bit_size, b_bit_size)
 484
 485       if cmp_result < 0:
 486          b_bit_size.set_bit_size(a)
 487       elif not isinstance(a_bit_size, int):
 488          a_bit_size.set_bit_size(b)
 489
 490    def merge_variables(self, val):
 491       """Perform the first part of type inference by merging all the different
 492       uses of the same variable. We always do this as if we're in the search
 493       expression, even if we're actually not, since otherwise we'd get errors
 494       if the search expression specified some constraint but the replace
 495       expression didn't, because we'd be merging a variable and a constant.
 496       """
 497       if isinstance(val, Variable):
 498          if self._var_classes[val.index] is None:
 499             self._var_classes[val.index] = val
 500          else:
 501             other = self._var_classes[val.index]
 502             self.unify_bit_size(other, val,
 503                   lambda other_bit_size, bit_size:
 504                      'Variable {} has conflicting bit size requirements: ' \
 505                      'it must have bit size {} and {}'.format(
 506                         val.var_name, other_bit_size, bit_size))
 507       elif isinstance(val, Expression):
 508          for src in val.sources:
 509             self.merge_variables(src)
 510
 511    def validate_value(self, val):
 512       """Validate the an expression by performing classic Hindley-Milner
 513       type inference on bitsizes. This will detect if there are any conflicting
 514       requirements, and unify variables so that we know which variables must
 515       have the same bitsize. If we're operating on the replace expression, we
 516       will refuse to merge different variables together or merge a variable
 517       with a constant, in order to prevent surprises due to rules unexpectedly
 518       not matching at runtime.
 519       """
 520       if not isinstance(val, Expression):
 521          return
 522
 523       # Generic conversion ops are special in that they have a single unsized
 524       # source and an unsized destination and the two don't have to match.
 525       # This means there's no validation or unioning to do here besides the
 526       # len(val.sources) check.
 527       if val.opcode in conv_opcode_types:
 528          assert len(val.sources) == 1, \
 529             "Expression {} has {} sources, expected 1".format(
 530                val, len(val.sources))
 531          self.validate_value(val.sources[0])
 532          return
 533
 534       nir_op = opcodes[val.opcode]
 535       assert len(val.sources) == nir_op.num_inputs, \
 536          "Expression {} has {} sources, expected {}".format(
 537             val, len(val.sources), nir_op.num_inputs)
 538
 539       for src in val.sources:
 540          self.validate_value(src)
 541
 542       dst_type_bits = type_bits(nir_op.output_type)
 543
 544       # First, unify all the sources. That way, an error coming up because two
 545       # sources have an incompatible bit-size won't produce an error message
 546       # involving the destination.
 547       first_unsized_src = None
 548       for src_type, src in zip(nir_op.input_types, val.sources):
 549          src_type_bits = type_bits(src_type)
 550          if src_type_bits == 0:
 551             if first_unsized_src is None:
 552                first_unsized_src = src
 553                continue
 554
 555             if self.is_search:
 556                self.unify_bit_size(first_unsized_src, src,
 557                   lambda first_unsized_src_bit_size, src_bit_size:
 558                      'Source {} of {} must have bit size {}, while source {} ' \
 559                      'must have incompatible bit size {}'.format(
 560                         first_unsized_src, val, first_unsized_src_bit_size,
 561                         src, src_bit_size))
 562             else:
 563                self.unify_bit_size(first_unsized_src, src,
 564                   lambda first_unsized_src_bit_size, src_bit_size:
 565                      'Sources {} (bit size of {}) and {} (bit size of {}) ' \
 566                      'of {} may not have the same bit size when building the ' \
 567                      'replacement expression.'.format(
 568                         first_unsized_src, first_unsized_src_bit_size, src,
 569                         src_bit_size, val))
 570          else:
 571             if self.is_search:
 572                self.unify_bit_size(src, src_type_bits,
 573                   lambda src_bit_size, unused:
 574                      '{} must have {} bits, but as a source of nir_op_{} '\
 575                      'it must have {} bits'.format(
 576                         src, src_bit_size, nir_op.name, src_type_bits))
 577             else:
 578                self.unify_bit_size(src, src_type_bits,
 579                   lambda src_bit_size, unused:
 580                      '{} has the bit size of {}, but as a source of ' \
 581                      'nir_op_{} it must have {} bits, which may not be the ' \
 582                      'same'.format(
 583                         src, src_bit_size, nir_op.name, src_type_bits))
 584
 585       if dst_type_bits == 0:
 586          if first_unsized_src is not None:
 587             if self.is_search:
 588                self.unify_bit_size(val, first_unsized_src,
 589                   lambda val_bit_size, src_bit_size:
 590                      '{} must have the bit size of {}, while its source {} ' \
 591                      'must have incompatible bit size {}'.format(
 592                         val, val_bit_size, first_unsized_src, src_bit_size))
 593             else:
 594                self.unify_bit_size(val, first_unsized_src,
 595                   lambda val_bit_size, src_bit_size:
 596                      '{} must have {} bits, but its source {} ' \
 597                      '(bit size of {}) may not have that bit size ' \
 598                      'when building the replacement.'.format(
 599                         val, val_bit_size, first_unsized_src, src_bit_size))
 600       else:
 601          self.unify_bit_size(val, dst_type_bits,
 602             lambda dst_bit_size, unused:
 603                '{} must have {} bits, but as a destination of nir_op_{} ' \
 604                'it must have {} bits'.format(
 605                   val, dst_bit_size, nir_op.name, dst_type_bits))
 606
 607    def validate_replace(self, val, search):
 608       bit_size = val.get_bit_size()
 609       assert isinstance(bit_size, int) or isinstance(bit_size, Variable) or \
 610             bit_size == search.get_bit_size(), \
 611             'Ambiguous bit size for replacement value {}: ' \
 612             'it cannot be deduced from a variable, a fixed bit size ' \
 613             'somewhere, or the search expression.'.format(val)
 614
 615       if isinstance(val, Expression):
 616          for src in val.sources:
 617             self.validate_replace(src, search)
 618
 619    def validate(self, search, replace):
 620       self.is_search = True
 621       self.merge_variables(search)
 622       self.merge_variables(replace)
 623       self.validate_value(search)
 624
 625       self.is_search = False
 626       self.validate_value(replace)
 627
 628       # Check that search is always more specialized than replace. Note that
 629       # we're doing this in replace mode, disallowing merging variables.
 630       search_bit_size = search.get_bit_size()
 631       replace_bit_size = replace.get_bit_size()
 632       cmp_result = self.compare_bitsizes(search_bit_size, replace_bit_size)
 633
 634       assert cmp_result is not None and cmp_result <= 0, \
 635          'The search expression bit size {} and replace expression ' \
 636          'bit size {} may not be the same'.format(
 637                search_bit_size, replace_bit_size)
 638
 639       replace.set_bit_size(search)
 640
 641       self.validate_replace(replace, search)
 642
 643 _optimization_ids = itertools.count()
 644
 645 condition_list = ['true']
 646
 647 class SearchAndReplace(object):
 648    def __init__(self, transform):
 649       self.id = next(_optimization_ids)
 650
 651       search = transform[0]
 652       replace = transform[1]
 653       if len(transform) > 2:
 654          self.condition = transform[2]
 655       else:
 656          self.condition = 'true'
 657
 658       if self.condition not in condition_list:
 659          condition_list.append(self.condition)
 660       self.condition_index = condition_list.index(self.condition)
 661
 662       varset = VarSet()
 663       if isinstance(search, Expression):
 664          self.search = search
 665       else:
 666          self.search = Expression(search, "search{0}".format(self.id), varset)
 667
 668       varset.lock()
 669
 670       if isinstance(replace, Value):
 671          self.replace = replace
 672       else:
 673          self.replace = Value.create(replace, "replace{0}".format(self.id), varset)
 674
 675       BitSizeValidator(varset).validate(self.search, self.replace)
 676
 677 _algebraic_pass_template = mako.template.Template("""
 678 #include "nir.h"
 679 #include "nir_builder.h"
 680 #include "nir_search.h"
 681 #include "nir_search_helpers.h"
 682
 683 #ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
 684 #define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
 685
 686 struct transform {
 687    const nir_search_expression *search;
 688    const nir_search_value *replace;
 689    unsigned condition_offset;
 690 };
 691
 692 #endif
 693
 694 % for xform in xforms:
 695    ${xform.search.render()}
 696    ${xform.replace.render()}
 697 % endfor
 698
 699 % for (opcode, xform_list) in sorted(opcode_xforms.items()):
 700 static const struct transform ${pass_name}_${opcode}_xforms[] = {
 701 % for xform in xform_list:
 702    { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} },
 703 % endfor
 704 };
 705 % endfor
 706
 707 static bool
 708 ${pass_name}_block(nir_builder *build, nir_block *block,
 709                    const bool *condition_flags)
 710 {
 711    bool progress = false;
 712
 713    nir_foreach_instr_reverse_safe(instr, block) {
 714       if (instr->type != nir_instr_type_alu)
 715          continue;
 716
 717       nir_alu_instr *alu = nir_instr_as_alu(instr);
 718       if (!alu->dest.dest.is_ssa)
 719          continue;
 720
 721       switch (alu->op) {
 722       % for opcode in sorted(opcode_xforms.keys()):
 723       case nir_op_${opcode}:
 724          for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) {
 725             const struct transform *xform = &${pass_name}_${opcode}_xforms[i];
 726             if (condition_flags[xform->condition_offset] &&
 727                 nir_replace_instr(build, alu, xform->search, xform->replace)) {
 728                progress = true;
 729                break;
 730             }
 731          }
 732          break;
 733       % endfor
 734       default:
 735          break;
 736       }
 737    }
 738
 739    return progress;
 740 }
 741
 742 static bool
 743 ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
 744 {
 745    bool progress = false;
 746
 747    nir_builder build;
 748    nir_builder_init(&build, impl);
 749
 750    nir_foreach_block_reverse(block, impl) {
 751       progress |= ${pass_name}_block(&build, block, condition_flags);
 752    }
 753
 754    if (progress) {
 755       nir_metadata_preserve(impl, nir_metadata_block_index |
 756                                   nir_metadata_dominance);
 757     } else {
 758 #ifndef NDEBUG
 759       impl->valid_metadata &= ~nir_metadata_not_properly_reset;
 760 #endif
 761     }
 762
 763    return progress;
 764 }
 765
 766
 767 bool
 768 ${pass_name}(nir_shader *shader)
 769 {
 770    bool progress = false;
 771    bool condition_flags[${len(condition_list)}];
 772    const nir_shader_compiler_options *options = shader->options;
 773    const shader_info *info = &shader->info;
 774    (void) options;
 775    (void) info;
 776
 777    % for index, condition in enumerate(condition_list):
 778    condition_flags[${index}] = ${condition};
 779    % endfor
 780
 781    nir_foreach_function(function, shader) {
 782       if (function->impl)
 783          progress |= ${pass_name}_impl(function->impl, condition_flags);
 784    }
 785
 786    return progress;
 787 }
 788 """)
 789
 790 class AlgebraicPass(object):
 791    def __init__(self, pass_name, transforms):
 792       self.xforms = []
 793       self.opcode_xforms = defaultdict(lambda : [])
 794       self.pass_name = pass_name
 795
 796       error = False
 797
 798       for xform in transforms:
 799          if not isinstance(xform, SearchAndReplace):
 800             try:
 801                xform = SearchAndReplace(xform)
 802             except:
 803                print("Failed to parse transformation:", file=sys.stderr)
 804                print("  " + str(xform), file=sys.stderr)
 805                traceback.print_exc(file=sys.stderr)
 806                print('', file=sys.stderr)
 807                error = True
 808                continue
 809
 810          self.xforms.append(xform)
 811          if xform.search.opcode in conv_opcode_types:
 812             dst_type = conv_opcode_types[xform.search.opcode]
 813             for size in type_sizes(dst_type):
 814                sized_opcode = xform.search.opcode + str(size)
 815                self.opcode_xforms[sized_opcode].append(xform)
 816          else:
 817             self.opcode_xforms[xform.search.opcode].append(xform)
 818
 819       if error:
 820          sys.exit(1)
 821
 822
 823    def render(self):
 824       return _algebraic_pass_template.render(pass_name=self.pass_name,
 825                                              xforms=self.xforms,
 826                                              opcode_xforms=self.opcode_xforms,
 827                                              condition_list=condition_list)