nir: fix memleak in error path

[mesa.git] / src / compiler / nir / nir_constant_expressions.py
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py

index c6745f1e9340e954dba984f7d8fa0ec7359fcb07..fca14602855acca5abebec049126614906b15bee 100644 (file)
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -1,20 +1,8 @@
+from __future__ import print_function
  
  import re
-
-type_split_re = re.compile(r'(?P<type>[a-z]+)(?P<bits>\d+)')
-
-def type_has_size(type_):
-    return type_[-1:].isdigit()
-
-def type_size(type_):
-    assert type_has_size(type_)
-    return int(type_split_re.match(type_).group('bits'))
-
-def type_sizes(type_):
-    if type_has_size(type_):
-        return [type_size(type_)]
-    else:
-        return [32, 64]
+from nir_opcodes import opcodes
+from nir_opcodes import type_has_size, type_size, type_sizes, type_base_type
  
  def type_add_size(type_, size):
      if type_has_size(type_):
@@ -22,22 +10,28 @@ def type_add_size(type_, size):
      return type_ + str(size)
  
  def op_bit_sizes(op):
-    sizes = set([8, 16, 32, 64])
+    sizes = None
      if not type_has_size(op.output_type):
-        sizes = sizes.intersection(set(type_sizes(op.output_type)))
+        sizes = set(type_sizes(op.output_type))
+
      for input_type in op.input_types:
          if not type_has_size(input_type):
-            sizes = sizes.intersection(set(type_sizes(input_type)))
-    return sorted(list(sizes))
+            if sizes is None:
+                sizes = set(type_sizes(input_type))
+            else:
+                sizes = sizes.intersection(set(type_sizes(input_type)))
+
+    return sorted(list(sizes)) if sizes is not None else None
  
  def get_const_field(type_):
-    if type_ == "bool32":
-        return "u32"
+    if type_size(type_) == 1:
+        return 'b'
+    elif type_base_type(type_) == 'bool':
+        return 'i' + str(type_size(type_))
+    elif type_ == "float16":
+        return "u16"
      else:
-        m = type_split_re.match(type_)
-        if not m:
-            raise Exception(str(type_))
-        return m.group('type')[0] + m.group('bits')
+        return type_base_type(type_)[0] + str(type_size(type_))
  
  template = """\
  /*
@@ -67,11 +61,13 @@ template = """\
   */
  
  #include <math.h>
-#include "main/core.h"
  #include "util/rounding.h" /* for _mesa_roundeven */
  #include "util/half_float.h"
+#include "util/bigmath.h"
  #include "nir_constant_expressions.h"
  
+#define MAX_UINT_FOR_SIZE(bits) (UINT64_MAX >> (64 - (bits)))
+
  /**
   * Evaluate one component of packSnorm4x8.
   */
@@ -245,10 +241,17 @@ unpack_half_1x16(uint16_t u)
  }
  
  /* Some typed vector structures to make things like src0.y work */
+typedef int8_t int1_t;
+typedef uint8_t uint1_t;
+typedef float float16_t;
  typedef float float32_t;
  typedef double float64_t;
+typedef bool bool1_t;
+typedef bool bool8_t;
+typedef bool bool16_t;
  typedef bool bool32_t;
-% for type in ["float", "int", "uint"]:
+typedef bool bool64_t;
+% for type in ["float", "int", "uint", "bool"]:
  % for width in type_sizes(type):
  struct ${type}${width}_vec {
     ${type}${width}_t x;
@@ -259,154 +262,170 @@ struct ${type}${width}_vec {
  % endfor
  % endfor
  
-struct bool32_vec {
-    bool x;
-    bool y;
-    bool z;
-    bool w;
-};
-
-% for name, op in sorted(opcodes.iteritems()):
-static nir_const_value
-evaluate_${name}(MAYBE_UNUSED unsigned num_components, unsigned bit_size,
-                 MAYBE_UNUSED nir_const_value *_src)
-{
-   nir_const_value _dst_val = { {0, } };
-
-   switch (bit_size) {
-   % for bit_size in op_bit_sizes(op):
-   case ${bit_size}: {
-      <%
-      output_type = type_add_size(op.output_type, bit_size)
-      input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
-      %>
-
-      ## For each non-per-component input, create a variable srcN that
-      ## contains x, y, z, and w elements which are filled in with the
-      ## appropriately-typed values.
-      % for j in range(op.num_inputs):
-         % if op.input_sizes[j] == 0:
-            <% continue %>
-         % elif "src" + str(j) not in op.const_expr:
-            ## Avoid unused variable warnings
-            <% continue %>
-         %endif
-
-         const struct ${input_types[j]}_vec src${j} = {
-         % for k in range(op.input_sizes[j]):
-            % if input_types[j] == "bool32":
-               _src[${j}].u32[${k}] != 0,
-            % else:
-               _src[${j}].${get_const_field(input_types[j])}[${k}],
-            % endif
-         % endfor
-         % for k in range(op.input_sizes[j], 4):
-            0,
-         % endfor
-         };
+<%def name="evaluate_op(op, bit_size)">
+   <%
+   output_type = type_add_size(op.output_type, bit_size)
+   input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
+   %>
+
+   ## For each non-per-component input, create a variable srcN that
+   ## contains x, y, z, and w elements which are filled in with the
+   ## appropriately-typed values.
+   % for j in range(op.num_inputs):
+      % if op.input_sizes[j] == 0:
+         <% continue %>
+      % elif "src" + str(j) not in op.const_expr:
+         ## Avoid unused variable warnings
+         <% continue %>
+      %endif
+
+      const struct ${input_types[j]}_vec src${j} = {
+      % for k in range(op.input_sizes[j]):
+         % if input_types[j] == "int1":
+             /* 1-bit integers use a 0/-1 convention */
+             -(int1_t)_src[${j}][${k}].b,
+         % elif input_types[j] == "float16":
+            _mesa_half_to_float(_src[${j}][${k}].u16),
+         % else:
+            _src[${j}][${k}].${get_const_field(input_types[j])},
+         % endif
        % endfor
+      % for k in range(op.input_sizes[j], 4):
+         0,
+      % endfor
+      };
+   % endfor
  
-      % if op.output_size == 0:
-         ## For per-component instructions, we need to iterate over the
-         ## components and apply the constant expression one component
-         ## at a time.
-         for (unsigned _i = 0; _i < num_components; _i++) {
-            ## For each per-component input, create a variable srcN that
-            ## contains the value of the current (_i'th) component.
-            % for j in range(op.num_inputs):
-               % if op.input_sizes[j] != 0:
-                  <% continue %>
-               % elif "src" + str(j) not in op.const_expr:
-                  ## Avoid unused variable warnings
-                  <% continue %>
-               % elif input_types[j] == "bool32":
-                  const bool src${j} = _src[${j}].u32[_i] != 0;
-               % else:
-                  const ${input_types[j]}_t src${j} =
-                     _src[${j}].${get_const_field(input_types[j])}[_i];
-               % endif
-            % endfor
-
-            ## Create an appropriately-typed variable dst and assign the
-            ## result of the const_expr to it.  If const_expr already contains
-            ## writes to dst, just include const_expr directly.
-            % if "dst" in op.const_expr:
-               ${output_type}_t dst;
-
-               ${op.const_expr}
-            % else:
-               ${output_type}_t dst = ${op.const_expr};
-            % endif
-
-            ## Store the current component of the actual destination to the
-            ## value of dst.
-            % if output_type == "bool32":
-               ## Sanitize the C value to a proper NIR bool
-               _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
+   % if op.output_size == 0:
+      ## For per-component instructions, we need to iterate over the
+      ## components and apply the constant expression one component
+      ## at a time.
+      for (unsigned _i = 0; _i < num_components; _i++) {
+         ## For each per-component input, create a variable srcN that
+         ## contains the value of the current (_i'th) component.
+         % for j in range(op.num_inputs):
+            % if op.input_sizes[j] != 0:
+               <% continue %>
+            % elif "src" + str(j) not in op.const_expr:
+               ## Avoid unused variable warnings
+               <% continue %>
+            % elif input_types[j] == "int1":
+               /* 1-bit integers use a 0/-1 convention */
+               const int1_t src${j} = -(int1_t)_src[${j}][_i].b;
+            % elif input_types[j] == "float16":
+               const float src${j} =
+                  _mesa_half_to_float(_src[${j}][_i].u16);
              % else:
-               _dst_val.${get_const_field(output_type)}[_i] = dst;
+               const ${input_types[j]}_t src${j} =
+                  _src[${j}][_i].${get_const_field(input_types[j])};
              % endif
-         }
-      % else:
-         ## In the non-per-component case, create a struct dst with
-         ## appropriately-typed elements x, y, z, and w and assign the result
-         ## of the const_expr to all components of dst, or include the
-         ## const_expr directly if it writes to dst already.
-         struct ${output_type}_vec dst;
+         % endfor
  
+         ## Create an appropriately-typed variable dst and assign the
+         ## result of the const_expr to it.  If const_expr already contains
+         ## writes to dst, just include const_expr directly.
           % if "dst" in op.const_expr:
+            ${output_type}_t dst;
+
              ${op.const_expr}
           % else:
-            ## Splat the value to all components.  This way expressions which
-            ## write the same value to all components don't need to explicitly
-            ## write to dest.  One such example is fnoise which has a
-            ## const_expr of 0.0f.
-            dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
+            ${output_type}_t dst = ${op.const_expr};
           % endif
  
-         ## For each component in the destination, copy the value of dst to
-         ## the actual destination.
-         % for k in range(op.output_size):
-            % if output_type == "bool32":
-               ## Sanitize the C value to a proper NIR bool
-               _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
-            % else:
-               _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
-            % endif
-         % endfor
+         ## Store the current component of the actual destination to the
+         ## value of dst.
+         % if output_type == "int1" or output_type == "uint1":
+            /* 1-bit integers get truncated */
+            _dst_val[_i].b = dst & 1;
+         % elif output_type.startswith("bool"):
+            ## Sanitize the C value to a proper NIR 0/-1 bool
+            _dst_val[_i].${get_const_field(output_type)} = -(int)dst;
+         % elif output_type == "float16":
+            _dst_val[_i].u16 = _mesa_float_to_half(dst);
+         % else:
+            _dst_val[_i].${get_const_field(output_type)} = dst;
+         % endif
+      }
+   % else:
+      ## In the non-per-component case, create a struct dst with
+      ## appropriately-typed elements x, y, z, and w and assign the result
+      ## of the const_expr to all components of dst, or include the
+      ## const_expr directly if it writes to dst already.
+      struct ${output_type}_vec dst;
+
+      % if "dst" in op.const_expr:
+         ${op.const_expr}
+      % else:
+         ## Splat the value to all components.  This way expressions which
+         ## write the same value to all components don't need to explicitly
+         ## write to dest.  One such example is fnoise which has a
+         ## const_expr of 0.0f.
+         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
        % endif
  
-      break;
-   }
-   % endfor
-
-   default:
-      unreachable("unknown bit width");
-   }
+      ## For each component in the destination, copy the value of dst to
+      ## the actual destination.
+      % for k in range(op.output_size):
+         % if output_type == "int1" or output_type == "uint1":
+            /* 1-bit integers get truncated */
+            _dst_val[${k}].b = dst.${"xyzw"[k]} & 1;
+         % elif output_type.startswith("bool"):
+            ## Sanitize the C value to a proper NIR 0/-1 bool
+            _dst_val[${k}].${get_const_field(output_type)} = -(int)dst.${"xyzw"[k]};
+         % elif output_type == "float16":
+            _dst_val[${k}].u16 = _mesa_float_to_half(dst.${"xyzw"[k]});
+         % else:
+            _dst_val[${k}].${get_const_field(output_type)} = dst.${"xyzw"[k]};
+         % endif
+      % endfor
+   % endif
+</%def>
+
+% for name, op in sorted(opcodes.items()):
+static void
+evaluate_${name}(nir_const_value *_dst_val,
+                 UNUSED unsigned num_components,
+                 ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
+                 UNUSED nir_const_value **_src)
+{
+   % if op_bit_sizes(op) is not None:
+      switch (bit_size) {
+      % for bit_size in op_bit_sizes(op):
+      case ${bit_size}: {
+         ${evaluate_op(op, bit_size)}
+         break;
+      }
+      % endfor
  
-   return _dst_val;
+      default:
+         unreachable("unknown bit width");
+      }
+   % else:
+      ${evaluate_op(op, 0)}
+   % endif
  }
  % endfor
  
-nir_const_value
-nir_eval_const_opcode(nir_op op, unsigned num_components,
-                      unsigned bit_width, nir_const_value *src)
+void
+nir_eval_const_opcode(nir_op op, nir_const_value *dest,
+                      unsigned num_components, unsigned bit_width,
+                      nir_const_value **src)
  {
     switch (op) {
-% for name in sorted(opcodes.iterkeys()):
+% for name in sorted(opcodes.keys()):
     case nir_op_${name}:
-      return evaluate_${name}(num_components, bit_width, src);
+      evaluate_${name}(dest, num_components, bit_width, src);
+      return;
  % endfor
     default:
        unreachable("shouldn't get here");
     }
  }"""
  
-from nir_opcodes import opcodes
  from mako.template import Template
  
-print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
+print(Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
                                  type_has_size=type_has_size,
                                  type_add_size=type_add_size,
                                  op_bit_sizes=op_bit_sizes,
-                                get_const_field=get_const_field)
+                                get_const_field=get_const_field))