nir/intrinsics: Add a second const index to load_uniform

[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_nir.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index b13465bcb304ee4263f2d75b8cf1a5af167d63df..9e52229190ec338c08d5de8e64f7b8ec61f17a64 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -119,7 +119,7 @@ vec4_visitor::nir_setup_inputs(nir_shader *shader)
  
     foreach_list_typed(nir_variable, var, node, &shader->inputs) {
        int offset = var->data.driver_location;
-      unsigned size = type_size(var->type);
+      unsigned size = type_size_vec4(var->type);
        for (unsigned i = 0; i < size; i++) {
           src_reg src = src_reg(ATTR, var->data.location + i, var->type);
           nir_inputs[offset + i] = src;
@@ -140,12 +140,12 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
           /* UBO's, atomics and samplers don't take up space in the
              uniform file */
           if (var->interface_type != NULL || var->type->contains_atomic() ||
-             type_size(var->type) == 0) {
+             type_size_vec4(var->type) == 0) {
              continue;
           }
  
           assert(uniforms < uniform_array_size);
-         this->uniform_size[uniforms] = type_size(var->type);
+         this->uniform_size[uniforms] = type_size_vec4(var->type);
  
           if (strncmp(var->name, "gl_", 3) == 0)
              nir_setup_builtin_uniform(var);
@@ -161,7 +161,7 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
               strcmp(var->name, "parameters") == 0);
  
        assert(uniforms < uniform_array_size);
-      this->uniform_size[uniforms] = type_size(var->type);
+      this->uniform_size[uniforms] = type_size_vec4(var->type);
  
        struct gl_program_parameter_list *plist = prog->Parameters;
        for (unsigned p = 0; p < plist->NumParameters; p++) {
@@ -458,13 +458,28 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
     dst_reg reg = dst_reg(GRF, alloc.allocate(1));
     reg.type =  BRW_REGISTER_TYPE_F;
  
+   unsigned remaining = brw_writemask_for_size(instr->def.num_components);
+
     /* @FIXME: consider emitting vector operations to save some MOVs in
      * cases where the components are representable in 8 bits.
-    * By now, we emit a MOV for each component.
+    * For now, we emit a MOV for each distinct value.
      */
-   for (unsigned i = 0; i < instr->def.num_components; ++i) {
-      reg.writemask = 1 << i;
+   for (unsigned i = 0; i < instr->def.num_components; i++) {
+      unsigned writemask = 1 << i;
+
+      if ((remaining & writemask) == 0)
+         continue;
+
+      for (unsigned j = i; j < instr->def.num_components; j++) {
+         if (instr->value.u[i] == instr->value.u[j]) {
+            writemask |= 1 << j;
+         }
+      }
+
+      reg.writemask = writemask;
        emit(MOV(reg, src_reg(instr->value.f[i])));
+
+      remaining &= ~writemask;
     }
  
     /* Set final writemask */
@@ -555,7 +570,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
        has_indirect = true;
        /* fallthrough */
     case nir_intrinsic_load_uniform: {
-      int uniform = instr->const_index[0];
+      int uniform = instr->const_index[0] + instr->const_index[1];
  
        dest = get_nir_dest(instr->dest);
  
@@ -775,31 +790,35 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        break;
  
     case nir_op_imul: {
-      nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
-      nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
-
-      /* For integer multiplication, the MUL uses the low 16 bits of one of
-       * the operands (src0 through SNB, src1 on IVB and later). The MACH
-       * accumulates in the contribution of the upper 16 bits of that
-       * operand. If we can determine that one of the args is in the low
-       * 16 bits, though, we can just emit a single MUL.
-       */
-      if (value0 && value0->u[0] < (1 << 16)) {
-         if (devinfo->gen < 7)
-            emit(MUL(dst, op[0], op[1]));
-         else
-            emit(MUL(dst, op[1], op[0]));
-      } else if (value1 && value1->u[0] < (1 << 16)) {
-         if (devinfo->gen < 7)
-            emit(MUL(dst, op[1], op[0]));
-         else
-            emit(MUL(dst, op[0], op[1]));
-      } else {
-         struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+      if (devinfo->gen < 8) {
+         nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
+         nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
+
+         /* For integer multiplication, the MUL uses the low 16 bits of one of
+          * the operands (src0 through SNB, src1 on IVB and later). The MACH
+          * accumulates in the contribution of the upper 16 bits of that
+          * operand. If we can determine that one of the args is in the low
+          * 16 bits, though, we can just emit a single MUL.
+          */
+         if (value0 && value0->u[0] < (1 << 16)) {
+            if (devinfo->gen < 7)
+               emit(MUL(dst, op[0], op[1]));
+            else
+               emit(MUL(dst, op[1], op[0]));
+         } else if (value1 && value1->u[0] < (1 << 16)) {
+            if (devinfo->gen < 7)
+               emit(MUL(dst, op[1], op[0]));
+            else
+               emit(MUL(dst, op[0], op[1]));
+         } else {
+            struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
  
-         emit(MUL(acc, op[0], op[1]));
-         emit(MACH(dst_null_d(), op[0], op[1]));
-         emit(MOV(dst, src_reg(acc)));
+            emit(MUL(acc, op[0], op[1]));
+            emit(MACH(dst_null_d(), op[0], op[1]));
+            emit(MOV(dst, src_reg(acc)));
+         }
+      } else {
+        emit(MUL(dst, op[0], op[1]));
        }
        break;
     }
@@ -1020,18 +1039,33 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
     }
  
     case nir_op_inot:
+      if (devinfo->gen >= 8) {
+         op[0] = resolve_source_modifiers(op[0]);
+      }
        emit(NOT(dst, op[0]));
        break;
  
     case nir_op_ixor:
+      if (devinfo->gen >= 8) {
+         op[0] = resolve_source_modifiers(op[0]);
+         op[1] = resolve_source_modifiers(op[1]);
+      }
        emit(XOR(dst, op[0], op[1]));
        break;
  
     case nir_op_ior:
+      if (devinfo->gen >= 8) {
+         op[0] = resolve_source_modifiers(op[0]);
+         op[1] = resolve_source_modifiers(op[1]);
+      }
        emit(OR(dst, op[0], op[1]));
        break;
  
     case nir_op_iand:
+      if (devinfo->gen >= 8) {
+         op[0] = resolve_source_modifiers(op[0]);
+         op[1] = resolve_source_modifiers(op[1]);
+      }
        emit(AND(dst, op[0], op[1]));
        break;
  
@@ -1292,6 +1326,20 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
     default:
        unreachable("Unimplemented ALU operation");
     }
+
+   /* If we need to do a boolean resolve, replace the result with -(x & 1)
+    * to sign extend the low bit to 0/~0
+    */
+   if (devinfo->gen <= 5 &&
+       (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) ==
+       BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
+      dst_reg masked = dst_reg(this, glsl_type::int_type);
+      masked.writemask = dst.writemask;
+      emit(AND(masked, src_reg(dst), src_reg(1)));
+      src_reg masked_neg = src_reg(masked);
+      masked_neg.negate = true;
+      emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg));
+   }
  }
  
  void