i965: Fix handling of MESA_pack_invert in blit (PBO) readpixels.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index a036e2dbb06eaaad826bbd7704edd8b8250aa0cb..3b8cef69a7e99d79385d1ea898a439b7c96a0cd8 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -53,7 +53,6 @@ vec4_instruction::vec4_instruction(vec4_visitor *v,
     this->mlen = 0;
     this->base_mrf = 0;
     this->offset = 0;
-   this->ir = NULL;
     this->annotation = v->current_annotation;
  }
  
@@ -177,11 +176,11 @@ vec4_visitor::IF(uint32_t predicate)
     return inst;
  }
  
-/** Gen6+ IF with embedded comparison. */
+/** Gen6 IF with embedded comparison. */
  vec4_instruction *
  vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
  {
-   assert(brw->gen >= 6);
+   assert(brw->gen == 6);
  
     vec4_instruction *inst;
  
@@ -946,20 +945,20 @@ vec4_visitor::visit(ir_variable *ir)
     if (variable_storage(ir))
        return;
  
-   switch (ir->mode) {
+   switch (ir->data.mode) {
     case ir_var_shader_in:
-      reg = new(mem_ctx) dst_reg(ATTR, ir->location);
+      reg = new(mem_ctx) dst_reg(ATTR, ir->data.location);
        break;
  
     case ir_var_shader_out:
        reg = new(mem_ctx) dst_reg(this, ir->type);
  
        for (int i = 0; i < type_size(ir->type); i++) {
-        output_reg[ir->location + i] = *reg;
-        output_reg[ir->location + i].reg_offset = i;
-        output_reg[ir->location + i].type =
+        output_reg[ir->data.location + i] = *reg;
+        output_reg[ir->data.location + i].reg_offset = i;
+        output_reg[ir->data.location + i].type =
              brw_type_for_base_type(ir->type->get_scalar_type());
-        output_reg_annotation[ir->location + i] = ir->name;
+        output_reg_annotation[ir->data.location + i] = ir->name;
        }
        break;
  
@@ -1008,48 +1007,15 @@ vec4_visitor::visit(ir_variable *ir)
  void
  vec4_visitor::visit(ir_loop *ir)
  {
-   dst_reg counter;
-
     /* We don't want debugging output to print the whole body of the
      * loop as the annotation.
      */
     this->base_ir = NULL;
  
-   if (ir->counter != NULL) {
-      this->base_ir = ir->counter;
-      ir->counter->accept(this);
-      counter = *(variable_storage(ir->counter));
-
-      if (ir->from != NULL) {
-        this->base_ir = ir->from;
-        ir->from->accept(this);
-
-        emit(MOV(counter, this->result));
-      }
-   }
-
     emit(BRW_OPCODE_DO);
  
-   if (ir->to) {
-      this->base_ir = ir->to;
-      ir->to->accept(this);
-
-      emit(CMP(dst_null_d(), src_reg(counter), this->result,
-              brw_conditional_for_comparison(ir->cmp)));
-
-      vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
-      inst->predicate = BRW_PREDICATE_NORMAL;
-   }
-
     visit_instructions(&ir->body_instructions);
  
-
-   if (ir->increment) {
-      this->base_ir = ir->increment;
-      ir->increment->accept(this);
-      emit(ADD(counter, src_reg(counter), this->result));
-   }
-
     emit(BRW_OPCODE_WHILE);
  }
  
@@ -1260,16 +1226,34 @@ vec4_visitor::visit(ir_expression *ir)
        break;
  
     case ir_unop_sign:
-      emit(MOV(result_dst, src_reg(0.0f)));
+      if (ir->type->is_float()) {
+         /* AND(val, 0x80000000) gives the sign bit.
+          *
+          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
+          * zero.
+          */
+         emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
  
-      emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
-      inst = emit(MOV(result_dst, src_reg(1.0f)));
-      inst->predicate = BRW_PREDICATE_NORMAL;
+         op[0].type = BRW_REGISTER_TYPE_UD;
+         result_dst.type = BRW_REGISTER_TYPE_UD;
+         emit(AND(result_dst, op[0], src_reg(0x80000000u)));
  
-      emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
-      inst = emit(MOV(result_dst, src_reg(-1.0f)));
-      inst->predicate = BRW_PREDICATE_NORMAL;
+         inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u)));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+
+         this->result.type = BRW_REGISTER_TYPE_F;
+      } else {
+         /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
+          *               -> non-negative val generates 0x00000000.
+          *  Predicated OR sets 1 if val is positive.
+          */
+         emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
  
+         emit(ASR(result_dst, op[0], src_reg(31)));
+
+         inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1)));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+      }
        break;
  
     case ir_unop_rcp:
@@ -1345,7 +1329,7 @@ vec4_visitor::visit(ir_expression *ir)
        break;
  
     case ir_binop_mul:
-      if (ir->type->is_integer()) {
+      if (brw->gen < 8 && ir->type->is_integer()) {
          /* For integer multiplication, the MUL uses the low 16 bits of one of
           * the operands (src0 through SNB, src1 on IVB and later).  The MACH
           * accumulates in the contribution of the upper 16 bits of that
@@ -1582,7 +1566,16 @@ vec4_visitor::visit(ir_expression *ir)
        src_reg surf_index =
           src_reg(prog_data->base.binding_table.ubo_start + uniform_block->value.u[0]);
        if (const_offset_ir) {
-         offset = src_reg(const_offset / 16);
+         if (brw->gen >= 8) {
+            /* Store the offset in a GRF so we can send-from-GRF. */
+            offset = src_reg(this, glsl_type::int_type);
+            emit(MOV(dst_reg(offset), src_reg(const_offset / 16)));
+         } else {
+            /* Immediates are fine on older generations since they'll be moved
+             * to a (potentially fake) MRF at the generator level.
+             */
+            offset = src_reg(const_offset / 16);
+         }
        } else {
           offset = src_reg(this, glsl_type::uint_type);
           emit(SHR(dst_reg(offset), op[1], src_reg(4)));
@@ -1759,7 +1752,7 @@ vec4_visitor::visit(ir_dereference_variable *ir)
     this->result = src_reg(*reg);
  
     /* System values get their swizzle from the dst_reg writemask */
-   if (ir->var->mode == ir_var_system_value)
+   if (ir->var->data.mode == ir_var_system_value)
        return;
  
     if (type->is_scalar() || type->is_vector() || type->is_matrix())
@@ -2170,7 +2163,7 @@ vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
        ir->actual_parameters.get_head());
     ir_variable *location = deref->variable_referenced();
     unsigned surf_index = (prog_data->base.binding_table.abo_start +
-                          location->atomic.buffer_index);
+                          location->data.atomic.buffer_index);
  
     /* Calculate the surface offset */
     src_reg offset(this, glsl_type::uint_type);
@@ -2180,9 +2173,9 @@ vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
  
        src_reg tmp(this, glsl_type::uint_type);
        emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE));
-      emit(ADD(dst_reg(offset), tmp, location->atomic.offset));
+      emit(ADD(dst_reg(offset), tmp, location->data.atomic.offset));
     } else {
-      offset = location->atomic.offset;
+      offset = location->data.atomic.offset;
     }
  
     /* Emit the appropriate machine instruction */
@@ -2216,6 +2209,31 @@ vec4_visitor::visit(ir_call *ir)
     }
  }
  
+src_reg
+vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, int sampler)
+{
+   vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MCS);
+   inst->base_mrf = 2;
+   inst->mlen = 1;
+   inst->sampler = sampler;
+   inst->dst = dst_reg(this, glsl_type::uvec4_type);
+   inst->dst.writemask = WRITEMASK_XYZW;
+
+   /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
+   int param_base = inst->base_mrf;
+   int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
+   int zero_mask = 0xf & ~coord_mask;
+
+   emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
+            coordinate));
+
+   emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
+            src_reg(0)));
+
+   emit(inst);
+   return src_reg(inst->dst);
+}
+
  void
  vec4_visitor::visit(ir_texture *ir)
  {
@@ -2266,7 +2284,7 @@ vec4_visitor::visit(ir_texture *ir)
     }
  
     const glsl_type *lod_type = NULL, *sample_index_type = NULL;
-   src_reg lod, dPdx, dPdy, sample_index;
+   src_reg lod, dPdx, dPdy, sample_index, mcs;
     switch (ir->op) {
     case ir_tex:
        lod = src_reg(0.0f);
@@ -2287,6 +2305,11 @@ vec4_visitor::visit(ir_texture *ir)
        ir->lod_info.sample_index->accept(this);
        sample_index = this->result;
        sample_index_type = ir->lod_info.sample_index->type;
+
+      if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
+         mcs = emit_mcs_fetch(ir, coordinate, sampler);
+      else
+         mcs = src_reg(0u);
        break;
     case ir_txd:
        ir->lod_info.grad.dPdx->accept(this);
@@ -2407,13 +2430,15 @@ vec4_visitor::visit(ir_texture *ir)
        } else if (ir->op == ir_txf_ms) {
           emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
                    sample_index));
+         if (brw->gen >= 7)
+            /* MCS data is in the first channel of `mcs`, but we need to get it into
+             * the .y channel of the second vec4 of params, so replicate .x across
+             * the whole vec4 and then mask off everything except .y
+             */
+            mcs.swizzle = BRW_SWIZZLE_XXXX;
+            emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
+                     mcs));
           inst->mlen++;
-
-         /* on Gen7, there is an additional MCS parameter here after SI,
-          * but we don't bother to emit it since it's always zero. If
-          * we start supporting texturing from CMS surfaces, this will have
-          * to change
-          */
        } else if (ir->op == ir_txd) {
          const glsl_type *type = lod_type;
  
@@ -2983,6 +3008,11 @@ vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
        }
  
        return index;
+   } else if (brw->gen >= 8) {
+      /* Store the offset in a GRF so we can send-from-GRF. */
+      src_reg offset = src_reg(this, glsl_type::int_type);
+      emit_before(inst, MOV(dst_reg(offset), src_reg(reg_offset)));
+      return offset;
     } else {
        int message_header_scale = brw->gen < 6 ? 16 : 1;
        return src_reg(reg_offset * message_header_scale);