i965: Add driconf option clamp_max_samples

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_visitor.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index b4fff50139fe40f7c330f9abf44363a2d9d87722..dd606718b66fd4bb98449ca3d239ed81c2085c44 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -82,6 +82,8 @@ fs_visitor::visit(ir_variable *ir)
          }
        } else if (ir->location == FRAG_RESULT_DEPTH) {
          this->frag_depth = *reg;
+      } else if (ir->location == FRAG_RESULT_SAMPLE_MASK) {
+         this->sample_mask = *reg;
        } else {
          /* gl_FragData or a user-defined FS output */
          assert(ir->location >= FRAG_RESULT_DATA0 &&
@@ -125,6 +127,13 @@ fs_visitor::visit(ir_variable *ir)
  
        reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
        reg->type = brw_type_for_base_type(ir->type);
+
+   } else if (ir->mode == ir_var_system_value) {
+      if (ir->location == SYSTEM_VALUE_SAMPLE_POS) {
+        reg = emit_samplepos_setup(ir);
+      } else if (ir->location == SYSTEM_VALUE_SAMPLE_ID) {
+        reg = emit_sampleid_setup(ir);
+      }
     }
  
     if (!reg)
@@ -808,6 +817,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
        break;
  
     case GLSL_TYPE_SAMPLER:
+   case GLSL_TYPE_ATOMIC_UINT:
        break;
  
     case GLSL_TYPE_VOID:
@@ -1093,34 +1103,19 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
     const int vector_elements =
        ir->coordinate ? ir->coordinate->type->vector_elements : 0;
  
-   if (ir->offset != NULL && ir->op == ir_txf) {
-      /* It appears that the ld instruction used for txf does its
-       * address bounds check before adding in the offset.  To work
-       * around this, just add the integer offset to the integer texel
-       * coordinate, and don't put the offset in the header.
+   if (ir->offset) {
+      /* The offsets set up by the ir_texture visitor are in the
+       * m1 header, so we can't go headerless.
         */
-      ir_constant *offset = ir->offset->as_constant();
-      for (int i = 0; i < vector_elements; i++) {
-        emit(ADD(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
-                  coordinate,
-                  offset->value.i[i]));
-        coordinate.reg_offset++;
-      }
-   } else {
-      if (ir->offset) {
-        /* The offsets set up by the ir_texture visitor are in the
-         * m1 header, so we can't go headerless.
-         */
-        header_present = true;
-        mlen++;
-        base_mrf--;
-      }
+      header_present = true;
+      mlen++;
+      base_mrf--;
+   }
  
-      for (int i = 0; i < vector_elements; i++) {
-        emit(MOV(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
-                  coordinate));
-        coordinate.reg_offset++;
-      }
+   for (int i = 0; i < vector_elements; i++) {
+      emit(MOV(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
+               coordinate));
+      coordinate.reg_offset++;
     }
     mlen += vector_elements * reg_width;
  
@@ -1229,7 +1224,6 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
  {
     int reg_width = dispatch_width / 8;
     bool header_present = false;
-   int offsets[3];
  
     fs_reg payload = fs_reg(this, glsl_type::float_type);
     fs_reg next = payload;
@@ -1251,11 +1245,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        next.reg_offset++;
     }
  
+   bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
+   bool coordinate_done = false;
+
     /* Set up the LOD info */
     switch (ir->op) {
     case ir_tex:
     case ir_lod:
-   case ir_tg4:
        break;
     case ir_txb:
        emit(MOV(next, lod));
@@ -1290,6 +1286,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
              next.reg_offset++;
           }
        }
+
+      coordinate_done = true;
        break;
     }
     case ir_txs:
@@ -1301,22 +1299,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        next.reg_offset++;
        break;
     case ir_txf:
-      /* It appears that the ld instruction used for txf does its
-       * address bounds check before adding in the offset.  To work
-       * around this, just add the integer offset to the integer texel
-       * coordinate, and don't put the offset in the header.
-       */
-      if (ir->offset) {
-        ir_constant *offset = ir->offset->as_constant();
-        offsets[0] = offset->value.i[0];
-        offsets[1] = offset->value.i[1];
-        offsets[2] = offset->value.i[2];
-      } else {
-        memset(offsets, 0, sizeof(offsets));
-      }
-
        /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
-      emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[0]));
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_D), coordinate));
        coordinate.reg_offset++;
        next.reg_offset++;
  
@@ -1324,10 +1308,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        next.reg_offset++;
  
        for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
-        emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[i]));
+        emit(MOV(next.retype(BRW_REGISTER_TYPE_D), coordinate));
          coordinate.reg_offset++;
          next.reg_offset++;
        }
+
+      coordinate_done = true;
        break;
     case ir_txf_ms:
        emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), sample_index));
@@ -1348,15 +1334,47 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
           coordinate.reg_offset++;
           next.reg_offset++;
        }
+
+      coordinate_done = true;
+      break;
+   case ir_tg4:
+      if (has_nonconstant_offset) {
+         if (ir->shadow_comparitor && dispatch_width == 16)
+            fail("Gen7 does not support gather4_po_c in SIMD16 mode.");
+
+         /* More crazy intermixing */
+         ir->offset->accept(this);
+         fs_reg offset_value = this->result;
+
+         for (int i = 0; i < 2; i++) { /* u, v */
+            emit(MOV(next, coordinate));
+            coordinate.reg_offset++;
+            next.reg_offset++;
+         }
+
+         for (int i = 0; i < 2; i++) { /* offu, offv */
+            emit(MOV(next.retype(BRW_REGISTER_TYPE_D), offset_value));
+            offset_value.reg_offset++;
+            next.reg_offset++;
+         }
+
+         if (ir->coordinate->type->vector_elements == 3) { /* r if present */
+            emit(MOV(next, coordinate));
+            coordinate.reg_offset++;
+            next.reg_offset++;
+         }
+
+         coordinate_done = true;
+      }
        break;
     }
  
     /* Set up the coordinate (except for cases where it was done above) */
-   if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms && ir->op != ir_query_levels) {
+   if (ir->coordinate && !coordinate_done) {
        for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-        emit(MOV(next, coordinate));
-        coordinate.reg_offset++;
-        next.reg_offset++;
+         emit(MOV(next, coordinate));
+         coordinate.reg_offset++;
+         next.reg_offset++;
        }
     }
  
@@ -1372,14 +1390,18 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
     case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
     case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
     case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break;
-   case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst, payload); break;
+   case ir_tg4:
+      if (has_nonconstant_offset)
+         inst = emit(SHADER_OPCODE_TG4_OFFSET, dst, payload);
+      else
+         inst = emit(SHADER_OPCODE_TG4, dst, payload);
+      break;
     }
     inst->base_mrf = -1;
     if (reg_width == 2)
        inst->mlen = next.reg_offset * reg_width - header_present;
     else
        inst->mlen = next.reg_offset * reg_width;
-
     inst->header_present = header_present;
     inst->regs_written = 4;
  
@@ -1527,6 +1549,9 @@ fs_visitor::visit(ir_texture *ir)
     /* Should be lowered by do_lower_texture_projection */
     assert(!ir->projector);
  
+   /* Should be lowered */
+   assert(!ir->offset || !ir->offset->type->is_array());
+
     /* Generate code to compute all the subexpression trees.  This has to be
      * done before loading any values into MRFs for the sampler message since
      * generating these values may involve SEND messages that need the MRFs.
@@ -1596,7 +1621,7 @@ fs_visitor::visit(ir_texture *ir)
     }
  
     if (ir->offset != NULL && ir->op != ir_txf)
-      inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
+      inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant());
  
     if (ir->op == ir_tg4)
        inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17
@@ -2503,6 +2528,16 @@ fs_visitor::emit_fb_writes()
        pop_force_uncompressed();
     }
  
+   c->prog_data.uses_omask =
+      fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
+   if(c->prog_data.uses_omask) {
+      this->current_annotation = "FB write oMask";
+      assert(this->sample_mask.file != BAD_FILE);
+      /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */
+      emit(FS_OPCODE_SET_OMASK, fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW), this->sample_mask);
+      nr += 1;
+   }
+
     /* Reserve space for color. It'll be filled in per MRT below. */
     int color_mrf = nr;
     nr += 4 * reg_width;
@@ -2669,8 +2704,10 @@ fs_visitor::fs_visitor(struct brw_context *brw,
     this->c = c;
     this->brw = brw;
     this->fp = fp;
+   this->prog = &fp->Base;
     this->shader_prog = shader_prog;
     this->prog = &fp->Base;
+   this->stage_prog_data = &c->prog_data.base;
     this->ctx = &brw->ctx;
     this->mem_ctx = ralloc_context(NULL);
     if (shader_prog)
@@ -2704,6 +2741,8 @@ fs_visitor::fs_visitor(struct brw_context *brw,
     this->force_uncompressed_stack = 0;
     this->force_sechalf_stack = 0;
  
+   this->spilled_any_registers = false;
+
     memset(&this->param_size, 0, sizeof(this->param_size));
  }