i965/fs: Drop "do dual source blending" generator parameter.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_generator.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp

index 8e4a31d951fc3756c43f13fb6638a9d66aeb0480..9e260a7ffe7694e17c35ed513485a6ec3b81a34e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -42,12 +42,10 @@ fs_generator::fs_generator(struct brw_context *brw,
                             struct brw_wm_prog_data *prog_data,
                             struct gl_shader_program *prog,
                             struct gl_fragment_program *fp,
-                           bool dual_source_output,
                             bool runtime_check_aads_emit,
                             bool debug_flag)
  
     : brw(brw), key(key), prog_data(prog_data), prog(prog), fp(fp),
-     dual_source_output(dual_source_output),
       runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(debug_flag),
       mem_ctx(mem_ctx)
  {
@@ -67,6 +65,8 @@ fs_generator::patch_discard_jumps_to_fb_writes()
     if (brw->gen < 6 || this->discard_halt_patches.is_empty())
        return false;
  
+   int scale = brw_jump_scale(brw);
+
     /* There is a somewhat strange undocumented requirement of using
      * HALT, according to the simulator.  If some channel has HALTed to
      * a particular UIP, then by the end of the program, every channel
@@ -79,8 +79,8 @@ fs_generator::patch_discard_jumps_to_fb_writes()
      * tests.
      */
     brw_inst *last_halt = gen6_HALT(p);
-   brw_inst_set_uip(brw, last_halt, 2);
-   brw_inst_set_jip(brw, last_halt, 2);
+   brw_inst_set_uip(brw, last_halt, 1 * scale);
+   brw_inst_set_jip(brw, last_halt, 1 * scale);
  
     int ip = p->nr_insn;
  
@@ -89,7 +89,7 @@ fs_generator::patch_discard_jumps_to_fb_writes()
  
        assert(brw_inst_opcode(brw, patch) == BRW_OPCODE_HALT);
        /* HALT takes a half-instruction distance from the pre-incremented IP. */
-      brw_inst_set_uip(brw, patch, (ip - patch_ip->ip) * 2);
+      brw_inst_set_uip(brw, patch, (ip - patch_ip->ip) * scale);
     }
  
     this->discard_halt_patches.make_empty();
@@ -115,7 +115,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
        brw_pop_insn_state(p);
     }
  
-   if (this->dual_source_output)
+   if (prog_data->dual_src_blend)
        msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
     else if (dispatch_width == 16)
        msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
@@ -152,6 +152,7 @@ fs_generator::generate_fb_write(fs_inst *inst)
        brw_set_default_mask_control(p, BRW_MASK_DISABLE);
        brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
        brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_default_flag_reg(p, 0, 0);
  
        /* On HSW, the GPU will use the predicate on SENDC, unless the header is
         * present.
@@ -368,7 +369,8 @@ fs_generator::generate_math_g45(fs_inst *inst,
  }
  
  void
-fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+                           struct brw_reg sampler_index)
  {
     int msg_type = -1;
     int rlen = 4;
@@ -536,6 +538,11 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
        src.nr++;
     }
  
+   assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
+   assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
+
+   uint32_t sampler = sampler_index.dw1.ud;
+
     /* Load the message header if present.  If there's a texture offset,
      * we need to set it up explicitly and load the offset bitfield.
      * Otherwise, we can use an implied move from g0 to the first message reg.
@@ -566,7 +573,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
                         brw_imm_ud(inst->texture_offset));
           }
  
-         if (inst->sampler >= 16) {
+         if (sampler >= 16) {
              /* The "Sampler Index" field can only store values between 0 and 15.
               * However, we can add an offset to the "Sampler State Pointer"
               * field, effectively selecting a different set of 16 samplers.
@@ -576,11 +583,11 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
               * exclusively use the offset - we have to use both.
               */
              assert(brw->gen >= 8 || brw->is_haswell);
+            const int sampler_state_size = 16; /* 16 bytes */
              brw_ADD(p,
                      get_element_ud(header_reg, 3),
                      get_element_ud(brw_vec8_grf(0, 0), 3),
-                    brw_imm_ud(16 * (inst->sampler / 16) *
-                               sizeof(gen7_sampler_state)));
+                    brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
           }
           brw_pop_insn_state(p);
        }
@@ -589,14 +596,14 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
     uint32_t surface_index = ((inst->opcode == SHADER_OPCODE_TG4 ||
        inst->opcode == SHADER_OPCODE_TG4_OFFSET)
        ? prog_data->base.binding_table.gather_texture_start
-      : prog_data->base.binding_table.texture_start) + inst->sampler;
+      : prog_data->base.binding_table.texture_start) + sampler;
  
     brw_SAMPLE(p,
               retype(dst, BRW_REGISTER_TYPE_UW),
               inst->base_mrf,
               src,
                surface_index,
-             inst->sampler % 16,
+             sampler % 16,
               msg_type,
               rlen,
               inst->mlen,
@@ -1582,11 +1589,7 @@ fs_generator::generate_code(exec_list *instructions)
          brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
          break;
        case BRW_OPCODE_CONTINUE:
-        /* FINISHME: We need to write the loop instruction support still. */
-        if (brw->gen >= 6)
-           gen6_CONT(p);
-        else
-           brw_CONT(p);
+         brw_CONT(p);
          brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
          break;
  
@@ -1649,7 +1652,7 @@ fs_generator::generate_code(exec_list *instructions)
        case SHADER_OPCODE_LOD:
        case SHADER_OPCODE_TG4:
        case SHADER_OPCODE_TG4_OFFSET:
-        generate_tex(inst, dst, src[0]);
+        generate_tex(inst, dst, src[0], src[1]);
          break;
        case FS_OPCODE_DDX:
          generate_ddx(inst, dst, src[0]);