i965/fs: Drop "do dual source blending" generator parameter.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_generator.cpp
index 8e4a31d951fc3756c43f13fb6638a9d66aeb0480..9e260a7ffe7694e17c35ed513485a6ec3b81a34e 100644 (file)
@@ -42,12 +42,10 @@ fs_generator::fs_generator(struct brw_context *brw,
                            struct brw_wm_prog_data *prog_data,
                            struct gl_shader_program *prog,
                            struct gl_fragment_program *fp,
-                           bool dual_source_output,
                            bool runtime_check_aads_emit,
                            bool debug_flag)
 
    : brw(brw), key(key), prog_data(prog_data), prog(prog), fp(fp),
-     dual_source_output(dual_source_output),
      runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(debug_flag),
      mem_ctx(mem_ctx)
 {
@@ -67,6 +65,8 @@ fs_generator::patch_discard_jumps_to_fb_writes()
    if (brw->gen < 6 || this->discard_halt_patches.is_empty())
       return false;
 
+   int scale = brw_jump_scale(brw);
+
    /* There is a somewhat strange undocumented requirement of using
     * HALT, according to the simulator.  If some channel has HALTed to
     * a particular UIP, then by the end of the program, every channel
@@ -79,8 +79,8 @@ fs_generator::patch_discard_jumps_to_fb_writes()
     * tests.
     */
    brw_inst *last_halt = gen6_HALT(p);
-   brw_inst_set_uip(brw, last_halt, 2);
-   brw_inst_set_jip(brw, last_halt, 2);
+   brw_inst_set_uip(brw, last_halt, 1 * scale);
+   brw_inst_set_jip(brw, last_halt, 1 * scale);
 
    int ip = p->nr_insn;
 
@@ -89,7 +89,7 @@ fs_generator::patch_discard_jumps_to_fb_writes()
 
       assert(brw_inst_opcode(brw, patch) == BRW_OPCODE_HALT);
       /* HALT takes a half-instruction distance from the pre-incremented IP. */
-      brw_inst_set_uip(brw, patch, (ip - patch_ip->ip) * 2);
+      brw_inst_set_uip(brw, patch, (ip - patch_ip->ip) * scale);
    }
 
    this->discard_halt_patches.make_empty();
@@ -115,7 +115,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
       brw_pop_insn_state(p);
    }
 
-   if (this->dual_source_output)
+   if (prog_data->dual_src_blend)
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
    else if (dispatch_width == 16)
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
@@ -152,6 +152,7 @@ fs_generator::generate_fb_write(fs_inst *inst)
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
       brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_default_flag_reg(p, 0, 0);
 
       /* On HSW, the GPU will use the predicate on SENDC, unless the header is
        * present.
@@ -368,7 +369,8 @@ fs_generator::generate_math_g45(fs_inst *inst,
 }
 
 void
-fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+                           struct brw_reg sampler_index)
 {
    int msg_type = -1;
    int rlen = 4;
@@ -536,6 +538,11 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
       src.nr++;
    }
 
+   assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
+   assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
+
+   uint32_t sampler = sampler_index.dw1.ud;
+
    /* Load the message header if present.  If there's a texture offset,
     * we need to set it up explicitly and load the offset bitfield.
     * Otherwise, we can use an implied move from g0 to the first message reg.
@@ -566,7 +573,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
                        brw_imm_ud(inst->texture_offset));
          }
 
-         if (inst->sampler >= 16) {
+         if (sampler >= 16) {
             /* The "Sampler Index" field can only store values between 0 and 15.
              * However, we can add an offset to the "Sampler State Pointer"
              * field, effectively selecting a different set of 16 samplers.
@@ -576,11 +583,11 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
              * exclusively use the offset - we have to use both.
              */
             assert(brw->gen >= 8 || brw->is_haswell);
+            const int sampler_state_size = 16; /* 16 bytes */
             brw_ADD(p,
                     get_element_ud(header_reg, 3),
                     get_element_ud(brw_vec8_grf(0, 0), 3),
-                    brw_imm_ud(16 * (inst->sampler / 16) *
-                               sizeof(gen7_sampler_state)));
+                    brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
          }
          brw_pop_insn_state(p);
       }
@@ -589,14 +596,14 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
    uint32_t surface_index = ((inst->opcode == SHADER_OPCODE_TG4 ||
       inst->opcode == SHADER_OPCODE_TG4_OFFSET)
       ? prog_data->base.binding_table.gather_texture_start
-      : prog_data->base.binding_table.texture_start) + inst->sampler;
+      : prog_data->base.binding_table.texture_start) + sampler;
 
    brw_SAMPLE(p,
              retype(dst, BRW_REGISTER_TYPE_UW),
              inst->base_mrf,
              src,
               surface_index,
-             inst->sampler % 16,
+             sampler % 16,
              msg_type,
              rlen,
              inst->mlen,
@@ -1582,11 +1589,7 @@ fs_generator::generate_code(exec_list *instructions)
         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
         break;
       case BRW_OPCODE_CONTINUE:
-        /* FINISHME: We need to write the loop instruction support still. */
-        if (brw->gen >= 6)
-           gen6_CONT(p);
-        else
-           brw_CONT(p);
+         brw_CONT(p);
         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
         break;
 
@@ -1649,7 +1652,7 @@ fs_generator::generate_code(exec_list *instructions)
       case SHADER_OPCODE_LOD:
       case SHADER_OPCODE_TG4:
       case SHADER_OPCODE_TG4_OFFSET:
-        generate_tex(inst, dst, src[0]);
+        generate_tex(inst, dst, src[0], src[1]);
         break;
       case FS_OPCODE_DDX:
         generate_ddx(inst, dst, src[0]);