i965: Don't disable exec masking for sampler message sends.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 26 Feb 2015 15:24:03 +0000 (17:24 +0200)
committerFrancisco Jerez <currojerez@riseup.net>
Fri, 20 Mar 2015 15:01:35 +0000 (17:01 +0200)
This was telling the sampler to do texture fetches for *all* channels
in the non-constant surface index case, what could have reduced
throughput unnecessarily when some of the channels were disabled by
control flow.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp

index 25a0ea3934825232b35e1286f9123b0cf2336ba8..260f50857b3779126deaa9b9c4bc78291451354e 100644 (file)
@@ -762,6 +762,8 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
       brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
       brw_OR(p, addr, addr, temp);
 
+      brw_pop_insn_state(p);
+
       /* dst = send(offset, a0.0 | <descriptor>) */
       brw_inst *insn = brw_send_indirect_message(
          p, BRW_SFID_SAMPLER, dst, src, addr);
@@ -775,8 +777,6 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
                               simd_mode,
                               return_format);
 
-      brw_pop_insn_state(p);
-
       /* visitor knows more than we do about the surface limit required,
        * so has already done marking.
        */
@@ -1229,6 +1229,8 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
       brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD)));
       brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
 
+      brw_pop_insn_state(p);
+
       /* dst = send(offset, a0.0 | <descriptor>) */
       brw_inst *insn = brw_send_indirect_message(
          p, BRW_SFID_SAMPLER, retype(dst, BRW_REGISTER_TYPE_UW),
@@ -1243,8 +1245,6 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
                               simd_mode,
                               0);
 
-      brw_pop_insn_state(p);
-
       /* visitor knows more than we do about the surface limit required,
        * so has already done marking.
        */
index 5ac0e7627c627fe9d484c2a214487f0babe44315..2bcddb16286a376e26f7e40e81da59df28d77015 100644 (file)
@@ -419,6 +419,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
       brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
       brw_OR(p, addr, addr, temp);
 
+      brw_pop_insn_state(p);
+
       /* dst = send(offset, a0.0 | <descriptor>) */
       brw_inst *insn = brw_send_indirect_message(
          p, BRW_SFID_SAMPLER, dst, src, addr);
@@ -432,8 +434,6 @@ vec4_generator::generate_tex(vec4_instruction *inst,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               return_format);
 
-      brw_pop_insn_state(p);
-
       /* visitor knows more than we do about the surface limit required,
        * so has already done marking.
        */
@@ -1091,6 +1091,8 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
       brw_set_src0(p, insn_and, vec1(retype(surf_index, BRW_REGISTER_TYPE_UD)));
       brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
 
+      brw_pop_insn_state(p);
+
       /* dst = send(offset, a0.0 | <descriptor>) */
       brw_inst *insn = brw_send_indirect_message(
          p, BRW_SFID_SAMPLER, dst, src, addr);
@@ -1104,8 +1106,6 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);
 
-      brw_pop_insn_state(p);
-
       /* visitor knows more than we do about the surface limit required,
        * so has already done marking.
        */