i965/ir: Make BROADCAST emit an unmasked single-channel move.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 19 May 2016 07:10:03 +0000 (00:10 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Sat, 28 May 2016 06:29:04 +0000 (23:29 -0700)
Alternatively we could have extended the current semantics to 32-wide
mode by changing brw_broadcast() to emit multiple indexed MOV
instructions in the generator copying the selected value to all
destination registers, but it seemed rather silly to waste EU cycles
unnecessarily copying the exact same value 32 times in the GRF.

The vstride change in the Align16 path is required to avoid assertions
in validate_reg() since the change causes the execution size of the
MOV and SEL instructions to be equal to the source region width.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp

index a5f3b52fa350ce78c5959c3e1a957681a23e327d..51eb856d7b1baec42391cf6909f04ce4f9e12b20 100644 (file)
@@ -1082,6 +1082,12 @@ enum opcode {
    /**
     * Pick the channel from its first source register given by the index
     * specified as second source.  Useful for variable indexing of surfaces.
+    *
+    * Note that because the result of this instruction is by definition
+    * uniform and it can always be splatted to multiple channels using a
+    * scalar regioning mode, only the first channel of the destination region
+    * is guaranteed to be updated, which implies that BROADCAST instructions
+    * should usually be marked force_writemask_all.
     */
    SHADER_OPCODE_BROADCAST,
 
index 2a3b0b03d8040560503cc6cd0014c930f627f101..1c2ccb4e16340b78c85cb6aafec7dc436e8141c5 100644 (file)
@@ -3426,6 +3426,10 @@ brw_broadcast(struct brw_codegen *p,
    const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
    brw_inst *inst;
 
+   brw_push_insn_state(p);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4);
+
    assert(src.file == BRW_GENERAL_REGISTER_FILE &&
           src.address_mode == BRW_ADDRESS_DIRECT);
 
@@ -3476,19 +3480,21 @@ brw_broadcast(struct brw_codegen *p,
           */
          inst = brw_MOV(p,
                         brw_null_reg(),
-                        stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 0, 4, 1));
+                        stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, 1));
          brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
          brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
          brw_inst_set_flag_reg_nr(devinfo, inst, 1);
 
          /* and use predicated SEL to pick the right channel. */
          inst = brw_SEL(p, dst,
-                        stride(suboffset(src, 4), 0, 4, 1),
-                        stride(src, 0, 4, 1));
+                        stride(suboffset(src, 4), 4, 4, 1),
+                        stride(src, 4, 4, 1));
          brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
          brw_inst_set_flag_reg_nr(devinfo, inst, 1);
       }
    }
+
+   brw_pop_insn_state(p);
 }
 
 /**
index 1d7fc6c9b4073896f8e192842c408eb6b0298636..3b0717e17d4968690b34bd38955771a31b35c73b 100644 (file)
@@ -2023,6 +2023,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          break;
 
       case SHADER_OPCODE_BROADCAST:
+         assert(inst->force_writemask_all);
          brw_broadcast(p, dst, src[0], src[1]);
          break;
 
index baf442212cce440753e2ba992da31ca75a99458d..bb0254ee51ec0c153880b6fc4ffd2857ff43b721 100644 (file)
@@ -1886,6 +1886,7 @@ generate_code(struct brw_codegen *p,
          break;
 
       case SHADER_OPCODE_BROADCAST:
+         assert(inst->force_writemask_all);
          brw_broadcast(p, dst, src[0], src[1]);
          break;