i965/fs: Switch shader_time writes to using GRFs.
authorEric Anholt <eric@anholt.net>
Tue, 19 Mar 2013 22:28:11 +0000 (15:28 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 28 Mar 2013 18:46:15 +0000 (11:46 -0700)
This avoids conflicts between shader_time and FB writes, so we can include
more of the program under our profiling.  This does mean hiding more of
the message setup from the optimizer, which doesn't have a way to handle
multi-reg sends from GRFs.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_eu.h
src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_emit.cpp
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp

index 60ce231bac5b35ed1ce51e4b817f3a7a8be99797..6ef1f83b5ca95be98ad3fe4093054dd2c1905358 100644 (file)
@@ -305,7 +305,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
                                   GLuint offset);
 
 void brw_shader_time_add(struct brw_compile *p,
-                         int mrf,
+                         struct brw_reg payload,
                          uint32_t surf_index);
 
 /* If/else/endif.  Works by manipulating the execution flags on each
index 23556264269aa0f960f2adaa3ffe612c26fe1773..2578bf89cca30e372172d8c4ba0f01b4af81bb9a 100644 (file)
@@ -2436,7 +2436,7 @@ brw_svb_write(struct brw_compile *p,
  * messages.
  */
 void brw_shader_time_add(struct brw_compile *p,
-                         int base_mrf,
+                         struct brw_reg payload,
                          uint32_t surf_index)
 {
    struct intel_context *intel = &p->brw->intel;
@@ -2453,8 +2453,8 @@ void brw_shader_time_add(struct brw_compile *p,
     */
    brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
                                       BRW_ARF_NULL, 0));
-   brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
-                                      base_mrf, 0));
+   brw_set_src0(p, send, brw_vec1_reg(payload.file,
+                                      payload.nr, 0));
 
    uint32_t sfid, msg_type;
    if (intel->is_haswell) {
index 01a1ec06ac03312222a8ee5a621439178c53a8d4..9ea7339f5818071e55b6e467d9d89d1204a8b8be 100644 (file)
@@ -377,6 +377,7 @@ bool
 fs_inst::is_send_from_grf()
 {
    return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 ||
+           opcode == SHADER_OPCODE_SHADER_TIME_ADD ||
            (opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD &&
             src[1].file == GRF));
 }
@@ -607,19 +608,16 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
 {
    int shader_time_index = brw_get_shader_time_index(brw, prog, &fp->Base,
                                                      type);
-   int base_mrf = 6;
+   fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
 
-   fs_reg offset_mrf = fs_reg(MRF, base_mrf);
-   offset_mrf.type = BRW_REGISTER_TYPE_UD;
-   emit(MOV(offset_mrf, fs_reg(shader_time_index * SHADER_TIME_STRIDE)));
-
-   fs_reg time_mrf = fs_reg(MRF, base_mrf + 1);
-   time_mrf.type = BRW_REGISTER_TYPE_UD;
-   emit(MOV(time_mrf, value));
+   fs_reg payload;
+   if (dispatch_width == 8)
+      payload = fs_reg(this, glsl_type::uvec2_type);
+   else
+      payload = fs_reg(this, glsl_type::uint_type);
 
-   fs_inst *inst = emit(fs_inst(SHADER_OPCODE_SHADER_TIME_ADD));
-   inst->base_mrf = base_mrf;
-   inst->mlen = 2;
+   emit(fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
+                fs_reg(), payload, offset, value));
 }
 
 void
@@ -735,8 +733,6 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
    case SHADER_OPCODE_TXL:
    case SHADER_OPCODE_TXS:
       return 1;
-   case SHADER_OPCODE_SHADER_TIME_ADD:
-      return 0;
    case FS_OPCODE_FB_WRITE:
       return 2;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
@@ -1359,6 +1355,13 @@ fs_visitor::split_virtual_grfs()
       if (inst->regs_written() > 1) {
         split_grf[inst->dst.reg] = false;
       }
+
+      /* If we're sending from a GRF, don't split it, on the assumption that
+       * the send is reading the whole thing.
+       */
+      if (inst->is_send_from_grf()) {
+         split_grf[inst->src[0].reg] = false;
+      }
    }
 
    /* Allocate new space for split regs.  Note that the virtual
index 1fabec45cf8cb778dd30c9796e92d50d6a36d2ea..d9d17a2520e6a9d270434ab249b8baf27e20fb0d 100644 (file)
@@ -559,6 +559,11 @@ private:
                                         struct brw_reg dst,
                                         struct brw_reg src);
 
+   void generate_shader_time_add(fs_inst *inst,
+                                 struct brw_reg payload,
+                                 struct brw_reg offset,
+                                 struct brw_reg value);
+
    void patch_discard_jumps_to_fb_writes();
 
    struct brw_context *brw;
index 712fef6e0934e1d74cee24fabc816342541b46a0..5db481c60ee3e035ae3e49a0e5d87391d9586ec7 100644 (file)
@@ -971,6 +971,41 @@ fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
    brw_F16TO32(p, dst, src_w);
 }
 
+void
+fs_generator::generate_shader_time_add(fs_inst *inst,
+                                       struct brw_reg payload,
+                                       struct brw_reg offset,
+                                       struct brw_reg value)
+{
+   assert(intel->gen >= 7);
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, true);
+
+   assert(payload.file == BRW_GENERAL_REGISTER_FILE);
+   struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
+                                          offset.type);
+   struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
+                                         value.type);
+
+   assert(offset.file == BRW_IMMEDIATE_VALUE);
+   if (value.file == BRW_GENERAL_REGISTER_FILE) {
+      value.width = BRW_WIDTH_1;
+      value.hstride = BRW_HORIZONTAL_STRIDE_0;
+      value.vstride = BRW_VERTICAL_STRIDE_0;
+   } else {
+      assert(value.file == BRW_IMMEDIATE_VALUE);
+   }
+
+   /* Trying to deal with setup of the params from the IR is crazy in the FS8
+    * case, and we don't really care about squeezing every bit of performance
+    * out of this path, so we just emit the MOVs from here.
+    */
+   brw_MOV(p, payload_offset, offset);
+   brw_MOV(p, payload_value, value);
+   brw_shader_time_add(p, payload, SURF_INDEX_WM_SHADER_TIME);
+   brw_pop_insn_state(p);
+}
+
 void
 fs_generator::generate_code(exec_list *instructions)
 {
@@ -1291,7 +1326,7 @@ fs_generator::generate_code(exec_list *instructions)
          break;
 
       case SHADER_OPCODE_SHADER_TIME_ADD:
-         brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_WM_SHADER_TIME);
+         generate_shader_time_add(inst, src[0], src[1], src[2]);
          break;
 
       case FS_OPCODE_SET_SIMD4X2_OFFSET:
index b73711cf7adb42ab5245ff489be1029e4193e3c8..54f3efd08ecad50c74a2fa1e4241106125bcb33e 100644 (file)
@@ -674,7 +674,8 @@ vec4_generator::generate_vs_instruction(vec4_instruction *instruction,
       break;
 
    case SHADER_OPCODE_SHADER_TIME_ADD:
-      brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_VS_SHADER_TIME);
+      brw_shader_time_add(p, brw_message_reg(inst->base_mrf),
+                          SURF_INDEX_VS_SHADER_TIME);
       break;
 
    default: