i965/fs: Hide varying pull constant load message setup behind logical opcode.
authorFrancisco Jerez <currojerez@riseup.net>
Wed, 18 May 2016 06:18:38 +0000 (23:18 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Sat, 28 May 2016 06:19:21 +0000 (23:19 -0700)
This will allow the SIMD lowering pass to split 32-wide varying pull
constant loads (not natively supported by the hardware) into 16-wide
instructions.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_cse.cpp
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp

index 31b333677279b397630134ccd86cec89f179e0b0..432a1aec0dfd0c6fb89704cfb8d2cf6b3ffffc51 100644 (file)
@@ -1116,6 +1116,7 @@ enum opcode {
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+   FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
    FS_OPCODE_GET_BUFFER_SIZE,
    FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
    FS_OPCODE_DISCARD_JUMP,
index 71df1e57fa37d7883d5ddf46493707f13ed6a821..336806b9e51c4bcc5207cab8646a4adfd0fb4702 100644 (file)
@@ -188,32 +188,16 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
       scale = 2;
    }
 
-   enum opcode op;
-   if (devinfo->gen >= 7)
-      op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
-   else
-      op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
-
    /* The pull load message will load a vec4 (16 bytes). If we are loading
     * a double this means we are only loading 2 elements worth of data.
     * We also want to use a 32-bit data type for the dst of the load operation
     * so other parts of the driver don't get confused about the size of the
     * result.
     */
-   int regs_written = 4 * (bld.dispatch_width() / 8) * scale;
-   fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written),
-                               BRW_REGISTER_TYPE_F);
-   fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
-   inst->regs_written = regs_written;
-
-   if (devinfo->gen < 7) {
-      inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen);
-      inst->header_size = 1;
-      if (devinfo->gen == 4)
-         inst->mlen = 3;
-      else
-         inst->mlen = 1 + bld.dispatch_width() / 8;
-   }
+   fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * scale);
+   fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
+                            vec4_result, surf_index, vec4_offset);
+   inst->regs_written = 4 * (bld.dispatch_width() / 8) * scale;
 
    if (type_sz(dst.type) == 8) {
       assert(scale == 1);
@@ -4439,6 +4423,28 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
    delete[] components;
 }
 
+static void
+lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst)
+{
+   const brw_device_info *devinfo = bld.shader->devinfo;
+
+   if (devinfo->gen >= 7) {
+      inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
+
+   } else {
+      const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->gen),
+                           BRW_REGISTER_TYPE_UD);
+
+      bld.MOV(byte_offset(payload, REG_SIZE), inst->src[1]);
+
+      inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
+      inst->resize_sources(1);
+      inst->base_mrf = payload.nr;
+      inst->header_size = 1;
+      inst->mlen = 1 + inst->exec_size / 8;
+   }
+}
+
 bool
 fs_visitor::lower_logical_sends()
 {
@@ -4544,6 +4550,10 @@ fs_visitor::lower_logical_sends()
                                     ibld.sample_mask_reg());
          break;
 
+      case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
+         lower_varying_pull_constant_logical_send(ibld, inst);
+         break;
+
       default:
          continue;
       }
index 062fcd54592af0b77cb2c7dd88091ce5bd418abb..75759b7ba26435b01afa8654054e15268422ea6f 100644 (file)
@@ -463,9 +463,9 @@ private:
                                                  struct brw_reg dst,
                                                  struct brw_reg surf_index,
                                                  struct brw_reg offset);
-   void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst,
-                                            struct brw_reg index,
-                                            struct brw_reg offset);
+   void generate_varying_pull_constant_load(fs_inst *inst,
+                                            struct brw_reg dst,
+                                            struct brw_reg index);
    void generate_varying_pull_constant_load_gen7(fs_inst *inst,
                                                  struct brw_reg dst,
                                                  struct brw_reg index,
index b17a082780aea36f65978cb0df0560218dbee2d1..99121c503f0524ff971c9a00dc55582c3c4f59d4 100644 (file)
@@ -72,8 +72,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
    case BRW_OPCODE_MAD:
    case BRW_OPCODE_LRP:
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
-   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
    case FS_OPCODE_CINTERP:
    case FS_OPCODE_LINTERP:
    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
index 0a847f8637f4c9dc8446f41367f8ff91fba5e3dc..d979518c9225c8368c3bf10d585d5bf723233039 100644 (file)
@@ -1347,8 +1347,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
 void
 fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
                                                   struct brw_reg dst,
-                                                  struct brw_reg index,
-                                                  struct brw_reg offset)
+                                                  struct brw_reg index)
 {
    assert(devinfo->gen < 7); /* Should use the gen7 variant. */
    assert(inst->header_size != 0);
@@ -1380,10 +1379,6 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
    }
 
-   struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1),
-                                      BRW_REGISTER_TYPE_D);
-   brw_MOV(p, offset_mrf, offset);
-
    struct brw_reg header = brw_vec8_grf(0, 0);
    gen6_resolve_implied_move(p, &header, inst->base_mrf);
 
@@ -2186,7 +2181,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
         break;
 
       case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
-        generate_varying_pull_constant_load(inst, dst, src[0], src[1]);
+        generate_varying_pull_constant_load(inst, dst, src[0]);
         break;
 
       case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
index d24db58a0463cd4bae55013752d500a6d09de66b..551d9c2392631e03bed342948effb9cf2d662d61 100644 (file)
@@ -348,6 +348,8 @@ brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op)
       return "varying_pull_const";
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
       return "varying_pull_const_gen7";
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
+      return "varying_pull_const_logical";
 
    case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
       return "mov_dispatch_to_flags";