i965/gen7: Implement code generation for untyped surface read instructions.
authorFrancisco Jerez <currojerez@riseup.net>
Wed, 11 Sep 2013 21:03:13 +0000 (14:03 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Tue, 29 Oct 2013 19:40:56 +0000 (12:40 -0700)
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_eu.h
src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp

index 1cadf6c2c3ec6c8d9d89aa14b68c661145195429..fbc787a7be1ee6a1ba44c838cffdb71781a3dd34 100644 (file)
@@ -776,6 +776,7 @@ enum opcode {
    SHADER_OPCODE_SHADER_TIME_ADD,
 
    SHADER_OPCODE_UNTYPED_ATOMIC,
+   SHADER_OPCODE_UNTYPED_SURFACE_READ,
 
    FS_OPCODE_DDX,
    FS_OPCODE_DDY,
index 66b7ba7720b4df7083795a897ce1a2b6026f4cd0..1a448d055c36bf1c983942dce637c3d05471580f 100644 (file)
@@ -433,6 +433,14 @@ brw_untyped_atomic(struct brw_compile *p,
                    GLuint msg_length,
                    GLuint response_length);
 
+void
+brw_untyped_surface_read(struct brw_compile *p,
+                         struct brw_reg dest,
+                         struct brw_reg mrf,
+                         GLuint bind_table_index,
+                         GLuint msg_length,
+                         GLuint response_length);
+
 /*********************************************************************** 
  * brw_eu_util.c:
  */
index d1d40f7bee374b7493e020c6e8963682df2fd9b6..f6085192c095b71feccf04c0b547b47b38408ce6 100644 (file)
@@ -2547,6 +2547,62 @@ brw_untyped_atomic(struct brw_compile *p,
       insn->header.access_mode == BRW_ALIGN_1);
 }
 
+static void
+brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
+                                        struct brw_instruction *insn,
+                                        GLuint bind_table_index,
+                                        GLuint msg_length,
+                                        GLuint response_length,
+                                        bool header_present)
+{
+   const unsigned dispatch_width =
+      (insn->header.execution_size == BRW_EXECUTE_16 ? 16 : 8);
+   const unsigned num_channels = response_length / (dispatch_width / 8);
+
+   if (p->brw->is_haswell) {
+      brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
+                                 msg_length, response_length,
+                                 header_present, false);
+
+      insn->bits3.gen7_dp.msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
+   } else {
+      brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
+                                 msg_length, response_length,
+                                 header_present, false);
+
+      insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
+   }
+
+   if (insn->header.access_mode == BRW_ALIGN_1) {
+      if (dispatch_width == 16)
+         insn->bits3.ud |= 1 << 12; /* SIMD16 mode */
+      else
+         insn->bits3.ud |= 2 << 12; /* SIMD8 mode */
+   }
+
+   insn->bits3.gen7_dp.binding_table_index = bind_table_index;
+
+   /* Set mask of 32-bit channels to drop. */
+   insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8;
+}
+
+void
+brw_untyped_surface_read(struct brw_compile *p,
+                         struct brw_reg dest,
+                         struct brw_reg mrf,
+                         GLuint bind_table_index,
+                         GLuint msg_length,
+                         GLuint response_length)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
+   brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
+   brw_set_dp_untyped_surface_read_message(
+      p, insn, bind_table_index, msg_length, response_length,
+      insn->header.access_mode == BRW_ALIGN_1);
+}
+
 /**
  * This instruction is generated as a single-channel align1 instruction by
  * both the VS and FS stages when using INTEL_DEBUG=shader_time.
index f3ceaad97b7fff58ef27203c4bbbe7deb5b2f3aa..76d8a2997abc89d8bd5f538eefd2699e50db736e 100644 (file)
@@ -773,6 +773,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
    case FS_OPCODE_SPILL:
       return 2;
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
       return 0;
    default:
       assert(!"not reached");
index bc67637f315534f0c7d5ff691b5f6eecdf33d11c..5b783137ddfae325ea6f952bb3a6cca3abe39bc0 100644 (file)
@@ -559,6 +559,10 @@ private:
                                 struct brw_reg atomic_op,
                                 struct brw_reg surf_index);
 
+   void generate_untyped_surface_read(fs_inst *inst,
+                                      struct brw_reg dst,
+                                      struct brw_reg surf_index);
+
    void mark_surface_used(unsigned surf_index);
 
    void patch_discard_jumps_to_fb_writes();
index f639d7e185b080da36236df0a871e7ce8b124bea..ef858370dbb7a9f8d27e095d9fe3237c0cb39095 100644 (file)
@@ -1206,6 +1206,20 @@ fs_generator::generate_untyped_atomic(fs_inst *inst, struct brw_reg dst,
    mark_surface_used(surf_index.dw1.ud);
 }
 
+void
+fs_generator::generate_untyped_surface_read(fs_inst *inst, struct brw_reg dst,
+                                            struct brw_reg surf_index)
+{
+   assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
+         surf_index.type == BRW_REGISTER_TYPE_UD);
+
+   brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf),
+                            surf_index.dw1.ud,
+                            inst->mlen, dispatch_width / 8);
+
+   mark_surface_used(surf_index.dw1.ud);
+}
+
 void
 fs_generator::generate_code(exec_list *instructions)
 {
@@ -1609,6 +1623,10 @@ fs_generator::generate_code(exec_list *instructions)
          generate_untyped_atomic(inst, dst, src[0], src[1]);
          break;
 
+      case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+         generate_untyped_surface_read(inst, dst, src[0]);
+         break;
+
       case FS_OPCODE_SET_SIMD4X2_OFFSET:
          generate_set_simd4x2_offset(inst, dst, src[0]);
          break;
index dbdf442bbf36e274e786e3df5a94d229cba6ae19..c1cfefa86875fcacef1e5838f39a4d7154353d46 100644 (file)
@@ -277,6 +277,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
    case SHADER_OPCODE_TG4_OFFSET:
       return inst->header_present ? 1 : 0;
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
       return 0;
    default:
       assert(!"not reached");
index 5ba32b645301c48903914fedefd67d2e657bca8f..a479646c69b8bfe31fe2f4429cad6f366c6c446c 100644 (file)
@@ -622,6 +622,10 @@ private:
                                 struct brw_reg atomic_op,
                                 struct brw_reg surf_index);
 
+   void generate_untyped_surface_read(vec4_instruction *inst,
+                                      struct brw_reg dst,
+                                      struct brw_reg surf_index);
+
    void mark_surface_used(unsigned surf_index);
 
    struct brw_context *brw;
index e8e9f072d14566c3c1b503312e70a611229323e9..426f78c2abee331d2d5d41cf62431a8a5ec8f99c 100644 (file)
@@ -872,6 +872,21 @@ vec4_generator::generate_untyped_atomic(vec4_instruction *inst,
    mark_surface_used(surf_index.dw1.ud);
 }
 
+void
+vec4_generator::generate_untyped_surface_read(vec4_instruction *inst,
+                                              struct brw_reg dst,
+                                              struct brw_reg surf_index)
+{
+   assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
+         surf_index.type == BRW_REGISTER_TYPE_UD);
+
+   brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf),
+                            surf_index.dw1.ud,
+                            inst->mlen, 1);
+
+   mark_surface_used(surf_index.dw1.ud);
+}
+
 /**
  * Generate assembly for a Vec4 IR instruction.
  *
@@ -1188,6 +1203,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
       generate_untyped_atomic(inst, dst, src[0], src[1]);
       break;
 
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+      generate_untyped_surface_read(inst, dst, src[0]);
+      break;
+
    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
       generate_unpack_flags(inst, dst);
       break;