i965: Introduce a new SHADER_OPCODE_URB_READ_SIMD8 opcode.
authorKenneth Graunke <kenneth@whitecape.org>
Tue, 29 Sep 2015 21:32:02 +0000 (14:32 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Wed, 21 Oct 2015 21:27:46 +0000 (14:27 -0700)
In scalar mode, geometry shader inputs can easily take up hundreds of
registers.  This makes pushing VUE entries impractical; we'll need to
resort to the pull model in some cases.

To support this, we introduce a new opcode corresponding to the "URB
Read SIMD8" message.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp

index b2ce197faba9f823228a85a6f6c47043b31dc638..bd7d0b1c9a7b2fa89d9ca78790080ea96bba1d9e 100644 (file)
@@ -1031,6 +1031,15 @@ enum opcode {
    SHADER_OPCODE_GEN4_SCRATCH_WRITE,
    SHADER_OPCODE_GEN7_SCRATCH_READ,
 
+   /**
+    * Gen8+ SIMD8 URB Read message.
+    *
+    * Source 0: The header register, containing URB handles (g1).
+    *
+    * Currently only supports constant offsets, in inst->offset.
+    */
+   SHADER_OPCODE_URB_READ_SIMD8,
+
    SHADER_OPCODE_URB_WRITE_SIMD8,
    SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT,
    SHADER_OPCODE_URB_WRITE_SIMD8_MASKED,
index 6f344c3bfdc7a2e628012c55adccaebca95ed0e2..436ee4d5f23eab02d87513b6d643e75d629d423b 100644 (file)
@@ -284,6 +284,7 @@ fs_inst::is_send_from_grf() const
    case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
+   case SHADER_OPCODE_URB_READ_SIMD8:
       return true;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
       return src[1].file == GRF;
@@ -787,6 +788,7 @@ fs_inst::regs_read(int arg) const
    case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
+   case SHADER_OPCODE_URB_READ_SIMD8:
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
index 171338dcc0bd406cceb9affe158f1ee491f93676..f4d2e14b821d27fbca397502d9f40d870d4fc60f 100644 (file)
@@ -415,6 +415,7 @@ private:
                       struct brw_reg implied_header,
                       GLuint nr);
    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
+   void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
    void generate_barrier(fs_inst *inst, struct brw_reg src);
index f4b6afa373287461bce1b422b3d1d6c271fd72a8..bb7e792044f179fc5c28b8c6be7e9ab2c9ef678d 100644 (file)
@@ -354,6 +354,28 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
    }
 }
 
+void
+fs_generator::generate_urb_read(fs_inst *inst,
+                                struct brw_reg dst,
+                                struct brw_reg header)
+{
+   assert(header.file == BRW_GENERAL_REGISTER_FILE);
+   assert(header.type == BRW_REGISTER_TYPE_UD);
+
+   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, header);
+   brw_set_src1(p, send, brw_imm_ud(0u));
+
+   brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB);
+   brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ);
+
+   brw_inst_set_mlen(p->devinfo, send, inst->mlen);
+   brw_inst_set_rlen(p->devinfo, send, inst->regs_written);
+   brw_inst_set_header_present(p->devinfo, send, true);
+   brw_inst_set_urb_global_offset(p->devinfo, send, inst->offset);
+}
+
 void
 fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
 {
@@ -2009,6 +2031,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          fill_count++;
         break;
 
+      case SHADER_OPCODE_URB_READ_SIMD8:
+         generate_urb_read(inst, dst, src[0]);
+         break;
+
       case SHADER_OPCODE_URB_WRITE_SIMD8:
       case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
       case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
index 94c201142e7edd0b3e97d7d66cc914bbd6d960b7..d910e479c9d77f36e6bbd6f4ebad9d14297fe349 100644 (file)
@@ -414,6 +414,8 @@ brw_instruction_name(enum opcode op)
       return "gen8_urb_write_simd8_masked";
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
       return "gen8_urb_write_simd8_masked_per_slot";
+   case SHADER_OPCODE_URB_READ_SIMD8:
+      return "urb_read_simd8";
 
    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
       return "find_live_channel";