i965: Add SIMD8 URB write low-level IR instruction
authorKristian Høgsberg <krh@bitplanet.net>
Tue, 21 Oct 2014 06:00:50 +0000 (23:00 -0700)
committerKristian Høgsberg <krh@bitplanet.net>
Wed, 10 Dec 2014 20:29:00 +0000 (12:29 -0800)
This is all we need from the generator for SIMD8 vertex shaders.  This
opcode is just the send instruction, all the hard work will happen
in the visitor using LOAD_PAYLOAD.

Signed-off-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp

index d4211496168cf22411f3fbc074bed20f9add20a6..843ef32cbbdf8af248a50a5cfa9bf59fe7dfa81c 100644 (file)
@@ -908,6 +908,8 @@ enum opcode {
    SHADER_OPCODE_GEN4_SCRATCH_WRITE,
    SHADER_OPCODE_GEN7_SCRATCH_READ,
 
+   SHADER_OPCODE_URB_WRITE_SIMD8,
+
    VEC4_OPCODE_PACK_BYTES,
    VEC4_OPCODE_UNPACK_UNIFORM,
 
@@ -1529,6 +1531,7 @@ enum brw_message_target {
 
 #define BRW_URB_OPCODE_WRITE_HWORD  0
 #define BRW_URB_OPCODE_WRITE_OWORD  1
+#define GEN8_URB_OPCODE_SIMD8_WRITE  7
 
 #define BRW_URB_SWIZZLE_NONE          0
 #define BRW_URB_SWIZZLE_INTERLEAVE    1
index a0fb56539988d8e56fec9390810bfa5886692409..9365b7a29b97c232f46a0b1719052081469d952e 100644 (file)
@@ -509,6 +509,7 @@ fs_inst::is_send_from_grf() const
    case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
       return true;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
       return src[1].file == GRF;
@@ -898,6 +899,8 @@ fs_inst::regs_read(fs_visitor *v, int arg) const
       return mlen;
    } else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) {
       return mlen;
+   } else if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8 && arg == 0) {
+      return mlen;
    } else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) {
       return mlen;
    } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) {
@@ -992,6 +995,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
       return 2;
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
    case FS_OPCODE_INTERPOLATE_AT_CENTROID:
    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
index f0e6f251dbb6aff7a8691a1f0609063f8e9d63be..d252caf9b6864a46268f5aa8eea591f95cbc14e4 100644 (file)
@@ -705,6 +705,7 @@ private:
                       struct brw_reg implied_header,
                       GLuint nr);
    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
+   void generate_urb_write(fs_inst *inst, struct brw_reg payload);
    void generate_blorp_fb_write(fs_inst *inst);
    void generate_pixel_xy(struct brw_reg dst, bool is_x);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
index 95bdfc1b6aa9200bea7de2d7e3af806899a06e19..c131db2391cab606c89db24c6d926b3205b113e5 100644 (file)
@@ -335,6 +335,27 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
    }
 }
 
+void
+fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
+{
+   brw_inst *insn;
+
+   insn = brw_next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(p, insn, brw_null_reg());
+   brw_set_src0(p, insn, payload);
+   brw_set_src1(p, insn, brw_imm_d(0));
+
+   brw_inst_set_sfid(brw, insn, BRW_SFID_URB);
+   brw_inst_set_urb_opcode(brw, insn, GEN8_URB_OPCODE_SIMD8_WRITE);
+
+   brw_inst_set_mlen(brw, insn, inst->mlen);
+   brw_inst_set_rlen(brw, insn, 0);
+   brw_inst_set_eot(brw, insn, inst->eot);
+   brw_inst_set_header_present(brw, insn, true);
+   brw_inst_set_urb_global_offset(brw, insn, inst->offset);
+}
+
 void
 fs_generator::generate_blorp_fb_write(fs_inst *inst)
 {
@@ -1887,6 +1908,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
         generate_scratch_read_gen7(inst, dst);
         break;
 
+      case SHADER_OPCODE_URB_WRITE_SIMD8:
+        generate_urb_write(inst, src[0]);
+        break;
+
       case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
         generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
         break;
index 0813f6457288e4b52cd18392fe64dfa6f0598b93..eba2fdd0816fdd8d05c1376701a9d38e8a037097 100644 (file)
@@ -385,6 +385,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
 
       /* Special case instructions which have extra implied registers used. */
       switch (inst->opcode) {
+      case SHADER_OPCODE_URB_WRITE_SIMD8:
       case FS_OPCODE_FB_WRITE:
          /* We could omit this for the !inst->header_present case, except that
           * the simulator apparently incorrectly reads from g0/g1 instead of
@@ -522,6 +523,19 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
    }
 }
 
+static bool
+is_last_send(fs_inst *inst)
+{
+   switch (inst->opcode) {
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
+   case FS_OPCODE_FB_WRITE:
+      return inst->eot;
+   default:
+      assert(!inst->eot);
+      return false;
+   }
+}
+
 bool
 fs_visitor::assign_regs(bool allow_spilling)
 {
@@ -594,7 +608,7 @@ fs_visitor::assign_regs(bool allow_spilling)
           * We could just do "something high".  Instead, we just pick the
           * highest register that works.
           */
-         if (inst->opcode == FS_OPCODE_FB_WRITE && inst->eot) {
+         if (is_last_send(inst)) {
             int size = virtual_grf_sizes[inst->src[0].reg];
             int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
             ra_set_node_reg(g, inst->src[0].reg, reg);
index 56aa7578603a1f0845a5f64cd3a2f4a9a5531233..b3b1ad79d627c77896539777fcd74506fcc97bd7 100644 (file)
@@ -453,6 +453,8 @@ brw_instruction_name(enum opcode op)
       return "gen4_scratch_write";
    case SHADER_OPCODE_GEN7_SCRATCH_READ:
       return "gen7_scratch_read";
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
+      return "gen8_urb_write_simd8";
 
    case VEC4_OPCODE_PACK_BYTES:
       return "pack_bytes";
@@ -754,6 +756,7 @@ backend_instruction::has_side_effects() const
 {
    switch (opcode) {
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
    case FS_OPCODE_FB_WRITE:
       return true;
    default: