i965/skl: Always use a header for SIMD4x2 sampler messages
authorKristian Høgsberg <krh@bitplanet.net>
Wed, 10 Dec 2014 22:59:26 +0000 (14:59 -0800)
committerKristian Høgsberg <krh@bitplanet.net>
Thu, 8 Jan 2015 18:13:32 +0000 (10:13 -0800)
SKL+ overloads the SIMD4x2 SIMD mode to mean either SIMD8D or SIMD4x2
depending on bit 22 in the message header.  If the bit is 0 or there is
no header we get SIMD8D.  We always wand SIMD4x2 in vec4 and for fs pull
constants, so use a message header in those cases and set bit 22 there.

Based on an initial patch from Ken.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Kristian Høgsberg <krh@bitplanet.net>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index 28e398d0b88bd3f1a2d03170ac77acc95b7d9434..f02a0b8eb7e410803d7ae740d716965665a085d4 100644 (file)
@@ -1373,6 +1373,11 @@ enum brw_message_target {
 #define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
 #define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
 
+/* GEN9 changes SIMD mode 0 to mean SIMD8D, but lets us get the SIMD4x2
+ * behavior by setting bit 22 of dword 2 in the message header. */
+#define GEN9_SAMPLER_SIMD_MODE_SIMD8D                   0
+#define GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2        (1 << 22)
+
 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
 #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
index 8c7d780f352a46d743e19b1a7688e1ae43923993..9dfb7b7343d6e8a5b69d17f8c5edeb6a9f529e7e 100644 (file)
@@ -2994,6 +2994,14 @@ fs_visitor::lower_uniform_pull_constant_loads()
          const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
          fs_reg payload = fs_reg(this, glsl_type::uint_type);
 
+         /* We have to use a message header on Skylake to get SIMD4x2 mode.
+          * Reserve space for the register.
+          */
+         if (brw->gen >= 9) {
+            payload.reg_offset++;
+            virtual_grf_sizes[payload.reg] = 2;
+         }
+
          /* This is actually going to be a MOV, but since only the first dword
           * is accessed, we have a special opcode to do just that one.  Note
           * that this needs to be an operation that will be considered a def
index c652d65423577c4c1be1a79cff3cada472e4e778..7b4ac8d36953a326a3c2878cb2ef9a97072fc421 100644 (file)
@@ -1017,6 +1017,26 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
     */
    dst.width = BRW_WIDTH_4;
 
+   struct brw_reg src = offset;
+   bool header_present = false;
+   int mlen = 1;
+
+   if (brw->gen >= 9) {
+      /* Skylake requires a message header in order to use SIMD4x2 mode. */
+      src = retype(brw_vec8_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD);
+      mlen = 2;
+      header_present = true;
+
+      brw_push_insn_state(p);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+      brw_MOV(p, src, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+      brw_MOV(p, get_element_ud(src, 2),
+              brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2));
+      brw_pop_insn_state(p);
+   }
+
    if (index.file == BRW_IMMEDIATE_VALUE) {
 
       uint32_t surf_index = index.dw1.ud;
@@ -1028,14 +1048,14 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
       brw_pop_insn_state(p);
 
       brw_set_dest(p, send, dst);
-      brw_set_src0(p, send, offset);
+      brw_set_src0(p, send, src);
       brw_set_sampler_message(p, send,
                               surf_index,
                               0, /* LD message ignores sampler unit */
                               GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                               1, /* rlen */
-                              1, /* mlen */
-                              false, /* no header */
+                              mlen,
+                              header_present,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);
 
@@ -1064,8 +1084,8 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
                               0 /* sampler */,
                               GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                               1 /* rlen */,
-                              1 /* mlen */,
-                              false /* header */,
+                              mlen,
+                              header_present,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);
       brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
@@ -1077,7 +1097,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
       /* dst = send(offset, a0.0) */
       brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
       brw_set_dest(p, insn_send, dst);
-      brw_set_src0(p, insn_send, offset);
+      brw_set_src0(p, insn_send, src);
       brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);
 
       brw_pop_insn_state(p);
index b88a57912cb501db94849f3dfe78d9d361758fff..f900bf7f300c58a44c901f7f2bce404dbf508180 100644 (file)
@@ -328,6 +328,7 @@ vec4_generator::generate_tex(vec4_instruction *inst,
       } else {
          struct brw_reg header =
             retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
+         uint32_t dw2 = 0;
 
          /* Explicitly set up the message header by copying g0 to the MRF. */
          brw_push_insn_state(p);
@@ -336,11 +337,18 @@ vec4_generator::generate_tex(vec4_instruction *inst,
 
          brw_set_default_access_mode(p, BRW_ALIGN_1);
 
-         if (inst->offset) {
+         if (inst->offset)
             /* Set the texel offset bits in DWord 2. */
-            brw_MOV(p, get_element_ud(header, 2),
-                    brw_imm_ud(inst->offset));
-         }
+            dw2 = inst->offset;
+
+         if (brw->gen >= 9)
+            /* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D,
+             * based on bit 22 in the header.
+             */
+            dw2 |= GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2;
+
+         if (dw2)
+            brw_MOV(p, get_element_ud(header, 2), brw_imm_ud(dw2));
 
          brw_adjust_sampler_state_pointer(p, header, sampler_index, dst);
          brw_pop_insn_state(p);
index 09d79c83704c02de44d5c41261c1ddcf43e5bbd4..a81c66a861fcb56533681aa18452780654480864 100644 (file)
@@ -2586,12 +2586,14 @@ vec4_visitor::visit(ir_texture *ir)
 
    /* The message header is necessary for:
     * - Gen4 (always)
+    * - Gen9+ for selecting SIMD4x2
     * - Texel offsets
     * - Gather channel selection
     * - Sampler indices too large to fit in a 4-bit value.
     */
    inst->header_present =
-      brw->gen < 5 || inst->offset != 0 || ir->op == ir_tg4 ||
+      brw->gen < 5 || brw->gen >= 9 ||
+      inst->offset != 0 || ir->op == ir_tg4 ||
       is_high_sampler(brw, sampler_reg);
    inst->base_mrf = 2;
    inst->mlen = inst->header_present + 1; /* always at least one */