intel/eu: Plumb header present bit to codegen helpers for HDC messages.
authorFrancisco Jerez <currojerez@riseup.net>
Tue, 12 Dec 2017 20:05:03 +0000 (12:05 -0800)
committerFrancisco Jerez <currojerez@riseup.net>
Fri, 2 Mar 2018 19:28:56 +0000 (11:28 -0800)
This makes sure that the header-present bit of the message descriptor
is in sync with the IR instruction fields, which gives the optimizer
more control to avoid the overhead of setting up a message header when
it's possible to do so.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/intel/compiler/brw_eu.h
src/intel/compiler/brw_eu_emit.c
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_vec4_generator.cpp

index 2d0f56f7938174975e6b92b6e01a06c6f742da89..a5f28d8fc650f1b57c6e0986e68b81063401ceb9 100644 (file)
@@ -444,7 +444,8 @@ brw_untyped_atomic(struct brw_codegen *p,
                    struct brw_reg surface,
                    unsigned atomic_op,
                    unsigned msg_length,
-                   bool response_expected);
+                   bool response_expected,
+                   bool header_present);
 
 void
 brw_untyped_surface_read(struct brw_codegen *p,
@@ -459,7 +460,8 @@ brw_untyped_surface_write(struct brw_codegen *p,
                           struct brw_reg payload,
                           struct brw_reg surface,
                           unsigned msg_length,
-                          unsigned num_channels);
+                          unsigned num_channels,
+                          bool header_present);
 
 void
 brw_typed_atomic(struct brw_codegen *p,
@@ -468,7 +470,8 @@ brw_typed_atomic(struct brw_codegen *p,
                  struct brw_reg surface,
                  unsigned atomic_op,
                  unsigned msg_length,
-                 bool response_expected);
+                 bool response_expected,
+                 bool header_present);
 
 void
 brw_typed_surface_read(struct brw_codegen *p,
@@ -476,14 +479,16 @@ brw_typed_surface_read(struct brw_codegen *p,
                        struct brw_reg payload,
                        struct brw_reg surface,
                        unsigned msg_length,
-                       unsigned num_channels);
+                       unsigned num_channels,
+                       bool header_present);
 
 void
 brw_typed_surface_write(struct brw_codegen *p,
                         struct brw_reg payload,
                         struct brw_reg surface,
                         unsigned msg_length,
-                        unsigned num_channels);
+                        unsigned num_channels,
+                        bool header_present);
 
 void
 brw_byte_scattered_read(struct brw_codegen *p,
@@ -498,7 +503,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
                          struct brw_reg payload,
                          struct brw_reg surface,
                          unsigned msg_length,
-                         unsigned bit_size);
+                         unsigned bit_size,
+                         bool header_present);
 
 void
 brw_memory_fence(struct brw_codegen *p,
index 14b1c592b639813751afe64c4d8a09d1e9a6cd4c..44abede16bc2ab884fed23e127131a43eb2678ba 100644 (file)
@@ -2883,7 +2883,8 @@ brw_untyped_atomic(struct brw_codegen *p,
                    struct brw_reg surface,
                    unsigned atomic_op,
                    unsigned msg_length,
-                   bool response_expected)
+                   bool response_expected,
+                   bool header_present)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -2901,7 +2902,7 @@ brw_untyped_atomic(struct brw_codegen *p,
       p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
       brw_surface_payload_size(p, response_expected,
                                devinfo->gen >= 8 || devinfo->is_haswell, true),
-      align1);
+      header_present);
 
    brw_set_dp_untyped_atomic_message(
       p, insn, atomic_op, response_expected);
@@ -2984,7 +2985,8 @@ brw_untyped_surface_write(struct brw_codegen *p,
                           struct brw_reg payload,
                           struct brw_reg surface,
                           unsigned msg_length,
-                          unsigned num_channels)
+                          unsigned num_channels,
+                          bool header_present)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -2996,7 +2998,7 @@ brw_untyped_surface_write(struct brw_codegen *p,
                           WRITEMASK_X : WRITEMASK_XYZW;
    struct brw_inst *insn = brw_send_indirect_surface_message(
       p, sfid, brw_writemask(brw_null_reg(), mask),
-      payload, surface, msg_length, 0, align1);
+      payload, surface, msg_length, 0, header_present);
 
    brw_set_dp_untyped_surface_write_message(
       p, insn, num_channels);
@@ -3054,7 +3056,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
                          struct brw_reg payload,
                          struct brw_reg surface,
                          unsigned msg_length,
-                         unsigned bit_size)
+                         unsigned bit_size,
+                         bool header_present)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    assert(devinfo->gen > 7 || devinfo->is_haswell);
@@ -3063,7 +3066,7 @@ brw_byte_scattered_write(struct brw_codegen *p,
 
    struct brw_inst *insn = brw_send_indirect_surface_message(
       p, sfid, brw_writemask(brw_null_reg(), WRITEMASK_XYZW),
-      payload, surface, msg_length, 0, true);
+      payload, surface, msg_length, 0, header_present);
 
    unsigned msg_control =
       brw_byte_scattered_data_element_from_bit_size(bit_size) << 2;
@@ -3119,7 +3122,8 @@ brw_typed_atomic(struct brw_codegen *p,
                  struct brw_reg surface,
                  unsigned atomic_op,
                  unsigned msg_length,
-                 bool response_expected) {
+                 bool response_expected,
+                 bool header_present) {
    const struct gen_device_info *devinfo = p->devinfo;
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
@@ -3131,7 +3135,7 @@ brw_typed_atomic(struct brw_codegen *p,
       p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
       brw_surface_payload_size(p, response_expected,
                                devinfo->gen >= 8 || devinfo->is_haswell, false),
-      true);
+      header_present);
 
    brw_set_dp_typed_atomic_message(
       p, insn, atomic_op, response_expected);
@@ -3175,7 +3179,8 @@ brw_typed_surface_read(struct brw_codegen *p,
                        struct brw_reg payload,
                        struct brw_reg surface,
                        unsigned msg_length,
-                       unsigned num_channels)
+                       unsigned num_channels,
+                       bool header_present)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -3185,7 +3190,7 @@ brw_typed_surface_read(struct brw_codegen *p,
       p, sfid, dst, payload, surface, msg_length,
       brw_surface_payload_size(p, num_channels,
                                devinfo->gen >= 8 || devinfo->is_haswell, false),
-      true);
+      header_present);
 
    brw_set_dp_typed_surface_read_message(
       p, insn, num_channels);
@@ -3229,7 +3234,8 @@ brw_typed_surface_write(struct brw_codegen *p,
                         struct brw_reg payload,
                         struct brw_reg surface,
                         unsigned msg_length,
-                        unsigned num_channels)
+                        unsigned num_channels,
+                        bool header_present)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -3241,7 +3247,7 @@ brw_typed_surface_write(struct brw_codegen *p,
                           WRITEMASK_X : WRITEMASK_XYZW);
    struct brw_inst *insn = brw_send_indirect_surface_message(
       p, sfid, brw_writemask(brw_null_reg(), mask),
-      payload, surface, msg_length, 0, true);
+      payload, surface, msg_length, 0, header_present);
 
    brw_set_dp_typed_surface_write_message(
       p, insn, num_channels);
index 557b098c20737189b51f446b43bffa5e35f490e8..60944a97d4b4d2c04689f728025f1241521b0e8e 100644 (file)
@@ -2118,10 +2118,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       case SHADER_OPCODE_UNTYPED_ATOMIC:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud,
-                            inst->mlen, !inst->dst.is_null());
+                            inst->mlen, !inst->dst.is_null(),
+                            inst->header_size);
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+         assert(!inst->header_size);
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_surface_read(p, dst, src[0], src[1],
                                   inst->mlen, src[2].ud);
@@ -2130,10 +2132,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_surface_write(p, src[0], src[1],
-                                   inst->mlen, src[2].ud);
+                                   inst->mlen, src[2].ud,
+                                   inst->header_size);
          break;
 
       case SHADER_OPCODE_BYTE_SCATTERED_READ:
+         assert(!inst->header_size);
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_byte_scattered_read(p, dst, src[0], src[1],
                                  inst->mlen, src[2].ud);
@@ -2142,24 +2146,28 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_byte_scattered_write(p, src[0], src[1],
-                                  inst->mlen, src[2].ud);
+                                  inst->mlen, src[2].ud,
+                                  inst->header_size);
          break;
 
       case SHADER_OPCODE_TYPED_ATOMIC:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_typed_atomic(p, dst, src[0], src[1],
-                          src[2].ud, inst->mlen, !inst->dst.is_null());
+                          src[2].ud, inst->mlen, !inst->dst.is_null(),
+                          inst->header_size);
          break;
 
       case SHADER_OPCODE_TYPED_SURFACE_READ:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_typed_surface_read(p, dst, src[0], src[1],
-                                inst->mlen, src[2].ud);
+                                inst->mlen, src[2].ud,
+                                inst->header_size);
          break;
 
       case SHADER_OPCODE_TYPED_SURFACE_WRITE:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud);
+         brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud,
+                                 inst->header_size);
          break;
 
       case SHADER_OPCODE_MEMORY_FENCE:
index 6fa6e35b24a1a14755c2e24572f7e901b61eed8f..ecf9ed0ba3a9aa5acb928bfc1c698ea4ef96a78e 100644 (file)
@@ -1869,10 +1869,11 @@ generate_code(struct brw_codegen *p,
       case SHADER_OPCODE_UNTYPED_ATOMIC:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
-                            !inst->dst.is_null());
+                            !inst->dst.is_null(), inst->header_size);
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+         assert(!inst->header_size);
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
                                   src[2].ud);
@@ -1881,25 +1882,25 @@ generate_code(struct brw_codegen *p,
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
-                                   src[2].ud);
+                                   src[2].ud, inst->header_size);
          break;
 
       case SHADER_OPCODE_TYPED_ATOMIC:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_typed_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
-                          !inst->dst.is_null());
+                          !inst->dst.is_null(), inst->header_size);
          break;
 
       case SHADER_OPCODE_TYPED_SURFACE_READ:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_typed_surface_read(p, dst, src[0], src[1], inst->mlen,
-                                src[2].ud);
+                                src[2].ud, inst->header_size);
          break;
 
       case SHADER_OPCODE_TYPED_SURFACE_WRITE:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_typed_surface_write(p, src[0], src[1], inst->mlen,
-                                 src[2].ud);
+                                 src[2].ud, inst->header_size);
          break;
 
       case SHADER_OPCODE_MEMORY_FENCE: