intel/fs,vec4: Properly account SENDs in IVB memory fence
authorCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Fri, 3 Jan 2020 18:05:39 +0000 (10:05 -0800)
committerCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Mon, 20 Apr 2020 16:29:09 +0000 (09:29 -0700)
Change brw_memory_fence to return the number of messages emitted, and
use that to update the send_count statistic in code generation.

This will fix the book-keeping for IVB since the memory fences will
result in two SEND messages.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4646>

src/intel/compiler/brw_eu.h
src/intel/compiler/brw_eu_emit.c
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_vec4_generator.cpp

index 591d01386febb5d5ab509f6b2df78bc2f93fdfb6..96c22ab429a3e5b0d33b5a9a97fee302a066b68c 100644 (file)
@@ -1148,7 +1148,7 @@ brw_untyped_surface_write(struct brw_codegen *p,
                           unsigned num_channels,
                           bool header_present);
 
-void
+unsigned
 brw_memory_fence(struct brw_codegen *p,
                  struct brw_reg dst,
                  struct brw_reg src,
index 1938dd65f4d78c741dd9d0b707c16d77c049855d..83f7f4a62ca5748921bdf17925f6fe5d6e87dd9f 100644 (file)
@@ -3145,7 +3145,7 @@ brw_set_memory_fence_message(struct brw_codegen *p,
    brw_inst_set_binding_table_index(devinfo, insn, bti);
 }
 
-void
+unsigned
 brw_memory_fence(struct brw_codegen *p,
                  struct brw_reg dst,
                  struct brw_reg src,
@@ -3159,6 +3159,8 @@ brw_memory_fence(struct brw_codegen *p,
       (devinfo->gen == 7 && !devinfo->is_haswell);
    struct brw_inst *insn;
 
+   unsigned fences = 0;
+
    brw_push_insn_state(p);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
    brw_set_default_exec_size(p, BRW_EXECUTE_1);
@@ -3173,6 +3175,7 @@ brw_memory_fence(struct brw_codegen *p,
    brw_set_src0(p, insn, src);
    brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
                                 commit_enable, bti);
+   fences++;
 
    if (devinfo->gen == 7 && !devinfo->is_haswell) {
       /* IVB does typed surface access through the render cache, so we need to
@@ -3184,6 +3187,7 @@ brw_memory_fence(struct brw_codegen *p,
       brw_set_src0(p, insn, src);
       brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
                                    commit_enable, bti);
+      fences++;
 
       /* Now write the response of the second message into the response of the
        * first to trigger a pipeline stall -- This way future render and data
@@ -3201,6 +3205,8 @@ brw_memory_fence(struct brw_codegen *p,
    }
 
    brw_pop_insn_state(p);
+
+   return fences;
 }
 
 void
index 8afc075f187245d77ad427f7e057b5f49cfa0045..b50f03142b54456571c5fb4906847da48eeddf34 100644 (file)
@@ -2216,12 +2216,15 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
          generate_shader_time_add(inst, src[0], src[1], src[2]);
          break;
 
-      case SHADER_OPCODE_MEMORY_FENCE:
+      case SHADER_OPCODE_MEMORY_FENCE: {
          assert(src[1].file == BRW_IMMEDIATE_VALUE);
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud, src[2].ud);
-         send_count++;
+         const unsigned sends =
+            brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud,
+                             src[2].ud);
+         send_count += sends;
          break;
+      }
 
       case FS_OPCODE_SCHEDULING_FENCE:
          if (unlikely(debug_flag))
index c247c988181fa17d140a05d21a9b110804f2c35d..be5eaf43ca0833b3f9d0abbbdb0025865176ed35 100644 (file)
@@ -1910,10 +1910,13 @@ generate_code(struct brw_codegen *p,
          send_count++;
          break;
 
-      case SHADER_OPCODE_MEMORY_FENCE:
-         brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false, /* bti */ 0);
-         send_count++;
+      case SHADER_OPCODE_MEMORY_FENCE: {
+         const unsigned sends =
+            brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false,
+                             /* bti */ 0);
+         send_count += sends;
          break;
+      }
 
       case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
          const struct brw_reg mask =