freedreno/ir3: Drop wrmask for ir3 local and global store intrinsics

author Kristian H. Kristensen <hoegsberg@google.com>

Wed, 13 May 2020 20:19:57 +0000 (13:19 -0700)

committer Rob Clark <robdclark@chromium.org>

Thu, 14 May 2020 03:24:33 +0000 (20:24 -0700)
author Kristian H. Kristensen <hoegsberg@google.com>
Wed, 13 May 2020 20:19:57 +0000 (13:19 -0700)
committer Rob Clark <robdclark@chromium.org>
Thu, 14 May 2020 03:24:33 +0000 (20:24 -0700)
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py

index 611955ffa0270b60c0038ffca2834f58635cc0b8..00098203d2ef63d78511fa37e0d6737a30fd75aa 100644 (file)
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -836,7 +836,7 @@ intrinsic("end_patch_ir3")
  # between geometry stages - perhaps it's explicit access to the vertex cache.
  
  # src[] = { value, offset }.
-store("shared_ir3", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
+store("shared_ir3", 2, [BASE, ALIGN_MUL, ALIGN_OFFSET])
  # src[] = { offset }.
  load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
  
@@ -846,7 +846,7 @@ load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
  
  # src[] = { value, address(vec2 of hi+lo uint32_t), offset }.
  # const_index[] = { write_mask, align_mul, align_offset }
-intrinsic("store_global_ir3", [0, 2, 1], indices=[WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
+intrinsic("store_global_ir3", [0, 2, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET])
  # src[] = { address(vec2 of hi+lo uint32_t), offset }.
  # const_index[] = { access, align_mul, align_offset }
  intrinsic("load_global_ir3", [2, 1], dest_comp=0, indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c

index 76f2f7525bf3e2df9a179b78e1730877e6bfe5a0..13e180118c43f67727e1fbe2b133fb81a1a97c63 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -939,48 +939,27 @@ emit_intrinsic_load_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *int
         ir3_split_dest(b, dst, load, 0, intr->num_components);
  }
  
-/* src[] = { value, offset }. const_index[] = { base, write_mask } */
+/* src[] = { value, offset }. const_index[] = { base } */
  static void
  emit_intrinsic_store_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr)
  {
         struct ir3_block *b = ctx->block;
         struct ir3_instruction *store, *offset;
         struct ir3_instruction * const *value;
-       unsigned base, wrmask;
  
         value  = ir3_get_src(ctx, &intr->src[0]);
         offset = ir3_get_src(ctx, &intr->src[1])[0];
  
-       base   = nir_intrinsic_base(intr);
-       wrmask = nir_intrinsic_write_mask(intr);
-
-       /* Combine groups of consecutive enabled channels in one write
-        * message. We use ffs to find the first enabled channel and then ffs on
-        * the bit-inverse, down-shifted writemask to determine the length of
-        * the block of enabled bits.
-        *
-        * (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic())
-        */
-       while (wrmask) {
-               unsigned first_component = ffs(wrmask) - 1;
-               unsigned length = ffs(~(wrmask >> first_component)) - 1;
-
-               store = ir3_STLW(b, offset, 0,
-                       ir3_create_collect(ctx, &value[first_component], length), 0,
-                       create_immed(b, length), 0);
-
-               store->cat6.dst_offset = first_component + base;
-               store->cat6.type = utype_src(intr->src[0]);
-               store->barrier_class = IR3_BARRIER_SHARED_W;
-               store->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
+       store = ir3_STLW(b, offset, 0,
+               ir3_create_collect(ctx, value, intr->num_components), 0,
+               create_immed(b, intr->num_components), 0);
  
-               array_insert(b, b->keeps, store);
+       store->cat6.dst_offset = nir_intrinsic_base(intr);
+       store->cat6.type = utype_src(intr->src[0]);
+       store->barrier_class = IR3_BARRIER_SHARED_W;
+       store->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
  
-               /* Clear the bits in the writemask that we just wrote, then try
-                * again to see if more channels are left.
-                */
-               wrmask &= (15 << (first_component + length));
-       }
+       array_insert(b, b->keeps, store);
  }
  
  /*
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c

index 4d8798c285fd436104cf2dbf1f6bf4c6857f83c1..9f4985bc34f4636e3ff99e0536b9ea0d0ccf35bc 100644 (file)
--- a/src/freedreno/ir3/ir3_nir_lower_tess.c
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
@@ -191,6 +191,13 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
                 case nir_intrinsic_store_output: {
                         // src[] = { value, offset }.
  
+                       /* nir_lower_io_to_temporaries replaces all access to output
+                        * variables with temp variables and then emits a nir_copy_var at
+                        * the end of the shader.  Thus, we should always get a full wrmask
+                        * here.
+                        */
+                       assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
+
                         b->cursor = nir_instr_remove(&intr->instr);
  
                         nir_ssa_def *vertex_id = build_vertex_id(b, state);
@@ -199,10 +206,8 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
                         nir_intrinsic_instr *store =
                                 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
  
-                       nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
                         store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
                         store->src[1] = nir_src_for_ssa(offset);
-
                         store->num_components = intr->num_components;
  
                         nir_builder_instr_insert(b, &store->instr);
@@ -431,17 +436,21 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
  
                         b->cursor = nir_before_instr(&intr->instr);
  
+                       /* nir_lower_io_to_temporaries replaces all access to output
+                        * variables with temp variables and then emits a nir_copy_var at
+                        * the end of the shader.  Thus, we should always get a full wrmask
+                        * here.
+                        */
+                       assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
+
                         nir_ssa_def *value = intr->src[0].ssa;
                         nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
                         nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
                         nir_ssa_def *offset = build_per_vertex_offset(b, state,
                                         intr->src[1].ssa, intr->src[2].ssa, var);
  
-                       nir_intrinsic_instr *store =
-                               replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
-                                                                 nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
-
-                       nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+                       replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
+                                       nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
  
                         break;
                 }
@@ -503,11 +512,15 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
  
                                 debug_assert(nir_intrinsic_component(intr) == 0);
  
-                               nir_intrinsic_instr *store =
-                                       replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
-                                                       intr->src[0].ssa, address, offset);
+                               /* nir_lower_io_to_temporaries replaces all access to output
+                                * variables with temp variables and then emits a nir_copy_var at
+                                * the end of the shader.  Thus, we should always get a full wrmask
+                                * here.
+                                */
+                               assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
  
-                               nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+                               replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
+                                               intr->src[0].ssa, address, offset);
                         }
                         break;
                 }
@@ -559,7 +572,6 @@ emit_tess_epilouge(nir_builder *b, struct state *state)
         store->src[2] = nir_src_for_ssa(offset);
         nir_builder_instr_insert(b, &store->instr);
         store->num_components = levels[0]->num_components;
-       nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
  
         if (levels[1]) {
                 store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
@@ -570,7 +582,6 @@ emit_tess_epilouge(nir_builder *b, struct state *state)
                 store->src[2] = nir_src_for_ssa(offset);
                 nir_builder_instr_insert(b, &store->instr);
                 store->num_components = levels[1]->num_components;
-               nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
         }
  
         /* Finally, Insert endpatch instruction:
author	Kristian H. Kristensen <hoegsberg@google.com>
	Wed, 13 May 2020 20:19:57 +0000 (13:19 -0700)
committer	Rob Clark <robdclark@chromium.org>
	Thu, 14 May 2020 03:24:33 +0000 (20:24 -0700)
src/compiler/nir/nir_intrinsics.py		patch \| blob \| history
src/freedreno/ir3/ir3_compiler_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_nir_lower_tess.c		patch \| blob \| history