nir: Add a writemask to store intrinsics.
authorKenneth Graunke <kenneth@whitecape.org>
Tue, 17 Nov 2015 08:26:37 +0000 (00:26 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Tue, 22 Dec 2015 23:57:59 +0000 (15:57 -0800)
Tessellation control shaders need to be careful when writing outputs.
Because multiple threads can concurrently write the same output
variables, we need to only write the exact components we were told.

Traditionally, for sub-vector writes, we've read the whole vector,
updated the temporary, and written the whole vector back.  This breaks
down with concurrent access.

This patch prepares the way for a solution by adding a writemask field
to store_var intrinsics, as well as the other store intrinsics.  It then
updates all produces to emit a writemask of "all channels enabled".  It
updates nir_lower_io to copy the writemask to output store intrinsics.

Finally, it updates nir_lower_vars_to_ssa to handle partial writemasks
by doing a read-modify-write cycle (which is safe, because local
variables are specific to a single thread).

This should have no functional change, since no one actually emits
partial writemasks yet.

v2: Make nir_validate momentarily assert that writemasks cover the
    complete value - we shouldn't have partial writemasks yet
    (requested by Jason Ekstrand).

v3: Fix accidental SSBO change that arose from merge conflicts.

v4: Don't try to handle writemasks in ir3_compiler_nir - my code
    for indirects was likely wrong, and TTN doesn't generate partial
    writemasks today anyway.  Change them to asserts as requested by
    Rob Clark.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com> [v3]
12 files changed:
src/gallium/auxiliary/nir/tgsi_to_nir.c
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
src/glsl/nir/glsl_to_nir.cpp
src/glsl/nir/nir_builder.h
src/glsl/nir/nir_intrinsics.h
src/glsl/nir/nir_lower_gs_intrinsics.c
src/glsl/nir/nir_lower_io.c
src/glsl/nir/nir_lower_locals_to_regs.c
src/glsl/nir/nir_lower_var_copies.c
src/glsl/nir/nir_lower_vars_to_ssa.c
src/glsl/nir/nir_validate.c
src/mesa/program/prog_to_nir.c

index 80821572f564b1f6dd09a782107ab4d9ababc2ec..56476d4f54e53ca73f5757557bce815c56a28778 100644 (file)
@@ -1903,6 +1903,7 @@ ttn_emit_instruction(struct ttn_compile *c)
                                            &tgsi_dst->Indirect : NULL;
 
       store->num_components = 4;
+      store->const_index[0] = 0xf;
       store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
       store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
 
index eea5c5e28dbe58c2e5b805a3dc135385c24d773b..1794059342b3fdb728c0b6ee2735202eb889d6d2 100644 (file)
@@ -1321,6 +1321,10 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
        case nir_deref_array_type_direct:
                /* direct access does not require anything special: */
                for (int i = 0; i < intr->num_components; i++) {
+                       /* ttn doesn't generate partial writemasks */
+                       assert(intr->const_index[0] ==
+                              (1 << intr->num_components) - 1);
+
                        unsigned n = darr->base_offset * 4 + i;
                        compile_assert(ctx, n < arr->length);
                        arr->arr[n] = src[i];
@@ -1333,6 +1337,10 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
                struct ir3_instruction *addr =
                                get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
                for (int i = 0; i < intr->num_components; i++) {
+                       /* ttn doesn't generate partial writemasks */
+                       assert(intr->const_index[0] ==
+                              (1 << intr->num_components) - 1);
+
                        struct ir3_instruction *store;
                        unsigned n = darr->base_offset * 4 + i;
                        compile_assert(ctx, n < arr->length);
index df8a18a7f49210738332aa81e10726064ab1c078..c0de7c4af98098e0c6f42f516c76a4b843b5cc83 100644 (file)
@@ -1067,6 +1067,7 @@ nir_visitor::visit(ir_call *ir)
          nir_intrinsic_instr *store_instr =
             nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
          store_instr->num_components = ir->return_deref->type->vector_elements;
+         store_instr->const_index[0] = (1 << store_instr->num_components) - 1;
 
          store_instr->variables[0] =
             evaluate_deref(&store_instr->instr, ir->return_deref);
@@ -1165,6 +1166,7 @@ nir_visitor::visit(ir_assignment *ir)
    nir_intrinsic_instr *store =
       nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
    store->num_components = ir->lhs->type->vector_elements;
+   store->const_index[0] = (1 << store->num_components) - 1;
    nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
    store->variables[0] = nir_deref_as_var(store_deref);
    store->src[0] = nir_src_for_ssa(src);
index 332bb0246df6963a6ac0299ad0ed01700e246856..5883d86e907f02ddb94a8340b2e5fefacfe43cf0 100644 (file)
@@ -310,13 +310,15 @@ nir_load_var(nir_builder *build, nir_variable *var)
 }
 
 static inline void
-nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
+nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
+              unsigned writemask)
 {
    const unsigned num_components = glsl_get_vector_elements(var->type);
 
    nir_intrinsic_instr *store =
       nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
    store->num_components = num_components;
+   store->const_index[0] = writemask;
    store->variables[0] = nir_deref_var_create(store, var);
    store->src[0] = nir_src_for_ssa(value);
    nir_builder_instr_insert(build, &store->instr);
index ec9e845574a443880c3d16d8fdb56dc8c9a82de0..5815dbecb68d5750c50105c3529f5dcf7b872ab9 100644 (file)
@@ -43,7 +43,7 @@
 
 
 INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
+INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
 INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
 
 /*
@@ -302,10 +302,10 @@ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
 #define STORE(name, srcs, indices, flags) \
    INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
 
-/* src[] = { value, offset }. const_index[] = { base } */
-STORE(output, 2, 1, 0)
-/* src[] = { value, vertex, offset }. const_index[] = { base } */
-STORE(per_vertex_output, 3, 1, 0)
+/* src[] = { value, offset }. const_index[] = { base, write_mask } */
+STORE(output, 2, 2, 0)
+/* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */
+STORE(per_vertex_output, 3, 2, 0)
 /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
 STORE(ssbo, 3, 1, 0)
 /* src[] = { value, offset }. const_index[] = { base, write_mask } */
index e0d067885d82bda3c818716df36c50e13c1086c7..13254599088cdacdcacbd3465e97a94ce3925256 100644 (file)
@@ -99,7 +99,8 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
 
    /* Increment the vertex count by 1 */
    nir_store_var(b, state->vertex_count_var,
-                 nir_iadd(b, count, nir_imm_int(b, 1)));
+                 nir_iadd(b, count, nir_imm_int(b, 1)),
+                 0x1); /* .x */
 
    nir_instr_remove(&intrin->instr);
 
index 3d646eb14b4a4a84a08c2b1ce85f494ca8330667..a3565cc52ea5d0359b6404a95719c8214c50df7e 100644 (file)
@@ -261,6 +261,9 @@ nir_lower_io_block(nir_block *block, void *void_state)
          store->const_index[0] =
             intrin->variables[0]->var->data.driver_location;
 
+         /* Copy the writemask */
+         store->const_index[1] = intrin->const_index[0];
+
          if (per_vertex)
             store->src[1] = nir_src_for_ssa(vertex_index);
 
index 17b53ca36f3006dc5850c7233131bc476c937eae..3e21ac0cdd5e17a2cc94ec44b4e0a4eee41f7a25 100644 (file)
@@ -243,7 +243,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
 
          nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
          nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
-         mov->dest.write_mask = (1 << intrin->num_components) - 1;
+         mov->dest.write_mask = intrin->const_index[0];
          mov->dest.dest.is_ssa = false;
          mov->dest.dest.reg.reg = reg_src.reg.reg;
          mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
index 98c107aa50e691447e37c0ae095b19ff5482648e..a9017de5449284e7b8785ef759e1ade991cc7b7b 100644 (file)
@@ -128,6 +128,7 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
       nir_intrinsic_instr *store =
          nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
       store->num_components = num_components;
+      store->const_index[0] = (1 << num_components) - 1;
       store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
 
       store->src[0].is_ssa = true;
index e670dbdc7e734227a664a99c1523b321afc39970..3ec0e1d99601d9aaacc6291db78d42361b31058c 100644 (file)
@@ -26,6 +26,7 @@
  */
 
 #include "nir.h"
+#include "nir_builder.h"
 #include "nir_vla.h"
 
 
@@ -590,6 +591,9 @@ add_phi_sources(nir_block *block, nir_block *pred,
 static bool
 rename_variables_block(nir_block *block, struct lower_variables_state *state)
 {
+   nir_builder b;
+   nir_builder_init(&b, state->impl);
+
    nir_foreach_instr_safe(block, instr) {
       if (instr->type == nir_instr_type_phi) {
          nir_phi_instr *phi = nir_instr_as_phi(instr);
@@ -675,20 +679,40 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
 
             assert(intrin->src[0].is_ssa);
 
-            nir_alu_instr *mov = nir_alu_instr_create(state->shader,
-                                                      nir_op_imov);
-            mov->src[0].src.is_ssa = true;
-            mov->src[0].src.ssa = intrin->src[0].ssa;
-            for (unsigned i = intrin->num_components; i < 4; i++)
-               mov->src[0].swizzle[i] = 0;
+            nir_ssa_def *new_def;
+            b.cursor = nir_before_instr(&intrin->instr);
 
-            mov->dest.write_mask = (1 << intrin->num_components) - 1;
-            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
-                              intrin->num_components, NULL);
+            if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
+               /* Whole variable store - just copy the source.  Note that
+                * intrin->num_components and intrin->src[0].ssa->num_components
+                * may differ.
+                */
+               unsigned swiz[4];
+               for (unsigned i = 0; i < 4; i++)
+                  swiz[i] = i < intrin->num_components ? i : 0;
+
+               new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
+                                     intrin->num_components, false);
+            } else {
+               nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state);
+               /* For writemasked store_var intrinsics, we combine the newly
+                * written values with the existing contents of unwritten
+                * channels, creating a new SSA value for the whole vector.
+                */
+               nir_ssa_def *srcs[4];
+               for (unsigned i = 0; i < intrin->num_components; i++) {
+                  if (intrin->const_index[0] & (1 << i)) {
+                     srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
+                  } else {
+                     srcs[i] = nir_channel(&b, old_def, i);
+                  }
+               }
+               new_def = nir_vec(&b, srcs, intrin->num_components);
+            }
 
-            nir_instr_insert_before(&intrin->instr, &mov->instr);
+            assert(new_def->num_components == intrin->num_components);
 
-            def_stack_push(node, &mov->dest.dest.ssa, state);
+            def_stack_push(node, new_def, state);
 
             /* We'll wait to remove the instruction until the next pass
              * where we pop the node we just pushed back off the stack.
index 06879d64ee25deff06ac667f6478dfa111cc7305..89cf0b8920818a2a857297ee5cd7a18186e5fcea 100644 (file)
@@ -417,6 +417,8 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
              instr->variables[0]->var->data.mode != nir_var_uniform &&
              instr->variables[0]->var->data.mode != nir_var_shader_storage);
+      /* Currently, writemasks must cover the entire value */
+      assert(instr->const_index[0] == (1 << instr->num_components) - 1);
       break;
    }
    case nir_intrinsic_copy_var:
index ff671592763cb4012929ba3ead8f62bbba19049d..14a339cf96ea950cde8e69b318f50cd294cdfaa9 100644 (file)
@@ -927,6 +927,7 @@ ptn_add_output_stores(struct ptn_compile *c)
       nir_intrinsic_instr *store =
          nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
       store->num_components = glsl_get_vector_elements(var->type);
+      store->const_index[0] = (1 << store->num_components) - 1;
       store->variables[0] =
          nir_deref_var_create(store, c->output_vars[var->data.location]);
 
@@ -997,6 +998,7 @@ setup_registers_and_variables(struct ptn_compile *c)
             nir_intrinsic_instr *store =
                nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
             store->num_components = 4;
+            store->const_index[0] = WRITEMASK_XYZW;
             store->variables[0] = nir_deref_var_create(store, fullvar);
             store->src[0] = nir_src_for_ssa(f001);
             nir_builder_instr_insert(b, &store->instr);