nir: Allow outputs reads and add the relevant intrinsics.
authorKenneth Graunke <kenneth@whitecape.org>
Mon, 19 Oct 2015 18:44:28 +0000 (11:44 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Fri, 13 Nov 2015 23:15:41 +0000 (15:15 -0800)
Normally, we rely on nir_lower_outputs_to_temporaries to create shadow
variables for outputs, buffering the results and writing them all out
at the end of the program.  However, this is infeasible for tessellation
control shader outputs.

Tessellation control shaders can generate multiple output vertices, and
write per-vertex outputs.  These are arrays indexed by the vertex
number; each thread only writes one element, but can read any other
element - including those being concurrently written by other threads.
The barrier() intrinsic synchronizes between threads.

Even if we tried to shadow every output element (which is of dubious
value), we'd have to read updated values in at barrier() time, which
means we need to allow output reads.

Most stages should continue using nir_lower_outputs_to_temporaries(),
but in theory drivers could choose not to if they really wanted.

v2: Rebase to accomodate Jason's review feedback.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
src/glsl/nir/nir_intrinsics.h
src/glsl/nir/nir_lower_io.c
src/glsl/nir/nir_print.c
src/glsl/nir/nir_validate.c

index 26ac7ce9cd7d9e5583f878203975a1c31d812be3..b8d7d6c68cb51b9b7097831adb038c0ac5fe86bb 100644 (file)
@@ -255,6 +255,8 @@ LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(output, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
 
 /*
  * Stores work the same way as loads, except now the first register input is
index b7b599da6d4c27df5abc21284f392905590fa975..8a4177fb9f0ea51fad03729cff226fd74feb1997 100644 (file)
@@ -161,6 +161,15 @@ load_op(struct lower_io_state *state,
                              nir_intrinsic_load_input;
       }
       break;
+   case nir_var_shader_out:
+      if (per_vertex) {
+         op = has_indirect ? nir_intrinsic_load_per_vertex_output_indirect :
+                             nir_intrinsic_load_per_vertex_output;
+      } else {
+         op = has_indirect ? nir_intrinsic_load_output_indirect :
+                             nir_intrinsic_load_output;
+      }
+      break;
    case nir_var_uniform:
       op = has_indirect ? nir_intrinsic_load_uniform_indirect :
                           nir_intrinsic_load_uniform;
@@ -191,13 +200,16 @@ nir_lower_io_block(nir_block *block, void *void_state)
       if (state->mode != -1 && state->mode != mode)
          continue;
 
+      if (mode != nir_var_shader_in &&
+          mode != nir_var_shader_out &&
+          mode != nir_var_uniform)
+         continue;
+
       switch (intrin->intrinsic) {
       case nir_intrinsic_load_var: {
-         if (mode != nir_var_shader_in && mode != nir_var_uniform)
-            continue;
-
          bool per_vertex =
-            is_per_vertex_input(state, intrin->variables[0]->var);
+            is_per_vertex_input(state, intrin->variables[0]->var) ||
+            is_per_vertex_output(state, intrin->variables[0]->var);
 
          nir_ssa_def *indirect;
          nir_ssa_def *vertex_index;
@@ -241,8 +253,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
       }
 
       case nir_intrinsic_store_var: {
-         if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
-            continue;
+         assert(mode == nir_var_shader_out);
 
          nir_ssa_def *indirect;
          nir_ssa_def *vertex_index;
index 23fcafeb7e878502da560b87f1bbfe133fbc86f7..f7f5fdf3181c0f76430c159a5bf57c8e4369cd19 100644 (file)
@@ -448,6 +448,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
    case nir_intrinsic_load_per_vertex_input_indirect:
       var_list = &state->shader->inputs;
       break;
+   case nir_intrinsic_load_output:
+   case nir_intrinsic_load_output_indirect:
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_output_indirect:
    case nir_intrinsic_store_per_vertex_output:
index 51c2529dc38fd53c239da518f0efcb02dc2c7e2d..ed374b921fa557a396e87bb676d8acc7e8434239 100644 (file)
@@ -405,7 +405,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
              (instr->variables[0]->var->data.mode == nir_var_uniform &&
               glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
       assert(instr->num_components == glsl_get_vector_elements(type));
-      assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
       break;
    }
    case nir_intrinsic_store_var: {
@@ -426,7 +425,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
              instr->variables[0]->var->data.mode != nir_var_uniform &&
              instr->variables[0]->var->data.mode != nir_var_shader_storage);
-      assert(instr->variables[1]->var->data.mode != nir_var_shader_out);
       break;
    default:
       break;