freedreno/ir3: Implement lowering passes for VS and GS

author Kristian H. Kristensen <hoegsberg@google.com>

Fri, 11 Oct 2019 00:17:10 +0000 (17:17 -0700)

committer Kristian H. Kristensen <hoegsberg@google.com>

Thu, 17 Oct 2019 20:43:53 +0000 (13:43 -0700)
author Kristian H. Kristensen <hoegsberg@google.com>
Fri, 11 Oct 2019 00:17:10 +0000 (17:17 -0700)
committer Kristian H. Kristensen <hoegsberg@google.com>
Thu, 17 Oct 2019 20:43:53 +0000 (13:43 -0700)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h

index 9b94c9edf2363e662a01ddbb82dc5355cd8d80d1..5c98aeefc66aec4fa7955cb14e16ed5855ecb290 100644 (file)
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1524,6 +1524,9 @@ typedef enum {
     NIR_INTRINSIC_SRC_ACCESS,
     NIR_INTRINSIC_DST_ACCESS,
  
+   /* Driver location for nir_load_patch_location_ir3 */
+   NIR_INTRINSIC_DRIVER_LOCATION,
+
     NIR_INTRINSIC_NUM_INDEX_FLAGS,
  
  } nir_intrinsic_index_flag;
@@ -1632,6 +1635,7 @@ INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned)
  INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned)
  INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type)
  INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned)
+INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned)
  
  static inline void
  nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py

index ae62a85d39b1b07fcc81bae79528063057dae0b2..637576c092a5cfe1affb951477c08383e272eaca 100644 (file)
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -124,6 +124,8 @@ DESC_TYPE = "NIR_INTRINSIC_DESC_TYPE"
  TYPE = "NIR_INTRINSIC_TYPE"
  # The swizzle mask for quad_swizzle_amd & masked_swizzle_amd
  SWIZZLE_MASK = "NIR_INTRINSIC_SWIZZLE_MASK"
+# Driver location of attribute
+DRIVER_LOCATION = "NIR_INTRINSIC_DRIVER_LOCATION"
  
  #
  # Possible flags:
@@ -771,6 +773,12 @@ intrinsic("ssbo_atomic_xor_ir3",        src_comp=[1, 1, 1, 1],    dest_comp=1)
  intrinsic("ssbo_atomic_exchange_ir3",   src_comp=[1, 1, 1, 1],    dest_comp=1)
  intrinsic("ssbo_atomic_comp_swap_ir3",  src_comp=[1, 1, 1, 1, 1], dest_comp=1)
  
+# System values for freedreno geometry shaders.
+system_value("vs_primitive_stride_ir3", 1)
+system_value("vs_vertex_stride_ir3", 1)
+system_value("gs_header_ir3", 1)
+system_value("primitive_location_ir3", 1, indices=[DRIVER_LOCATION])
+
  # IR3-specific load/store intrinsics. These access a buffer used to pass data
  # between geometry stages - perhaps it's explicit access to the vertex cache.
  
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c

index 48844b7ed79a7c40800890761d1fcb3c4e237f5e..496f92796761147092a7ef83389302a3607d165e 100644 (file)
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -800,6 +800,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
        [NIR_INTRINSIC_DESC_TYPE] = "desc_type",
        [NIR_INTRINSIC_TYPE] = "type",
        [NIR_INTRINSIC_SWIZZLE_MASK] = "swizzle_mask",
+      [NIR_INTRINSIC_DRIVER_LOCATION] = "driver_location",
     };
     for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
        if (!info->index_map[idx])
diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c

index 71796687afa564396946d64f9b9646023a563be7..afaad50adf680eaa2db3a8805a9ae6eeaefc919f 100644 (file)
--- a/src/compiler/shader_enums.c
+++ b/src/compiler/shader_enums.c
@@ -254,6 +254,7 @@ gl_system_value_name(gl_system_value sysval)
       ENUM(SYSTEM_VALUE_BARYCENTRIC_SAMPLE),
       ENUM(SYSTEM_VALUE_BARYCENTRIC_CENTROID),
       ENUM(SYSTEM_VALUE_BARYCENTRIC_SIZE),
+     ENUM(SYSTEM_VALUE_GS_HEADER_IR3),
     };
     STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
     return NAME(sysval);
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h

index 0704719c229ab309e01a4c1e2a302c5be1f5c4be..f9b2b8c1d736646759ac463d8a27e54040672e32 100644 (file)
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -641,6 +641,13 @@ typedef enum
     SYSTEM_VALUE_BARYCENTRIC_CENTROID,
     SYSTEM_VALUE_BARYCENTRIC_SIZE,
  
+   /**
+    * IR3 specific geometry shader system value that packs invocation id,
+    * thread id and vertex id.  Having this as a nir level system value lets
+    * us do the unpacking in nir.
+    */
+   SYSTEM_VALUE_GS_HEADER_IR3,
+
     SYSTEM_VALUE_MAX             /**< Number of values */
  } gl_system_value;
  
diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources

index cf3ac7bdba407e107e2c67fb8bb7f110c033cbe6..bb56869e1ccce67df576018854c17d956f1fc7b1 100644 (file)
--- a/src/freedreno/Makefile.sources
+++ b/src/freedreno/Makefile.sources
@@ -38,6 +38,7 @@ ir3_SOURCES := \
         ir3/ir3_nir_lower_load_barycentric_at_sample.c \
         ir3/ir3_nir_lower_load_barycentric_at_offset.c \
         ir3/ir3_nir_lower_io_offsets.c \
+       ir3/ir3_nir_lower_tess.c \
         ir3/ir3_nir_lower_tg4_to_tex.c \
         ir3/ir3_nir_move_varying_inputs.c \
         ir3/ir3_print.c \
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h

index b0d3e98d00aa2fd1a761d10b85bf3670f730bd33..2a1f90711180373fe6dd62021c5a3ec4fbef676f 100644 (file)
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -76,6 +76,10 @@ struct ir3_context {
         /* For fragment shaders: */
         struct ir3_instruction *samp_id, *samp_mask_in;
  
+       /* For geometry shaders: */
+       struct ir3_instruction *primitive_id;
+       struct ir3_instruction *gs_header;
+
         /* Compute shader inputs: */
         struct ir3_instruction *local_invocation_id, *work_group_id;
  
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c

index 2f95b249c263ff85e3d1cb9be1069d7ad9698c52..103821cd6b359ba8bec78f34097b485489a0b13f 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -101,7 +101,8 @@ ir3_key_lowers_nir(const struct ir3_shader_key *key)
         return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
                         key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
                         key->ucp_enables | key->color_two_side |
-                       key->fclamp_color | key->vclamp_color;
+                       key->fclamp_color | key->vclamp_color |
+                       key->has_gs;
  }
  
  #define OPT(nir, pass, ...) ({                             \
@@ -186,6 +187,19 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
                         .lower_tg4_offsets = true,
         };
  
+       if (key && key->has_gs) {
+               switch (shader->type) {
+               case MESA_SHADER_VERTEX:
+                       NIR_PASS_V(s, ir3_nir_lower_vs_to_explicit_io, shader);
+                       break;
+               case MESA_SHADER_GEOMETRY:
+                       NIR_PASS_V(s, ir3_nir_lower_gs, shader);
+                       break;
+               default:
+                       break;
+               }
+       }
+
         if (key) {
                 switch (shader->type) {
                 case MESA_SHADER_FRAGMENT:
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h

index a9b39e235b56ee6927ee1a2008335fc6877de2e8..a602f40858b480bbef54c7dfb5538ab40d6b2f36 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -41,6 +41,9 @@ bool ir3_nir_lower_load_barycentric_at_sample(nir_shader *shader);
  bool ir3_nir_lower_load_barycentric_at_offset(nir_shader *shader);
  bool ir3_nir_move_varying_inputs(nir_shader *shader);
  
+void ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s);
+void ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s);
+
  const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
  bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
  void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c

new file mode 100644 (file)

index 0000000..b498250
--- /dev/null
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright © 2019 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir3_nir.h"
+#include "ir3_compiler.h"
+#include "compiler/nir/nir_builder.h"
+
+struct state {
+       struct primitive_map {
+               unsigned loc[32];
+               unsigned size[32];
+               unsigned stride;
+       } map;
+
+       nir_ssa_def *header;
+
+       nir_variable *vertex_count_var;
+       nir_variable *emitted_vertex_var;
+       nir_variable *vertex_flags_var;
+       nir_variable *vertex_flags_out;
+
+       nir_variable *output_vars[32];
+};
+
+static nir_ssa_def *
+bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
+{
+       return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
+                       nir_imm_int(b, mask));
+}
+
+static nir_ssa_def *
+build_invocation_id(nir_builder *b, struct state *state)
+{
+       return bitfield_extract(b, state->header, 11, 31);
+}
+
+static nir_ssa_def *
+build_vertex_id(nir_builder *b, struct state *state)
+{
+       return bitfield_extract(b, state->header, 6, 31);
+}
+
+static nir_ssa_def *
+build_local_primitive_id(nir_builder *b, struct state *state)
+{
+       return bitfield_extract(b, state->header, 0, 63);
+}
+
+static nir_variable *
+get_var(struct exec_list *list, int driver_location)
+{
+       nir_foreach_variable(v, list) {
+               if (v->data.driver_location == driver_location) {
+                       return v;
+               }
+       }
+
+       return NULL;
+}
+
+static nir_ssa_def *
+build_local_offset(nir_builder *b, struct state *state,
+               nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
+{
+       nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
+       nir_ssa_def *primitive_offset =
+               nir_imul(b, build_local_primitive_id(b, state), primitive_stride);
+       nir_ssa_def *attr_offset;
+       nir_ssa_def *vertex_stride;
+
+       if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+               vertex_stride = nir_imm_int(b, state->map.stride * 4);
+               attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
+       } else if (b->shader->info.stage == MESA_SHADER_GEOMETRY) {
+               vertex_stride = nir_load_vs_vertex_stride_ir3(b);
+               attr_offset = nir_load_primitive_location_ir3(b, base);
+       } else {
+               unreachable("bad shader stage");
+       }
+
+       nir_ssa_def *vertex_offset = nir_imul(b, vertex, vertex_stride);
+
+       return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
+                       nir_iadd(b, attr_offset, offset));
+}
+
+static nir_intrinsic_instr *
+replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
+               nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
+{
+       nir_intrinsic_instr *new_intr =
+               nir_intrinsic_instr_create(b->shader, op);
+
+       new_intr->src[0] = nir_src_for_ssa(src0);
+       if (src1)
+               new_intr->src[1] = nir_src_for_ssa(src1);
+       if (src2)
+               new_intr->src[2] = nir_src_for_ssa(src2);
+
+       new_intr->num_components = intr->num_components;
+
+       if (nir_intrinsic_infos[op].has_dest)
+               nir_ssa_dest_init(&new_intr->instr, &new_intr->dest,
+                                                 intr->num_components, 32, NULL);
+
+       nir_builder_instr_insert(b, &new_intr->instr);
+
+       if (nir_intrinsic_infos[op].has_dest)
+               nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa));
+
+       nir_instr_remove(&intr->instr);
+
+       return new_intr;
+}
+
+static void
+build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list)
+{
+       nir_foreach_variable(var, list) {
+               switch (var->data.location) {
+               case VARYING_SLOT_TESS_LEVEL_OUTER:
+               case VARYING_SLOT_TESS_LEVEL_INNER:
+                       continue;
+               }
+
+               unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
+
+               assert(var->data.driver_location < ARRAY_SIZE(map->size));
+               map->size[var->data.driver_location] =
+                       MAX2(map->size[var->data.driver_location], size);
+       }
+
+       unsigned loc = 0;
+       for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
+               if (map->size[i] == 0)
+                               continue;
+               nir_variable *var = get_var(list, i);
+               map->loc[i] = loc;
+               loc += map->size[i];
+
+               if (var->data.patch)
+                       map->size[i] = 0;
+               else
+                       map->size[i] = map->size[i] / glsl_get_length(var->type);
+       }
+
+       map->stride = loc;
+}
+
+static void
+lower_vs_block(nir_block *block, nir_builder *b, struct state *state)
+{
+       nir_foreach_instr_safe(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                       continue;
+
+               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+               switch (intr->intrinsic) {
+               case nir_intrinsic_store_output: {
+                       // src[] = { value, offset }.
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *vertex_id = build_vertex_id(b, state);
+                       nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
+                                       intr->src[1].ssa);
+                       nir_intrinsic_instr *store =
+                               nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
+
+                       nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
+                       store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
+                       store->src[1] = nir_src_for_ssa(offset);
+
+                       store->num_components = intr->num_components;
+
+                       nir_builder_instr_insert(b, &store->instr);
+                       break;
+               }
+
+               default:
+                       break;
+               }
+       }
+}
+
+static nir_ssa_def *
+local_thread_id(nir_builder *b)
+{
+       return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
+}
+
+void
+ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
+{
+       struct state state = { };
+
+       build_primitive_map(shader, &state.map, &shader->outputs);
+       memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
+
+       nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+       assert(impl);
+
+       nir_builder b;
+       nir_builder_init(&b, impl);
+       b.cursor = nir_before_cf_list(&impl->body);
+
+       state.header = nir_load_gs_header_ir3(&b);
+
+       nir_foreach_block_safe(block, impl)
+               lower_vs_block(block, &b, &state);
+
+       nir_metadata_preserve(impl, nir_metadata_block_index |
+                       nir_metadata_dominance);
+
+       s->output_size = state.map.stride;
+}
+
+static void
+lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
+{
+       nir_intrinsic_instr *outputs[32] = {};
+
+       nir_foreach_instr_safe(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                       continue;
+
+               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+               switch (intr->intrinsic) {
+               case nir_intrinsic_store_output: {
+                       // src[] = { value, offset }.
+
+                       uint32_t loc = nir_intrinsic_base(intr);
+                       outputs[loc] = intr;
+                       break;
+               }
+
+               case nir_intrinsic_end_primitive: {
+                       b->cursor = nir_before_instr(&intr->instr);
+                       nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 4), 0x1);
+                       nir_instr_remove(&intr->instr);
+                       break;
+               }
+
+               case nir_intrinsic_emit_vertex: {
+
+                       /* Load the vertex count */
+                       b->cursor = nir_before_instr(&intr->instr);
+                       nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+                       nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
+
+                       for (uint32_t i = 0; i < ARRAY_SIZE(outputs); i++) {
+                               if (outputs[i]) {
+                                       nir_store_var(b, state->output_vars[i],
+                                                       outputs[i]->src[0].ssa,
+                                                       (1 << outputs[i]->num_components) - 1);
+
+                                       nir_instr_remove(&outputs[i]->instr);
+                               }
+                               outputs[i] = NULL;
+                       }
+
+                       nir_instr_remove(&intr->instr);
+
+                       nir_store_var(b, state->emitted_vertex_var,
+                                       nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
+
+                       nir_store_var(b, state->vertex_flags_out,
+                                       nir_load_var(b, state->vertex_flags_var), 0x1);
+
+                       nir_pop_if(b, NULL);
+
+                       /* Increment the vertex count by 1 */
+                       nir_store_var(b, state->vertex_count_var,
+                                       nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
+                       nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 0), 0x1);
+
+                       break;
+               }
+
+               case nir_intrinsic_load_per_vertex_input: {
+                       // src[] = { vertex, offset }.
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *offset = build_local_offset(b, state,
+                                       intr->src[0].ssa, // this is typically gl_InvocationID
+                                       nir_intrinsic_base(intr),
+                                       intr->src[1].ssa);
+
+                       replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
+                       break;
+               }
+
+               case nir_intrinsic_load_invocation_id: {
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *iid = build_invocation_id(b, state);
+                       nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
+                       nir_instr_remove(&intr->instr);
+                       break;
+               }
+
+               default:
+                       break;
+               }
+       }
+}
+
+static void
+emit_store_outputs(nir_builder *b, struct state *state)
+{
+       /* This also stores the internally added vertex_flags output. */
+
+       for (uint32_t i = 0; i < ARRAY_SIZE(state->output_vars); i++) {
+               if (!state->output_vars[i])
+                       continue;
+
+               nir_intrinsic_instr *store =
+                       nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+
+               nir_intrinsic_set_base(store, i);
+               store->src[0] = nir_src_for_ssa(nir_load_var(b, state->output_vars[i]));
+               store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+               store->num_components = store->src[0].ssa->num_components;
+
+               nir_builder_instr_insert(b, &store->instr);
+       }
+}
+
+static void
+clean_up_split_vars(nir_shader *shader, struct exec_list *list)
+{
+       uint32_t components[32] = {};
+
+       nir_foreach_variable(var, list) {
+               uint32_t mask =
+                       ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
+               components[var->data.driver_location] |= mask;
+       }
+
+       nir_foreach_variable_safe(var, list) {
+               uint32_t mask =
+                       ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
+               bool subset =
+                       (components[var->data.driver_location] | mask) != mask;
+               if (subset)
+                       exec_node_remove(&var->node);
+       }
+}
+
+void
+ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s)
+{
+       struct state state = { };
+
+       if (shader_debug_enabled(shader->info.stage)) {
+               fprintf(stderr, "NIR (before gs lowering):\n");
+               nir_print_shader(shader, stderr);
+       }
+
+       clean_up_split_vars(shader, &shader->inputs);
+       clean_up_split_vars(shader, &shader->outputs);
+
+       build_primitive_map(shader, &state.map, &shader->inputs);
+
+       uint32_t loc = 0;
+       nir_foreach_variable(var, &shader->outputs) {
+               uint32_t end = var->data.driver_location + glsl_count_attribute_slots(var->type, false);
+               loc = MAX2(loc, end);
+       }
+
+       state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
+                       glsl_uint_type(), "vertex_flags");
+       state.vertex_flags_out->data.driver_location = loc;
+       state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
+
+       nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+       assert(impl);
+
+       nir_builder b;
+       nir_builder_init(&b, impl);
+       b.cursor = nir_before_cf_list(&impl->body);
+
+       state.header = nir_load_gs_header_ir3(&b);
+
+       nir_foreach_variable(var, &shader->outputs) {
+               state.output_vars[var->data.driver_location] = 
+                       nir_local_variable_create(impl, var->type,
+                                       ralloc_asprintf(var, "%s:gs-temp", var->name));
+       }
+
+       state.vertex_count_var =
+               nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
+       state.emitted_vertex_var =
+               nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
+       state.vertex_flags_var =
+               nir_local_variable_create(impl, glsl_uint_type(), "vertex_flags");
+       state.vertex_flags_out = state.output_vars[state.vertex_flags_out->data.driver_location];
+
+       /* initialize to 0 */
+       b.cursor = nir_before_cf_list(&impl->body);
+       nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
+       nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
+       nir_store_var(&b, state.vertex_flags_var, nir_imm_int(&b, 4), 0x1);
+
+       nir_foreach_block_safe(block, impl)
+               lower_gs_block(block, &b, &state);
+
+       set_foreach(impl->end_block->predecessors, block_entry) {
+               struct nir_block *block = (void *)block_entry->key;
+               b.cursor = nir_after_block_before_jump(block);
+
+               nir_intrinsic_instr *discard_if =
+                       nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
+
+               nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0));
+
+               discard_if->src[0] = nir_src_for_ssa(cond);
+
+               nir_builder_instr_insert(&b, &discard_if->instr);
+
+               emit_store_outputs(&b, &state);
+       }
+
+       nir_metadata_preserve(impl, 0);
+
+       if (shader_debug_enabled(shader->info.stage)) {
+               fprintf(stderr, "NIR (after gs lowering):\n");
+               nir_print_shader(shader, stderr);
+       }
+}
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c

index aae7baeb2e06a106d626e17957b83e279019a6f5..10980bd38be41d817a9ba1d8e2c376cfc0091151 100644 (file)
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -350,7 +350,14 @@ output_name(struct ir3_shader_variant *so, int i)
         if (so->type == MESA_SHADER_FRAGMENT) {
                 return gl_frag_result_name(so->outputs[i].slot);
         } else {
-               return gl_varying_slot_name(so->outputs[i].slot);
+               switch (so->outputs[i].slot) {
+               case VARYING_SLOT_GS_HEADER_IR3:
+                       return "GS_HEADER";
+               case VARYING_SLOT_GS_VERTEX_FLAGS_IR3:
+                       return "GS_VERTEX_FLAGS";
+               default:
+                       return gl_varying_slot_name(so->outputs[i].slot);
+               }
         }
  }
  
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h

index fa6d5b7d387a4419be5fedae4182fee2667dc087..ce25886565897eced1863a89cdb34a264ea4e125 100644 (file)
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -554,6 +554,11 @@ struct ir3_shader {
  
         struct ir3_shader_variant *variants;
         mtx_t variants_lock;
+
+       uint32_t output_size; /* Size in dwords of all outputs for VS, size of entire patch for HS. */
+
+       /* Map from driver_location to byte offset in per-primitive storage */
+       unsigned output_loc[32];
  };
  
  void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
@@ -693,6 +698,10 @@ ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot)
         return regid(63, 0);
  }
  
+#define VARYING_SLOT_GS_HEADER_IR3                     (VARYING_SLOT_MAX + 0)
+#define VARYING_SLOT_GS_VERTEX_FLAGS_IR3       (VARYING_SLOT_MAX + 1)
+
+
  static inline uint32_t
  ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
  {
diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build

index be03ffb88c3deccc862ab56a57cdad078dc10db6..6e1434057e73dff525f822058526cedc5af68f7b 100644 (file)
--- a/src/freedreno/ir3/meson.build
+++ b/src/freedreno/ir3/meson.build
@@ -66,6 +66,7 @@ libfreedreno_ir3_files = files(
    'ir3_nir_lower_load_barycentric_at_sample.c',
    'ir3_nir_lower_load_barycentric_at_offset.c',
    'ir3_nir_lower_io_offsets.c',
+  'ir3_nir_lower_tess.c',
    'ir3_nir_lower_tg4_to_tex.c',
    'ir3_nir_move_varying_inputs.c',
    'ir3_print.c',
author	Kristian H. Kristensen <hoegsberg@google.com>
	Fri, 11 Oct 2019 00:17:10 +0000 (17:17 -0700)
committer	Kristian H. Kristensen <hoegsberg@google.com>
	Thu, 17 Oct 2019 20:43:53 +0000 (13:43 -0700)
src/compiler/nir/nir.h		patch \| blob \| history
src/compiler/nir/nir_intrinsics.py		patch \| blob \| history
src/compiler/nir/nir_print.c		patch \| blob \| history
src/compiler/shader_enums.c		patch \| blob \| history
src/compiler/shader_enums.h		patch \| blob \| history
src/freedreno/Makefile.sources		patch \| blob \| history
src/freedreno/ir3/ir3_context.h		patch \| blob \| history
src/freedreno/ir3/ir3_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_nir.h		patch \| blob \| history
src/freedreno/ir3/ir3_nir_lower_tess.c	[new file with mode: 0644]	patch \| blob
src/freedreno/ir3/ir3_shader.c		patch \| blob \| history
src/freedreno/ir3/ir3_shader.h		patch \| blob \| history
src/freedreno/ir3/meson.build		patch \| blob \| history