freedreno/ir3: Implement lowering passes for VS and GS
authorKristian H. Kristensen <hoegsberg@google.com>
Fri, 11 Oct 2019 00:17:10 +0000 (17:17 -0700)
committerKristian H. Kristensen <hoegsberg@google.com>
Thu, 17 Oct 2019 20:43:53 +0000 (13:43 -0700)
This introduces two new lowering passes. One to lower VS to explicit
outputs using STLW and one to lower GS to load input using LDLW and
implement the GS specific functionality.

Signed-off-by: Kristian H. Kristensen <hoegsberg@google.com>
13 files changed:
src/compiler/nir/nir.h
src/compiler/nir/nir_intrinsics.py
src/compiler/nir/nir_print.c
src/compiler/shader_enums.c
src/compiler/shader_enums.h
src/freedreno/Makefile.sources
src/freedreno/ir3/ir3_context.h
src/freedreno/ir3/ir3_nir.c
src/freedreno/ir3/ir3_nir.h
src/freedreno/ir3/ir3_nir_lower_tess.c [new file with mode: 0644]
src/freedreno/ir3/ir3_shader.c
src/freedreno/ir3/ir3_shader.h
src/freedreno/ir3/meson.build

index 9b94c9edf2363e662a01ddbb82dc5355cd8d80d1..5c98aeefc66aec4fa7955cb14e16ed5855ecb290 100644 (file)
@@ -1524,6 +1524,9 @@ typedef enum {
    NIR_INTRINSIC_SRC_ACCESS,
    NIR_INTRINSIC_DST_ACCESS,
 
+   /* Driver location for nir_load_patch_location_ir3 */
+   NIR_INTRINSIC_DRIVER_LOCATION,
+
    NIR_INTRINSIC_NUM_INDEX_FLAGS,
 
 } nir_intrinsic_index_flag;
@@ -1632,6 +1635,7 @@ INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned)
 INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned)
 INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type)
 INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned)
+INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned)
 
 static inline void
 nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
index ae62a85d39b1b07fcc81bae79528063057dae0b2..637576c092a5cfe1affb951477c08383e272eaca 100644 (file)
@@ -124,6 +124,8 @@ DESC_TYPE = "NIR_INTRINSIC_DESC_TYPE"
 TYPE = "NIR_INTRINSIC_TYPE"
 # The swizzle mask for quad_swizzle_amd & masked_swizzle_amd
 SWIZZLE_MASK = "NIR_INTRINSIC_SWIZZLE_MASK"
+# Driver location of attribute
+DRIVER_LOCATION = "NIR_INTRINSIC_DRIVER_LOCATION"
 
 #
 # Possible flags:
@@ -771,6 +773,12 @@ intrinsic("ssbo_atomic_xor_ir3",        src_comp=[1, 1, 1, 1],    dest_comp=1)
 intrinsic("ssbo_atomic_exchange_ir3",   src_comp=[1, 1, 1, 1],    dest_comp=1)
 intrinsic("ssbo_atomic_comp_swap_ir3",  src_comp=[1, 1, 1, 1, 1], dest_comp=1)
 
+# System values for freedreno geometry shaders.
+system_value("vs_primitive_stride_ir3", 1)
+system_value("vs_vertex_stride_ir3", 1)
+system_value("gs_header_ir3", 1)
+system_value("primitive_location_ir3", 1, indices=[DRIVER_LOCATION])
+
 # IR3-specific load/store intrinsics. These access a buffer used to pass data
 # between geometry stages - perhaps it's explicit access to the vertex cache.
 
index 48844b7ed79a7c40800890761d1fcb3c4e237f5e..496f92796761147092a7ef83389302a3607d165e 100644 (file)
@@ -800,6 +800,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
       [NIR_INTRINSIC_DESC_TYPE] = "desc_type",
       [NIR_INTRINSIC_TYPE] = "type",
       [NIR_INTRINSIC_SWIZZLE_MASK] = "swizzle_mask",
+      [NIR_INTRINSIC_DRIVER_LOCATION] = "driver_location",
    };
    for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
       if (!info->index_map[idx])
index 71796687afa564396946d64f9b9646023a563be7..afaad50adf680eaa2db3a8805a9ae6eeaefc919f 100644 (file)
@@ -254,6 +254,7 @@ gl_system_value_name(gl_system_value sysval)
      ENUM(SYSTEM_VALUE_BARYCENTRIC_SAMPLE),
      ENUM(SYSTEM_VALUE_BARYCENTRIC_CENTROID),
      ENUM(SYSTEM_VALUE_BARYCENTRIC_SIZE),
+     ENUM(SYSTEM_VALUE_GS_HEADER_IR3),
    };
    STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
    return NAME(sysval);
index 0704719c229ab309e01a4c1e2a302c5be1f5c4be..f9b2b8c1d736646759ac463d8a27e54040672e32 100644 (file)
@@ -641,6 +641,13 @@ typedef enum
    SYSTEM_VALUE_BARYCENTRIC_CENTROID,
    SYSTEM_VALUE_BARYCENTRIC_SIZE,
 
+   /**
+    * IR3 specific geometry shader system value that packs invocation id,
+    * thread id and vertex id.  Having this as a nir level system value lets
+    * us do the unpacking in nir.
+    */
+   SYSTEM_VALUE_GS_HEADER_IR3,
+
    SYSTEM_VALUE_MAX             /**< Number of values */
 } gl_system_value;
 
index cf3ac7bdba407e107e2c67fb8bb7f110c033cbe6..bb56869e1ccce67df576018854c17d956f1fc7b1 100644 (file)
@@ -38,6 +38,7 @@ ir3_SOURCES := \
        ir3/ir3_nir_lower_load_barycentric_at_sample.c \
        ir3/ir3_nir_lower_load_barycentric_at_offset.c \
        ir3/ir3_nir_lower_io_offsets.c \
+       ir3/ir3_nir_lower_tess.c \
        ir3/ir3_nir_lower_tg4_to_tex.c \
        ir3/ir3_nir_move_varying_inputs.c \
        ir3/ir3_print.c \
index b0d3e98d00aa2fd1a761d10b85bf3670f730bd33..2a1f90711180373fe6dd62021c5a3ec4fbef676f 100644 (file)
@@ -76,6 +76,10 @@ struct ir3_context {
        /* For fragment shaders: */
        struct ir3_instruction *samp_id, *samp_mask_in;
 
+       /* For geometry shaders: */
+       struct ir3_instruction *primitive_id;
+       struct ir3_instruction *gs_header;
+
        /* Compute shader inputs: */
        struct ir3_instruction *local_invocation_id, *work_group_id;
 
index 2f95b249c263ff85e3d1cb9be1069d7ad9698c52..103821cd6b359ba8bec78f34097b485489a0b13f 100644 (file)
@@ -101,7 +101,8 @@ ir3_key_lowers_nir(const struct ir3_shader_key *key)
        return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
                        key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
                        key->ucp_enables | key->color_two_side |
-                       key->fclamp_color | key->vclamp_color;
+                       key->fclamp_color | key->vclamp_color |
+                       key->has_gs;
 }
 
 #define OPT(nir, pass, ...) ({                             \
@@ -186,6 +187,19 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
                        .lower_tg4_offsets = true,
        };
 
+       if (key && key->has_gs) {
+               switch (shader->type) {
+               case MESA_SHADER_VERTEX:
+                       NIR_PASS_V(s, ir3_nir_lower_vs_to_explicit_io, shader);
+                       break;
+               case MESA_SHADER_GEOMETRY:
+                       NIR_PASS_V(s, ir3_nir_lower_gs, shader);
+                       break;
+               default:
+                       break;
+               }
+       }
+
        if (key) {
                switch (shader->type) {
                case MESA_SHADER_FRAGMENT:
index a9b39e235b56ee6927ee1a2008335fc6877de2e8..a602f40858b480bbef54c7dfb5538ab40d6b2f36 100644 (file)
@@ -41,6 +41,9 @@ bool ir3_nir_lower_load_barycentric_at_sample(nir_shader *shader);
 bool ir3_nir_lower_load_barycentric_at_offset(nir_shader *shader);
 bool ir3_nir_move_varying_inputs(nir_shader *shader);
 
+void ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s);
+void ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s);
+
 const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
 bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
 void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c
new file mode 100644 (file)
index 0000000..b498250
--- /dev/null
@@ -0,0 +1,455 @@
+/*
+ * Copyright © 2019 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir3_nir.h"
+#include "ir3_compiler.h"
+#include "compiler/nir/nir_builder.h"
+
+struct state {
+       struct primitive_map {
+               unsigned loc[32];
+               unsigned size[32];
+               unsigned stride;
+       } map;
+
+       nir_ssa_def *header;
+
+       nir_variable *vertex_count_var;
+       nir_variable *emitted_vertex_var;
+       nir_variable *vertex_flags_var;
+       nir_variable *vertex_flags_out;
+
+       nir_variable *output_vars[32];
+};
+
+static nir_ssa_def *
+bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
+{
+       return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
+                       nir_imm_int(b, mask));
+}
+
+static nir_ssa_def *
+build_invocation_id(nir_builder *b, struct state *state)
+{
+       return bitfield_extract(b, state->header, 11, 31);
+}
+
+static nir_ssa_def *
+build_vertex_id(nir_builder *b, struct state *state)
+{
+       return bitfield_extract(b, state->header, 6, 31);
+}
+
+static nir_ssa_def *
+build_local_primitive_id(nir_builder *b, struct state *state)
+{
+       return bitfield_extract(b, state->header, 0, 63);
+}
+
+static nir_variable *
+get_var(struct exec_list *list, int driver_location)
+{
+       nir_foreach_variable(v, list) {
+               if (v->data.driver_location == driver_location) {
+                       return v;
+               }
+       }
+
+       return NULL;
+}
+
+static nir_ssa_def *
+build_local_offset(nir_builder *b, struct state *state,
+               nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
+{
+       nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
+       nir_ssa_def *primitive_offset =
+               nir_imul(b, build_local_primitive_id(b, state), primitive_stride);
+       nir_ssa_def *attr_offset;
+       nir_ssa_def *vertex_stride;
+
+       if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+               vertex_stride = nir_imm_int(b, state->map.stride * 4);
+               attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
+       } else if (b->shader->info.stage == MESA_SHADER_GEOMETRY) {
+               vertex_stride = nir_load_vs_vertex_stride_ir3(b);
+               attr_offset = nir_load_primitive_location_ir3(b, base);
+       } else {
+               unreachable("bad shader stage");
+       }
+
+       nir_ssa_def *vertex_offset = nir_imul(b, vertex, vertex_stride);
+
+       return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
+                       nir_iadd(b, attr_offset, offset));
+}
+
+static nir_intrinsic_instr *
+replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
+               nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
+{
+       nir_intrinsic_instr *new_intr =
+               nir_intrinsic_instr_create(b->shader, op);
+
+       new_intr->src[0] = nir_src_for_ssa(src0);
+       if (src1)
+               new_intr->src[1] = nir_src_for_ssa(src1);
+       if (src2)
+               new_intr->src[2] = nir_src_for_ssa(src2);
+
+       new_intr->num_components = intr->num_components;
+
+       if (nir_intrinsic_infos[op].has_dest)
+               nir_ssa_dest_init(&new_intr->instr, &new_intr->dest,
+                                                 intr->num_components, 32, NULL);
+
+       nir_builder_instr_insert(b, &new_intr->instr);
+
+       if (nir_intrinsic_infos[op].has_dest)
+               nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa));
+
+       nir_instr_remove(&intr->instr);
+
+       return new_intr;
+}
+
+static void
+build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list)
+{
+       nir_foreach_variable(var, list) {
+               switch (var->data.location) {
+               case VARYING_SLOT_TESS_LEVEL_OUTER:
+               case VARYING_SLOT_TESS_LEVEL_INNER:
+                       continue;
+               }
+
+               unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
+
+               assert(var->data.driver_location < ARRAY_SIZE(map->size));
+               map->size[var->data.driver_location] =
+                       MAX2(map->size[var->data.driver_location], size);
+       }
+
+       unsigned loc = 0;
+       for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
+               if (map->size[i] == 0)
+                               continue;
+               nir_variable *var = get_var(list, i);
+               map->loc[i] = loc;
+               loc += map->size[i];
+
+               if (var->data.patch)
+                       map->size[i] = 0;
+               else
+                       map->size[i] = map->size[i] / glsl_get_length(var->type);
+       }
+
+       map->stride = loc;
+}
+
+static void
+lower_vs_block(nir_block *block, nir_builder *b, struct state *state)
+{
+       nir_foreach_instr_safe(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                       continue;
+
+               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+               switch (intr->intrinsic) {
+               case nir_intrinsic_store_output: {
+                       // src[] = { value, offset }.
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *vertex_id = build_vertex_id(b, state);
+                       nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
+                                       intr->src[1].ssa);
+                       nir_intrinsic_instr *store =
+                               nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
+
+                       nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
+                       store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
+                       store->src[1] = nir_src_for_ssa(offset);
+
+                       store->num_components = intr->num_components;
+
+                       nir_builder_instr_insert(b, &store->instr);
+                       break;
+               }
+
+               default:
+                       break;
+               }
+       }
+}
+
+static nir_ssa_def *
+local_thread_id(nir_builder *b)
+{
+       return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
+}
+
+void
+ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
+{
+       struct state state = { };
+
+       build_primitive_map(shader, &state.map, &shader->outputs);
+       memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
+
+       nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+       assert(impl);
+
+       nir_builder b;
+       nir_builder_init(&b, impl);
+       b.cursor = nir_before_cf_list(&impl->body);
+
+       state.header = nir_load_gs_header_ir3(&b);
+
+       nir_foreach_block_safe(block, impl)
+               lower_vs_block(block, &b, &state);
+
+       nir_metadata_preserve(impl, nir_metadata_block_index |
+                       nir_metadata_dominance);
+
+       s->output_size = state.map.stride;
+}
+
+static void
+lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
+{
+       nir_intrinsic_instr *outputs[32] = {};
+
+       nir_foreach_instr_safe(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                       continue;
+
+               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+               switch (intr->intrinsic) {
+               case nir_intrinsic_store_output: {
+                       // src[] = { value, offset }.
+
+                       uint32_t loc = nir_intrinsic_base(intr);
+                       outputs[loc] = intr;
+                       break;
+               }
+
+               case nir_intrinsic_end_primitive: {
+                       b->cursor = nir_before_instr(&intr->instr);
+                       nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 4), 0x1);
+                       nir_instr_remove(&intr->instr);
+                       break;
+               }
+
+               case nir_intrinsic_emit_vertex: {
+
+                       /* Load the vertex count */
+                       b->cursor = nir_before_instr(&intr->instr);
+                       nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+                       nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
+
+                       for (uint32_t i = 0; i < ARRAY_SIZE(outputs); i++) {
+                               if (outputs[i]) {
+                                       nir_store_var(b, state->output_vars[i],
+                                                       outputs[i]->src[0].ssa,
+                                                       (1 << outputs[i]->num_components) - 1);
+
+                                       nir_instr_remove(&outputs[i]->instr);
+                               }
+                               outputs[i] = NULL;
+                       }
+
+                       nir_instr_remove(&intr->instr);
+
+                       nir_store_var(b, state->emitted_vertex_var,
+                                       nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
+
+                       nir_store_var(b, state->vertex_flags_out,
+                                       nir_load_var(b, state->vertex_flags_var), 0x1);
+
+                       nir_pop_if(b, NULL);
+
+                       /* Increment the vertex count by 1 */
+                       nir_store_var(b, state->vertex_count_var,
+                                       nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
+                       nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 0), 0x1);
+
+                       break;
+               }
+
+               case nir_intrinsic_load_per_vertex_input: {
+                       // src[] = { vertex, offset }.
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *offset = build_local_offset(b, state,
+                                       intr->src[0].ssa, // this is typically gl_InvocationID
+                                       nir_intrinsic_base(intr),
+                                       intr->src[1].ssa);
+
+                       replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
+                       break;
+               }
+
+               case nir_intrinsic_load_invocation_id: {
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *iid = build_invocation_id(b, state);
+                       nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
+                       nir_instr_remove(&intr->instr);
+                       break;
+               }
+
+               default:
+                       break;
+               }
+       }
+}
+
+static void
+emit_store_outputs(nir_builder *b, struct state *state)
+{
+       /* This also stores the internally added vertex_flags output. */
+
+       for (uint32_t i = 0; i < ARRAY_SIZE(state->output_vars); i++) {
+               if (!state->output_vars[i])
+                       continue;
+
+               nir_intrinsic_instr *store =
+                       nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+
+               nir_intrinsic_set_base(store, i);
+               store->src[0] = nir_src_for_ssa(nir_load_var(b, state->output_vars[i]));
+               store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+               store->num_components = store->src[0].ssa->num_components;
+
+               nir_builder_instr_insert(b, &store->instr);
+       }
+}
+
+static void
+clean_up_split_vars(nir_shader *shader, struct exec_list *list)
+{
+       uint32_t components[32] = {};
+
+       nir_foreach_variable(var, list) {
+               uint32_t mask =
+                       ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
+               components[var->data.driver_location] |= mask;
+       }
+
+       nir_foreach_variable_safe(var, list) {
+               uint32_t mask =
+                       ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
+               bool subset =
+                       (components[var->data.driver_location] | mask) != mask;
+               if (subset)
+                       exec_node_remove(&var->node);
+       }
+}
+
+void
+ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s)
+{
+       struct state state = { };
+
+       if (shader_debug_enabled(shader->info.stage)) {
+               fprintf(stderr, "NIR (before gs lowering):\n");
+               nir_print_shader(shader, stderr);
+       }
+
+       clean_up_split_vars(shader, &shader->inputs);
+       clean_up_split_vars(shader, &shader->outputs);
+
+       build_primitive_map(shader, &state.map, &shader->inputs);
+
+       uint32_t loc = 0;
+       nir_foreach_variable(var, &shader->outputs) {
+               uint32_t end = var->data.driver_location + glsl_count_attribute_slots(var->type, false);
+               loc = MAX2(loc, end);
+       }
+
+       state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
+                       glsl_uint_type(), "vertex_flags");
+       state.vertex_flags_out->data.driver_location = loc;
+       state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
+
+       nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+       assert(impl);
+
+       nir_builder b;
+       nir_builder_init(&b, impl);
+       b.cursor = nir_before_cf_list(&impl->body);
+
+       state.header = nir_load_gs_header_ir3(&b);
+
+       nir_foreach_variable(var, &shader->outputs) {
+               state.output_vars[var->data.driver_location] = 
+                       nir_local_variable_create(impl, var->type,
+                                       ralloc_asprintf(var, "%s:gs-temp", var->name));
+       }
+
+       state.vertex_count_var =
+               nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
+       state.emitted_vertex_var =
+               nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
+       state.vertex_flags_var =
+               nir_local_variable_create(impl, glsl_uint_type(), "vertex_flags");
+       state.vertex_flags_out = state.output_vars[state.vertex_flags_out->data.driver_location];
+
+       /* initialize to 0 */
+       b.cursor = nir_before_cf_list(&impl->body);
+       nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
+       nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
+       nir_store_var(&b, state.vertex_flags_var, nir_imm_int(&b, 4), 0x1);
+
+       nir_foreach_block_safe(block, impl)
+               lower_gs_block(block, &b, &state);
+
+       set_foreach(impl->end_block->predecessors, block_entry) {
+               struct nir_block *block = (void *)block_entry->key;
+               b.cursor = nir_after_block_before_jump(block);
+
+               nir_intrinsic_instr *discard_if =
+                       nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
+
+               nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0));
+
+               discard_if->src[0] = nir_src_for_ssa(cond);
+
+               nir_builder_instr_insert(&b, &discard_if->instr);
+
+               emit_store_outputs(&b, &state);
+       }
+
+       nir_metadata_preserve(impl, 0);
+
+       if (shader_debug_enabled(shader->info.stage)) {
+               fprintf(stderr, "NIR (after gs lowering):\n");
+               nir_print_shader(shader, stderr);
+       }
+}
index aae7baeb2e06a106d626e17957b83e279019a6f5..10980bd38be41d817a9ba1d8e2c376cfc0091151 100644 (file)
@@ -350,7 +350,14 @@ output_name(struct ir3_shader_variant *so, int i)
        if (so->type == MESA_SHADER_FRAGMENT) {
                return gl_frag_result_name(so->outputs[i].slot);
        } else {
-               return gl_varying_slot_name(so->outputs[i].slot);
+               switch (so->outputs[i].slot) {
+               case VARYING_SLOT_GS_HEADER_IR3:
+                       return "GS_HEADER";
+               case VARYING_SLOT_GS_VERTEX_FLAGS_IR3:
+                       return "GS_VERTEX_FLAGS";
+               default:
+                       return gl_varying_slot_name(so->outputs[i].slot);
+               }
        }
 }
 
index fa6d5b7d387a4419be5fedae4182fee2667dc087..ce25886565897eced1863a89cdb34a264ea4e125 100644 (file)
@@ -554,6 +554,11 @@ struct ir3_shader {
 
        struct ir3_shader_variant *variants;
        mtx_t variants_lock;
+
+       uint32_t output_size; /* Size in dwords of all outputs for VS, size of entire patch for HS. */
+
+       /* Map from driver_location to byte offset in per-primitive storage */
+       unsigned output_loc[32];
 };
 
 void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
@@ -693,6 +698,10 @@ ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot)
        return regid(63, 0);
 }
 
+#define VARYING_SLOT_GS_HEADER_IR3                     (VARYING_SLOT_MAX + 0)
+#define VARYING_SLOT_GS_VERTEX_FLAGS_IR3       (VARYING_SLOT_MAX + 1)
+
+
 static inline uint32_t
 ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
 {
index be03ffb88c3deccc862ab56a57cdad078dc10db6..6e1434057e73dff525f822058526cedc5af68f7b 100644 (file)
@@ -66,6 +66,7 @@ libfreedreno_ir3_files = files(
   'ir3_nir_lower_load_barycentric_at_sample.c',
   'ir3_nir_lower_load_barycentric_at_offset.c',
   'ir3_nir_lower_io_offsets.c',
+  'ir3_nir_lower_tess.c',
   'ir3_nir_lower_tg4_to_tex.c',
   'ir3_nir_move_varying_inputs.c',
   'ir3_print.c',