freedreno/ir3: Extend geometry lowering pass to handle tessellation
authorKristian H. Kristensen <hoegsberg@google.com>
Wed, 23 Oct 2019 00:30:48 +0000 (17:30 -0700)
committerKristian H. Kristensen <hoegsberg@google.com>
Fri, 8 Nov 2019 00:36:59 +0000 (16:36 -0800)
VS and TCS pass varyings the same way as VS and GS does. TCS then
writes entire patch to a system memory BO and TES eventually reads
back from the BO once the TE starts generating vertices.  TES outputs
vertices the same way as VS and GS, except when there's a GS as well,
in which case TES passes varyings to GS same way the VS would.

In addition, the TCS needs a little bit of control flow massaging so
that it only runs for valid invocations needs a couple of unknown
instructions to synchronize with the TE.

Signed-off-by: Kristian H. Kristensen <hoegsberg@google.com>
Acked-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Rob Clark <robdclark@gmail.com>
src/freedreno/ir3/ir3_nir.c
src/freedreno/ir3/ir3_nir.h
src/freedreno/ir3/ir3_nir_lower_tess.c

index ab092ff1eda4e851e9f2b67d4fbde2c426fdac9f..fc90fbe38686a3b745f75a55d7e1d6a9250ba934 100644 (file)
@@ -189,10 +189,18 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
                        .lower_tg4_offsets = true,
        };
 
-       if (key && key->has_gs) {
+       if (key && (key->has_gs || key->tessellation)) {
                switch (shader->type) {
                case MESA_SHADER_VERTEX:
-                       NIR_PASS_V(s, ir3_nir_lower_vs_to_explicit_io, shader);
+                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_io, shader, key->tessellation);
+                       break;
+               case MESA_SHADER_TESS_CTRL:
+                       NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, shader, key->tessellation);
+                       break;
+               case MESA_SHADER_TESS_EVAL:
+                       NIR_PASS_V(s, ir3_nir_lower_tess_eval, key->tessellation);
+                       if (key->has_gs)
+                               NIR_PASS_V(s, ir3_nir_lower_to_explicit_io, shader, key->tessellation);
                        break;
                case MESA_SHADER_GEOMETRY:
                        NIR_PASS_V(s, ir3_nir_lower_gs, shader);
index dc693b3f556acd252c6c4901356cf3698ec8b2de..a42c8822b4a96531cdec279647a7f0fde96bb1f2 100644 (file)
@@ -44,7 +44,10 @@ int ir3_nir_coord_offset(nir_ssa_def *ssa);
 bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
 
 
-void ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s);
+void ir3_nir_lower_to_explicit_io(nir_shader *shader,
+               struct ir3_shader *s, unsigned topology);
+void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology);
+void ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology);
 void ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s);
 
 const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
index acbb02d4108c84aeb508634d069f130f08f1ae36..27fc24c1a0dde83219b8dce574f229dce5b83e23 100644 (file)
@@ -26,6 +26,8 @@
 #include "compiler/nir/nir_builder.h"
 
 struct state {
+       uint32_t topology;
+
        struct primitive_map {
                unsigned loc[32];
                unsigned size[32];
@@ -40,6 +42,9 @@ struct state {
        nir_variable *vertex_flags_out;
 
        nir_variable *output_vars[32];
+
+       nir_ssa_def *outer_levels[4];
+       nir_ssa_def *inner_levels[2];
 };
 
 static nir_ssa_def *
@@ -89,13 +94,18 @@ build_local_offset(nir_builder *b, struct state *state,
        nir_ssa_def *attr_offset;
        nir_ssa_def *vertex_stride;
 
-       if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+       switch (b->shader->info.stage) {
+       case MESA_SHADER_VERTEX:
+       case MESA_SHADER_TESS_EVAL:
                vertex_stride = nir_imm_int(b, state->map.stride * 4);
                attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
-       } else if (b->shader->info.stage == MESA_SHADER_GEOMETRY) {
+               break;
+       case MESA_SHADER_TESS_CTRL:
+       case MESA_SHADER_GEOMETRY:
                vertex_stride = nir_load_vs_vertex_stride_ir3(b);
                attr_offset = nir_load_primitive_location_ir3(b, base);
-       } else {
+               break;
+       default:
                unreachable("bad shader stage");
        }
 
@@ -212,7 +222,7 @@ local_thread_id(nir_builder *b)
 }
 
 void
-ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
+ir3_nir_lower_to_explicit_io(nir_shader *shader, struct ir3_shader *s, unsigned topology)
 {
        struct state state = { };
 
@@ -226,7 +236,10 @@ ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
        nir_builder_init(&b, impl);
        b.cursor = nir_before_cf_list(&impl->body);
 
-       state.header = nir_load_gs_header_ir3(&b);
+       if (s->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
+               state.header = nir_load_tcs_header_ir3(&b);
+       else
+               state.header = nir_load_gs_header_ir3(&b);
 
        nir_foreach_block_safe(block, impl)
                lower_vs_block(block, &b, &state);
@@ -237,6 +250,494 @@ ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
        s->output_size = state.map.stride;
 }
 
+static nir_ssa_def *
+build_per_vertex_offset(nir_builder *b, struct state *state,
+               nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
+{
+       nir_ssa_def *primitive_id = nir_load_primitive_id(b);
+       nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
+       nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
+       nir_ssa_def *attr_offset;
+       int loc = var->data.driver_location;
+
+       switch (b->shader->info.stage) {
+       case MESA_SHADER_TESS_CTRL:
+               attr_offset = nir_imm_int(b, state->map.loc[loc]);
+               break;
+       case MESA_SHADER_TESS_EVAL:
+               attr_offset = nir_load_primitive_location_ir3(b, loc);
+               break;
+       default:
+               unreachable("bad shader state");
+       }
+
+       nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
+       nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
+
+       return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
+                       nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
+}
+
+static nir_ssa_def *
+build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
+{
+       debug_assert(var && var->data.patch);
+
+       return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
+}
+
+static nir_ssa_def *
+build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
+{
+       uint32_t inner_levels, outer_levels;
+       switch (state->topology) {
+       case IR3_TESS_TRIANGLES:
+               inner_levels = 1;
+               outer_levels = 3;
+               break;
+       case IR3_TESS_QUADS:
+               inner_levels = 2;
+               outer_levels = 4;
+               break;
+       case IR3_TESS_ISOLINES:
+               inner_levels = 0;
+               outer_levels = 2;
+               break;
+       default:
+               unreachable("bad");
+       }
+
+       const uint32_t patch_stride = 1 + inner_levels + outer_levels;
+
+       nir_ssa_def *primitive_id = nir_load_primitive_id(b);
+
+       nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
+
+       uint32_t offset;
+       switch (slot) {
+       case VARYING_SLOT_TESS_LEVEL_OUTER:
+               /* There's some kind of header dword, tess levels start at index 1. */
+               offset = 1;
+               break;
+       case VARYING_SLOT_TESS_LEVEL_INNER:
+               offset = 1 + outer_levels;
+               break;
+       default:
+               unreachable("bad");
+       }
+
+       return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
+}
+
+static void
+lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
+{
+       nir_foreach_instr_safe(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                       continue;
+
+               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+               switch (intr->intrinsic) {
+               case nir_intrinsic_load_invocation_id:
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *invocation_id = build_invocation_id(b, state);
+                       nir_ssa_def_rewrite_uses(&intr->dest.ssa,
+                                                                        nir_src_for_ssa(invocation_id));
+                       nir_instr_remove(&intr->instr);
+                       break;
+
+               case nir_intrinsic_barrier:
+                       /* Hull shaders dispatch 32 wide so an entire patch will always
+                        * fit in a single warp and execute in lock-step.  Consequently,
+                        * we don't need to do anything for TCS barriers so just remove
+                        * the intrinsic. Otherwise we'll emit an actual barrier
+                        * instructions, which will deadlock.
+                        */
+                       nir_instr_remove(&intr->instr);
+                       break;
+
+               case nir_intrinsic_load_per_vertex_output: {
+                       // src[] = { vertex, offset }.
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+                       nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+                       nir_ssa_def *offset = build_per_vertex_offset(b, state,
+                                       intr->src[0].ssa, intr->src[1].ssa, var);
+
+                       replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+                       break;
+               }
+
+               case nir_intrinsic_store_per_vertex_output: {
+                       // src[] = { value, vertex, offset }.
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *value = intr->src[0].ssa;
+                       nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+                       nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+                       nir_ssa_def *offset = build_per_vertex_offset(b, state,
+                                       intr->src[1].ssa, intr->src[2].ssa, var);
+
+                       nir_intrinsic_instr *store =
+                               replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
+                                                                 nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
+
+                       nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+
+                       break;
+               }
+
+               case nir_intrinsic_load_per_vertex_input: {
+                       // src[] = { vertex, offset }.
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *offset = build_local_offset(b, state,
+                                       intr->src[0].ssa, // this is typically gl_InvocationID
+                                       nir_intrinsic_base(intr),
+                                       intr->src[1].ssa);
+
+                       replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
+                       break;
+               }
+
+               case nir_intrinsic_load_tess_level_inner:
+               case nir_intrinsic_load_tess_level_outer: {
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       gl_varying_slot slot;
+                       if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
+                               slot = VARYING_SLOT_TESS_LEVEL_INNER;
+                       else
+                               slot = VARYING_SLOT_TESS_LEVEL_OUTER;
+
+                       nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
+                       nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
+
+                       replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+                       break;
+               }
+
+               case nir_intrinsic_load_output: {
+                       // src[] = { offset }.
+
+                       nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+                       nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+
+                       replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+                       break;
+               }
+
+               case nir_intrinsic_store_output: {
+                       // src[] = { value, offset }.
+
+                       /* write patch output to bo */
+
+                       nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+
+                       nir_ssa_def **levels = NULL;
+                       if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+                               levels = state->outer_levels;
+                       else if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER)
+                               levels = state->inner_levels;
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       if (levels) {
+                               for (int i = 0; i < 4; i++)
+                                       if (nir_intrinsic_write_mask(intr) & (1 << i))
+                                               levels[i] = nir_channel(b, intr->src[0].ssa, i);
+                               nir_instr_remove(&intr->instr);
+                       } else {
+                               nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+                               nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
+
+                               debug_assert(nir_intrinsic_component(intr) == 0);
+
+                               nir_intrinsic_instr *store =
+                                       replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
+                                                       intr->src[0].ssa, address, offset);
+
+                               nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+                       }
+                       break;
+               }
+
+               default:
+                       break;
+               }
+       }
+}
+
+static void
+emit_tess_epilouge(nir_builder *b, struct state *state)
+{
+       nir_ssa_def *tessfactor_address = nir_load_tess_factor_base_ir3(b);
+       nir_ssa_def *levels[2];
+
+       /* Then emit the epilogue that actually writes out the tessellation levels
+        * to the BOs.
+        */
+       switch (state->topology) {
+       case IR3_TESS_TRIANGLES:
+               levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
+                               state->outer_levels[2], state->inner_levels[0]);
+               levels[1] = NULL;
+               break;
+       case IR3_TESS_QUADS:
+               levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
+                               state->outer_levels[2], state->outer_levels[3]);
+               levels[1] = nir_vec2(b, state->inner_levels[0], state->inner_levels[1]);
+               break;
+       case IR3_TESS_ISOLINES:
+               levels[0] = nir_vec2(b, state->outer_levels[0], state->outer_levels[1]);
+               levels[1] = NULL;
+               break;
+       default:
+               unreachable("nope");
+       }
+
+       nir_ssa_def *offset = build_tessfactor_base(b, VARYING_SLOT_TESS_LEVEL_OUTER, state);
+
+       nir_intrinsic_instr *store =
+               nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
+
+       store->src[0] = nir_src_for_ssa(levels[0]);
+       store->src[1] = nir_src_for_ssa(tessfactor_address);
+       store->src[2] = nir_src_for_ssa(offset);
+       nir_builder_instr_insert(b, &store->instr);
+       store->num_components = levels[0]->num_components;
+       nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
+
+       if (levels[1]) {
+               store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
+               offset = nir_iadd(b, offset, nir_imm_int(b, levels[0]->num_components));
+
+               store->src[0] = nir_src_for_ssa(levels[1]);
+               store->src[1] = nir_src_for_ssa(tessfactor_address);
+               store->src[2] = nir_src_for_ssa(offset);
+               nir_builder_instr_insert(b, &store->instr);
+               store->num_components = levels[1]->num_components;
+               nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
+       }
+
+       /* Finally, Insert endpatch instruction, maybe signalling the tess engine
+        * that another primitive is ready?
+        */
+
+       nir_intrinsic_instr *end_patch =
+               nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_patch_ir3);
+       nir_builder_instr_insert(b, &end_patch->instr);
+}
+
+void
+ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology)
+{
+       struct state state = { .topology = topology };
+
+       if (shader_debug_enabled(shader->info.stage)) {
+               fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
+                               _mesa_shader_stage_to_string(shader->info.stage));
+               nir_print_shader(shader, stderr);
+       }
+
+       build_primitive_map(shader, &state.map, &shader->outputs);
+       memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
+       s->output_size = state.map.stride;
+
+       nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+       assert(impl);
+
+       nir_builder b;
+       nir_builder_init(&b, impl);
+       b.cursor = nir_before_cf_list(&impl->body);
+
+       state.header = nir_load_tcs_header_ir3(&b);
+
+       nir_foreach_block_safe(block, impl)
+               lower_tess_ctrl_block(block, &b, &state);
+
+       /* Now move the body of the TCS into a conditional:
+        *
+        *   if (gl_InvocationID < num_vertices)
+        *     // body
+        *
+        */
+
+       nir_cf_list body;
+       nir_cf_extract(&body, nir_before_cf_list(&impl->body),
+                                  nir_after_cf_list(&impl->body));
+
+       b.cursor = nir_after_cf_list(&impl->body);
+
+       /* Re-emit the header, since the old one got moved into the if branch */
+       state.header = nir_load_tcs_header_ir3(&b);
+       nir_ssa_def *iid = build_invocation_id(&b, &state);
+
+       const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
+       nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
+
+       nir_if *nif = nir_push_if(&b, cond);
+
+       nir_cf_reinsert(&body, b.cursor);
+
+       b.cursor = nir_after_cf_list(&nif->then_list);
+
+       /* Insert conditional exit for threads invocation id != 0 */
+       nir_ssa_def *iid0_cond = nir_ieq(&b, iid, nir_imm_int(&b, 0));
+       nir_intrinsic_instr *cond_end =
+               nir_intrinsic_instr_create(shader, nir_intrinsic_cond_end_ir3);
+       cond_end->src[0] = nir_src_for_ssa(iid0_cond);
+       nir_builder_instr_insert(&b, &cond_end->instr);
+
+       emit_tess_epilouge(&b, &state);
+
+       nir_pop_if(&b, nif);
+
+       nir_metadata_preserve(impl, 0);
+}
+
+
+static void
+lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
+{
+       nir_foreach_instr_safe(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                       continue;
+
+               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+               switch (intr->intrinsic) {
+               case nir_intrinsic_load_tess_coord: {
+                       b->cursor = nir_after_instr(&intr->instr);
+                       nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
+                       nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
+                       nir_ssa_def *z;
+
+                       if (state->topology == IR3_TESS_TRIANGLES)
+                               z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
+                       else
+                               z = nir_imm_float(b, 0.0f);
+
+                       nir_ssa_def *coord = nir_vec3(b, x, y, z);
+
+                       nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
+                                       nir_src_for_ssa(coord),
+                                       b->cursor.instr);
+                       break;
+               }
+
+               case nir_intrinsic_load_per_vertex_input: {
+                       // src[] = { vertex, offset }.
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+                       nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
+                       nir_ssa_def *offset = build_per_vertex_offset(b, state,
+                                       intr->src[0].ssa, intr->src[1].ssa, var);
+
+                       replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+                       break;
+               }
+
+               case nir_intrinsic_load_tess_level_inner:
+               case nir_intrinsic_load_tess_level_outer: {
+                               b->cursor = nir_before_instr(&intr->instr);
+
+                               gl_varying_slot slot;
+                               if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
+                                       slot = VARYING_SLOT_TESS_LEVEL_INNER;
+                               else
+                                       slot = VARYING_SLOT_TESS_LEVEL_OUTER;
+
+                               nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
+                               nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
+
+                               /* Loading across a vec4 (16b) memory boundary is problematic
+                                * if we don't use components from the second vec4.  The tess
+                                * levels aren't guaranteed to be vec4 aligned and we don't
+                                * know which levels are actually used, so we load each
+                                * component individually.
+                                */
+                               nir_ssa_def *levels[4];
+                               for (unsigned i = 0; i < intr->num_components; i++) {
+                                       nir_intrinsic_instr *new_intr =
+                                               nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_global_ir3);
+
+                                       new_intr->src[0] = nir_src_for_ssa(address);
+                                       new_intr->src[1] = nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i)));
+                                       new_intr->num_components = 1;
+                                       nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, 1, 32, NULL);
+                                       nir_builder_instr_insert(b, &new_intr->instr);
+                                       levels[i] = &new_intr->dest.ssa;
+                               }
+
+                               nir_ssa_def *v = nir_vec(b, levels, intr->num_components);
+
+                               nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(v));
+
+                               nir_instr_remove(&intr->instr);
+                               break;
+               }
+
+               case nir_intrinsic_load_input: {
+                       // src[] = { offset }.
+
+                       nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
+
+                       debug_assert(var->data.patch);
+
+                       b->cursor = nir_before_instr(&intr->instr);
+
+                       nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+                       nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+
+                       replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+                       break;
+               }
+
+               default:
+                       break;
+               }
+       }
+}
+
+void
+ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
+{
+       struct state state = { .topology = topology };
+
+       if (shader_debug_enabled(shader->info.stage)) {
+               fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
+                               _mesa_shader_stage_to_string(shader->info.stage));
+               nir_print_shader(shader, stderr);
+       }
+
+       /* Build map of inputs so we have the sizes. */
+       build_primitive_map(shader, &state.map, &shader->inputs);
+
+       nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+       assert(impl);
+
+       nir_builder b;
+       nir_builder_init(&b, impl);
+
+       nir_foreach_block_safe(block, impl)
+               lower_tess_eval_block(block, &b, &state);
+
+       nir_metadata_preserve(impl, 0);
+}
+
 static void
 lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
 {