From: Kristian H. Kristensen <hoegsberg@google.com>
Date: Wed, 23 Oct 2019 00:30:48 +0000 (-0700)
Subject: freedreno/ir3: Extend geometry lowering pass to handle tessellation
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=56ed835bffb0e9cd6770a788b6605b84bd54683c;p=mesa.git

freedreno/ir3: Extend geometry lowering pass to handle tessellation

VS and TCS pass varyings the same way as VS and GS does. TCS then
writes entire patch to a system memory BO and TES eventually reads
back from the BO once the TE starts generating vertices.  TES outputs
vertices the same way as VS and GS, except when there's a GS as well,
in which case TES passes varyings to GS same way the VS would.

In addition, the TCS needs a little bit of control flow massaging so
that it only runs for valid invocations needs a couple of unknown
instructions to synchronize with the TE.

Signed-off-by: Kristian H. Kristensen <hoegsberg@google.com>
Acked-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Rob Clark <robdclark@gmail.com>
---

diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index ab092ff1eda..fc90fbe3868 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -189,10 +189,18 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 			.lower_tg4_offsets = true,
 	};
 
-	if (key && key->has_gs) {
+	if (key && (key->has_gs || key->tessellation)) {
 		switch (shader->type) {
 		case MESA_SHADER_VERTEX:
-			NIR_PASS_V(s, ir3_nir_lower_vs_to_explicit_io, shader);
+			NIR_PASS_V(s, ir3_nir_lower_to_explicit_io, shader, key->tessellation);
+			break;
+		case MESA_SHADER_TESS_CTRL:
+			NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, shader, key->tessellation);
+			break;
+		case MESA_SHADER_TESS_EVAL:
+			NIR_PASS_V(s, ir3_nir_lower_tess_eval, key->tessellation);
+			if (key->has_gs)
+				NIR_PASS_V(s, ir3_nir_lower_to_explicit_io, shader, key->tessellation);
 			break;
 		case MESA_SHADER_GEOMETRY:
 			NIR_PASS_V(s, ir3_nir_lower_gs, shader);
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index dc693b3f556..a42c8822b4a 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -44,7 +44,10 @@ int ir3_nir_coord_offset(nir_ssa_def *ssa);
 bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
 
 
-void ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s);
+void ir3_nir_lower_to_explicit_io(nir_shader *shader,
+		struct ir3_shader *s, unsigned topology);
+void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology);
+void ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology);
 void ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s);
 
 const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c
index acbb02d4108..27fc24c1a0d 100644
--- a/src/freedreno/ir3/ir3_nir_lower_tess.c
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
@@ -26,6 +26,8 @@
 #include "compiler/nir/nir_builder.h"
 
 struct state {
+	uint32_t topology;
+
 	struct primitive_map {
 		unsigned loc[32];
 		unsigned size[32];
@@ -40,6 +42,9 @@ struct state {
 	nir_variable *vertex_flags_out;
 
 	nir_variable *output_vars[32];
+
+	nir_ssa_def *outer_levels[4];
+	nir_ssa_def *inner_levels[2];
 };
 
 static nir_ssa_def *
@@ -89,13 +94,18 @@ build_local_offset(nir_builder *b, struct state *state,
 	nir_ssa_def *attr_offset;
 	nir_ssa_def *vertex_stride;
 
-	if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+	switch (b->shader->info.stage) {
+	case MESA_SHADER_VERTEX:
+	case MESA_SHADER_TESS_EVAL:
 		vertex_stride = nir_imm_int(b, state->map.stride * 4);
 		attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
-	} else if (b->shader->info.stage == MESA_SHADER_GEOMETRY) {
+		break;
+	case MESA_SHADER_TESS_CTRL:
+	case MESA_SHADER_GEOMETRY:
 		vertex_stride = nir_load_vs_vertex_stride_ir3(b);
 		attr_offset = nir_load_primitive_location_ir3(b, base);
-	} else {
+		break;
+	default:
 		unreachable("bad shader stage");
 	}
 
@@ -212,7 +222,7 @@ local_thread_id(nir_builder *b)
 }
 
 void
-ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
+ir3_nir_lower_to_explicit_io(nir_shader *shader, struct ir3_shader *s, unsigned topology)
 {
 	struct state state = { };
 
@@ -226,7 +236,10 @@ ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
 	nir_builder_init(&b, impl);
 	b.cursor = nir_before_cf_list(&impl->body);
 
-	state.header = nir_load_gs_header_ir3(&b);
+	if (s->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
+		state.header = nir_load_tcs_header_ir3(&b);
+	else
+		state.header = nir_load_gs_header_ir3(&b);
 
 	nir_foreach_block_safe(block, impl)
 		lower_vs_block(block, &b, &state);
@@ -237,6 +250,494 @@ ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
 	s->output_size = state.map.stride;
 }
 
+static nir_ssa_def *
+build_per_vertex_offset(nir_builder *b, struct state *state,
+		nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
+{
+	nir_ssa_def *primitive_id = nir_load_primitive_id(b);
+	nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
+	nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
+	nir_ssa_def *attr_offset;
+	int loc = var->data.driver_location;
+
+	switch (b->shader->info.stage) {
+	case MESA_SHADER_TESS_CTRL:
+		attr_offset = nir_imm_int(b, state->map.loc[loc]);
+		break;
+	case MESA_SHADER_TESS_EVAL:
+		attr_offset = nir_load_primitive_location_ir3(b, loc);
+		break;
+	default:
+		unreachable("bad shader state");
+	}
+
+	nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
+	nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
+
+	return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
+			nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
+}
+
+static nir_ssa_def *
+build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
+{
+	debug_assert(var && var->data.patch);
+
+	return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
+}
+
+static nir_ssa_def *
+build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
+{
+	uint32_t inner_levels, outer_levels;
+	switch (state->topology) {
+	case IR3_TESS_TRIANGLES:
+		inner_levels = 1;
+		outer_levels = 3;
+		break;
+	case IR3_TESS_QUADS:
+		inner_levels = 2;
+		outer_levels = 4;
+		break;
+	case IR3_TESS_ISOLINES:
+		inner_levels = 0;
+		outer_levels = 2;
+		break;
+	default:
+		unreachable("bad");
+	}
+
+	const uint32_t patch_stride = 1 + inner_levels + outer_levels;
+
+	nir_ssa_def *primitive_id = nir_load_primitive_id(b);
+
+	nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
+
+	uint32_t offset;
+	switch (slot) {
+	case VARYING_SLOT_TESS_LEVEL_OUTER:
+		/* There's some kind of header dword, tess levels start at index 1. */
+		offset = 1;
+		break;
+	case VARYING_SLOT_TESS_LEVEL_INNER:
+		offset = 1 + outer_levels;
+		break;
+	default:
+		unreachable("bad");
+	}
+
+	return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
+}
+
+static void
+lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
+{
+	nir_foreach_instr_safe(instr, block) {
+		if (instr->type != nir_instr_type_intrinsic)
+			continue;
+
+		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+		switch (intr->intrinsic) {
+		case nir_intrinsic_load_invocation_id:
+			b->cursor = nir_before_instr(&intr->instr);
+
+			nir_ssa_def *invocation_id = build_invocation_id(b, state);
+			nir_ssa_def_rewrite_uses(&intr->dest.ssa,
+									 nir_src_for_ssa(invocation_id));
+			nir_instr_remove(&intr->instr);
+			break;
+
+		case nir_intrinsic_barrier:
+			/* Hull shaders dispatch 32 wide so an entire patch will always
+			 * fit in a single warp and execute in lock-step.  Consequently,
+			 * we don't need to do anything for TCS barriers so just remove
+			 * the intrinsic. Otherwise we'll emit an actual barrier
+			 * instructions, which will deadlock.
+			 */
+			nir_instr_remove(&intr->instr);
+			break;
+
+		case nir_intrinsic_load_per_vertex_output: {
+			// src[] = { vertex, offset }.
+
+			b->cursor = nir_before_instr(&intr->instr);
+
+			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+			nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+			nir_ssa_def *offset = build_per_vertex_offset(b, state,
+					intr->src[0].ssa, intr->src[1].ssa, var);
+
+			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+			break;
+		}
+
+		case nir_intrinsic_store_per_vertex_output: {
+			// src[] = { value, vertex, offset }.
+
+			b->cursor = nir_before_instr(&intr->instr);
+
+			nir_ssa_def *value = intr->src[0].ssa;
+			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+			nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+			nir_ssa_def *offset = build_per_vertex_offset(b, state,
+					intr->src[1].ssa, intr->src[2].ssa, var);
+
+			nir_intrinsic_instr *store =
+				replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
+								  nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
+
+			nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+
+			break;
+		}
+
+		case nir_intrinsic_load_per_vertex_input: {
+			// src[] = { vertex, offset }.
+
+			b->cursor = nir_before_instr(&intr->instr);
+
+			nir_ssa_def *offset = build_local_offset(b, state,
+					intr->src[0].ssa, // this is typically gl_InvocationID
+					nir_intrinsic_base(intr),
+					intr->src[1].ssa);
+
+			replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
+			break;
+		}
+
+		case nir_intrinsic_load_tess_level_inner:
+		case nir_intrinsic_load_tess_level_outer: {
+			b->cursor = nir_before_instr(&intr->instr);
+
+			gl_varying_slot slot;
+			if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
+				slot = VARYING_SLOT_TESS_LEVEL_INNER;
+			else
+				slot = VARYING_SLOT_TESS_LEVEL_OUTER;
+
+			nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
+			nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
+
+			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+			break;
+		}
+
+		case nir_intrinsic_load_output: {
+			// src[] = { offset }.
+
+			nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+
+			b->cursor = nir_before_instr(&intr->instr);
+
+			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+			nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+
+			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+			break;
+		}
+
+		case nir_intrinsic_store_output: {
+			// src[] = { value, offset }.
+
+			/* write patch output to bo */
+
+			nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+
+			nir_ssa_def **levels = NULL;
+			if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+				levels = state->outer_levels;
+			else if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER)
+				levels = state->inner_levels;
+
+			b->cursor = nir_before_instr(&intr->instr);
+
+			if (levels) {
+				for (int i = 0; i < 4; i++)
+					if (nir_intrinsic_write_mask(intr) & (1 << i))
+						levels[i] = nir_channel(b, intr->src[0].ssa, i);
+				nir_instr_remove(&intr->instr);
+			} else {
+				nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+				nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
+
+				debug_assert(nir_intrinsic_component(intr) == 0);
+
+				nir_intrinsic_instr *store =
+					replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
+							intr->src[0].ssa, address, offset);
+
+				nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+			}
+			break;
+		}
+
+		default:
+			break;
+		}
+	}
+}
+
+static void
+emit_tess_epilouge(nir_builder *b, struct state *state)
+{
+	nir_ssa_def *tessfactor_address = nir_load_tess_factor_base_ir3(b);
+	nir_ssa_def *levels[2];
+
+	/* Then emit the epilogue that actually writes out the tessellation levels
+	 * to the BOs.
+	 */
+	switch (state->topology) {
+	case IR3_TESS_TRIANGLES:
+		levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
+				state->outer_levels[2], state->inner_levels[0]);
+		levels[1] = NULL;
+		break;
+	case IR3_TESS_QUADS:
+		levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
+				state->outer_levels[2], state->outer_levels[3]);
+		levels[1] = nir_vec2(b, state->inner_levels[0], state->inner_levels[1]);
+		break;
+	case IR3_TESS_ISOLINES:
+		levels[0] = nir_vec2(b, state->outer_levels[0], state->outer_levels[1]);
+		levels[1] = NULL;
+		break;
+	default:
+		unreachable("nope");
+	}
+
+	nir_ssa_def *offset = build_tessfactor_base(b, VARYING_SLOT_TESS_LEVEL_OUTER, state);
+
+	nir_intrinsic_instr *store =
+		nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
+
+	store->src[0] = nir_src_for_ssa(levels[0]);
+	store->src[1] = nir_src_for_ssa(tessfactor_address);
+	store->src[2] = nir_src_for_ssa(offset);
+	nir_builder_instr_insert(b, &store->instr);
+	store->num_components = levels[0]->num_components;
+	nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
+
+	if (levels[1]) {
+		store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
+		offset = nir_iadd(b, offset, nir_imm_int(b, levels[0]->num_components));
+
+		store->src[0] = nir_src_for_ssa(levels[1]);
+		store->src[1] = nir_src_for_ssa(tessfactor_address);
+		store->src[2] = nir_src_for_ssa(offset);
+		nir_builder_instr_insert(b, &store->instr);
+		store->num_components = levels[1]->num_components;
+		nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
+	}
+
+	/* Finally, Insert endpatch instruction, maybe signalling the tess engine
+	 * that another primitive is ready?
+	 */
+
+	nir_intrinsic_instr *end_patch =
+		nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_patch_ir3);
+	nir_builder_instr_insert(b, &end_patch->instr);
+}
+
+void
+ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology)
+{
+	struct state state = { .topology = topology };
+
+	if (shader_debug_enabled(shader->info.stage)) {
+		fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
+				_mesa_shader_stage_to_string(shader->info.stage));
+		nir_print_shader(shader, stderr);
+	}
+
+	build_primitive_map(shader, &state.map, &shader->outputs);
+	memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
+	s->output_size = state.map.stride;
+
+	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+	assert(impl);
+
+	nir_builder b;
+	nir_builder_init(&b, impl);
+	b.cursor = nir_before_cf_list(&impl->body);
+
+	state.header = nir_load_tcs_header_ir3(&b);
+
+	nir_foreach_block_safe(block, impl)
+		lower_tess_ctrl_block(block, &b, &state);
+
+	/* Now move the body of the TCS into a conditional:
+	 *
+	 *   if (gl_InvocationID < num_vertices)
+	 *     // body
+	 *
+	 */
+
+	nir_cf_list body;
+	nir_cf_extract(&body, nir_before_cf_list(&impl->body),
+				   nir_after_cf_list(&impl->body));
+
+	b.cursor = nir_after_cf_list(&impl->body);
+
+	/* Re-emit the header, since the old one got moved into the if branch */
+	state.header = nir_load_tcs_header_ir3(&b);
+	nir_ssa_def *iid = build_invocation_id(&b, &state);
+
+	const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
+	nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
+
+	nir_if *nif = nir_push_if(&b, cond);
+
+	nir_cf_reinsert(&body, b.cursor);
+
+	b.cursor = nir_after_cf_list(&nif->then_list);
+
+	/* Insert conditional exit for threads invocation id != 0 */
+	nir_ssa_def *iid0_cond = nir_ieq(&b, iid, nir_imm_int(&b, 0));
+	nir_intrinsic_instr *cond_end =
+		nir_intrinsic_instr_create(shader, nir_intrinsic_cond_end_ir3);
+	cond_end->src[0] = nir_src_for_ssa(iid0_cond);
+	nir_builder_instr_insert(&b, &cond_end->instr);
+
+	emit_tess_epilouge(&b, &state);
+
+	nir_pop_if(&b, nif);
+
+	nir_metadata_preserve(impl, 0);
+}
+
+
+static void
+lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
+{
+	nir_foreach_instr_safe(instr, block) {
+		if (instr->type != nir_instr_type_intrinsic)
+			continue;
+
+		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+		switch (intr->intrinsic) {
+		case nir_intrinsic_load_tess_coord: {
+			b->cursor = nir_after_instr(&intr->instr);
+			nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
+			nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
+			nir_ssa_def *z;
+
+			if (state->topology == IR3_TESS_TRIANGLES)
+				z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
+			else
+				z = nir_imm_float(b, 0.0f);
+
+			nir_ssa_def *coord = nir_vec3(b, x, y, z);
+
+			nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
+					nir_src_for_ssa(coord),
+					b->cursor.instr);
+			break;
+		}
+
+		case nir_intrinsic_load_per_vertex_input: {
+			// src[] = { vertex, offset }.
+
+			b->cursor = nir_before_instr(&intr->instr);
+
+			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+			nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
+			nir_ssa_def *offset = build_per_vertex_offset(b, state,
+					intr->src[0].ssa, intr->src[1].ssa, var);
+
+			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+			break;
+		}
+
+		case nir_intrinsic_load_tess_level_inner:
+		case nir_intrinsic_load_tess_level_outer: {
+				b->cursor = nir_before_instr(&intr->instr);
+
+				gl_varying_slot slot;
+				if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
+					slot = VARYING_SLOT_TESS_LEVEL_INNER;
+				else
+					slot = VARYING_SLOT_TESS_LEVEL_OUTER;
+
+				nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
+				nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
+
+				/* Loading across a vec4 (16b) memory boundary is problematic
+				 * if we don't use components from the second vec4.  The tess
+				 * levels aren't guaranteed to be vec4 aligned and we don't
+				 * know which levels are actually used, so we load each
+				 * component individually.
+				 */
+				nir_ssa_def *levels[4];
+				for (unsigned i = 0; i < intr->num_components; i++) {
+					nir_intrinsic_instr *new_intr =
+						nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_global_ir3);
+
+					new_intr->src[0] = nir_src_for_ssa(address);
+					new_intr->src[1] = nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i)));
+					new_intr->num_components = 1;
+					nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, 1, 32, NULL);
+					nir_builder_instr_insert(b, &new_intr->instr);
+					levels[i] = &new_intr->dest.ssa;
+				}
+
+				nir_ssa_def *v = nir_vec(b, levels, intr->num_components);
+
+				nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(v));
+
+				nir_instr_remove(&intr->instr);
+				break;
+		}
+
+		case nir_intrinsic_load_input: {
+			// src[] = { offset }.
+
+			nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
+
+			debug_assert(var->data.patch);
+
+			b->cursor = nir_before_instr(&intr->instr);
+
+			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+			nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+
+			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+			break;
+		}
+
+		default:
+			break;
+		}
+	}
+}
+
+void
+ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
+{
+	struct state state = { .topology = topology };
+
+	if (shader_debug_enabled(shader->info.stage)) {
+		fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
+				_mesa_shader_stage_to_string(shader->info.stage));
+		nir_print_shader(shader, stderr);
+	}
+
+	/* Build map of inputs so we have the sizes. */
+	build_primitive_map(shader, &state.map, &shader->inputs);
+
+	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+	assert(impl);
+
+	nir_builder b;
+	nir_builder_init(&b, impl);
+
+	nir_foreach_block_safe(block, impl)
+		lower_tess_eval_block(block, &b, &state);
+
+	nir_metadata_preserve(impl, 0);
+}
+
 static void
 lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
 {