From eb3047c094abfa03e071453d7c373e9c2c574370 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Wed, 2 Oct 2019 16:19:08 -0400 Subject: [PATCH] nir: support lowering clipdist to arrays MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This allows us to make sure clipdist is emitted as a scalar array rather than two vec4s. This matches SPIR-V semantics, and will be useful for Zink. Reviewed-by: Marek Olšák --- src/broadcom/compiler/vir.c | 5 +- src/compiler/nir/nir.h | 10 +- src/compiler/nir/nir_lower_clip.c | 99 +++++++++++++------ src/freedreno/ir3/ir3_nir.c | 4 +- src/gallium/drivers/iris/iris_program.c | 8 +- src/gallium/drivers/vc4/vc4_program.c | 5 +- .../drivers/dri/i965/brw_nir_uniforms.cpp | 2 +- 7 files changed, 89 insertions(+), 44 deletions(-) diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 5d929c2412a..afba863b0ec 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -825,7 +825,7 @@ v3d_nir_lower_vs_late(struct v3d_compile *c) if (c->key->ucp_enables) { NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables, - false); + false, false); NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out); } @@ -850,7 +850,8 @@ v3d_nir_lower_fs_late(struct v3d_compile *c) } if (c->key->ucp_enables) - NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables); + NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables, + false); /* Note: FS input scalarizing must happen after * nir_lower_two_sided_color, which only handles a vec4 at a time. diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 77c877d6a79..a1f233917bd 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3906,9 +3906,13 @@ bool nir_lower_idiv(nir_shader *shader); bool nir_lower_input_attachments(nir_shader *shader, bool use_fragcoord_sysval); -bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars); -bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables); -bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); +bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, + bool use_vars, + bool use_clipdist_array); +bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, + bool use_clipdist_array); +bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, + bool use_clipdist_array); bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); void nir_lower_point_size_mov(nir_shader *shader, diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c index 26c3e64856b..0da36d79ba0 100644 --- a/src/compiler/nir/nir_lower_clip.c +++ b/src/compiler/nir/nir_lower_clip.c @@ -42,17 +42,23 @@ static nir_variable * create_clipdist_var(nir_shader *shader, unsigned drvloc, - bool output, gl_varying_slot slot) + bool output, gl_varying_slot slot, unsigned array_size) { nir_variable *var = rzalloc(shader, nir_variable); var->data.driver_location = drvloc; - var->type = glsl_vec4_type(); var->data.mode = output ? nir_var_shader_out : nir_var_shader_in; var->name = ralloc_asprintf(var, "clipdist_%d", drvloc); var->data.index = 0; var->data.location = slot; + if (array_size > 0) { + var->type = glsl_array_type(glsl_float_type(), array_size, + sizeof(float)); + var->data.compact = 1; + } else + var->type = glsl_vec4_type(); + if (output) { exec_list_push_tail(&shader->outputs, &var->node); shader->num_outputs++; /* TODO use type_size() */ @@ -66,16 +72,24 @@ create_clipdist_var(nir_shader *shader, unsigned drvloc, static void create_clipdist_vars(nir_shader *shader, nir_variable **io_vars, - unsigned ucp_enables, int *drvloc, bool output) + unsigned ucp_enables, int *drvloc, bool output, + bool use_clipdist_array) { - if (ucp_enables & 0x0f) + if (use_clipdist_array) { io_vars[0] = - create_clipdist_var(shader, ++(*drvloc), output, - VARYING_SLOT_CLIP_DIST0); - if (ucp_enables & 0xf0) - io_vars[1] = - create_clipdist_var(shader, ++(*drvloc), output, - VARYING_SLOT_CLIP_DIST1); + create_clipdist_var(shader, ++(*drvloc), true, + VARYING_SLOT_CLIP_DIST0, + util_last_bit(ucp_enables)); + } else { + if (ucp_enables & 0x0f) + io_vars[0] = + create_clipdist_var(shader, ++(*drvloc), output, + VARYING_SLOT_CLIP_DIST0, 0); + if (ucp_enables & 0xf0) + io_vars[1] = + create_clipdist_var(shader, ++(*drvloc), output, + VARYING_SLOT_CLIP_DIST1, 0); + } } static void @@ -192,7 +206,8 @@ find_clipvertex_and_position_outputs(nir_shader *shader, static void lower_clip_outputs(nir_builder *b, nir_variable *position, nir_variable *clipvertex, nir_variable **out, - unsigned ucp_enables, bool use_vars) + unsigned ucp_enables, bool use_vars, + bool use_clipdist_array) { nir_ssa_def *clipdist[MAX_CLIP_PLANES]; nir_ssa_def *cv; @@ -224,18 +239,28 @@ lower_clip_outputs(nir_builder *b, nir_variable *position, /* 0.0 == don't-clip == disabled: */ clipdist[plane] = nir_imm_float(b, 0.0); } + if (use_clipdist_array && plane < util_last_bit(ucp_enables)) { + assert(use_vars); + nir_deref_instr *deref; + deref = nir_build_deref_array_imm(b, + nir_build_deref_var(b, out[0]), + plane); + nir_store_deref(b, deref, clipdist[plane], 1); + } } - if (use_vars) { - if (ucp_enables & 0x0f) - nir_store_var(b, out[0], nir_vec(b, clipdist, 4), 0xf); - if (ucp_enables & 0xf0) - nir_store_var(b, out[1], nir_vec(b, &clipdist[4], 4), 0xf); - } else { - if (ucp_enables & 0x0f) - store_clipdist_output(b, out[0], &clipdist[0]); - if (ucp_enables & 0xf0) - store_clipdist_output(b, out[1], &clipdist[4]); + if (!use_clipdist_array) { + if (use_vars) { + if (ucp_enables & 0x0f) + nir_store_var(b, out[0], nir_vec(b, clipdist, 4), 0xf); + if (ucp_enables & 0xf0) + nir_store_var(b, out[1], nir_vec(b, &clipdist[4], 4), 0xf); + } else { + if (ucp_enables & 0x0f) + store_clipdist_output(b, out[0], &clipdist[0]); + if (ucp_enables & 0xf0) + store_clipdist_output(b, out[1], &clipdist[4]); + } } } @@ -248,9 +273,13 @@ lower_clip_outputs(nir_builder *b, nir_variable *position, * * If use_vars is true, the pass will use variable loads and stores instead * of working with store_output intrinsics. + * + * If use_clipdist_array is true, the pass will use compact arrays for the + * clipdist output instead of two vec4s. */ bool -nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars) +nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars, + bool use_clipdist_array) { nir_function_impl *impl = nir_shader_get_entrypoint(shader); nir_builder b; @@ -281,9 +310,11 @@ nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars) return false; /* insert CLIPDIST outputs */ - create_clipdist_vars(shader, out, ucp_enables, &maxloc, true); + create_clipdist_vars(shader, out, ucp_enables, &maxloc, true, + use_clipdist_array); - lower_clip_outputs(&b, position, clipvertex, out, ucp_enables, use_vars); + lower_clip_outputs(&b, position, clipvertex, out, ucp_enables, use_vars, + use_clipdist_array); nir_metadata_preserve(impl, nir_metadata_dominance); @@ -293,7 +324,7 @@ nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars) static void lower_clip_in_gs_block(nir_builder *b, nir_block *block, nir_variable *position, nir_variable *clipvertex, nir_variable **out, - unsigned ucp_enables) + unsigned ucp_enables, bool use_clipdist_array) { nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_intrinsic) @@ -304,7 +335,8 @@ lower_clip_in_gs_block(nir_builder *b, nir_block *block, nir_variable *position, case nir_intrinsic_emit_vertex_with_counter: case nir_intrinsic_emit_vertex: b->cursor = nir_before_instr(instr); - lower_clip_outputs(b, position, clipvertex, out, ucp_enables, true); + lower_clip_outputs(b, position, clipvertex, out, ucp_enables, true, + use_clipdist_array); break; default: /* not interesting; skip this */ @@ -318,7 +350,8 @@ lower_clip_in_gs_block(nir_builder *b, nir_block *block, nir_variable *position, */ bool -nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables) +nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, + bool use_clipdist_array) { nir_function_impl *impl = nir_shader_get_entrypoint(shader); nir_builder b; @@ -335,12 +368,14 @@ nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables) return false; /* insert CLIPDIST outputs */ - create_clipdist_vars(shader, out, ucp_enables, &maxloc, true); + create_clipdist_vars(shader, out, ucp_enables, &maxloc, true, + use_clipdist_array); nir_builder_init(&b, impl); nir_foreach_block(block, impl) - lower_clip_in_gs_block(&b, block, position, clipvertex, out, ucp_enables); + lower_clip_in_gs_block(&b, block, position, clipvertex, out, + ucp_enables, use_clipdist_array); nir_metadata_preserve(impl, nir_metadata_dominance); @@ -388,7 +423,8 @@ lower_clip_fs(nir_function_impl *impl, unsigned ucp_enables, /* insert conditional kill based on interpolated CLIPDIST */ bool -nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables) +nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, + bool use_clipdist_array) { nir_variable *in[2]; int maxloc = -1; @@ -410,7 +446,8 @@ nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables) * must add our own: */ /* insert CLIPDIST inputs */ - create_clipdist_vars(shader, in, ucp_enables, &maxloc, false); + create_clipdist_vars(shader, in, ucp_enables, &maxloc, false, + use_clipdist_array); nir_foreach_function(function, shader) { if (!strcmp(function->name, "main")) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index b729692bdff..dbf25926ac5 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -223,11 +223,11 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, if (key) { if (s->info.stage == MESA_SHADER_VERTEX) { - OPT_V(s, nir_lower_clip_vs, key->ucp_enables, false); + OPT_V(s, nir_lower_clip_vs, key->ucp_enables, false, false); if (key->vclamp_color) OPT_V(s, nir_lower_clamp_color_outputs); } else if (s->info.stage == MESA_SHADER_FRAGMENT) { - OPT_V(s, nir_lower_clip_fs, key->ucp_enables); + OPT_V(s, nir_lower_clip_fs, key->ucp_enables, false); if (key->fclamp_color) OPT_V(s, nir_lower_clamp_color_outputs); } diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index fbf6d685382..fc4f2d2949d 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -938,7 +938,8 @@ iris_compile_vs(struct iris_context *ice, if (key->nr_userclip_plane_consts) { nir_function_impl *impl = nir_shader_get_entrypoint(nir); - nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true); + nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true, + false); nir_lower_io_to_temporaries(nir, impl, true, false); nir_lower_global_vars_to_local(nir); nir_lower_vars_to_ssa(nir); @@ -1281,7 +1282,8 @@ iris_compile_tes(struct iris_context *ice, if (key->nr_userclip_plane_consts) { nir_function_impl *impl = nir_shader_get_entrypoint(nir); - nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true); + nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true, + false); nir_lower_io_to_temporaries(nir, impl, true, false); nir_lower_global_vars_to_local(nir); nir_lower_vars_to_ssa(nir); @@ -1401,7 +1403,7 @@ iris_compile_gs(struct iris_context *ice, if (key->nr_userclip_plane_consts) { nir_function_impl *impl = nir_shader_get_entrypoint(nir); - nir_lower_clip_gs(nir, (1 << key->nr_userclip_plane_consts) - 1); + nir_lower_clip_gs(nir, (1 << key->nr_userclip_plane_consts) - 1, false); nir_lower_io_to_temporaries(nir, impl, true, false); nir_lower_global_vars_to_local(nir); nir_lower_vars_to_ssa(nir); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 08487bea3a6..4568100c104 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2313,10 +2313,11 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, if (c->key->ucp_enables) { if (stage == QSTAGE_FRAG) { - NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables); + NIR_PASS_V(c->s, nir_lower_clip_fs, + c->key->ucp_enables, false); } else { NIR_PASS_V(c->s, nir_lower_clip_vs, - c->key->ucp_enables, false); + c->key->ucp_enables, false, false); NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out); } diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index e31c91175c9..995b229e101 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -407,7 +407,7 @@ brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts, nir_function_impl *impl = nir_shader_get_entrypoint(nir); - nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true); + nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true, false); nir_lower_io_to_temporaries(nir, impl, true, false); nir_lower_global_vars_to_local(nir); nir_lower_vars_to_ssa(nir); -- 2.30.2