From 56e3c70b56997f3d3861e994b22e7b770c5a4da8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 23 Jul 2019 18:55:47 -0400 Subject: [PATCH] radeonsi/nir: accurately set output_usagemask (v2) v2: fix doubles --- src/gallium/drivers/radeonsi/si_shader_nir.c | 53 ++++++++++++++++++-- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 56ab23a2c89..96ca344e3cc 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -113,6 +113,48 @@ static void gather_intrinsic_load_deref_output_info(const nir_shader *nir, } } +static void gather_intrinsic_store_deref_output_info(const nir_shader *nir, + const nir_intrinsic_instr *instr, + nir_variable *var, + struct tgsi_shader_info *info) +{ + assert(var && var->data.mode == nir_var_shader_out); + + switch (nir->info.stage) { + case MESA_SHADER_VERTEX: /* needed by LS, ES */ + case MESA_SHADER_TESS_EVAL: /* needed by ES */ + case MESA_SHADER_GEOMETRY: { + unsigned i = var->data.driver_location; + unsigned attrib_count = glsl_count_attribute_slots(var->type, false); + unsigned mask = nir_intrinsic_write_mask(instr); + + assert(!var->data.compact); + + for (unsigned j = 0; j < attrib_count; j++, i++) { + if (glsl_type_is_64bit(glsl_without_array(var->type))) { + unsigned dmask = mask; + + if (glsl_type_is_dual_slot(glsl_without_array(var->type)) && j % 2) + dmask >>= 2; + + dmask <<= var->data.location_frac / 2; + + if (dmask & 0x1) + info->output_usagemask[i] |= TGSI_WRITEMASK_XY; + if (dmask & 0x2) + info->output_usagemask[i] |= TGSI_WRITEMASK_ZW; + } else { + info->output_usagemask[i] |= + (mask << var->data.location_frac) & 0xf; + } + + } + break; + } + default:; + } +} + static void scan_instruction(const struct nir_shader *nir, struct tgsi_shader_info *info, nir_instr *instr) @@ -312,6 +354,13 @@ static void scan_instruction(const struct nir_shader *nir, } break; } + case nir_intrinsic_store_deref: { + nir_variable *var = intrinsic_get_var(intr); + + if (var->data.mode == nir_var_shader_out) + gather_intrinsic_store_deref_output_info(nir, intr, var, info); + break; + } case nir_intrinsic_interp_deref_at_centroid: case nir_intrinsic_interp_deref_at_sample: case nir_intrinsic_interp_deref_at_offset: { @@ -606,22 +655,18 @@ void si_nir_scan_shader(const struct nir_shader *nir, unsigned streamw = (gs_out_streams >> 6) & 3; if (usagemask & TGSI_WRITEMASK_X) { - info->output_usagemask[i] |= TGSI_WRITEMASK_X; info->output_streams[i] |= streamx; info->num_stream_output_components[streamx]++; } if (usagemask & TGSI_WRITEMASK_Y) { - info->output_usagemask[i] |= TGSI_WRITEMASK_Y; info->output_streams[i] |= streamy << 2; info->num_stream_output_components[streamy]++; } if (usagemask & TGSI_WRITEMASK_Z) { - info->output_usagemask[i] |= TGSI_WRITEMASK_Z; info->output_streams[i] |= streamz << 4; info->num_stream_output_components[streamz]++; } if (usagemask & TGSI_WRITEMASK_W) { - info->output_usagemask[i] |= TGSI_WRITEMASK_W; info->output_streams[i] |= streamw << 6; info->num_stream_output_components[streamw]++; } -- 2.30.2