From ba965084b60e702b41beaea75237bfa39335b6cb Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Jan 2018 11:26:58 -0800 Subject: [PATCH] broadcom/vc5: Move texture return channel setup into the compiler. The compiler decides how many LDTMUs we're going to emit, and that must match the P1 flags. This brings the return channel counting to a single place (so all that's passed into the compiler is "how many return channels you may request from this texture's format), and was a necessary step for shadow samplers once we stop using OVRTMUOUT=0. --- src/broadcom/cle/v3d_packet_v33.xml | 5 +-- src/broadcom/compiler/nir_to_vir.c | 48 ++++++++++++++++++++------ src/broadcom/compiler/v3d_compiler.h | 6 ++-- src/gallium/drivers/vc5/vc5_state.c | 13 ------- src/gallium/drivers/vc5/vc5_uniforms.c | 10 ++++-- 5 files changed, 49 insertions(+), 33 deletions(-) diff --git a/src/broadcom/cle/v3d_packet_v33.xml b/src/broadcom/cle/v3d_packet_v33.xml index 8d1ff2cf3b3..f18954d64c8 100644 --- a/src/broadcom/cle/v3d_packet_v33.xml +++ b/src/broadcom/cle/v3d_packet_v33.xml @@ -810,10 +810,7 @@ - - - - + diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 6ec5db58a26..77d460c1b62 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -337,6 +337,9 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) .fetch_sample_mode = instr->op == nir_texop_txf, }; + struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 p1_unpacked = { + }; + switch (instr->sampler_dim) { case GLSL_SAMPLER_DIM_1D: if (instr->is_array) @@ -419,11 +422,35 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) } } + bool return_16 = (c->key->tex[unit].return_size == 16 || + p0_unpacked.shadow); + + /* Limit the number of channels returned to both how many the NIR + * instruction writes and how many the instruction could produce. + */ + uint32_t instr_return_channels = nir_tex_instr_dest_size(instr); + if (return_16) + instr_return_channels = (instr_return_channels + 1) / 2; + + p1_unpacked.return_words_of_texture_data = + (1 << MIN2(instr_return_channels, + c->key->tex[unit].return_channels)) - 1; + uint32_t p0_packed; V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL, (uint8_t *)&p0_packed, &p0_unpacked); + uint32_t p1_packed; + V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL, + (uint8_t *)&p1_packed, + &p1_unpacked); + /* Load unit number into the address field, which will be be used by + * the driver to decide which texture to put in the actual address + * field. + */ + p1_packed |= unit << 5; + /* There is no native support for GL texture rectangle coordinates, so * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, * 1]). @@ -439,7 +466,7 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) struct qreg texture_u[] = { vir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0_0 + unit, p0_packed), - vir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit), + vir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, p1_packed), }; uint32_t next_texture_u = 0; @@ -460,17 +487,16 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) } } - bool return_16 = (c->key->tex[unit].return_size == 16 || - p0_unpacked.shadow); - struct qreg return_values[4]; - for (int i = 0; i < c->key->tex[unit].return_channels; i++) - return_values[i] = vir_LDTMU(c); - /* Swizzling .zw of an RG texture should give undefined results, not - * crash the compiler. - */ - for (int i = c->key->tex[unit].return_channels; i < 4; i++) - return_values[i] = c->undef; + for (int i = 0; i < 4; i++) { + /* Swizzling .zw of an RG texture should give undefined + * results, not crash the compiler. + */ + if (p1_unpacked.return_words_of_texture_data & (1 << i)) + return_values[i] = vir_LDTMU(c); + else + return_values[i] = c->undef; + } for (int i = 0; i < nir_tex_instr_dest_size(instr); i++) { struct qreg chan; diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 1111102e6b1..7cb2e59b8f7 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -215,9 +215,9 @@ enum quniform_contents { * A reference to a texture config parameter 1 uniform. * * This is a uniform implicitly loaded with a QPU_W_TMU* write, which - * defines texture width, height, filters, and wrap modes. It will be - * found as a parameter to the second QOP_TEX_[STRB] instruction in a - * sequence. + * has the pointer to the indirect texture state. Our data[] field + * will have a packed p1 value, but the address field will be just + * which texture unit's texture should be referenced. */ QUNIFORM_TEXTURE_CONFIG_P1, diff --git a/src/gallium/drivers/vc5/vc5_state.c b/src/gallium/drivers/vc5/vc5_state.c index d022aa84e42..04ce3075a8f 100644 --- a/src/gallium/drivers/vc5/vc5_state.c +++ b/src/gallium/drivers/vc5/vc5_state.c @@ -588,19 +588,6 @@ vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, pipe_reference(NULL, &prsc->reference); - v3dx_pack(&so->p1, TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1, p1) { - p1.return_word_0_of_texture_data = true; - if (vc5_get_tex_return_size(cso->format) == 16) { - p1.return_word_1_of_texture_data = true; - } else { - int chans = vc5_get_tex_return_channels(cso->format); - - p1.return_word_1_of_texture_data = chans > 1; - p1.return_word_2_of_texture_data = chans > 2; - p1.return_word_3_of_texture_data = chans > 3; - } - } - /* Compute the sampler view's swizzle up front. This will be plugged * into either the sampler (for 16-bit returns) or the shader's * texture key (for 32) diff --git a/src/gallium/drivers/vc5/vc5_uniforms.c b/src/gallium/drivers/vc5/vc5_uniforms.c index 5e181344e73..676ab1a32c9 100644 --- a/src/gallium/drivers/vc5/vc5_uniforms.c +++ b/src/gallium/drivers/vc5/vc5_uniforms.c @@ -193,8 +193,14 @@ static void write_texture_p1(struct vc5_job *job, struct vc5_cl_out **uniforms, struct vc5_texture_stateobj *texstate, - uint32_t unit) + uint32_t data) { + /* Extract the texture unit from the top bits, and the compiler's + * packed p1 from the bottom. + */ + uint32_t unit = data >> 5; + uint32_t p1 = data & 0x1f; + struct pipe_sampler_view *psview = texstate->textures[unit]; struct vc5_sampler_view *sview = vc5_sampler_view(psview); @@ -207,7 +213,7 @@ write_texture_p1(struct vc5_job *job, (uint8_t *)&packed, &unpacked); - cl_aligned_u32(uniforms, packed | sview->p1); + cl_aligned_u32(uniforms, p1 | packed | sview->p1); } struct vc5_cl_reloc -- 2.30.2