From 5a7aba2e0a7fb3414a94d04d5970a2ed10c1f63e Mon Sep 17 00:00:00 2001 From: Andres Gomez Date: Mon, 29 Jan 2018 18:25:30 +0200 Subject: [PATCH] i965: perform 2 uploads with dual slot *64*PASSTHRU formats on gen<8 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The emission of vertex attributes corresponding to dvec3 and dvec4 vertex shader input variables was not correct when the passed to the VertexAttribL* commands was <= 2. In 61a8a55f557 ("i965/gen8: Fix vertex attrib upload for dvec3/4 shader inputs"), for gen8+ we needed to determine if the attrib was dual slot to emit 128 or 256-bit, independently of the VAO size. Similarly, for gen < 8 we also need to determine whether the attrib is dual slot to force the emission of 256-bits through 2 uploads. Additionally, we make use of the ISL_FORMAT_R32_FLOAT format in this second upload to fill these unspecified components with zeros, as we also do for gen8+. Fixes the following test on Haswell: KHR-GL46.vertex_attrib_binding.basic-inputL-case1 v2: Added more inline comments to explain why we are using ISL_FORMAT_R32_FLOAT and its consequences, as requested by Alejandro and Antía. Fixes: 75968a668e4 ("i965/gen7: expose OpenGL 4.2 on Haswell when supported") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103006 Cc: Alejandro Piñeiro Cc: Juan A. Suarez Romero Cc: Antia Puentes Cc: Rafael Antognolli Cc: Kenneth Graunke Signed-off-by: Andres Gomez Reviewed-by: Alejandro Piñeiro Reviewed-by: Antia Puentes Reviewed-by: Rafael Antognolli Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/genX_state_upload.c | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index aa4d64d08e2..a39a254dacd 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -364,11 +364,15 @@ is_passthru_format(uint32_t format) } UNUSED static int -uploads_needed(uint32_t format) +uploads_needed(uint32_t format, + bool is_dual_slot) { if (!is_passthru_format(format)) return 1; + if (is_dual_slot) + return 2; + switch (format) { case ISL_FORMAT_R64_PASSTHRU: case ISL_FORMAT_R64G64_PASSTHRU: @@ -397,11 +401,19 @@ downsize_format_if_needed(uint32_t format, if (!is_passthru_format(format)) return format; + /* ISL_FORMAT_R64_PASSTHRU and ISL_FORMAT_R64G64_PASSTHRU with an upload == + * 1 means that we have been forced to do 2 uploads for a size <= 2. This + * happens with gen < 8 and dvec3 or dvec4 vertex shader input + * variables. In those cases, we return ISL_FORMAT_R32_FLOAT as a way of + * flagging that we want to fill with zeroes this second forced upload. + */ switch (format) { case ISL_FORMAT_R64_PASSTHRU: - return ISL_FORMAT_R32G32_FLOAT; + return !upload ? ISL_FORMAT_R32G32_FLOAT + : ISL_FORMAT_R32_FLOAT; case ISL_FORMAT_R64G64_PASSTHRU: - return ISL_FORMAT_R32G32B32A32_FLOAT; + return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT + : ISL_FORMAT_R32_FLOAT; case ISL_FORMAT_R64G64B64_PASSTHRU: return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT : ISL_FORMAT_R32G32_FLOAT; @@ -420,6 +432,15 @@ static int upload_format_size(uint32_t upload_format) { switch (upload_format) { + case ISL_FORMAT_R32_FLOAT: + + /* downsized_format has returned this one in order to flag that we are + * performing a second upload which we want to have filled with + * zeroes. This happens with gen < 8, a size <= 2, and dvec3 or dvec4 + * vertex shader input variables. + */ + + return 0; case ISL_FORMAT_R32G32_FLOAT: return 2; case ISL_FORMAT_R32G32B32A32_FLOAT: @@ -517,7 +538,7 @@ genX(emit_vertices)(struct brw_context *brw) struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); - if (uploads_needed(format) > 1) + if (uploads_needed(format, input->is_dual_slot) > 1) nr_elements++; } #endif @@ -613,7 +634,8 @@ genX(emit_vertices)(struct brw_context *brw) uint32_t comp1 = VFCOMP_STORE_SRC; uint32_t comp2 = VFCOMP_STORE_SRC; uint32_t comp3 = VFCOMP_STORE_SRC; - const unsigned num_uploads = GEN_GEN < 8 ? uploads_needed(format) : 1; + const unsigned num_uploads = GEN_GEN < 8 ? + uploads_needed(format, input->is_dual_slot) : 1; #if GEN_GEN >= 8 /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): -- 2.30.2