From e79aa19d88b4d6dbd26c23287292e6bf9f41ce33 Mon Sep 17 00:00:00 2001 From: "Juan A. Suarez Romero" Date: Fri, 20 May 2016 16:35:52 +0200 Subject: [PATCH] i965: fix double-precision vertex inputs measurement For double-precision vertex inputs we need to measure them in dvec4 terms, and for single-precision vertex inputs we need to measure them in vec4 terms. For the later case, we use type_size_vec4() function. For the former case, we had a wrong implementation based on type_size_vec4(). This commit introduces a proper type_size_dvec4() function, that we use to measure vertex inputs. Measuring double-precision vertex inputs as dvec4 is required because ARB_vertex_attrib_64bit states that these uses the same number of locations than the single-precision version. That is, two consecutives dvec4 would be located in location "x" and location "x+1", not "x+2". Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_shader.h | 1 + .../drivers/dri/i965/brw_vec4_visitor.cpp | 66 ++++++++++++++----- 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 847a6d36566..bb2caa54e17 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -552,7 +552,7 @@ extern "C" int type_size_vs_input(const struct glsl_type *type) { if (type->is_double()) { - return type_size_vec4(type) / 2; + return type_size_dvec4(type); } else { return type_size_vec4(type); } diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 60f3b5f3d40..656dc89c25d 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -293,6 +293,7 @@ struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint typ int type_size_scalar(const struct glsl_type *type); int type_size_vec4(const struct glsl_type *type); +int type_size_dvec4(const struct glsl_type *type); int type_size_vec4_times_4(const struct glsl_type *type); int type_size_vs_input(const struct glsl_type *type); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f73d6782528..4b5dfe6e5b5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -566,18 +566,12 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) emit(VEC4_OPCODE_PACK_BYTES, dst, bytes); } -/** - * Returns the minimum number of vec4 elements needed to pack a type. - * - * For simple types, it will return 1 (a single vec4); for matrices, the - * number of columns; for array and struct, the sum of the vec4_size of - * each of its elements; and for sampler and atomic, zero. - * - * This method is useful to calculate how much register space is needed to - * store a particular type. +/* + * Returns the minimum number of vec4 (as_vec4 == true) or dvec4 (as_vec4 == + * false) elements needed to pack a type. */ -extern "C" int -type_size_vec4(const struct glsl_type *type) +static int +type_size_xvec4(const struct glsl_type *type, bool as_vec4) { unsigned int i; int size; @@ -590,7 +584,8 @@ type_size_vec4(const struct glsl_type *type) case GLSL_TYPE_DOUBLE: if (type->is_matrix()) { const glsl_type *col_type = type->column_type(); - unsigned col_slots = col_type->is_dual_slot_double() ? 2 : 1; + unsigned col_slots = + (as_vec4 && col_type->is_dual_slot_double()) ? 2 : 1; return type->matrix_columns * col_slots; } else { /* Regardless of size of vector, it gets a vec4. This is bad @@ -598,15 +593,15 @@ type_size_vec4(const struct glsl_type *type) * mess. Hopefully a later pass over the code can pack scalars * down if appropriate. */ - return type->is_dual_slot_double() ? 2 : 1; + return (as_vec4 && type->is_dual_slot_double()) ? 2 : 1; } case GLSL_TYPE_ARRAY: assert(type->length > 0); - return type_size_vec4(type->fields.array) * type->length; + return type_size_xvec4(type->fields.array, as_vec4) * type->length; case GLSL_TYPE_STRUCT: size = 0; for (i = 0; i < type->length; i++) { - size += type_size_vec4(type->fields.structure[i].type); + size += type_size_xvec4(type->fields.structure[i].type, as_vec4); } return size; case GLSL_TYPE_SUBROUTINE: @@ -631,6 +626,47 @@ type_size_vec4(const struct glsl_type *type) return 0; } +/** + * Returns the minimum number of vec4 elements needed to pack a type. + * + * For simple types, it will return 1 (a single vec4); for matrices, the + * number of columns; for array and struct, the sum of the vec4_size of + * each of its elements; and for sampler and atomic, zero. + * + * This method is useful to calculate how much register space is needed to + * store a particular type. + */ +extern "C" int +type_size_vec4(const struct glsl_type *type) +{ + return type_size_xvec4(type, true); +} + +/** + * Returns the minimum number of dvec4 elements needed to pack a type. + * + * For simple types, it will return 1 (a single dvec4); for matrices, the + * number of columns; for array and struct, the sum of the dvec4_size of + * each of its elements; and for sampler and atomic, zero. + * + * This method is useful to calculate how much register space is needed to + * store a particular type. + * + * Measuring double-precision vertex inputs as dvec4 is required because + * ARB_vertex_attrib_64bit states that these uses the same number of locations + * than the single-precision version. That is, two consecutives dvec4 would be + * located in location "x" and location "x+1", not "x+2". + * + * In order to map vec4/dvec4 vertex inputs in the proper ATTRs, + * remap_vs_attrs() will take in account both the location and also if the + * type fits in one or two vec4 slots. + */ +extern "C" int +type_size_dvec4(const struct glsl_type *type) +{ + return type_size_xvec4(type, false); +} + src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) { init(); -- 2.30.2