From: Timothy Arceri Date: Fri, 29 Mar 2019 01:39:48 +0000 (+1100) Subject: nir/i965/freedreno/vc4: add a bindless bool to type size functions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=035759b61ba1778d5143cdf3a8795a62dd5d8a60;p=mesa.git nir/i965/freedreno/vc4: add a bindless bool to type size functions This required to calculate sizes correctly when we have bindless samplers/images. Reviewed-by: Marek Olšák --- diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 77b5ffb8619..4f83f03c7b2 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -525,7 +525,7 @@ vir_compile_init(const struct v3d_compiler *compiler, } static int -type_size_vec4(const struct glsl_type *type) +type_size_vec4(const struct glsl_type *type, bool bindless) { return glsl_count_attribute_slots(type, false); } diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index fb36b04ae66..956b716d38d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3015,7 +3015,7 @@ void nir_lower_io_to_temporaries(nir_shader *shader, void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, - int (*type_size)(const struct glsl_type *)); + int (*type_size)(const struct glsl_type *, bool)); /* Some helpers to do very simple linking */ bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); @@ -3036,7 +3036,7 @@ typedef enum { } nir_lower_io_options; bool nir_lower_io(nir_shader *shader, nir_variable_mode modes, - int (*type_size)(const struct glsl_type *), + int (*type_size)(const struct glsl_type *, bool), nir_lower_io_options); typedef enum { diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 98e2afe76ea..5f18f1df445 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -38,7 +38,7 @@ struct lower_io_state { void *dead_ctx; nir_builder builder; - int (*type_size)(const struct glsl_type *type); + int (*type_size)(const struct glsl_type *type, bool); nir_variable_mode modes; nir_lower_io_options options; }; @@ -95,7 +95,7 @@ global_atomic_for_deref(nir_intrinsic_op deref_op) void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, - int (*type_size)(const struct glsl_type *)) + int (*type_size)(const struct glsl_type *, bool)) { unsigned location = 0; @@ -108,7 +108,10 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size, continue; var->data.driver_location = location; - location += type_size(var->type); + bool bindless_type_size = var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out || + var->data.bindless; + location += type_size(var->type, bindless_type_size); } *size = location; @@ -138,8 +141,8 @@ nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage) static nir_ssa_def * get_io_offset(nir_builder *b, nir_deref_instr *deref, nir_ssa_def **vertex_index, - int (*type_size)(const struct glsl_type *), - unsigned *component) + int (*type_size)(const struct glsl_type *, bool), + unsigned *component, bool bts) { nir_deref_path path; nir_deref_path_init(&path, deref, NULL); @@ -165,7 +168,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref, const unsigned total_offset = *component + index; const unsigned slot_offset = total_offset / 4; *component = total_offset % 4; - return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset); + return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset); } /* Just emit code and let constant-folding go to town */ @@ -173,7 +176,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref, for (; *p; p++) { if ((*p)->deref_type == nir_deref_type_array) { - unsigned size = type_size((*p)->type); + unsigned size = type_size((*p)->type, bts); nir_ssa_def *mul = nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size); @@ -185,7 +188,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref, unsigned field_offset = 0; for (unsigned i = 0; i < (*p)->strct.index; i++) { - field_offset += type_size(glsl_get_struct_field(parent->type, i)); + field_offset += type_size(glsl_get_struct_field(parent->type, i), bts); } offset = nir_iadd_imm(b, offset, field_offset); } else { @@ -255,7 +258,8 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_intrinsic_set_component(load, component); if (load->intrinsic == nir_intrinsic_load_uniform) - nir_intrinsic_set_range(load, state->type_size(var->type)); + nir_intrinsic_set_range(load, + state->type_size(var->type, var->data.bindless)); if (vertex_index) { load->src[0] = nir_src_for_ssa(vertex_index); @@ -468,9 +472,13 @@ nir_lower_io_block(nir_block *block, nir_ssa_def *offset; nir_ssa_def *vertex_index = NULL; unsigned component_offset = var->data.location_frac; + bool bindless_type_size = mode == nir_var_shader_in || + mode == nir_var_shader_out || + var->data.bindless; offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL, - state->type_size, &component_offset); + state->type_size, &component_offset, + bindless_type_size); nir_intrinsic_instr *replacement; @@ -538,7 +546,7 @@ nir_lower_io_block(nir_block *block, static bool nir_lower_io_impl(nir_function_impl *impl, nir_variable_mode modes, - int (*type_size)(const struct glsl_type *), + int (*type_size)(const struct glsl_type *, bool), nir_lower_io_options options) { struct lower_io_state state; @@ -563,7 +571,7 @@ nir_lower_io_impl(nir_function_impl *impl, bool nir_lower_io(nir_shader *shader, nir_variable_mode modes, - int (*type_size)(const struct glsl_type *), + int (*type_size)(const struct glsl_type *, bool), nir_lower_io_options options) { bool progress = false; diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index d40c6e0e161..b2329899bac 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -35,7 +35,7 @@ #include "ir3_nir.h" int -ir3_glsl_type_size(const struct glsl_type *type) +ir3_glsl_type_size(const struct glsl_type *type, bool bindless) { return glsl_count_attribute_slots(type, false); } diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 58d14197879..b3481c12990 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -518,7 +518,7 @@ void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out); uint64_t ir3_shader_outputs(const struct ir3_shader *so); int -ir3_glsl_type_size(const struct glsl_type *type); +ir3_glsl_type_size(const struct glsl_type *type, bool bindless); static inline const char * ir3_shader_stage(struct ir3_shader *shader) diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_program.c b/src/gallium/drivers/freedreno/a2xx/fd2_program.c index cba87f01afc..0d262e93208 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_program.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_program.c @@ -83,7 +83,7 @@ emit(struct fd_ringbuffer *ring, gl_shader_stage type, } static int -ir2_glsl_type_size(const struct glsl_type *type) +ir2_glsl_type_size(const struct glsl_type *type, bool bindless) { return glsl_count_attribute_slots(type, false); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index ed0c5d24b85..1e4dd15d114 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -53,7 +53,7 @@ using std::tr1::unordered_map; using namespace nv50_ir; int -type_size(const struct glsl_type *type) +type_size(const struct glsl_type *type, bool bindless) { return glsl_count_attribute_slots(type, false); } @@ -3028,7 +3028,7 @@ Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_vari switch (deref->deref_type) { case nir_deref_type_array: { Value *indirect; - uint8_t size = type_size(deref->type); + uint8_t size = type_size(deref->type, true); result += size * getIndirect(&deref->arr.index, 0, indirect); if (indirect) { diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c index 8104d442f82..09eaea10294 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c @@ -640,15 +640,15 @@ attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constant } static int -glsl_type_size(const struct glsl_type *type) +glsl_type_size(const struct glsl_type *type, bool bindless) { return glsl_count_attribute_slots(type, false); } static int -uniform_type_size(const struct glsl_type *type) +uniform_type_size(const struct glsl_type *type, bool bindless) { - return st_glsl_storage_type_size(type, false); + return st_glsl_storage_type_size(type, bindless); } /* Lower fdot2 to a vector multiplication followed by channel addition */ diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c index e3515335502..e3e491e9fd7 100644 --- a/src/gallium/drivers/v3d/v3d_program.c +++ b/src/gallium/drivers/v3d/v3d_program.c @@ -169,7 +169,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so, } static int -type_size(const struct glsl_type *type) +type_size(const struct glsl_type *type, bool bindless) { return glsl_count_attribute_slots(type, false); } diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 7d13544ab28..1fe3f94aba4 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -45,7 +45,7 @@ static void ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list); static int -type_size(const struct glsl_type *type) +type_size(const struct glsl_type *type, bool bindless) { return glsl_count_attribute_slots(type, false); } diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 29a80d36d87..e7118703358 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -518,7 +518,7 @@ fs_reg::component_size(unsigned width) const } extern "C" int -type_size_scalar(const struct glsl_type *type) +type_size_scalar(const struct glsl_type *type, bool bindless) { unsigned int size, i; @@ -540,17 +540,19 @@ type_size_scalar(const struct glsl_type *type) case GLSL_TYPE_INT64: return type->components() * 2; case GLSL_TYPE_ARRAY: - return type_size_scalar(type->fields.array) * type->length; + return type_size_scalar(type->fields.array, bindless) * type->length; case GLSL_TYPE_STRUCT: case GLSL_TYPE_INTERFACE: size = 0; for (i = 0; i < type->length; i++) { - size += type_size_scalar(type->fields.structure[i].type); + size += type_size_scalar(type->fields.structure[i].type, bindless); } return size; case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_IMAGE: + if (bindless) + return type->components() * 2; + case GLSL_TYPE_ATOMIC_UINT: /* Samplers, atomics, and images take up no register space, since * they're baked in at link time. */ @@ -1135,7 +1137,8 @@ fs_reg fs_visitor::vgrf(const glsl_type *const type) { int reg_width = dispatch_width / 8; - return fs_reg(VGRF, alloc.allocate(type_size_scalar(type) * reg_width), + return fs_reg(VGRF, + alloc.allocate(type_size_scalar(type, false) * reg_width), brw_type_for_base_type(type)); } diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index e88cb3de9f2..875f3fac66c 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -58,7 +58,7 @@ fs_visitor::nir_setup_outputs() const int loc = var->data.driver_location; const unsigned var_vec4s = var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4) - : type_size_vec4(var->type); + : type_size_vec4(var->type, true); vec4s[loc] = MAX2(vec4s[loc], var_vec4s); } diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 7c73079c35d..bb916341e59 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -32,20 +32,20 @@ extern "C" { #endif -int type_size_scalar(const struct glsl_type *type); -int type_size_vec4(const struct glsl_type *type); -int type_size_dvec4(const struct glsl_type *type); +int type_size_scalar(const struct glsl_type *type, bool bindless); +int type_size_vec4(const struct glsl_type *type, bool bindless); +int type_size_dvec4(const struct glsl_type *type, bool bindless); static inline int -type_size_scalar_bytes(const struct glsl_type *type) +type_size_scalar_bytes(const struct glsl_type *type, bool bindless) { - return type_size_scalar(type) * 4; + return type_size_scalar(type, bindless) * 4; } static inline int -type_size_vec4_bytes(const struct glsl_type *type) +type_size_vec4_bytes(const struct glsl_type *type, bool bindless) { - return type_size_vec4(type) * 16; + return type_size_vec4(type, bindless) * 16; } /* Flags set in the instr->pass_flags field by i965 analysis passes */ diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp index 16ee31d730a..fa62abb03c8 100644 --- a/src/intel/compiler/brw_vec4_visitor.cpp +++ b/src/intel/compiler/brw_vec4_visitor.cpp @@ -576,7 +576,7 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) * false) elements needed to pack a type. */ static int -type_size_xvec4(const struct glsl_type *type, bool as_vec4) +type_size_xvec4(const struct glsl_type *type, bool as_vec4, bool bindless) { unsigned int i; int size; @@ -609,12 +609,14 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4) } case GLSL_TYPE_ARRAY: assert(type->length > 0); - return type_size_xvec4(type->fields.array, as_vec4) * type->length; + return type_size_xvec4(type->fields.array, as_vec4, bindless) * + type->length; case GLSL_TYPE_STRUCT: case GLSL_TYPE_INTERFACE: size = 0; for (i = 0; i < type->length; i++) { - size += type_size_xvec4(type->fields.structure[i].type, as_vec4); + size += type_size_xvec4(type->fields.structure[i].type, as_vec4, + bindless); } return size; case GLSL_TYPE_SUBROUTINE: @@ -624,11 +626,11 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4) /* Samplers take up no register space, since they're baked in at * link time. */ - return 0; + return bindless ? 1 : 0; case GLSL_TYPE_ATOMIC_UINT: return 0; case GLSL_TYPE_IMAGE: - return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4); + return bindless ? 1 : DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4); case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_FUNCTION: @@ -649,9 +651,9 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4) * store a particular type. */ extern "C" int -type_size_vec4(const struct glsl_type *type) +type_size_vec4(const struct glsl_type *type, bool bindless) { - return type_size_xvec4(type, true); + return type_size_xvec4(type, true, bindless); } /** @@ -674,9 +676,9 @@ type_size_vec4(const struct glsl_type *type) * type fits in one or two vec4 slots. */ extern "C" int -type_size_dvec4(const struct glsl_type *type) +type_size_dvec4(const struct glsl_type *type, bool bindless) { - return type_size_xvec4(type, false); + return type_size_xvec4(type, false, bindless); } src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) @@ -684,7 +686,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = VGRF; - this->nr = v->alloc.allocate(type_size_vec4(type)); + this->nr = v->alloc.allocate(type_size_vec4(type, false)); if (type->is_array() || type->is_struct()) { this->swizzle = BRW_SWIZZLE_NOOP; @@ -702,7 +704,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) init(); this->file = VGRF; - this->nr = v->alloc.allocate(type_size_vec4(type) * size); + this->nr = v->alloc.allocate(type_size_vec4(type, false) * size); this->swizzle = BRW_SWIZZLE_NOOP; @@ -714,7 +716,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = VGRF; - this->nr = v->alloc.allocate(type_size_vec4(type)); + this->nr = v->alloc.allocate(type_size_vec4(type, false)); if (type->is_array() || type->is_struct()) { this->writemask = WRITEMASK_XYZW; diff --git a/src/mesa/state_tracker/st_glsl_types.cpp b/src/mesa/state_tracker/st_glsl_types.cpp index 277b91c4a5c..b0b422f41fe 100644 --- a/src/mesa/state_tracker/st_glsl_types.cpp +++ b/src/mesa/state_tracker/st_glsl_types.cpp @@ -110,7 +110,7 @@ st_glsl_storage_type_size(const struct glsl_type *type, bool is_bindless) } int -st_glsl_type_dword_size(const struct glsl_type *type) +st_glsl_type_dword_size(const struct glsl_type *type, bool bindless) { unsigned int size, i; @@ -127,20 +127,24 @@ st_glsl_type_dword_size(const struct glsl_type *type) case GLSL_TYPE_UINT8: case GLSL_TYPE_INT8: return DIV_ROUND_UP(type->components(), 4); + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SAMPLER: + if (!bindless) + return 0; case GLSL_TYPE_DOUBLE: case GLSL_TYPE_UINT64: case GLSL_TYPE_INT64: return type->components() * 2; case GLSL_TYPE_ARRAY: - return st_glsl_type_dword_size(type->fields.array) * type->length; + return st_glsl_type_dword_size(type->fields.array, bindless) * + type->length; case GLSL_TYPE_STRUCT: size = 0; for (i = 0; i < type->length; i++) { - size += st_glsl_type_dword_size(type->fields.structure[i].type); + size += st_glsl_type_dword_size(type->fields.structure[i].type, + bindless); } return size; - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_SAMPLER: case GLSL_TYPE_ATOMIC_UINT: return 0; case GLSL_TYPE_SUBROUTINE: @@ -162,7 +166,7 @@ st_glsl_type_dword_size(const struct glsl_type *type) * vec4. */ int -st_glsl_uniforms_type_size(const struct glsl_type *type) +st_glsl_uniforms_type_size(const struct glsl_type *type, bool bindless) { - return st_glsl_storage_type_size(type, false); + return st_glsl_storage_type_size(type, bindless); } diff --git a/src/mesa/state_tracker/st_glsl_types.h b/src/mesa/state_tracker/st_glsl_types.h index e0aff12366a..1f2e5ab8f29 100644 --- a/src/mesa/state_tracker/st_glsl_types.h +++ b/src/mesa/state_tracker/st_glsl_types.h @@ -36,9 +36,9 @@ extern "C" { int st_glsl_storage_type_size(const struct glsl_type *type, bool is_bindless); -int st_glsl_uniforms_type_size(const struct glsl_type *type); +int st_glsl_uniforms_type_size(const struct glsl_type *type, bool bindless); -int st_glsl_type_dword_size(const struct glsl_type *type); +int st_glsl_type_dword_size(const struct glsl_type *type, bool bindless); #ifdef __cplusplus }