From: Jason Ekstrand Date: Thu, 16 Aug 2018 21:23:10 +0000 (-0500) Subject: anv,i965: Lower away image derefs in the driver X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=09f1de97a76a4990fd7ce909760f3c8933263b05;p=mesa.git anv,i965: Lower away image derefs in the driver Previously, the back-end compiler turn image access into magic uniform reads and there was a complex contract between back-end compiler and driver about setting up and filling out those params. As of this commit, both drivers now lower image_deref_load_param_intel intrinsics to load_uniform intrinsics controlled by the driver and lower the other image_deref_* intrinsics to image_* intrinsics which take an actual binding table index. There are still "magic" uniforms but they are now added and controlled entirely by the driver and that contract no longer spans components. This also has the side-effect of making most image use compile-time binding table indices. Previously, all image access pulled the binding table index from a uniform. Part of the reason for this was that the magic uniforms made it difficult to decouple binding table indices from the uniforms and, since they are indexed completely differently (especially in Vulkan), it was hard to pull them apart. Now that the driver is handling both, it's trivial to decouple the two and provide actual binding table indices. Shader-db results on Kaby Lake: total instructions in shared programs: 15166872 -> 15164293 (-0.02%) instructions in affected programs: 115834 -> 113255 (-2.23%) helped: 191 HURT: 0 total cycles in shared programs: 571311495 -> 571196465 (-0.02%) cycles in affected programs: 4757115 -> 4642085 (-2.42%) helped: 73 HURT: 67 total spills in shared programs: 10951 -> 10926 (-0.23%) spills in affected programs: 742 -> 717 (-3.37%) helped: 7 HURT: 0 total fills in shared programs: 22226 -> 22201 (-0.11%) fills in affected programs: 1146 -> 1121 (-2.18%) helped: 7 HURT: 0 Reviewed-by: Kenneth Graunke --- diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index d7184dadbbc..b06b38fc2ce 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -331,9 +331,9 @@ image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) # variable. The const index specifies which of the six parameters to load. intrinsic("image_deref_load_param_intel", src_comp=[1], dest_comp=0, indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER]) -intrinsic("image_deref_load_raw_intel", src_comp=[1, 1], dest_comp=0, - flags=[CAN_ELIMINATE]) -intrinsic("image_deref_store_raw_intel", src_comp=[1, 1, 0]) +image("load_raw_intel", src_comp=[1], dest_comp=0, + flags=[CAN_ELIMINATE]) +image("store_raw_intel", src_comp=[1, 0]) # Vulkan descriptor set intrinsics # diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 58736503f9a..02a7a33c4d7 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -494,16 +494,14 @@ type_size_scalar(const struct glsl_type *type) } return size; case GLSL_TYPE_SAMPLER: - /* Samplers take up no register space, since they're baked in at - * link time. - */ - return 0; case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_IMAGE: + /* Samplers, atomics, and images take up no register space, since + * they're baked in at link time. + */ return 0; case GLSL_TYPE_SUBROUTINE: return 1; - case GLSL_TYPE_IMAGE: - return BRW_IMAGE_PARAM_SIZE; case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 52220db2dc0..aba19d5ab2c 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -216,6 +216,8 @@ public: nir_intrinsic_instr *instr); void nir_emit_cs_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); + fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); void nir_emit_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); void nir_emit_tes_intrinsic(const brw::fs_builder &bld, @@ -235,7 +237,6 @@ public: fs_reg get_nir_src(const nir_src &src); fs_reg get_nir_src_imm(const nir_src &src); fs_reg get_nir_dest(const nir_dest &dest); - fs_reg get_nir_image_deref(nir_deref_instr *deref); fs_reg get_indirect_offset(nir_intrinsic_instr *instr); void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, unsigned wr_mask); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index b2be91f9117..aaba0e2a693 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1694,70 +1694,6 @@ fs_visitor::get_nir_dest(const nir_dest &dest) } } -fs_reg -fs_visitor::get_nir_image_deref(nir_deref_instr *deref) -{ - fs_reg arr_offset = brw_imm_ud(0); - unsigned array_size = BRW_IMAGE_PARAM_SIZE * 4; - nir_deref_instr *head = deref; - while (head->deref_type != nir_deref_type_var) { - assert(head->deref_type == nir_deref_type_array); - - /* This level's element size is the previous level's array size */ - const unsigned elem_size = array_size; - - fs_reg index = retype(get_nir_src_imm(head->arr.index), - BRW_REGISTER_TYPE_UD); - if (arr_offset.file == BRW_IMMEDIATE_VALUE && - index.file == BRW_IMMEDIATE_VALUE) { - arr_offset.ud += index.ud * elem_size; - } else if (index.file == BRW_IMMEDIATE_VALUE) { - bld.ADD(arr_offset, arr_offset, brw_imm_ud(index.ud * elem_size)); - } else { - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.MUL(tmp, index, brw_imm_ud(elem_size)); - bld.ADD(tmp, tmp, arr_offset); - arr_offset = tmp; - } - - head = nir_deref_instr_parent(head); - assert(glsl_type_is_array(head->type)); - array_size = elem_size * glsl_get_length(head->type); - } - - assert(head->deref_type == nir_deref_type_var); - const unsigned max_arr_offset = array_size - (BRW_IMAGE_PARAM_SIZE * 4); - fs_reg image(UNIFORM, head->var->data.driver_location / 4, - BRW_REGISTER_TYPE_UD); - - if (arr_offset.file == BRW_IMMEDIATE_VALUE) { - /* The offset is in bytes but we want it in dwords */ - return offset(image, bld, MIN2(arr_offset.ud, max_arr_offset) / 4); - } else { - /* Accessing an invalid surface index with the dataport can result - * in a hang. According to the spec "if the index used to - * select an individual element is negative or greater than or - * equal to the size of the array, the results of the operation - * are undefined but may not lead to termination" -- which is one - * of the possible outcomes of the hang. Clamp the index to - * prevent access outside of the array bounds. - */ - bld.emit_minmax(arr_offset, arr_offset, brw_imm_ud(max_arr_offset), - BRW_CONDITIONAL_L); - - /* Emit a pile of MOVs to load the uniform into a temporary. The - * dead-code elimination pass will get rid of what we don't use. - */ - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE); - for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) { - bld.emit(SHADER_OPCODE_MOV_INDIRECT, - offset(tmp, bld, j), offset(image, bld, j), - arr_offset, brw_imm_ud(max_arr_offset + 4)); - } - return tmp; - } -} - void fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, unsigned wr_mask) @@ -3847,6 +3783,43 @@ brw_cond_mod_for_nir_reduction_op(nir_op op) } } +fs_reg +fs_visitor::get_nir_image_intrinsic_image(const brw::fs_builder &bld, + nir_intrinsic_instr *instr) +{ + fs_reg image = retype(get_nir_src_imm(instr->src[0]), BRW_REGISTER_TYPE_UD); + + if (stage_prog_data->binding_table.image_start > 0) { + if (image.file == BRW_IMMEDIATE_VALUE) { + image.d += stage_prog_data->binding_table.image_start; + } else { + bld.ADD(image, image, + brw_imm_d(stage_prog_data->binding_table.image_start)); + } + } + + return bld.emit_uniformize(image); +} + +static unsigned +image_intrinsic_coord_components(nir_intrinsic_instr *instr) +{ + switch (nir_intrinsic_image_dim(instr)) { + case GLSL_SAMPLER_DIM_1D: + return 1 + nir_intrinsic_image_array(instr); + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + return 2 + nir_intrinsic_image_array(instr); + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + return 3; + case GLSL_SAMPLER_DIM_BUF: + return 1; + default: + unreachable("Invalid image dimension"); + } +} + void fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { @@ -3855,40 +3828,37 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr dest = get_nir_dest(instr->dest); switch (instr->intrinsic) { - case nir_intrinsic_image_deref_load: - case nir_intrinsic_image_deref_store: - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_min: - case nir_intrinsic_image_deref_atomic_max: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: { + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: { if (stage == MESA_SHADER_FRAGMENT && - instr->intrinsic != nir_intrinsic_image_deref_load) + instr->intrinsic != nir_intrinsic_image_load) brw_wm_prog_data(prog_data)->has_side_effects = true; - /* Get the referenced image variable and type. */ - nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); - const glsl_type *type = deref->type; - /* Get some metadata from the image intrinsic. */ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; - const unsigned dims = type->coordinate_components(); + const unsigned dims = image_intrinsic_coord_components(instr); + const GLenum format = nir_intrinsic_format(instr); const unsigned dest_components = nir_intrinsic_dest_components(instr); /* Get the arguments of the image intrinsic. */ - const fs_reg image = get_nir_image_deref(deref); + const fs_reg image = get_nir_image_intrinsic_image(bld, instr); const fs_reg coords = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD); fs_reg tmp; /* Emit an image load, store or atomic op. */ - if (instr->intrinsic == nir_intrinsic_image_deref_load) { + if (instr->intrinsic == nir_intrinsic_image_load) { tmp = emit_typed_read(bld, image, coords, dims, instr->num_components); - } else if (instr->intrinsic == nir_intrinsic_image_deref_store) { + } else if (instr->intrinsic == nir_intrinsic_image_store) { const fs_reg src0 = get_nir_src(instr->src[3]); emit_typed_write(bld, image, coords, src0, dims, instr->num_components); @@ -3897,7 +3867,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unsigned num_srcs = info->num_srcs; switch (instr->intrinsic) { - case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_atomic_add: assert(num_srcs == 4); op = get_op_for_atomic_add(instr, 3); @@ -3905,27 +3875,27 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (op != BRW_AOP_ADD) num_srcs = 3; break; - case nir_intrinsic_image_deref_atomic_min: - op = (get_image_base_type(type) == BRW_REGISTER_TYPE_D ? - BRW_AOP_IMIN : BRW_AOP_UMIN); + case nir_intrinsic_image_atomic_min: + assert(format == GL_R32UI || format == GL_R32I); + op = (format == GL_R32I) ? BRW_AOP_IMIN : BRW_AOP_UMIN; break; - case nir_intrinsic_image_deref_atomic_max: - op = (get_image_base_type(type) == BRW_REGISTER_TYPE_D ? - BRW_AOP_IMAX : BRW_AOP_UMAX); + case nir_intrinsic_image_atomic_max: + assert(format == GL_R32UI || format == GL_R32I); + op = (format == GL_R32I) ? BRW_AOP_IMAX : BRW_AOP_UMAX; break; - case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_atomic_and: op = BRW_AOP_AND; break; - case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_atomic_or: op = BRW_AOP_OR; break; - case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_atomic_xor: op = BRW_AOP_XOR; break; - case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_atomic_exchange: op = BRW_AOP_MOV; break; - case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_atomic_comp_swap: op = BRW_AOP_CMPWR; break; default: @@ -3948,19 +3918,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_image_deref_load_param_intel: { - nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); - const fs_reg image = get_nir_image_deref(deref); - const fs_reg param = offset(image, bld, nir_intrinsic_base(instr) * 4); - for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) { - bld.MOV(offset(retype(dest, param.type), bld, c), - offset(param, bld, c)); - } - break; - } - - case nir_intrinsic_image_deref_load_raw_intel: { - const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0])); + case nir_intrinsic_image_load_raw_intel: { + const fs_reg image = get_nir_image_intrinsic_image(bld, instr); const fs_reg addr = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD); @@ -3974,8 +3933,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_image_deref_store_raw_intel: { - const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0])); + case nir_intrinsic_image_store_raw_intel: { + const fs_reg image = get_nir_image_intrinsic_image(bld, instr); const fs_reg addr = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD); const fs_reg data = retype(get_nir_src(instr->src[2]), @@ -4010,7 +3969,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_samples: /* The driver does not support multi-sampled images. */ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1)); break; diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 72a6ee8884a..50073265539 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -116,6 +116,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir); bool brw_nir_lower_image_load_store(nir_shader *nir, const struct gen_device_info *devinfo); +void brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, + nir_ssa_def *index); nir_shader *brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, @@ -147,6 +149,9 @@ void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data); +void brw_nir_lower_glsl_images(nir_shader *shader, + const struct gl_program *prog); + void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, nir_shader *nir, const struct brw_vs_prog_key *vs_key, diff --git a/src/intel/compiler/brw_nir_lower_image_load_store.c b/src/intel/compiler/brw_nir_lower_image_load_store.c index 819fb440f2c..5eba9ddabd3 100644 --- a/src/intel/compiler/brw_nir_lower_image_load_store.c +++ b/src/intel/compiler/brw_nir_lower_image_load_store.c @@ -811,3 +811,44 @@ brw_nir_lower_image_load_store(nir_shader *shader, return progress; } + +void +brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, + nir_ssa_def *index) +{ + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + switch (intrin->intrinsic) { +#define CASE(op) \ + case nir_intrinsic_image_deref_##op: \ + intrin->intrinsic = nir_intrinsic_image_##op; \ + break; + CASE(load) + CASE(store) + CASE(atomic_add) + CASE(atomic_min) + CASE(atomic_max) + CASE(atomic_and) + CASE(atomic_or) + CASE(atomic_xor) + CASE(atomic_exchange) + CASE(atomic_comp_swap) + CASE(atomic_fadd) + CASE(size) + CASE(samples) + CASE(load_raw_intel) + CASE(store_raw_intel) +#undef CASE + default: + unreachable("Unhanded image intrinsic"); + } + + nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type)); + nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type)); + nir_intrinsic_set_access(intrin, var->data.image.access); + nir_intrinsic_set_format(intrin, var->data.image.format); + + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], + nir_src_for_ssa(index)); +} diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 84a664826e8..583b5a17cc6 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -24,6 +24,7 @@ #include "anv_nir.h" #include "program/prog_parameter.h" #include "nir/nir_builder.h" +#include "compiler/brw_nir.h" struct apply_pipeline_layout_state { nir_shader *shader; @@ -32,6 +33,8 @@ struct apply_pipeline_layout_state { struct anv_pipeline_layout *layout; bool add_bounds_checks; + unsigned first_image_uniform; + bool uses_constants; uint8_t constants_offset; struct { @@ -99,6 +102,9 @@ get_used_bindings_block(nir_block *block, case nir_intrinsic_image_deref_atomic_comp_swap: case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_param_intel: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: add_deref_src_binding(state, intrin->src[0]); break; @@ -178,6 +184,63 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, nir_instr_remove(&intrin->instr); } +static void +lower_image_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned array_size = + state->layout->set[set].layout->binding[binding].array_size; + + nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&intrin->instr); + + nir_ssa_def *index = NULL; + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + index = nir_ssa_for_src(b, deref->arr.index, 1); + if (state->add_bounds_checks) + index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); + } else { + index = nir_imm_int(b, 0); + } + + if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) { + b->cursor = nir_instr_remove(&intrin->instr); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); + + nir_intrinsic_set_base(load, state->first_image_uniform + + state->set[set].image_offsets[binding] * + BRW_IMAGE_PARAM_SIZE * 4); + nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4); + + const unsigned param = nir_intrinsic_base(intrin); + nir_ssa_def *offset = + nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4)); + offset = nir_iadd(b, offset, nir_imm_int(b, param * 16)); + load->src[0] = nir_src_for_ssa(offset); + + load->num_components = intrin->dest.ssa.num_components; + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(b, &load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + } else { + unsigned binding_offset = state->set[set].surface_offsets[binding]; + index = nir_iadd(b, index, nir_imm_int(b, binding_offset)); + brw_nir_rewrite_image_intrinsic(intrin, index); + } +} + static void lower_load_constant(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) @@ -318,6 +381,23 @@ apply_pipeline_layout_block(nir_block *block, case nir_intrinsic_vulkan_resource_reindex: lower_res_reindex_intrinsic(intrin, state); break; + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_param_intel: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: + lower_image_intrinsic(intrin, state); + break; case nir_intrinsic_load_constant: lower_load_constant(intrin, state); break; @@ -436,6 +516,39 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, } } + unsigned image_uniform; + if (map->image_count > 0) { + assert(map->image_count <= MAX_IMAGES); + assert(shader->num_uniforms == prog_data->nr_params * 4); + state.first_image_uniform = shader->num_uniforms; + uint32_t *param = brw_stage_prog_data_add_params(prog_data, + map->image_count * + BRW_IMAGE_PARAM_SIZE); + struct anv_push_constants *null_data = NULL; + const struct brw_image_param *image_param = null_data->images; + for (uint32_t i = 0; i < map->image_count; i++) { + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + (uintptr_t)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + (uintptr_t)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + (uintptr_t)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + (uintptr_t)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, + (uintptr_t)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + (uintptr_t)image_param->swizzling, 2); + + param += BRW_IMAGE_PARAM_SIZE; + image_param ++; + } + assert(param == prog_data->param + prog_data->nr_params); + + shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4; + assert(shader->num_uniforms == prog_data->nr_params * 4); + } + nir_foreach_variable(var, &shader->uniforms) { const struct glsl_type *glsl_type = glsl_without_array(var->type); @@ -479,51 +592,5 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, nir_metadata_dominance); } - if (map->image_count > 0) { - assert(map->image_count <= MAX_IMAGES); - nir_foreach_variable(var, &shader->uniforms) { - if (glsl_type_is_image(var->type) || - (glsl_type_is_array(var->type) && - glsl_type_is_image(glsl_get_array_element(var->type)))) { - /* Images are represented as uniform push constants and the actual - * information required for reading/writing to/from the image is - * storred in the uniform. - */ - unsigned set = var->data.descriptor_set; - unsigned binding = var->data.binding; - unsigned image_index = state.set[set].image_offsets[binding]; - - var->data.driver_location = shader->num_uniforms + - image_index * BRW_IMAGE_PARAM_SIZE * 4; - } - } - - uint32_t *param = brw_stage_prog_data_add_params(prog_data, - map->image_count * - BRW_IMAGE_PARAM_SIZE); - struct anv_push_constants *null_data = NULL; - const struct brw_image_param *image_param = null_data->images; - for (uint32_t i = 0; i < map->image_count; i++) { - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - (uintptr_t)&image_param->surface_idx, 1); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (uintptr_t)image_param->offset, 2); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - (uintptr_t)image_param->size, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (uintptr_t)image_param->stride, 4); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, - (uintptr_t)image_param->tiling, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (uintptr_t)image_param->swizzling, 2); - - param += BRW_IMAGE_PARAM_SIZE; - image_param ++; - } - assert(param == prog_data->param + prog_data->nr_params); - - shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4; - } - ralloc_free(mem_ctx); } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 19d59b7fbac..a3eb68769a2 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -523,6 +523,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, if (nir->info.num_ssbos > 0 || nir->info.num_images > 0) pipeline->needs_data_cache = true; + NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo); + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (layout) { anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, @@ -532,8 +534,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, if (nir->info.stage != MESA_SHADER_COMPUTE) brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); - NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo); - assert(nir->num_uniforms == prog_data->nr_params * 4); stage->nir = nir; diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index 54f9f9b1a6b..8a560d9bac1 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -23,6 +23,7 @@ #include "compiler/brw_nir.h" #include "compiler/glsl/ir_uniform.h" +#include "compiler/nir/nir_builder.h" #include "brw_program.h" static void @@ -267,3 +268,132 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO; } } + +static nir_ssa_def * +get_aoa_deref_offset(nir_builder *b, + nir_deref_instr *deref, + unsigned elem_size) +{ + unsigned array_size = elem_size; + nir_ssa_def *offset = nir_imm_int(b, 0); + + while (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + + /* This level's element size is the previous level's array size */ + nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); + assert(deref->arr.index.ssa); + offset = nir_iadd(b, offset, + nir_imul(b, index, nir_imm_int(b, array_size))); + + deref = nir_deref_instr_parent(deref); + assert(glsl_type_is_array(deref->type)); + array_size *= glsl_get_length(deref->type); + } + + /* Accessing an invalid surface index with the dataport can result in a + * hang. According to the spec "if the index used to select an individual + * element is negative or greater than or equal to the size of the array, + * the results of the operation are undefined but may not lead to + * termination" -- which is one of the possible outcomes of the hang. + * Clamp the index to prevent access outside of the array bounds. + */ + return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size)); +} + +void +brw_nir_lower_glsl_images(nir_shader *shader, + const struct gl_program *prog) +{ + /* We put image uniforms at the end */ + nir_foreach_variable(var, &shader->uniforms) { + if (!var->type->contains_image()) + continue; + + /* GL Only allows arrays of arrays of images */ + assert(var->type->without_array()->is_image()); + const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size()); + + var->data.driver_location = shader->num_uniforms; + shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4; + } + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + const unsigned num_images = + MAX2(1, var->type->arrays_of_arrays_size()); + + struct gl_uniform_storage *storage = + &prog->sh.data->UniformStorage[var->data.location]; + const unsigned image_var_idx = + storage->opaque[shader->info.stage].index; + + b.cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx), + get_aoa_deref_offset(&b, deref, 1)); + brw_nir_rewrite_image_intrinsic(intrin, index); + break; + } + + case nir_intrinsic_image_deref_load_param_intel: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + const unsigned num_images = + MAX2(1, var->type->arrays_of_arrays_size()); + + b.cursor = nir_instr_remove(&intrin->instr); + + const unsigned param = nir_intrinsic_base(intrin); + nir_ssa_def *offset = + get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4); + offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16)); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_load_uniform); + nir_intrinsic_set_base(load, var->data.driver_location); + nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4); + load->src[0] = nir_src_for_ssa(offset); + load->num_components = intrin->dest.ssa.num_components; + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(&b, &load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + break; + } + + default: + break; + } + } + } +} diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index f5ebd3c3b05..041395ec4c0 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -140,6 +140,7 @@ brw_create_nir(struct brw_context *brw, } NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar); + NIR_PASS_V(nir, brw_nir_lower_glsl_images, prog); return nir; }