From: Kenneth Graunke Date: Fri, 30 Nov 2018 10:27:07 +0000 (-0800) Subject: iris: Fill out brw_image_params for storage images on Broadwell X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=477e7d575b6b1bab5a0221c18af0ff40c29bc8ee;p=mesa.git iris: Fill out brw_image_params for storage images on Broadwell --- diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index ad7cfdb7b90..0b0255bcc18 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -48,6 +48,18 @@ struct blorp_params; #define IRIS_MAX_VIEWPORTS 16 #define IRIS_MAX_CLIP_PLANES 8 +enum iris_param_domain { + BRW_PARAM_DOMAIN_BUILTIN = 0, + BRW_PARAM_DOMAIN_IMAGE, +}; + +#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val)) +#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24) +#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff) +#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset)) +#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8) +#define BRW_PARAM_IMAGE_OFFSET(value)(BRW_PARAM_VALUE(value) & 0xf) + /** * Dirty flags. When state changes, we flag some combination of these * to indicate that particular GPU commands need to be re-emitted. @@ -294,6 +306,9 @@ struct iris_shader_state { struct pipe_resource *res; struct iris_state_ref surface_state; unsigned access; + + /** Gen8-only uniform data for image lowering */ + struct brw_image_param param; } image[PIPE_MAX_SHADER_IMAGES]; struct iris_state_ref sampler_table; diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index b8929be9b72..47a4b635f99 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -152,7 +152,9 @@ iris_lower_storage_image_derefs(nir_shader *nir) case nir_intrinsic_image_deref_atomic_exchange: case nir_intrinsic_image_deref_atomic_comp_swap: case nir_intrinsic_image_deref_size: - case nir_intrinsic_image_deref_samples: { + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: { nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); nir_variable *var = nir_deref_instr_get_variable(deref); @@ -569,6 +571,19 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo, return next_binding_table_offset; } +static void +setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx, + unsigned offset, unsigned n) +{ + assert(offset % sizeof(uint32_t) == 0); + + for (unsigned i = 0; i < n; ++i) + sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i); + + for (unsigned i = n; i < 4; ++i) + sysvals[i] = BRW_PARAM_BUILTIN_ZERO; +} + /** * Associate NIR uniform variables with the prog_data->param[] mechanism * used by the backend. Also, decide which UBOs we'd like to push in an @@ -582,12 +597,7 @@ iris_setup_uniforms(const struct brw_compiler *compiler, enum brw_param_builtin **out_system_values, unsigned *out_num_system_values) { - /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts - * about it for compute shaders, so go ahead and make some fake ones - * which the backend will dead code eliminate. - */ - prog_data->nr_params = nir->num_uniforms; - prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params); + const struct gen_device_info *devinfo = compiler->devinfo; /* The intel compiler assumes that num_uniforms is in bytes. For * scalar that means 4 bytes per uniform slot. @@ -596,14 +606,17 @@ iris_setup_uniforms(const struct brw_compiler *compiler, */ nir->num_uniforms *= 4; - const unsigned IRIS_MAX_SYSTEM_VALUES = 32; + const unsigned IRIS_MAX_SYSTEM_VALUES = + PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE; enum brw_param_builtin *system_values = rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES); unsigned num_system_values = 0; unsigned patch_vert_idx = -1; unsigned ucp_idx[IRIS_MAX_CLIP_PLANES]; + unsigned img_idx[PIPE_MAX_SHADER_IMAGES]; memset(ucp_idx, -1, sizeof(ucp_idx)); + memset(img_idx, -1, sizeof(img_idx)); nir_function_impl *impl = nir_shader_get_entrypoint(nir); @@ -650,6 +663,49 @@ iris_setup_uniforms(const struct brw_compiler *compiler, b.cursor = nir_before_instr(instr); offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t)); break; + case nir_intrinsic_image_deref_load_param_intel: { + assert(devinfo->gen < 9); + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + if (img_idx[var->data.binding] == -1) { + /* GL only allows arrays of arrays of images. */ + assert(glsl_type_is_image(glsl_without_array(var->type))); + unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type)); + + for (int i = 0; i < num_images; i++) { + const unsigned img = var->data.binding + i; + + img_idx[img] = num_system_values; + num_system_values += BRW_IMAGE_PARAM_SIZE; + + uint32_t *img_sv = &system_values[img_idx[img]]; + + setup_vec4_image_sysval( + img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img, + offsetof(struct brw_image_param, offset), 2); + setup_vec4_image_sysval( + img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img, + offsetof(struct brw_image_param, size), 3); + setup_vec4_image_sysval( + img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img, + offsetof(struct brw_image_param, stride), 4); + setup_vec4_image_sysval( + img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img, + offsetof(struct brw_image_param, tiling), 3); + setup_vec4_image_sysval( + img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img, + offsetof(struct brw_image_param, swizzling), 2); + } + } + + b.cursor = nir_before_instr(instr); + offset = nir_iadd(&b, + get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4), + nir_imm_int(&b, img_idx[var->data.binding] * 4 + + nir_intrinsic_base(intrin) * 16)); + break; + } default: continue; } @@ -717,6 +773,13 @@ iris_setup_uniforms(const struct brw_compiler *compiler, if (nir->info.stage != MESA_SHADER_COMPUTE) brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); + /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts + * about it for compute shaders, so go ahead and make some fake ones + * which the backend will dead code eliminate. + */ + prog_data->nr_params = nir->num_uniforms / 4; + prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params); + *out_system_values = system_values; *out_num_system_values = num_system_values; } diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 1bdf4469397..5d1aaf97c41 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -1741,6 +1741,36 @@ iris_create_surface(struct pipe_context *ctx, return psurf; } +#if GEN_GEN < 9 +static void +fill_default_image_param(struct brw_image_param *param) +{ + memset(param, 0, sizeof(*param)); + /* Set the swizzling shifts to all-ones to effectively disable swizzling -- + * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more + * detailed explanation of these parameters. + */ + param->swizzling[0] = 0xff; + param->swizzling[1] = 0xff; +} + +static void +fill_buffer_image_param(struct brw_image_param *param, + enum pipe_format pfmt, + unsigned size) +{ + const unsigned cpp = util_format_get_blocksize(pfmt); + + fill_default_image_param(param); + param->size[0] = size / cpp; + param->stride[0] = cpp; +} +#else +#define isl_surf_fill_image_param(x, ...) +#define fill_default_image_param(x, ...) +#define fill_buffer_image_param(x, ...) +#endif + /** * The pipe->set_shader_images() driver hook. */ @@ -1798,19 +1828,31 @@ iris_set_shader_images(struct pipe_context *ctx, }; fill_surface_state(&screen->isl_dev, map, res, &view); + isl_surf_fill_image_param(&screen->isl_dev, + &shs->image[start_slot + i].param, + &res->surf, &view); } else { fill_buffer_surface_state(&screen->isl_dev, res->bo, map, isl_format, img->u.buf.offset, img->u.buf.size); + fill_buffer_image_param(&shs->image[start_slot + i].param, + img->format, img->u.buf.size); } } else { pipe_resource_reference(&shs->image[start_slot + i].res, NULL); pipe_resource_reference(&shs->image[start_slot + i].surface_state.res, NULL); + fill_default_image_param(&shs->image[start_slot + i].param); } } ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage; + + /* Broadwell also needs brw_image_params re-uploaded */ + if (GEN_GEN < 9) { + ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << stage; + shs->cbuf0_needs_upload = true; + } } @@ -2289,7 +2331,16 @@ upload_uniforms(struct iris_context *ice, uint32_t sysval = shader->system_values[i]; uint32_t value = 0; - if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) { + if (BRW_PARAM_DOMAIN(sysval) == BRW_PARAM_DOMAIN_IMAGE) { + unsigned img = BRW_PARAM_IMAGE_IDX(sysval); + unsigned offset = BRW_PARAM_IMAGE_OFFSET(sysval); + struct brw_image_param *param = &shs->image[img].param; + + assert(offset < sizeof(struct brw_image_param)); + value = ((uint32_t *) param)[offset]; + } else if (sysval == BRW_PARAM_BUILTIN_ZERO) { + value = 0; + } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) { int plane = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(sysval); int comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(sysval); value = fui(ice->state.clip_planes.ucp[plane][comp]);