From 87a3e02d9bec689e110f820bba7b125b3e801fdd Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 21 Jan 2015 17:34:49 +0200 Subject: [PATCH] i965: Define and initialize image parameter structure. This will be used to pass image meta-data to the shader when we cannot use typed surface reads and writes. All entries except surface_idx and size are otherwise unused and will get eliminated by the uniform packing pass. size will be used for bounds checking with some image formats and will be useful for ARB_shader_image_size too. surface_idx is always used. v2: Add CS support. Move the image_params array back to brw_stage_prog_data. v3: Improve documentation. Reviewed-by: Topi Pohjolainen Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_context.h | 54 ++++++++++ src/mesa/drivers/dri/i965/brw_cs.cpp | 3 + src/mesa/drivers/dri/i965/brw_gs.c | 3 + src/mesa/drivers/dri/i965/brw_vs.c | 5 +- src/mesa/drivers/dri/i965/brw_wm.c | 4 + .../drivers/dri/i965/brw_wm_surface_state.c | 102 ++++++++++++++++++ 6 files changed, 170 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 3aff9246db8..b851f3830a2 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -361,6 +361,7 @@ struct brw_stage_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; + unsigned nr_image_params; unsigned curb_read_length; unsigned total_scratch; @@ -381,6 +382,59 @@ struct brw_stage_prog_data { */ const gl_constant_value **param; const gl_constant_value **pull_param; + + /** + * Image metadata passed to the shader as uniforms. This is deliberately + * ignored by brw_stage_prog_data_compare() because its contents don't have + * any influence on program compilation. + */ + struct brw_image_param *image_param; +}; + +/* + * Image metadata structure as laid out in the shader parameter + * buffer. Entries have to be 16B-aligned for the vec4 back-end to be + * able to use them. That's okay because the padding and any unused + * entries [most of them except when we're doing untyped surface + * access] will be removed by the uniform packing pass. + */ +#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0 +#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4 +#define BRW_IMAGE_PARAM_SIZE_OFFSET 8 +#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12 +#define BRW_IMAGE_PARAM_TILING_OFFSET 16 +#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20 +#define BRW_IMAGE_PARAM_SIZE 24 + +struct brw_image_param { + /** Surface binding table index. */ + uint32_t surface_idx; + + /** Offset applied to the X and Y surface coordinates. */ + uint32_t offset[2]; + + /** Surface X, Y and Z dimensions. */ + uint32_t size[3]; + + /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in + * pixels, vertical slice stride in pixels. + */ + uint32_t stride[4]; + + /** Log2 of the tiling modulus in the X, Y and Z dimension. */ + uint32_t tiling[3]; + + /** + * Right shift to apply for bit 6 address swizzling. Two different + * swizzles can be specified and will be applied one after the other. The + * resulting address will be: + * + * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^ + * (addr >> swizzling[1]))) + * + * Use \c 0xff if any of the swizzles is not required. + */ + uint32_t swizzling[2]; }; /* Data about a particular attempt to compile a program. Note that diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 29ee75b1e1a..59520307f42 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -190,7 +190,10 @@ brw_codegen_cs_prog(struct brw_context *brw, rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.image_param = + rzalloc_array(NULL, struct brw_image_param, cs->NumImages); prog_data.base.nr_params = param_count; + prog_data.base.nr_image_params = cs->NumImages; program = brw_cs_emit(brw, mem_ctx, key, &prog_data, &cp->program, prog, &program_size); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 9c59c8a0dfc..d1a955a4de3 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -69,7 +69,10 @@ brw_codegen_gs_prog(struct brw_context *brw, rzalloc_array(NULL, const gl_constant_value *, param_count); c.prog_data.base.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); + c.prog_data.base.base.image_param = + rzalloc_array(NULL, struct brw_image_param, gs->NumImages); c.prog_data.base.base.nr_params = param_count; + c.prog_data.base.base.nr_image_params = gs->NumImages; if (brw->gen >= 7) { if (gp->program.OutputType == GL_POINTS) { diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 2b9b005c782..20bc7a97d20 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -122,7 +122,7 @@ brw_codegen_vs_prog(struct brw_context *brw, * conservative here. */ param_count = vs->num_uniform_components * 4; - + stage_prog_data->nr_image_params = vs->NumImages; } else { param_count = vp->program.Base.Parameters->NumParameters * 4; } @@ -135,6 +135,9 @@ brw_codegen_vs_prog(struct brw_context *brw, rzalloc_array(NULL, const gl_constant_value *, param_count); stage_prog_data->pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); + stage_prog_data->image_param = + rzalloc_array(NULL, struct brw_image_param, + stage_prog_data->nr_image_params); stage_prog_data->nr_params = param_count; GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 9b90a7c191f..c8583c09fe6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -190,6 +190,7 @@ brw_codegen_wm_prog(struct brw_context *brw, int param_count; if (fs) { param_count = fs->num_uniform_components; + prog_data.base.nr_image_params = fs->NumImages; } else { param_count = fp->program.Base.Parameters->NumParameters * 4; } @@ -199,6 +200,9 @@ brw_codegen_wm_prog(struct brw_context *brw, rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.image_param = + rzalloc_array(NULL, struct brw_image_param, + prog_data.base.nr_image_params); prog_data.base.nr_params = param_count; prog_data.barycentric_interp_modes = diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 33e045f2099..35df6f13e19 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1044,6 +1044,103 @@ get_image_format(struct brw_context *brw, mesa_format format, GLenum access) } } +static void +update_default_image_param(struct brw_context *brw, + struct gl_image_unit *u, + unsigned surface_idx, + struct brw_image_param *param) +{ + memset(param, 0, sizeof(*param)); + param->surface_idx = surface_idx; + /* Set the swizzling shifts to all-ones to effectively disable swizzling -- + * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more + * detailed explanation of these parameters. + */ + param->swizzling[0] = 0xff; + param->swizzling[1] = 0xff; +} + +static void +update_buffer_image_param(struct brw_context *brw, + struct gl_image_unit *u, + unsigned surface_idx, + struct brw_image_param *param) +{ + struct gl_buffer_object *obj = u->TexObj->BufferObject; + + update_default_image_param(brw, u, surface_idx, param); + + param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat); + param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat); +} + +static void +update_texture_image_param(struct brw_context *brw, + struct gl_image_unit *u, + unsigned surface_idx, + struct brw_image_param *param) +{ + struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt; + + update_default_image_param(brw, u, surface_idx, param); + + param->size[0] = minify(mt->logical_width0, u->Level); + param->size[1] = minify(mt->logical_height0, u->Level); + param->size[2] = (!u->Layered ? 1 : + u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 : + u->TexObj->Target == GL_TEXTURE_3D ? + minify(mt->logical_depth0, u->Level) : + mt->logical_depth0); + + intel_miptree_get_image_offset(mt, u->Level, u->Layer, + ¶m->offset[0], + ¶m->offset[1]); + + param->stride[0] = mt->cpp; + param->stride[1] = mt->pitch / mt->cpp; + param->stride[2] = + brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level); + param->stride[3] = + brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level); + + if (mt->tiling == I915_TILING_X) { + /* An X tile is a rectangular block of 512x8 bytes. */ + param->tiling[0] = _mesa_logbase2(512 / mt->cpp); + param->tiling[1] = _mesa_logbase2(8); + + if (brw->has_swizzling) { + /* Right shifts required to swizzle bits 9 and 10 of the memory + * address with bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 4; + } + } else if (mt->tiling == I915_TILING_Y) { + /* The layout of a Y-tiled surface in memory isn't really fundamentally + * different to the layout of an X-tiled surface, we simply pretend that + * the surface is broken up in a number of smaller 16Bx32 tiles, each + * one arranged in X-major order just like is the case for X-tiling. + */ + param->tiling[0] = _mesa_logbase2(16 / mt->cpp); + param->tiling[1] = _mesa_logbase2(32); + + if (brw->has_swizzling) { + /* Right shift required to swizzle bit 9 of the memory address with + * bit 6. + */ + param->swizzling[0] = 3; + } + } + + /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The + * address calculation algorithm (emit_address_calculation() in + * brw_fs_surface_builder.cpp) handles this as a sort of tiling with + * modulus equal to the LOD. + */ + param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level : + 0); +} + static void update_image_surface(struct brw_context *brw, struct gl_image_unit *u, @@ -1067,6 +1164,8 @@ update_image_surface(struct brw_context *brw, format, intel_obj->Base.Size / texel_size, texel_size, access != GL_READ_ONLY); + update_buffer_image_param(brw, u, surface_idx, param); + } else { struct intel_texture_object *intel_obj = intel_texture_object(obj); struct intel_mipmap_tree *mt = intel_obj->mt; @@ -1094,10 +1193,13 @@ update_image_surface(struct brw_context *brw, format, SWIZZLE_XYZW, surf_offset, access != GL_READ_ONLY, false); } + + update_texture_image_param(brw, u, surface_idx, param); } } else { brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset); + update_default_image_param(brw, u, surface_idx, param); } } -- 2.30.2