From cb5a37249c23084f057ece366bff0a0cf5e66297 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 25 Dec 2014 12:22:02 -1000 Subject: [PATCH] vc4: Handle unaligned accesses in CL emits. As of 229bf4475ff0a5dbeb9bc95250f7a40a983c2e28 we started getting SIBGUS from unaligned accesses on the hardware, for reasons I haven't figured out. However, we should be avoiding unaligned accesses anyway, and our CL setup certainly would have produced them. --- src/gallium/drivers/vc4/vc4_cl.h | 53 ++++++++++++++++++++++++++- src/gallium/drivers/vc4/vc4_program.c | 51 +++++++++++++------------- 2 files changed, 78 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h index 5c67f225749..32a2e717379 100644 --- a/src/gallium/drivers/vc4/vc4_cl.h +++ b/src/gallium/drivers/vc4/vc4_cl.h @@ -27,6 +27,7 @@ #include #include "util/u_math.h" +#include "util/macros.h" #include "vc4_packet.h" @@ -45,6 +46,23 @@ void vc4_reset_cl(struct vc4_cl *cl); void vc4_dump_cl(void *cl, uint32_t size, bool is_render); uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo); +struct PACKED unaligned_16 { uint16_t x; }; +struct PACKED unaligned_32 { uint32_t x; }; + +static inline void +put_unaligned_32(void *ptr, uint32_t val) +{ + struct unaligned_32 *p = ptr; + p->x = val; +} + +static inline void +put_unaligned_16(void *ptr, uint16_t val) +{ + struct unaligned_16 *p = ptr; + p->x = val; +} + static inline void cl_u8(struct vc4_cl *cl, uint8_t n) { @@ -59,7 +77,7 @@ cl_u16(struct vc4_cl *cl, uint16_t n) { assert((cl->next - cl->base) + 2 <= cl->size); - *(uint16_t *)cl->next = n; + put_unaligned_16(cl->next, n); cl->next += 2; } @@ -68,6 +86,15 @@ cl_u32(struct vc4_cl *cl, uint32_t n) { assert((cl->next - cl->base) + 4 <= cl->size); + put_unaligned_32(cl->next, n); + cl->next += 4; +} + +static inline void +cl_aligned_u32(struct vc4_cl *cl, uint32_t n) +{ + assert((cl->next - cl->base) + 4 <= cl->size); + *(uint32_t *)cl->next = n; cl->next += 4; } @@ -87,6 +114,12 @@ cl_f(struct vc4_cl *cl, float f) cl_u32(cl, fui(f)); } +static inline void +cl_aligned_f(struct vc4_cl *cl, float f) +{ + cl_aligned_u32(cl, fui(f)); +} + static inline void cl_start_reloc(struct vc4_cl *cl, uint32_t n) { @@ -122,6 +155,17 @@ cl_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset) cl_u32(cl, offset); } +static inline void +cl_aligned_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset) +{ + *(uint32_t *)(cl->base + cl->reloc_next) = hindex; + cl->reloc_next += 4; + + cl->reloc_count--; + + cl_aligned_u32(cl, offset); +} + static inline void cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_bo *bo, uint32_t offset) @@ -129,6 +173,13 @@ cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, cl_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset); } +static inline void +cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl, + struct vc4_bo *bo, uint32_t offset) +{ + cl_aligned_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset); +} + void cl_ensure_space(struct vc4_cl *cl, uint32_t size); #endif /* VC4_CL_H */ diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 570c76a459b..0d6f0fa7d60 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2580,7 +2580,7 @@ write_texture_p1(struct vc4_context *vc4, (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); - cl_u32(&vc4->uniforms, + cl_aligned_u32(&vc4->uniforms, VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) | VC4_SET_FIELD(texture->texture->height0 & 2047, VC4_TEX_P1_HEIGHT) | @@ -2606,7 +2606,7 @@ write_texture_p2(struct vc4_context *vc4, struct pipe_sampler_view *texture = texstate->textures[unit]; struct vc4_resource *rsc = vc4_resource(texture->texture); - cl_u32(&vc4->uniforms, + cl_aligned_u32(&vc4->uniforms, VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE, VC4_TEX_P2_PTYPE) | VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) | @@ -2683,7 +2683,7 @@ write_texture_border_color(struct vc4_context *vc4, } } - cl_u32(&vc4->uniforms, uc.ui[0]); + cl_aligned_u32(&vc4->uniforms, uc.ui[0]); } static uint32_t @@ -2738,29 +2738,29 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, switch (uinfo->contents[i]) { case QUNIFORM_CONSTANT: - cl_u32(&vc4->uniforms, uinfo->data[i]); + cl_aligned_u32(&vc4->uniforms, uinfo->data[i]); break; case QUNIFORM_UNIFORM: - cl_u32(&vc4->uniforms, - gallium_uniforms[uinfo->data[i]]); + cl_aligned_u32(&vc4->uniforms, + gallium_uniforms[uinfo->data[i]]); break; case QUNIFORM_VIEWPORT_X_SCALE: - cl_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f); + cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f); break; case QUNIFORM_VIEWPORT_Y_SCALE: - cl_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f); + cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f); break; case QUNIFORM_VIEWPORT_Z_OFFSET: - cl_f(&vc4->uniforms, vc4->viewport.translate[2]); + cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]); break; case QUNIFORM_VIEWPORT_Z_SCALE: - cl_f(&vc4->uniforms, vc4->viewport.scale[2]); + cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]); break; case QUNIFORM_USER_CLIP_PLANE: - cl_f(&vc4->uniforms, - vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]); + cl_aligned_f(&vc4->uniforms, + vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]); break; case QUNIFORM_TEXTURE_CONFIG_P0: @@ -2776,7 +2776,7 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, break; case QUNIFORM_UBO_ADDR: - cl_reloc(vc4, &vc4->uniforms, ubo, 0); + cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0); break; case QUNIFORM_TEXTURE_BORDER_COLOR: @@ -2785,27 +2785,28 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, case QUNIFORM_TEXRECT_SCALE_X: case QUNIFORM_TEXRECT_SCALE_Y: - cl_u32(&vc4->uniforms, - get_texrect_scale(texstate, - uinfo->contents[i], - uinfo->data[i])); + cl_aligned_u32(&vc4->uniforms, + get_texrect_scale(texstate, + uinfo->contents[i], + uinfo->data[i])); break; case QUNIFORM_BLEND_CONST_COLOR: - cl_f(&vc4->uniforms, - vc4->blend_color.color[uinfo->data[i]]); + cl_aligned_f(&vc4->uniforms, + vc4->blend_color.color[uinfo->data[i]]); break; case QUNIFORM_STENCIL: - cl_u32(&vc4->uniforms, - vc4->zsa->stencil_uniforms[uinfo->data[i]] | - (uinfo->data[i] <= 1 ? - (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) : - 0)); + cl_aligned_u32(&vc4->uniforms, + vc4->zsa->stencil_uniforms[uinfo->data[i]] | + (uinfo->data[i] <= 1 ? + (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) : + 0)); break; case QUNIFORM_ALPHA_REF: - cl_f(&vc4->uniforms, vc4->zsa->base.alpha.ref_value); + cl_aligned_f(&vc4->uniforms, + vc4->zsa->base.alpha.ref_value); break; } #if 0 -- 2.30.2