vc4: Handle unaligned accesses in CL emits.
authorEric Anholt <eric@anholt.net>
Thu, 25 Dec 2014 22:22:02 +0000 (12:22 -1000)
committerEric Anholt <eric@anholt.net>
Fri, 26 Dec 2014 01:47:39 +0000 (15:47 -1000)
As of 229bf4475ff0a5dbeb9bc95250f7a40a983c2e28 we started getting SIBGUS
from unaligned accesses on the hardware, for reasons I haven't figured
out.  However, we should be avoiding unaligned accesses anyway, and our CL
setup certainly would have produced them.

src/gallium/drivers/vc4/vc4_cl.h
src/gallium/drivers/vc4/vc4_program.c

index 5c67f225749d214e1805a2b4dee0eeb0f8be51f1..32a2e717379cebaf4f29bc5f4e8e2cd0c312af15 100644 (file)
@@ -27,6 +27,7 @@
 #include <stdint.h>
 
 #include "util/u_math.h"
+#include "util/macros.h"
 
 #include "vc4_packet.h"
 
@@ -45,6 +46,23 @@ void vc4_reset_cl(struct vc4_cl *cl);
 void vc4_dump_cl(void *cl, uint32_t size, bool is_render);
 uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo);
 
+struct PACKED unaligned_16 { uint16_t x; };
+struct PACKED unaligned_32 { uint32_t x; };
+
+static inline void
+put_unaligned_32(void *ptr, uint32_t val)
+{
+        struct unaligned_32 *p = ptr;
+        p->x = val;
+}
+
+static inline void
+put_unaligned_16(void *ptr, uint16_t val)
+{
+        struct unaligned_16 *p = ptr;
+        p->x = val;
+}
+
 static inline void
 cl_u8(struct vc4_cl *cl, uint8_t n)
 {
@@ -59,7 +77,7 @@ cl_u16(struct vc4_cl *cl, uint16_t n)
 {
         assert((cl->next - cl->base) + 2 <= cl->size);
 
-        *(uint16_t *)cl->next = n;
+        put_unaligned_16(cl->next, n);
         cl->next += 2;
 }
 
@@ -68,6 +86,15 @@ cl_u32(struct vc4_cl *cl, uint32_t n)
 {
         assert((cl->next - cl->base) + 4 <= cl->size);
 
+        put_unaligned_32(cl->next, n);
+        cl->next += 4;
+}
+
+static inline void
+cl_aligned_u32(struct vc4_cl *cl, uint32_t n)
+{
+        assert((cl->next - cl->base) + 4 <= cl->size);
+
         *(uint32_t *)cl->next = n;
         cl->next += 4;
 }
@@ -87,6 +114,12 @@ cl_f(struct vc4_cl *cl, float f)
         cl_u32(cl, fui(f));
 }
 
+static inline void
+cl_aligned_f(struct vc4_cl *cl, float f)
+{
+        cl_aligned_u32(cl, fui(f));
+}
+
 static inline void
 cl_start_reloc(struct vc4_cl *cl, uint32_t n)
 {
@@ -122,6 +155,17 @@ cl_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
         cl_u32(cl, offset);
 }
 
+static inline void
+cl_aligned_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
+{
+        *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
+        cl->reloc_next += 4;
+
+        cl->reloc_count--;
+
+        cl_aligned_u32(cl, offset);
+}
+
 static inline void
 cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
          struct vc4_bo *bo, uint32_t offset)
@@ -129,6 +173,13 @@ cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
         cl_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
 }
 
+static inline void
+cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
+         struct vc4_bo *bo, uint32_t offset)
+{
+        cl_aligned_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
+}
+
 void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
 
 #endif /* VC4_CL_H */
index 570c76a459bf8a5af285db8f65ad06f13009aaa2..0d6f0fa7d60367ce672a0988c2acf1280035e9c7 100644 (file)
@@ -2580,7 +2580,7 @@ write_texture_p1(struct vc4_context *vc4,
                 (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
                  sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
 
-        cl_u32(&vc4->uniforms,
+        cl_aligned_u32(&vc4->uniforms,
                VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
                VC4_SET_FIELD(texture->texture->height0 & 2047,
                              VC4_TEX_P1_HEIGHT) |
@@ -2606,7 +2606,7 @@ write_texture_p2(struct vc4_context *vc4,
         struct pipe_sampler_view *texture = texstate->textures[unit];
         struct vc4_resource *rsc = vc4_resource(texture->texture);
 
-        cl_u32(&vc4->uniforms,
+        cl_aligned_u32(&vc4->uniforms,
                VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
                              VC4_TEX_P2_PTYPE) |
                VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
@@ -2683,7 +2683,7 @@ write_texture_border_color(struct vc4_context *vc4,
                 }
         }
 
-        cl_u32(&vc4->uniforms, uc.ui[0]);
+        cl_aligned_u32(&vc4->uniforms, uc.ui[0]);
 }
 
 static uint32_t
@@ -2738,29 +2738,29 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
 
                 switch (uinfo->contents[i]) {
                 case QUNIFORM_CONSTANT:
-                        cl_u32(&vc4->uniforms, uinfo->data[i]);
+                        cl_aligned_u32(&vc4->uniforms, uinfo->data[i]);
                         break;
                 case QUNIFORM_UNIFORM:
-                        cl_u32(&vc4->uniforms,
-                               gallium_uniforms[uinfo->data[i]]);
+                        cl_aligned_u32(&vc4->uniforms,
+                                       gallium_uniforms[uinfo->data[i]]);
                         break;
                 case QUNIFORM_VIEWPORT_X_SCALE:
-                        cl_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
+                        cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
                         break;
                 case QUNIFORM_VIEWPORT_Y_SCALE:
-                        cl_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
+                        cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
                         break;
 
                 case QUNIFORM_VIEWPORT_Z_OFFSET:
-                        cl_f(&vc4->uniforms, vc4->viewport.translate[2]);
+                        cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]);
                         break;
                 case QUNIFORM_VIEWPORT_Z_SCALE:
-                        cl_f(&vc4->uniforms, vc4->viewport.scale[2]);
+                        cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]);
                         break;
 
                 case QUNIFORM_USER_CLIP_PLANE:
-                        cl_f(&vc4->uniforms,
-                             vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+                        cl_aligned_f(&vc4->uniforms,
+                                     vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
                         break;
 
                 case QUNIFORM_TEXTURE_CONFIG_P0:
@@ -2776,7 +2776,7 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
                         break;
 
                 case QUNIFORM_UBO_ADDR:
-                        cl_reloc(vc4, &vc4->uniforms, ubo, 0);
+                        cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0);
                         break;
 
                 case QUNIFORM_TEXTURE_BORDER_COLOR:
@@ -2785,27 +2785,28 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
 
                 case QUNIFORM_TEXRECT_SCALE_X:
                 case QUNIFORM_TEXRECT_SCALE_Y:
-                        cl_u32(&vc4->uniforms,
-                               get_texrect_scale(texstate,
-                                                 uinfo->contents[i],
-                                                 uinfo->data[i]));
+                        cl_aligned_u32(&vc4->uniforms,
+                                       get_texrect_scale(texstate,
+                                                         uinfo->contents[i],
+                                                         uinfo->data[i]));
                         break;
 
                 case QUNIFORM_BLEND_CONST_COLOR:
-                        cl_f(&vc4->uniforms,
-                             vc4->blend_color.color[uinfo->data[i]]);
+                        cl_aligned_f(&vc4->uniforms,
+                                     vc4->blend_color.color[uinfo->data[i]]);
                         break;
 
                 case QUNIFORM_STENCIL:
-                        cl_u32(&vc4->uniforms,
-                               vc4->zsa->stencil_uniforms[uinfo->data[i]] |
-                               (uinfo->data[i] <= 1 ?
-                                (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
-                                0));
+                        cl_aligned_u32(&vc4->uniforms,
+                                       vc4->zsa->stencil_uniforms[uinfo->data[i]] |
+                                       (uinfo->data[i] <= 1 ?
+                                        (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+                                        0));
                         break;
 
                 case QUNIFORM_ALPHA_REF:
-                        cl_f(&vc4->uniforms, vc4->zsa->base.alpha.ref_value);
+                        cl_aligned_f(&vc4->uniforms,
+                                     vc4->zsa->base.alpha.ref_value);
                         break;
                 }
 #if 0