From 5db1593c78192b764ad2ef7bdc5182d8ec4aed7c Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 4 Feb 2008 18:05:37 -0700 Subject: [PATCH] Cell: fix some alignment issues by aligning commands to 8-byte boundaries Contributed by Ian Romanick. Also, temporarily disable inlined vertex buffers. They need to be 16-byte aligned... --- src/mesa/pipe/cell/common.h | 16 +++--- src/mesa/pipe/cell/ppu/cell_batch.c | 4 +- src/mesa/pipe/cell/ppu/cell_flush.c | 2 +- src/mesa/pipe/cell/ppu/cell_state_emit.c | 3 +- src/mesa/pipe/cell/ppu/cell_vbuf.c | 4 +- src/mesa/pipe/cell/ppu/cell_vertex_shader.c | 22 ++++---- src/mesa/pipe/cell/spu/spu_main.c | 58 +++++++++------------ src/mesa/pipe/cell/spu/spu_vertex_fetch.c | 7 ++- src/mesa/pipe/cell/spu/spu_vertex_shader.h | 2 +- 9 files changed, 57 insertions(+), 61 deletions(-) diff --git a/src/mesa/pipe/cell/common.h b/src/mesa/pipe/cell/common.h index d861e82d332..cf8fc94ebf1 100644 --- a/src/mesa/pipe/cell/common.h +++ b/src/mesa/pipe/cell/common.h @@ -57,6 +57,9 @@ /** round up value to next multiple of 4 */ #define ROUNDUP4(k) (((k) + 0x3) & ~0x3) +/** round up value to next multiple of 8 */ +#define ROUNDUP8(k) (((k) + 0x7) & ~0x7) + /** round up value to next multiple of 16 */ #define ROUNDUP16(k) (((k) + 0xf) & ~0xf) @@ -102,7 +105,7 @@ */ struct cell_command_framebuffer { - uint opcode; + uint64_t opcode; int width, height; void *color_start, *depth_start; enum pipe_format color_format, depth_format; @@ -114,7 +117,7 @@ struct cell_command_framebuffer */ struct cell_command_clear_surface { - uint opcode; + uint64_t opcode; uint surface; /**< Temporary: 0=color, 1=Z */ uint value; }; @@ -125,8 +128,7 @@ struct cell_command_clear_surface */ struct cell_array_info { - uint opcode; - uint base; /**< Base address of the 0th element. */ + uint64_t base; /**< Base address of the 0th element. */ uint attr; /**< Attribute that this state if for. */ uint pitch; /**< Byte pitch from one entry to the next. */ uint format; /**< Pipe format of each entry. */ @@ -150,7 +152,7 @@ struct cell_shader_info #define SPU_VERTS_PER_BATCH 64 struct cell_command_vs { - uint opcode; /**< CELL_CMD_VS_EXECUTE */ + uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */ struct cell_shader_info shader; unsigned num_elts; unsigned elts[SPU_VERTS_PER_BATCH]; @@ -163,7 +165,7 @@ struct cell_command_vs struct cell_command_render { - uint opcode; /**< CELL_CMD_RENDER */ + uint64_t opcode; /**< CELL_CMD_RENDER */ uint prim_type; /**< PIPE_PRIM_x */ uint num_verts; uint vertex_size; /**< bytes per vertex */ @@ -179,7 +181,7 @@ struct cell_command_render struct cell_command_release_verts { - int opcode; /**< CELL_CMD_RELEASE_VERTS */ + uint64_t opcode; /**< CELL_CMD_RELEASE_VERTS */ uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ }; diff --git a/src/mesa/pipe/cell/ppu/cell_batch.c b/src/mesa/pipe/cell/ppu/cell_batch.c index 2d032fc9026..2fb49711b2d 100644 --- a/src/mesa/pipe/cell/ppu/cell_batch.c +++ b/src/mesa/pipe/cell/ppu/cell_batch.c @@ -136,7 +136,7 @@ cell_batch_append(struct cell_context *cell, const void *data, uint bytes) { uint size; - ASSERT(bytes % 4 == 0); + ASSERT(bytes % 8 == 0); ASSERT(bytes <= CELL_BUFFER_SIZE); ASSERT(cell->cur_batch >= 0); @@ -171,7 +171,7 @@ cell_batch_alloc(struct cell_context *cell, uint bytes) void *pos; uint size; - ASSERT(bytes % 4 == 0); + ASSERT(bytes % 8 == 0); ASSERT(bytes <= CELL_BUFFER_SIZE); assert(cell->cur_batch >= 0); diff --git a/src/mesa/pipe/cell/ppu/cell_flush.c b/src/mesa/pipe/cell/ppu/cell_flush.c index cf4e676645b..f62bc4650ce 100644 --- a/src/mesa/pipe/cell/ppu/cell_flush.c +++ b/src/mesa/pipe/cell/ppu/cell_flush.c @@ -59,7 +59,7 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags) flushing = TRUE; if (flags & PIPE_FLUSH_WAIT) { - uint *cmd = (uint *) cell_batch_alloc(cell, sizeof(uint)); + uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t)); *cmd = CELL_CMD_FINISH; } diff --git a/src/mesa/pipe/cell/ppu/cell_state_emit.c b/src/mesa/pipe/cell/ppu/cell_state_emit.c index 3b2670f786b..5d2a7864493 100644 --- a/src/mesa/pipe/cell/ppu/cell_state_emit.c +++ b/src/mesa/pipe/cell/ppu/cell_state_emit.c @@ -37,7 +37,8 @@ static void emit_state_cmd(struct cell_context *cell, uint cmd, const void *state, uint state_size) { - uint *dst = (uint *) cell_batch_alloc(cell, sizeof(uint) + state_size); + uint64_t *dst = (uint64_t *) + cell_batch_alloc(cell, ROUNDUP8(sizeof(uint64_t) + state_size)); *dst = cmd; memcpy(dst + 1, state, state_size); } diff --git a/src/mesa/pipe/cell/ppu/cell_vbuf.c b/src/mesa/pipe/cell/ppu/cell_vbuf.c index e63b34cf525..0fee61821a8 100644 --- a/src/mesa/pipe/cell/ppu/cell_vbuf.c +++ b/src/mesa/pipe/cell/ppu/cell_vbuf.c @@ -40,7 +40,7 @@ /** Allow vertex data to be inlined after RENDER command */ -#define ALLOW_INLINE_VERTS 1 +#define ALLOW_INLINE_VERTS 0 /** @@ -197,7 +197,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, /* build/insert batch RENDER command */ { - const uint index_bytes = ROUNDUP4(nr_indices * 2); + const uint index_bytes = ROUNDUP8(nr_indices * 2); const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size; const uint batch_size = sizeof(struct cell_command_render) diff --git a/src/mesa/pipe/cell/ppu/cell_vertex_shader.c b/src/mesa/pipe/cell/ppu/cell_vertex_shader.c index aef329a9024..80dd500b345 100644 --- a/src/mesa/pipe/cell/ppu/cell_vertex_shader.c +++ b/src/mesa/pipe/cell/ppu/cell_vertex_shader.c @@ -52,8 +52,8 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) struct cell_context *const cell = (struct cell_context *) draw->driver_private; struct cell_command_vs *const vs = &cell_global.command[0].vs; - unsigned *batch; - struct cell_array_info array_info; + uint64_t *batch; + struct cell_array_info *array_info; unsigned i, j; assert(draw->vs.queue_nr != 0); @@ -63,17 +63,19 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) draw_update_vertex_fetch(draw); for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - array_info.opcode = CELL_CMD_STATE_VS_ARRAY_INFO; - assert(draw->vertex_fetch.src_ptr[i] != NULL); - array_info.base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; - array_info.attr = i; - array_info.pitch = draw->vertex_fetch.pitch[i]; - array_info.format = draw->vertex_element[i].src_format; + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); + + batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; - cell_batch_append(cell, & array_info, sizeof(array_info)); + array_info = (struct cell_array_info *) &batch[1]; + assert(draw->vertex_fetch.src_ptr[i] != NULL); + array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; + array_info->attr = i; + array_info->pitch = draw->vertex_fetch.pitch[i]; + array_info->format = draw->vertex_element[i].src_format; } - batch = cell_batch_alloc(cell, sizeof(unsigned) + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(struct pipe_viewport_state)); batch[0] = CELL_CMD_STATE_VIEWPORT; (void) memcpy(&batch[1], &draw->viewport, diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c index b0311db1aab..4f126d5e5bb 100644 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ b/src/mesa/pipe/cell/spu/spu_main.c @@ -31,7 +31,6 @@ #include #include -#include #include "spu_main.h" #include "spu_render.h" @@ -220,13 +219,13 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) spu.fb.zsize = 0; if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) - spu.color_shuffle = VEC_LITERAL(vector unsigned char, - 12, 0, 4, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0); + spu.color_shuffle = ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM) - spu.color_shuffle = VEC_LITERAL(vector unsigned char, - 8, 4, 0, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0); + spu.color_shuffle = ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); else ASSERT(0); } @@ -279,16 +278,10 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.init.id, texture->start, texture->width, texture->height); memcpy(&spu.texture, texture, sizeof(*texture)); - spu.tex_size = VEC_LITERAL(vector float, - spu.texture.width, - spu.texture.height, - 0.0, - 0.0); - spu.tex_size_mask = VEC_LITERAL(vector unsigned int, - spu.texture.width - 1, - spu.texture.height - 1, - 0, - 0); + spu.tex_size = (vector float) + { spu.texture.width, spu.texture.height, 0.0, 0.0}; + spu.tex_size_mask = (vector unsigned int) + { spu.texture.width - 1, spu.texture.height - 1, 0, 0 }; } @@ -341,8 +334,8 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - uint buffer[CELL_BUFFER_SIZE / 4] ALIGN16_ATTRIB; - const uint usize = size / sizeof(uint); + uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; + const unsigned usize = size / sizeof(buffer[0]); uint pos; if (Debug) @@ -377,7 +370,7 @@ cmd_batch(uint opcode) struct cell_command_framebuffer *fb = (struct cell_command_framebuffer *) &buffer[pos]; cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 4; + pos += sizeof(*fb) / 8; } break; case CELL_CMD_CLEAR_SURFACE: @@ -385,7 +378,7 @@ cmd_batch(uint opcode) struct cell_command_clear_surface *clr = (struct cell_command_clear_surface *) &buffer[pos]; cmd_clear_surface(clr); - pos += sizeof(*clr) / 4; + pos += sizeof(*clr) / 8; } break; case CELL_CMD_RENDER: @@ -394,7 +387,7 @@ cmd_batch(uint opcode) = (struct cell_command_render *) &buffer[pos]; uint pos_incr; cmd_render(render, &pos_incr); - pos += sizeof(*render) / 4 + pos_incr; + pos += sizeof(*render) / 8 + ((pos_incr + 1) / 2); } break; case CELL_CMD_RELEASE_VERTS: @@ -402,8 +395,7 @@ cmd_batch(uint opcode) struct cell_command_release_verts *release = (struct cell_command_release_verts *) &buffer[pos]; cmd_release_verts(release); - ASSERT(sizeof(*release) == 8); - pos += sizeof(*release) / 4; + pos += sizeof(*release) / 8; } break; case CELL_CMD_FINISH: @@ -413,36 +405,36 @@ cmd_batch(uint opcode) case CELL_CMD_STATE_BLEND: cmd_state_blend((struct pipe_blend_state *) &buffer[pos+1]); - pos += (1 + sizeof(struct pipe_blend_state) / 4); + pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); break; case CELL_CMD_STATE_DEPTH_STENCIL: cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *) &buffer[pos+1]); - pos += (1 + sizeof(struct pipe_depth_stencil_alpha_state) / 4); + pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8); break; case CELL_CMD_STATE_SAMPLER: cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); - pos += (1 + sizeof(struct pipe_sampler_state) / 4); + pos += (1 + ROUNDUP8(sizeof(struct pipe_sampler_state)) / 8); break; case CELL_CMD_STATE_TEXTURE: cmd_state_texture((struct cell_command_texture *) &buffer[pos+1]); - pos += (1 + sizeof(struct cell_command_texture) / 4); + pos += (1 + ROUNDUP8(sizeof(struct cell_command_texture)) / 8); break; case CELL_CMD_STATE_VERTEX_INFO: cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); - pos += (1 + sizeof(struct vertex_info) / 4); + pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); break; case CELL_CMD_STATE_VIEWPORT: (void) memcpy(& draw.viewport, &buffer[pos+1], sizeof(struct pipe_viewport_state)); - pos += (1 + sizeof(struct pipe_viewport_state) / 4); + pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); break; case CELL_CMD_STATE_VS_ARRAY_INFO: - cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos]); - pos += (sizeof(struct cell_array_info) / 4); + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; default: - printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, buffer[pos]); + printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); ASSERT(0); break; } diff --git a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c index 1e846868e38..5b0f2a6470c 100644 --- a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c +++ b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c @@ -431,9 +431,8 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, /* loop over vertex attributes (vertex shader inputs) */ for (attr = 0; attr < nr_attrs; attr++) { - - const unsigned pitch = draw->vertex_fetch.pitch[attr]; - const ubyte *src = draw->vertex_fetch.src_ptr[attr]; + const unsigned pitch = draw->vertex_fetch.pitch[attr]; + const uint64_t src = draw->vertex_fetch.src_ptr[attr]; const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; unsigned i; float p[4][4]; @@ -447,7 +446,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, */ for (i = 0; i < count; i++) { uint8_t buffer[32] ALIGN16_ATTRIB; - const unsigned long addr = src + (elts[i] * pitch); + const uint64_t addr = src + (elts[i] * pitch); const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); diff --git a/src/mesa/pipe/cell/spu/spu_vertex_shader.h b/src/mesa/pipe/cell/spu/spu_vertex_shader.h index c52f38fd026..b261ab44a21 100644 --- a/src/mesa/pipe/cell/spu/spu_vertex_shader.h +++ b/src/mesa/pipe/cell/spu/spu_vertex_shader.h @@ -16,7 +16,7 @@ struct spu_vs_context { struct pipe_viewport_state viewport; struct { - const ubyte *src_ptr[PIPE_ATTRIB_MAX]; + uint64_t src_ptr[PIPE_ATTRIB_MAX]; unsigned pitch[PIPE_ATTRIB_MAX]; enum pipe_format format[PIPE_ATTRIB_MAX]; unsigned nr_attrs; -- 2.30.2