Cell: fix some alignment issues by aligning commands to 8-byte boundaries
authorBrian <brian.paul@tungstengraphics.com>
Tue, 5 Feb 2008 01:05:37 +0000 (18:05 -0700)
committerBrian <brian.paul@tungstengraphics.com>
Tue, 5 Feb 2008 01:05:37 +0000 (18:05 -0700)
Contributed by Ian Romanick.
Also, temporarily disable inlined vertex buffers.  They need to be 16-byte
aligned...

src/mesa/pipe/cell/common.h
src/mesa/pipe/cell/ppu/cell_batch.c
src/mesa/pipe/cell/ppu/cell_flush.c
src/mesa/pipe/cell/ppu/cell_state_emit.c
src/mesa/pipe/cell/ppu/cell_vbuf.c
src/mesa/pipe/cell/ppu/cell_vertex_shader.c
src/mesa/pipe/cell/spu/spu_main.c
src/mesa/pipe/cell/spu/spu_vertex_fetch.c
src/mesa/pipe/cell/spu/spu_vertex_shader.h

index d861e82d332a16f5080451e10fe244b41e917b62..cf8fc94ebf16e4f8e95bbf7f47261495de690c09 100644 (file)
@@ -57,6 +57,9 @@
 /** round up value to next multiple of 4 */
 #define ROUNDUP4(k)  (((k) + 0x3) & ~0x3)
 
+/** round up value to next multiple of 8 */
+#define ROUNDUP8(k)  (((k) + 0x7) & ~0x7)
+
 /** round up value to next multiple of 16 */
 #define ROUNDUP16(k)  (((k) + 0xf) & ~0xf)
 
  */
 struct cell_command_framebuffer
 {
-   uint opcode;
+   uint64_t opcode;
    int width, height;
    void *color_start, *depth_start;
    enum pipe_format color_format, depth_format;
@@ -114,7 +117,7 @@ struct cell_command_framebuffer
  */
 struct cell_command_clear_surface
 {
-   uint opcode;
+   uint64_t opcode;
    uint surface; /**< Temporary: 0=color, 1=Z */
    uint value;
 };
@@ -125,8 +128,7 @@ struct cell_command_clear_surface
  */
 struct cell_array_info
 {
-    uint opcode;
-    uint base;          /**< Base address of the 0th element. */
+    uint64_t base;          /**< Base address of the 0th element. */
     uint attr;          /**< Attribute that this state if for. */
     uint pitch;         /**< Byte pitch from one entry to the next. */
     uint format;        /**< Pipe format of each entry. */
@@ -150,7 +152,7 @@ struct cell_shader_info
 #define SPU_VERTS_PER_BATCH 64
 struct cell_command_vs
 {
-   uint opcode;       /**< CELL_CMD_VS_EXECUTE */
+   uint64_t opcode;       /**< CELL_CMD_VS_EXECUTE */
    struct cell_shader_info   shader;
    unsigned num_elts;
    unsigned elts[SPU_VERTS_PER_BATCH];
@@ -163,7 +165,7 @@ struct cell_command_vs
 
 struct cell_command_render
 {
-   uint opcode;       /**< CELL_CMD_RENDER */
+   uint64_t opcode;   /**< CELL_CMD_RENDER */
    uint prim_type;    /**< PIPE_PRIM_x */
    uint num_verts;
    uint vertex_size;  /**< bytes per vertex */
@@ -179,7 +181,7 @@ struct cell_command_render
 
 struct cell_command_release_verts
 {
-   int opcode;         /**< CELL_CMD_RELEASE_VERTS */
+   uint64_t opcode;         /**< CELL_CMD_RELEASE_VERTS */
    uint vertex_buf;    /**< in [0, CELL_NUM_BUFFERS-1] */
 };
 
index 2d032fc9026a01b2b297bbc100f6d24366c27b39..2fb49711b2d22b567a9687a38bddb2b0e7105aa6 100644 (file)
@@ -136,7 +136,7 @@ cell_batch_append(struct cell_context *cell, const void *data, uint bytes)
 {
    uint size;
 
-   ASSERT(bytes % 4 == 0);
+   ASSERT(bytes % 8 == 0);
    ASSERT(bytes <= CELL_BUFFER_SIZE);
    ASSERT(cell->cur_batch >= 0);
 
@@ -171,7 +171,7 @@ cell_batch_alloc(struct cell_context *cell, uint bytes)
    void *pos;
    uint size;
 
-   ASSERT(bytes % 4 == 0);
+   ASSERT(bytes % 8 == 0);
    ASSERT(bytes <= CELL_BUFFER_SIZE);
 
    assert(cell->cur_batch >= 0);
index cf4e676645b8767757ef2338aaff0e35d3497065..f62bc4650ce2b27ea263694365e7e769ef109914 100644 (file)
@@ -59,7 +59,7 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags)
    flushing = TRUE;
 
    if (flags & PIPE_FLUSH_WAIT) {
-      uint *cmd = (uint *) cell_batch_alloc(cell, sizeof(uint));
+      uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t));
       *cmd = CELL_CMD_FINISH;
    }
 
index 3b2670f786b50676711bcc329d3e2a05c43dfc55..5d2a786449353d3916eac9ef98df379611113b5f 100644 (file)
@@ -37,7 +37,8 @@ static void
 emit_state_cmd(struct cell_context *cell, uint cmd,
                const void *state, uint state_size)
 {
-   uint *dst = (uint *) cell_batch_alloc(cell, sizeof(uint) + state_size);
+   uint64_t *dst = (uint64_t *) 
+       cell_batch_alloc(cell, ROUNDUP8(sizeof(uint64_t) + state_size));
    *dst = cmd;
    memcpy(dst + 1, state, state_size);
 }
index e63b34cf5250caba2a875b4704979ebac6deb262..0fee61821a8d4ecdb54aa89ccd778739400057bd 100644 (file)
@@ -40,7 +40,7 @@
 
 
 /** Allow vertex data to be inlined after RENDER command */
-#define ALLOW_INLINE_VERTS 1
+#define ALLOW_INLINE_VERTS 0
 
 
 /**
@@ -197,7 +197,7 @@ cell_vbuf_draw(struct vbuf_render *vbr,
 
    /* build/insert batch RENDER command */
    {
-      const uint index_bytes = ROUNDUP4(nr_indices * 2);
+      const uint index_bytes = ROUNDUP8(nr_indices * 2);
       const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size;
 
       const uint batch_size = sizeof(struct cell_command_render)
index aef329a9024a93f092d9dd4569ea4c3c896a7867..80dd500b345ce1c1e786bbf9157faa04a82d0c7e 100644 (file)
@@ -52,8 +52,8 @@ cell_vertex_shader_queue_flush(struct draw_context *draw)
    struct cell_context *const cell =
        (struct cell_context *) draw->driver_private;
    struct cell_command_vs *const vs = &cell_global.command[0].vs;
-   unsigned *batch;
-   struct cell_array_info array_info;
+   uint64_t *batch;
+   struct cell_array_info *array_info;
    unsigned i, j;
 
    assert(draw->vs.queue_nr != 0);
@@ -63,17 +63,19 @@ cell_vertex_shader_queue_flush(struct draw_context *draw)
    draw_update_vertex_fetch(draw);
 
    for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
-      array_info.opcode = CELL_CMD_STATE_VS_ARRAY_INFO;
-      assert(draw->vertex_fetch.src_ptr[i] != NULL);
-      array_info.base = (uintptr_t) draw->vertex_fetch.src_ptr[i];
-      array_info.attr = i;
-      array_info.pitch = draw->vertex_fetch.pitch[i];
-      array_info.format = draw->vertex_element[i].src_format;
+      batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info));
+
+      batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO;
 
-      cell_batch_append(cell, & array_info, sizeof(array_info));
+      array_info = (struct cell_array_info *) &batch[1];
+      assert(draw->vertex_fetch.src_ptr[i] != NULL);
+      array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i];
+      array_info->attr = i;
+      array_info->pitch = draw->vertex_fetch.pitch[i];
+      array_info->format = draw->vertex_element[i].src_format;
    }
 
-   batch = cell_batch_alloc(cell, sizeof(unsigned)
+   batch = cell_batch_alloc(cell, sizeof(batch[0])
                             + sizeof(struct pipe_viewport_state));
    batch[0] = CELL_CMD_STATE_VIEWPORT;
    (void) memcpy(&batch[1], &draw->viewport,
index b0311db1aabc6db64f96c445e157c302da8d61ac..4f126d5e5bb423f1ce8dd14b949f84ee53751ab2 100644 (file)
@@ -31,7 +31,6 @@
 
 #include <stdio.h>
 #include <libmisc.h>
-#include <vec_literal.h>
 
 #include "spu_main.h"
 #include "spu_render.h"
@@ -220,13 +219,13 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
       spu.fb.zsize = 0;
 
    if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM)
-      spu.color_shuffle = VEC_LITERAL(vector unsigned char,
-                                      12, 0, 4, 8, 0, 0, 0, 0, 
-                                      0, 0, 0, 0, 0, 0, 0, 0);
+      spu.color_shuffle = ((vector unsigned char) {
+                              12, 0, 4, 8, 0, 0, 0, 0, 
+                              0, 0, 0, 0, 0, 0, 0, 0});
    else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM)
-      spu.color_shuffle = VEC_LITERAL(vector unsigned char,
-                                      8, 4, 0, 12, 0, 0, 0, 0, 
-                                      0, 0, 0, 0, 0, 0, 0, 0);
+      spu.color_shuffle = ((vector unsigned char) {
+                              8, 4, 0, 12, 0, 0, 0, 0, 
+                              0, 0, 0, 0, 0, 0, 0, 0});
    else
       ASSERT(0);
 }
@@ -279,16 +278,10 @@ cmd_state_texture(const struct cell_command_texture *texture)
              spu.init.id, texture->start, texture->width, texture->height);
 
    memcpy(&spu.texture, texture, sizeof(*texture));
-   spu.tex_size = VEC_LITERAL(vector float,
-                              spu.texture.width,
-                              spu.texture.height,
-                              0.0,
-                              0.0);
-   spu.tex_size_mask = VEC_LITERAL(vector unsigned int,
-                                   spu.texture.width - 1,
-                                   spu.texture.height - 1,
-                                   0,
-                                   0);
+   spu.tex_size = (vector float)
+      { spu.texture.width, spu.texture.height, 0.0, 0.0};
+   spu.tex_size_mask = (vector unsigned int)
+      { spu.texture.width - 1, spu.texture.height - 1, 0, 0 };
 }
 
 
@@ -341,8 +334,8 @@ cmd_batch(uint opcode)
 {
    const uint buf = (opcode >> 8) & 0xff;
    uint size = (opcode >> 16);
-   uint buffer[CELL_BUFFER_SIZE / 4] ALIGN16_ATTRIB;
-   const uint usize = size / sizeof(uint);
+   uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB;
+   const unsigned usize = size / sizeof(buffer[0]);
    uint pos;
 
    if (Debug)
@@ -377,7 +370,7 @@ cmd_batch(uint opcode)
             struct cell_command_framebuffer *fb
                = (struct cell_command_framebuffer *) &buffer[pos];
             cmd_state_framebuffer(fb);
-            pos += sizeof(*fb) / 4;
+            pos += sizeof(*fb) / 8;
          }
          break;
       case CELL_CMD_CLEAR_SURFACE:
@@ -385,7 +378,7 @@ cmd_batch(uint opcode)
             struct cell_command_clear_surface *clr
                = (struct cell_command_clear_surface *) &buffer[pos];
             cmd_clear_surface(clr);
-            pos += sizeof(*clr) / 4;
+            pos += sizeof(*clr) / 8;
          }
          break;
       case CELL_CMD_RENDER:
@@ -394,7 +387,7 @@ cmd_batch(uint opcode)
                = (struct cell_command_render *) &buffer[pos];
             uint pos_incr;
             cmd_render(render, &pos_incr);
-            pos += sizeof(*render) / 4 + pos_incr;
+            pos += sizeof(*render) / 8 + ((pos_incr + 1) / 2);
          }
          break;
       case CELL_CMD_RELEASE_VERTS:
@@ -402,8 +395,7 @@ cmd_batch(uint opcode)
             struct cell_command_release_verts *release
                = (struct cell_command_release_verts *) &buffer[pos];
             cmd_release_verts(release);
-            ASSERT(sizeof(*release) == 8);
-            pos += sizeof(*release) / 4;
+            pos += sizeof(*release) / 8;
          }
          break;
       case CELL_CMD_FINISH:
@@ -413,36 +405,36 @@ cmd_batch(uint opcode)
       case CELL_CMD_STATE_BLEND:
          cmd_state_blend((struct pipe_blend_state *)
                                  &buffer[pos+1]);
-         pos += (1 + sizeof(struct pipe_blend_state) / 4);
+         pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8);
          break;
       case CELL_CMD_STATE_DEPTH_STENCIL:
          cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *)
                                  &buffer[pos+1]);
-         pos += (1 + sizeof(struct pipe_depth_stencil_alpha_state) / 4);
+         pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8);
          break;
       case CELL_CMD_STATE_SAMPLER:
          cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]);
-         pos += (1 + sizeof(struct pipe_sampler_state) / 4);
+         pos += (1 + ROUNDUP8(sizeof(struct pipe_sampler_state)) / 8);
          break;
       case CELL_CMD_STATE_TEXTURE:
          cmd_state_texture((struct cell_command_texture *) &buffer[pos+1]);
-         pos += (1 + sizeof(struct cell_command_texture) / 4);
+         pos += (1 + ROUNDUP8(sizeof(struct cell_command_texture)) / 8);
          break;
       case CELL_CMD_STATE_VERTEX_INFO:
          cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
-         pos += (1 + sizeof(struct vertex_info) / 4);
+         pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8);
          break;
       case CELL_CMD_STATE_VIEWPORT:
          (void) memcpy(& draw.viewport, &buffer[pos+1],
                        sizeof(struct pipe_viewport_state));
-         pos += (1 + sizeof(struct pipe_viewport_state) / 4);
+         pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
          break;
       case CELL_CMD_STATE_VS_ARRAY_INFO:
-         cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos]);
-         pos += (sizeof(struct cell_array_info) / 4);
+         cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
+         pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
          break;
       default:
-         printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, buffer[pos]);
+         printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
          ASSERT(0);
          break;
       }
index 1e846868e389aea40da25064e6d6040f7a7a28e7..5b0f2a6470c0d2875e6b78950fc2c50c838bd811 100644 (file)
@@ -431,9 +431,8 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
    /* loop over vertex attributes (vertex shader inputs)
     */
    for (attr = 0; attr < nr_attrs; attr++) {
-
-      const unsigned pitch   = draw->vertex_fetch.pitch[attr];
-      const ubyte *src = draw->vertex_fetch.src_ptr[attr];
+      const unsigned pitch = draw->vertex_fetch.pitch[attr];
+      const uint64_t src = draw->vertex_fetch.src_ptr[attr];
       const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr];
       unsigned i;
       float p[4][4];
@@ -447,7 +446,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
        */
       for (i = 0; i < count; i++) {
          uint8_t buffer[32] ALIGN16_ATTRIB;
-         const unsigned long addr = src + (elts[i] * pitch);
+         const uint64_t addr = src + (elts[i] * pitch);
          const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32;
 
          mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0);
index c52f38fd0266a886e87e19e201183aa9569f3418..b261ab44a21d0248f66134f1eefc588b289ce4d7 100644 (file)
@@ -16,7 +16,7 @@ struct spu_vs_context {
    struct pipe_viewport_state viewport;
 
    struct {
-      const ubyte *src_ptr[PIPE_ATTRIB_MAX];
+      uint64_t src_ptr[PIPE_ATTRIB_MAX];
       unsigned pitch[PIPE_ATTRIB_MAX];
       enum pipe_format format[PIPE_ATTRIB_MAX];
       unsigned nr_attrs;