From 1bab5bd24e09b6e11cd99f989bb00c587119aed2 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 26 Jan 2008 18:30:44 -0700 Subject: [PATCH] Cell: added support for inlined indexes If there's room in the batch buffer after the rendering command to accomodate the indexes, put them there rather than in a separate buffer. --- src/mesa/pipe/cell/common.h | 1 + src/mesa/pipe/cell/ppu/cell_vbuf.c | 31 ++++++- src/mesa/pipe/cell/spu/spu_main.c | 125 ++++++++++++++++++++--------- 3 files changed, 114 insertions(+), 43 deletions(-) diff --git a/src/mesa/pipe/cell/common.h b/src/mesa/pipe/cell/common.h index e9252991191..e955bb9ec50 100644 --- a/src/mesa/pipe/cell/common.h +++ b/src/mesa/pipe/cell/common.h @@ -124,6 +124,7 @@ struct cell_command_render const void *vertex_data; const ushort *index_data; float xmin, ymin, xmax, ymax; + boolean inline_indexes; } ALIGN16_ATTRIB; diff --git a/src/mesa/pipe/cell/ppu/cell_vbuf.c b/src/mesa/pipe/cell/ppu/cell_vbuf.c index 00ff990eab7..59a4a2b6e9f 100644 --- a/src/mesa/pipe/cell/ppu/cell_vbuf.c +++ b/src/mesa/pipe/cell/ppu/cell_vbuf.c @@ -39,6 +39,10 @@ #include "pipe/draw/draw_vbuf.h" +/** Allow render indexes to be inlined after RENDER command */ +#define ALLOW_INLINING 1 + + /** * Subclass of vbuf_render because we need a cell_context pointer in * a few places. @@ -123,6 +127,8 @@ cell_vbuf_draw(struct vbuf_render *vbr, printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", nr_indices, nr_vertices, indices[0], indices[1], indices[2]); + printf("ind space = %u, space = %u\n", + nr_indices * 2, cell_batch_free_space(cell)); #endif /* compute x/y bounding box */ @@ -145,23 +151,40 @@ cell_vbuf_draw(struct vbuf_render *vbr, /* build/insert batch RENDER command */ { + const uint index_bytes = (nr_indices * 2 + 3) & ~0x3; + struct cell_command_render *render = (struct cell_command_render *) cell_batch_alloc(cell, sizeof(*render)); render->opcode = CELL_CMD_RENDER; render->prim_type = cvbr->prim; + render->num_verts = nr_vertices; render->vertex_size = 4 * cell->vertex_info.size; render->vertex_data = vertices; - render->index_data = indices; + ASSERT_ALIGN16(render->vertex_data); + render->num_indexes = nr_indices; + + if (ALLOW_INLINING && + index_bytes <= cell_batch_free_space(cell)) { + /* indices inlined, right after render cmd */ + void *dst = cell_batch_alloc(cell, index_bytes); + memcpy(dst, indices, nr_indices * 2); + render->inline_indexes = TRUE; + render->index_data = NULL; + } + else { + /* indices in separate buffer */ + render->inline_indexes = FALSE; + render->index_data = indices; + ASSERT_ALIGN16(render->index_data); + } + render->xmin = xmin; render->ymin = ymin; render->xmax = xmax; render->ymax = ymax; - - ASSERT_ALIGN16(render->vertex_data); - ASSERT_ALIGN16(render->index_data); } #if 01 diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c index ef605a51973..92be0b4a4af 100644 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ b/src/mesa/pipe/cell/spu/spu_main.c @@ -204,66 +204,104 @@ tile_bounding_box(const struct cell_command_render *render, } +/** + * Render primitives + * \param pos_incr returns value indicating how may words to skip after + * this command in the batch buffer + */ static void -cmd_render(const struct cell_command_render *render) +cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ ubyte vertex_data[CELL_MAX_VBUF_SIZE] ALIGN16_ATTRIB; - ushort indexes[CELL_MAX_VBUF_INDEXES] ALIGN16_ATTRIB; - uint i, j, total_vertex_bytes, total_index_bytes; + ushort index_data[CELL_MAX_VBUF_INDEXES] ALIGN16_ATTRIB; const uint vertex_size = render->vertex_size; /* in bytes */ + const ushort *indexes; + uint i, j; + if (Debug) { - printf("SPU %u: RENDER prim %u, indices: %u, nr_vert: %u\n", + printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u inlined=%u\n", spu.init.id, render->prim_type, render->num_verts, - render->num_indexes); + render->num_indexes, + render->inline_indexes); + /* printf(" bound: %g, %g .. %g, %g\n", render->xmin, render->ymin, render->xmax, render->ymax); */ + printf("SPU %u: indices at %p vertices at %p\n", + spu.init.id, + render->index_data, render->vertex_data); } ASSERT_ALIGN16(render->vertex_data); ASSERT_ALIGN16(render->index_data); - /* how much vertex data */ - total_vertex_bytes = render->num_verts * vertex_size; - total_index_bytes = render->num_indexes * sizeof(ushort); - if (total_index_bytes < 16) - total_index_bytes = 16; - else - total_index_bytes = ROUNDUP16(total_index_bytes); - /* - printf("VBUF: indices at %p, vertices at %p total_vertex_bytes %u ind_bytes %u\n", - render->index_data, render->vertex_data, total_vertex_bytes, total_index_bytes); - */ + /** + ** Get vertices + **/ + { + const uint total_vertex_bytes = render->num_verts * vertex_size; - ASSERT(total_vertex_bytes % 16 == 0); - /* get vertex data from main memory */ - mfc_get(vertex_data, /* dest */ - (unsigned int) render->vertex_data, /* src */ - total_vertex_bytes, /* size */ - TAG_VERTEX_BUFFER, - 0, /* tid */ - 0 /* rid */); + ASSERT(total_vertex_bytes % 16 == 0); + + /* get vertex data from main memory */ + mfc_get(vertex_data, /* dest */ + (unsigned int) render->vertex_data, /* src */ + total_vertex_bytes, /* size */ + TAG_VERTEX_BUFFER, + 0, /* tid */ + 0 /* rid */); + } - ASSERT(total_index_bytes % 16 == 0); - /* get index data from main memory */ - mfc_get(indexes, /* dest */ - (unsigned int) render->index_data, /* src */ - total_index_bytes, - TAG_INDEX_BUFFER, - 0, /* tid */ - 0 /* rid */); + /** + ** Get indexes + **/ + if (render->inline_indexes) { + /* indexes are right after the render command in the batch buffer */ + ASSERT(sizeof(*render) % 4 == 0); + indexes = (ushort *) (render + 1); - wait_on_mask_all((1 << TAG_VERTEX_BUFFER) | - (1 << TAG_INDEX_BUFFER)); + *pos_incr = (render->num_indexes * 2 + 3) / 4; - /* find tiles which intersect the prim bounding box */ + /* wait for vertex data */ + wait_on_mask_all(1 << TAG_VERTEX_BUFFER); + } + else { + /* indexes are in separate buffer */ + uint total_index_bytes; + + *pos_incr = 0; + + total_index_bytes = render->num_indexes * sizeof(ushort); + if (total_index_bytes < 16) + total_index_bytes = 16; + else + total_index_bytes = ROUNDUP16(total_index_bytes); + + indexes = index_data; + + /* get index data from main memory */ + mfc_get(index_data, /* dest */ + (unsigned int) render->index_data, /* src */ + total_index_bytes, + TAG_INDEX_BUFFER, + 0, /* tid */ + 0 /* rid */); + + wait_on_mask_all((1 << TAG_VERTEX_BUFFER) | + (1 << TAG_INDEX_BUFFER)); + } + + + /** + ** find tiles which intersect the prim bounding box + **/ uint txmin, tymin, box_width_tiles, box_num_tiles; #if 0 tile_bounding_box(render, &txmin, &tymin, @@ -278,7 +316,10 @@ cmd_render(const struct cell_command_render *render) /* make sure any pending clears have completed */ wait_on_mask(1 << TAG_SURFACE_CLEAR); - /* loop over tiles */ + + /** + ** loop over tiles, rendering tris + **/ for (i = spu.init.id; i < box_num_tiles; i += spu.init.num_spus) { const uint tx = txmin + i % box_width_tiles; const uint ty = tymin + i / box_width_tiles; @@ -300,6 +341,7 @@ cmd_render(const struct cell_command_render *render) } ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); + ASSERT(render->num_indexes % 3 == 0); /* loop over tris */ for (j = 0; j < render->num_indexes; j += 3) { @@ -508,8 +550,9 @@ cmd_batch(uint opcode) { struct cell_command_render *render = (struct cell_command_render *) &buffer[pos]; - cmd_render(render); - pos += sizeof(*render) / 4; + uint pos_incr; + cmd_render(render, &pos_incr); + pos += sizeof(*render) / 4 + pos_incr; } break; case CELL_CMD_FINISH: @@ -591,7 +634,11 @@ main_loop(void) cmd_clear_surface(&cmd.clear); break; case CELL_CMD_RENDER: - cmd_render(&cmd.render); + { + uint pos_incr; + cmd_render(&cmd.render, &pos_incr); + assert(pos_incr == 0); + } break; case CELL_CMD_BATCH: cmd_batch(opcode); -- 2.30.2