From 9abbaacea6f340486a3b2bf68fbd4efa0d15a5d3 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 28 Jan 2008 10:00:27 -0700 Subject: [PATCH] Cell: checkpoint commit: always inline prim indexes into batch buffer Also, explicit release-vertex-buffer command. Lots of debug/stale code still in place... --- src/mesa/pipe/cell/common.h | 12 +++ src/mesa/pipe/cell/ppu/cell_vbuf.c | 113 ++++++++++++++++++++--------- src/mesa/pipe/cell/spu/spu_main.c | 110 +++++++++++++++++++--------- src/mesa/pipe/cell/spu/spu_main.h | 2 + 4 files changed, 171 insertions(+), 66 deletions(-) diff --git a/src/mesa/pipe/cell/common.h b/src/mesa/pipe/cell/common.h index ce9c3819077..31637ed1cc8 100644 --- a/src/mesa/pipe/cell/common.h +++ b/src/mesa/pipe/cell/common.h @@ -75,6 +75,7 @@ #define CELL_CMD_FINISH 3 #define CELL_CMD_RENDER 4 #define CELL_CMD_BATCH 5 +#define CELL_CMD_RELEASE_VERTS 6 #define CELL_CMD_STATE_FRAMEBUFFER 10 #define CELL_CMD_STATE_DEPTH_STENCIL 11 #define CELL_CMD_STATE_SAMPLER 12 @@ -124,7 +125,11 @@ struct cell_command_render uint vertex_size; /**< bytes per vertex */ uint dummy; /* XXX this dummy field works around a compiler bug */ uint num_indexes; +#if 0 const void *vertex_data; +#else + uint vertex_buf; /**< which cell->buffer[] contains the vertex data */ +#endif const ushort *index_data; float xmin, ymin, xmax, ymax; boolean inline_indexes; @@ -132,6 +137,13 @@ struct cell_command_render } ALIGN16_ATTRIB; +struct cell_command_release_verts +{ + int opcode; /**< CELL_CMD_RELEASE_VERTS */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + /** XXX unions don't seem to work */ struct cell_command { diff --git a/src/mesa/pipe/cell/ppu/cell_vbuf.c b/src/mesa/pipe/cell/ppu/cell_vbuf.c index ee572b3a51b..6e12e16fe0a 100644 --- a/src/mesa/pipe/cell/ppu/cell_vbuf.c +++ b/src/mesa/pipe/cell/ppu/cell_vbuf.c @@ -40,8 +40,8 @@ /** Allow prim indexes, verts to be inlined after RENDER command */ -#define ALLOW_INLINE_INDEXES 1 -#define ALLOW_INLINE_VERTS 1 +#define ALLOW_INLINE_INDEXES 01 +#define ALLOW_INLINE_VERTS 0 /** @@ -55,6 +55,9 @@ struct cell_vbuf_render uint prim; uint vertex_size; void *vertex_buffer; +#if 1 + uint vertex_buf; +#endif }; @@ -81,13 +84,52 @@ cell_vbuf_allocate_vertices(struct vbuf_render *vbr, { struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ +#if 0 assert(!cvbr->vertex_buffer); cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16); +#else + assert(cvbr->vertex_buf == ~0); + cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); + cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; + printf("%s vertex_buf = %u\n", __FUNCTION__, cvbr->vertex_buf); +#endif cvbr->vertex_size = vertex_size; return cvbr->vertex_buffer; } +static void +cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + + /*printf("Free verts %u * %u\n", vertex_size, vertices_used);*/ +#if 0 + align_free(vertices); +#else + printf("%s vertex_buf = %u count = %u\n", + __FUNCTION__, cvbr->vertex_buf, vertices_used); + + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) + cell_batch_alloc(cell, sizeof(struct cell_command_release_verts)); + release->opcode = CELL_CMD_RELEASE_VERTS; + release->vertex_buf = cvbr->vertex_buf; + } + + cvbr->vertex_buf = ~0; + cell_flush_int(&cell->pipe, 0x0);/*NEW*/ +#endif + + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + + static void cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { @@ -124,7 +166,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); } printf("\n"); -#elif 0 +#elif 01 printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", nr_indices, nr_vertices, indices[0], indices[1], indices[2]); @@ -157,28 +199,26 @@ cell_vbuf_draw(struct vbuf_render *vbr, const uint index_bytes = ROUNDUP4(nr_indices * 2); const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size; + const uint batch_size = sizeof(struct cell_command_render) + + index_bytes; + struct cell_command_render *render = (struct cell_command_render *) - cell_batch_alloc(cell, sizeof(*render)); + cell_batch_alloc(cell, batch_size); + render->opcode = CELL_CMD_RENDER; render->prim_type = cvbr->prim; render->num_indexes = nr_indices; - if (ALLOW_INLINE_INDEXES && - index_bytes <= cell_batch_free_space(cell)) { - /* indices inlined, right after render cmd */ - void *dst = cell_batch_alloc(cell, index_bytes); - memcpy(dst, indices, nr_indices * 2); - render->inline_indexes = TRUE; - render->index_data = NULL; - } - else { - /* indices in separate buffer */ - render->inline_indexes = FALSE; - render->index_data = indices; - ASSERT_ALIGN16(render->index_data); - } + /* append indices after render command */ + memcpy(render + 1, indices, nr_indices * 2); + render->inline_indexes = TRUE; + render->index_data = NULL; + + /* if there's room, append vertices after the indices, else leave + * vertices in the original/separate buffer. + */ render->vertex_size = 4 * cell->vertex_info.size; render->num_verts = nr_vertices; if (ALLOW_INLINE_VERTS && @@ -188,12 +228,21 @@ cell_vbuf_draw(struct vbuf_render *vbr, void *dst = cell_batch_alloc(cell, vertex_bytes); memcpy(dst, vertices, vertex_bytes); render->inline_verts = TRUE; +#if 0 render->vertex_data = NULL; +#else + render->vertex_buf = ~0; +#endif } else { render->inline_verts = FALSE; +#if 0 render->vertex_data = vertices; ASSERT_ALIGN16(render->vertex_data); +#else + ASSERT(cvbr->vertex_buf >= 0); + render->vertex_buf = cvbr->vertex_buf; +#endif } @@ -203,27 +252,13 @@ cell_vbuf_draw(struct vbuf_render *vbr, render->ymax = ymax; } -#if 01 +#if 0 /* XXX this is temporary */ cell_flush_int(&cell->pipe, PIPE_FLUSH_WAIT); #endif } -static void -cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, - unsigned vertex_size, unsigned vertices_used) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - - /*printf("Free verts %u * %u\n", vertex_size, vertices_used);*/ - align_free(vertices); - - assert(vertices == cvbr->vertex_buffer); - cvbr->vertex_buffer = NULL; -} - - static void cell_vbuf_destroy(struct vbuf_render *vbr) { @@ -244,8 +279,17 @@ cell_init_vbuf(struct cell_context *cell) cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); +#if 0 cell->vbuf_render->base.max_indices = CELL_MAX_VBUF_INDEXES; cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_MAX_VBUF_SIZE; +#else + cell->vbuf_render->base.max_indices + = (CELL_BUFFER_SIZE + - sizeof(struct cell_command_render) + - sizeof(struct cell_command_release_verts)) + / sizeof(ushort); + cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; +#endif cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; @@ -255,6 +299,9 @@ cell_init_vbuf(struct cell_context *cell) cell->vbuf_render->base.destroy = cell_vbuf_destroy; cell->vbuf_render->cell = cell; +#if 1 + cell->vbuf_render->vertex_buf = ~0; +#endif cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); } diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c index 2097683b825..eb979718f85 100644 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ b/src/mesa/pipe/cell/spu/spu_main.c @@ -69,6 +69,32 @@ wait_on_mask_all(unsigned tagMask) } +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) +{ + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const uint status[4] ALIGN16_ATTRIB + = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; + + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; + + ASSERT(buffer < CELL_NUM_BUFFERS); + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} + /** * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled @@ -237,13 +263,18 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) printf(" bound: %g, %g .. %g, %g\n", render->xmin, render->ymin, render->xmax, render->ymax); */ + /* printf("SPU %u: indices at %p vertices at %p\n", spu.init.id, render->index_data, render->vertex_data); + */ } ASSERT(sizeof(*render) % 4 == 0); +#if 0 ASSERT_ALIGN16(render->vertex_data); +#else +#endif ASSERT_ALIGN16(render->index_data); @@ -251,10 +282,18 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) ** Get vertex, index buffers if not inlined **/ if (!render->inline_verts) { + void *src; ASSERT(total_vertex_bytes % 16 == 0); +#if 0 + src = render->vertex_data; +#else + spu.cur_vertex_buf = render->vertex_buf; + src = spu.init.buffers[render->vertex_buf]; +#endif + mfc_get(vertex_data, /* dest */ - (unsigned int) render->vertex_data, /* src */ + (unsigned int) src, total_vertex_bytes, /* size */ TAG_VERTEX_BUFFER, 0, /* tid */ @@ -298,6 +337,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) /* vertices are after indexes, if inlined */ vertices = (const ubyte *) (render + 1) + *pos_incr * 4; *pos_incr = *pos_incr + total_vertex_bytes / 4; + spu.cur_vertex_buf = ~0; } } @@ -310,6 +350,12 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) mask |= (1 << TAG_INDEX_BUFFER); wait_on_mask_all(mask); +#if 0 + if (!render->inline_verts) { + printf("SPU %u: release vbuf %u\n", spu.init.id, render->vertex_buf); + release_buffer(render->vertex_buf); + } +#endif /** ** find tiles which intersect the prim bounding box @@ -359,6 +405,14 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) for (j = 0; j < render->num_indexes; j += 3) { const float *v0, *v1, *v2; + if (indexes[j] == 0xffff) { + printf("index[%u] = 0xffff\n", j); + } + + ASSERT(indexes[j] != 0xffff); + ASSERT(indexes[j+1] != 0xffff); + ASSERT(indexes[j+2] != 0xffff); + v0 = (const float *) (vertices + indexes[j+0] * vertex_size); v1 = (const float *) (vertices + indexes[j+1] * vertex_size); v2 = (const float *) (vertices + indexes[j+2] * vertex_size); @@ -391,6 +445,17 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) } +static void +cmd_release_verts(const struct cell_command_release_verts *release) +{ + if (Debug) + printf("SPU %u: RELEASE VERTS %u\n", + spu.init.id, spu.cur_vertex_buf); + ASSERT(spu.cur_vertex_buf == release->vertex_buf); + release_buffer(release->vertex_buf); +} + + static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { @@ -472,38 +537,6 @@ cmd_finish(void) } -/** - * Tell the PPU that this SPU has finished copying a buffer to - * local store and that it may be reused by the PPU. - * This is done by writting a 16-byte batch-buffer-status block back into - * main memory (in cell_context->buffer_status[]). - */ -static void -release_buffer(uint buffer) -{ - /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const uint status[4] ALIGN16_ATTRIB - = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; - - const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); - uint *dst = spu.init.buffer_status + index; - - ASSERT(buffer < CELL_NUM_BUFFERS); - - /* - printf("SPU %u: Set batch status buf=%u, index %u, at %p to FREE\n", - spu.init.id, buffer, index, dst); - */ - - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_MISC, /* tag is unimportant */ - 0, /* tid */ - 0 /* rid */); -} - - /** * Execute a batch of commands * The opcode param encodes the location of the buffer and its size. @@ -538,6 +571,8 @@ cmd_batch(uint opcode) wait_on_mask(1 << TAG_BATCH_BUFFER); /* Tell PPU we're done copying the buffer to local store */ + if (Debug) + printf("SPU %u: release batch buf %u\n", spu.init.id, buf); release_buffer(buf); for (pos = 0; pos < usize; /* no incr */) { @@ -567,6 +602,15 @@ cmd_batch(uint opcode) pos += sizeof(*render) / 4 + pos_incr; } break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + ASSERT(sizeof(*release) == 8); + pos += sizeof(*release) / 4; + } + break; case CELL_CMD_FINISH: cmd_finish(); pos += 1; diff --git a/src/mesa/pipe/cell/spu/spu_main.h b/src/mesa/pipe/cell/spu/spu_main.h index 5bc5d9fa99d..68c7263b7f9 100644 --- a/src/mesa/pipe/cell/spu/spu_main.h +++ b/src/mesa/pipe/cell/spu/spu_main.h @@ -65,6 +65,8 @@ struct spu_global /* XXX more state to come */ + uint cur_vertex_buf; + } ALIGN16_ATTRIB; -- 2.30.2