#define CELL_CMD_STATE_DEPTH_STENCIL 7
-#define CELL_NUM_BATCH_BUFFERS 2
+#define CELL_NUM_BATCH_BUFFERS 3
#define CELL_BATCH_BUFFER_SIZE 1024 /**< 16KB would be the max */
-#define CELL_BATCH_FINISHED 0x1234 /**< mbox message */
+#define CELL_BUFFER_STATUS_FREE 10
+#define CELL_BUFFER_STATUS_USED 20
+
/**
unsigned num_spus;
struct cell_command *cmd;
ubyte *batch_buffers[CELL_NUM_BATCH_BUFFERS];
+ uint *buffer_status; /**< points at cell_context->buffer_status */
} ALIGN16_ATTRIB;
void
cell_batch_flush(struct cell_context *cell)
{
- const uint batch = cell->cur_batch;
+ uint batch = cell->cur_batch;
const uint size = cell->batch_buffer_size[batch];
- uint i, cmd_word;
+ uint spu, cmd_word;
if (size == 0)
return;
batch, &cell->batch_buffer[batch][0], size);
*/
+ /*
+ * Build "BATCH" command and sent to all SPUs.
+ */
cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16);
- for (i = 0; i < cell->num_spus; i++) {
- send_mbox_message(cell_global.spe_contexts[i], cmd_word);
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED);
+ send_mbox_message(cell_global.spe_contexts[spu], cmd_word);
}
- /* XXX wait for the DMX xfer to finish.
- * Using mailboxes here is temporary.
- * Ideally, we want to use a PPE-side DMA status check function...
+ /* When the SPUs are done copying the buffer into their locals stores
+ * they'll write a BUFFER_STATUS_FREE message into the buffer_status[]
+ * array indicating that the PPU can re-use the buffer.
*/
- for (i = 0; i < cell->num_spus; i++) {
- uint k = wait_mbox_message(cell_global.spe_contexts[i]);
- assert(k == CELL_BATCH_FINISHED);
- }
- /* next buffer */
- cell->cur_batch = (batch + 1) % CELL_NUM_BATCH_BUFFERS;
- cell->batch_buffer_size[cell->cur_batch] = 0; /* empty */
+ /* Find a buffer that's marked as free by all SPUs */
+ while (1) {
+ uint num_free = 0;
+
+ batch = (batch + 1) % CELL_NUM_BATCH_BUFFERS;
+
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ if (cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_FREE)
+ num_free++;
+ }
+
+ if (num_free == cell->num_spus) {
+ /* found a free buffer, now mark status as used */
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ cell->buffer_status[spu][batch][0] = CELL_BUFFER_STATUS_USED;
+ }
+ break;
+ }
+ }
+
+ cell->batch_buffer_size[batch] = 0; /* empty */
+ cell->cur_batch = batch;
}
cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws)
{
struct cell_context *cell;
- uint i;
+ uint spu, buf;
/* some fields need to be 16-byte aligned, so align the whole object */
cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
cell_start_spus(cell);
- for (i = 0; i < CELL_NUM_BATCH_BUFFERS; i++) {
- cell->batch_buffer_size[i] = 0;
+ for (buf = 0; buf < CELL_NUM_BATCH_BUFFERS; buf++) {
+ cell->batch_buffer_size[buf] = 0;
+
+ /* init batch buffer status values,
+ * mark 0th buffer as used, rest as free.
+ */
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ if (buf == 0)
+ cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
+ else
+ cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE;
+ }
}
+
#if 0
test_spus(cell);
#endif
return &cell->pipe;
}
+
+
+#if 0
+/** [4] to ensure 16-byte alignment for each status word */
+uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BATCH_BUFFERS][4] ALIGN16_ATTRIB;
+#endif
#include "pipe/cell/common.h"
+#define CELL_MAX_SPUS 6
+
+
struct cell_vbuf_render;
struct cell_vertex_shader_state
ubyte batch_buffer[CELL_NUM_BATCH_BUFFERS][CELL_BATCH_BUFFER_SIZE] ALIGN16_ATTRIB;
int cur_batch; /**< which batch buffer is being filled */
+ /** [4] to ensure 16-byte alignment for each status word */
+ uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BATCH_BUFFERS][4] ALIGN16_ATTRIB;
+
};
+
static INLINE struct cell_context *
cell_context(struct pipe_context *pipe)
{
for (j = 0; j < CELL_NUM_BATCH_BUFFERS; j++) {
cell_global.inits[i].batch_buffers[j] = cell->batch_buffer[j];
}
+ cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0];
cell_global.spe_contexts[i] = spe_context_create(0, NULL);
if (!cell_global.spe_contexts[i]) {
}
+/**
+ * Tell the PPU that this SPU has finished copying a batch buffer to
+ * local store and that it may be reused by the PPU.
+ * This is done by writting a 16-byte batch-buffer-status block back into
+ * main memory (in cell_contex->buffer_status[]).
+ */
+static void
+release_batch_buffer(uint buffer)
+{
+ /* Evidently, using less than a 16-byte status doesn't work reliably */
+ static const uint status[4] ALIGN16_ATTRIB
+ = {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
+
+ const uint index = 4 * (spu.init.id * CELL_NUM_BATCH_BUFFERS + buffer);
+ uint *dst = spu.init.buffer_status + index;
+
+ ASSERT(buffer < CELL_NUM_BATCH_BUFFERS);
+
+ /*
+ printf("SPU %u: Set batch status buf=%u, index %u, at %p to FREE\n",
+ spu.init.id, buffer, index, dst);
+ */
+
+ mfc_put((void *) &status, /* src in local memory */
+ (unsigned int) dst, /* dst in main memory */
+ sizeof(status), /* size */
+ TAG_MISC, /* tag is unimportant */
+ 0, /* tid */
+ 0 /* rid */);
+}
+
+
/**
* Execute a batch of commands
* The opcode param encodes the location of the buffer and its size.
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
- /* send mbox message to indicate DMA completed */
- /* XXX temporary */
- spu_write_out_mbox(CELL_BATCH_FINISHED);
+ /* Tell PPU we're done copying the buffer to local store */
+ release_batch_buffer(buf);
+
for (pos = 0; pos < usize; /* no incr */) {
switch (buffer[pos]) {
#define TAG_WRITE_TILE_Z 15
#define TAG_INDEX_BUFFER 16
#define TAG_BATCH_BUFFER 17
+#define TAG_MISC 18
+
/** The standard assert macro doesn't seem to work on SPUs */
#define ASSERT(x) \