}
+
+#define JOIN(x, y) JOIN_AGAIN(x, y)
+#define JOIN_AGAIN(x, y) x ## y
+
+#define STATIC_ASSERT(e) \
+{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];}
+
+
+
/** for sanity checking */
#define ASSERT_ALIGN16(ptr) \
ASSERT((((unsigned long) (ptr)) & 0xf) == 0);
#define ROUNDUP16(k) (((k) + 0xf) & ~0xf)
-#define CELL_MAX_SPUS 6
+#define CELL_MAX_SPUS 8
#define CELL_MAX_SAMPLERS 4
#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */
+#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */
+#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */
+#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */
#define TILE_SIZE 32
#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
#define CELL_CMD_STATE_ATTRIB_FETCH 20
#define CELL_CMD_STATE_FS_CONSTANTS 21
-#define CELL_CMD_VS_EXECUTE 22
-#define CELL_CMD_FLUSH_BUFFER_RANGE 23
+#define CELL_CMD_STATE_RASTERIZER 22
+#define CELL_CMD_VS_EXECUTE 23
+#define CELL_CMD_FLUSH_BUFFER_RANGE 24
+#define CELL_CMD_FENCE 25
+/** Command/batch buffers */
#define CELL_NUM_BUFFERS 4
#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */
#define CELL_BUFFER_STATUS_FREE 10
#define CELL_BUFFER_STATUS_USED 20
+/** Debug flags */
#define CELL_DEBUG_CHECKER (1 << 0)
#define CELL_DEBUG_ASM (1 << 1)
#define CELL_DEBUG_SYNC (1 << 2)
#define CELL_DEBUG_FRAGMENT_OPS (1 << 3)
#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4)
+#define CELL_DEBUG_CMD (1 << 5)
+#define CELL_DEBUG_CACHE (1 << 6)
+
+#define CELL_FENCE_IDLE 0
+#define CELL_FENCE_EMITTED 1
+#define CELL_FENCE_SIGNALLED 2
+
+#define CELL_FACING_FRONT 0
+#define CELL_FACING_BACK 1
-/** Max instructions for doing per-fragment operations */
-#define SPU_MAX_FRAGMENT_OPS_INSTS 64
+struct cell_fence
+{
+ /** There's a 16-byte status qword per SPU */
+ volatile uint status[CELL_MAX_SPUS][4];
+};
+
+#ifdef __SPU__
+typedef vector unsigned int opcode_t;
+#else
+typedef unsigned int opcode_t[4];
+#endif
+
+/**
+ * Fence command sent to SPUs. In response, the SPUs will write
+ * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory.
+ */
+struct cell_command_fence
+{
+ opcode_t opcode; /**< CELL_CMD_FENCE */
+ struct cell_fence *fence;
+ uint32_t pad_[3];
+};
/**
* Command to specify per-fragment operations state and generated code.
+ * Note that this is a variant-length structure, allocated with as
+ * much memory as needed to hold the generated code; the "code"
+ * field *must* be the last field in the structure. Also, the entire
+ * length of the structure (including the variant code field) must be
+ * a multiple of 8 bytes; we require that this structure itself be
+ * a multiple of 8 bytes, and that the generated code also be a multiple
+ * of 8 bytes.
+ *
+ * Also note that the dsa, blend, blend_color fields are really only needed
+ * for the fallback/C per-pixel code. They're not used when we generate
+ * dynamic SPU fragment code (which is the normal case), and will eventually
+ * be removed from this structure.
*/
struct cell_command_fragment_ops
{
- uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
+ opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
+
+ /* Fields for the fallback case */
struct pipe_depth_stencil_alpha_state dsa;
struct pipe_blend_state blend;
- unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS];
+ struct pipe_blend_color blend_color;
+
+ /* Fields for the generated SPU code */
+ unsigned total_code_size;
+ unsigned front_code_index;
+ unsigned back_code_index;
+ /* this field has variant length, and must be the last field in
+ * the structure
+ */
+ unsigned code[0];
};
*/
struct cell_command_fragment_program
{
- uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
+ opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
uint num_inst; /**< Number of instructions */
+ uint32_t pad[3];
unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
};
*/
struct cell_command_framebuffer
{
- uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */
+ opcode_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */
int width, height;
void *color_start, *depth_start;
enum pipe_format color_format, depth_format;
+ uint32_t pad_[2];
+};
+
+
+/**
+ * Tell SPUs about rasterizer state.
+ */
+struct cell_command_rasterizer
+{
+ opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */
+ struct pipe_rasterizer_state rasterizer;
};
*/
struct cell_command_clear_surface
{
- uint64_t opcode; /**< CELL_CMD_CLEAR_SURFACE */
+ opcode_t opcode; /**< CELL_CMD_CLEAR_SURFACE */
uint surface; /**< Temporary: 0=color, 1=Z */
uint value;
+ uint32_t pad[2];
};
#define SPU_VERTS_PER_BATCH 64
struct cell_command_vs
{
- uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */
+ opcode_t opcode; /**< CELL_CMD_VS_EXECUTE */
uint64_t vOut[SPU_VERTS_PER_BATCH];
unsigned num_elts;
unsigned elts[SPU_VERTS_PER_BATCH];
struct cell_command_render
{
- uint64_t opcode; /**< CELL_CMD_RENDER */
+ opcode_t opcode; /**< CELL_CMD_RENDER */
uint prim_type; /**< PIPE_PRIM_x */
uint num_verts;
uint vertex_size; /**< bytes per vertex */
float xmin, ymin, xmax, ymax; /* XXX another dummy field */
uint min_index;
boolean inline_verts;
- uint front_winding; /* the rasterizer needs to be able to determine facing to apply front/back-facing stencil */
+ uint32_t pad_[1];
};
struct cell_command_release_verts
{
- uint64_t opcode; /**< CELL_CMD_RELEASE_VERTS */
+ opcode_t opcode; /**< CELL_CMD_RELEASE_VERTS */
uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
+ uint32_t pad_[3];
};
struct cell_command_sampler
{
- uint64_t opcode; /**< CELL_CMD_STATE_SAMPLER */
+ opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */
uint unit;
struct pipe_sampler_state state;
+ uint32_t pad_[1];
};
struct cell_command_texture
{
- uint64_t opcode; /**< CELL_CMD_STATE_TEXTURE */
+ opcode_t opcode; /**< CELL_CMD_STATE_TEXTURE */
uint target; /**< PIPE_TEXTURE_x */
uint unit;
void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */
ushort width[CELL_MAX_TEXTURE_LEVELS];
ushort height[CELL_MAX_TEXTURE_LEVELS];
+ ushort depth[CELL_MAX_TEXTURE_LEVELS];
};
-/** XXX unions don't seem to work */
-/* XXX this should go away; all commands should be placed in batch buffers */
-struct cell_command
-{
-#if 0
- struct cell_command_framebuffer fb;
- struct cell_command_clear_surface clear;
- struct cell_command_render render;
-#endif
- struct cell_command_vs vs;
-} ALIGN16_ATTRIB;
-
-
#define MAX_SPU_FUNCTIONS 12
/**
* Used to tell the PPU about the address of particular functions in the
unsigned id;
unsigned num_spus;
unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */
- struct cell_command *cmd;
+ float inv_timebase; /**< 1.0/timebase, for perf measurement */
/** Buffers for command batches, vertex/index data */
ubyte *buffers[CELL_NUM_BUFFERS];