1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * SPU command processing code
37 #include "pipe/p_defines.h"
39 #include "spu_command.h"
41 #include "spu_render.h"
42 #include "spu_per_fragment_op.h"
43 #include "spu_texture.h"
45 #include "spu_vertex_shader.h"
46 #include "spu_dcache.h"
47 #include "spu_debug.h"
48 #include "cell/common.h"
51 struct spu_vs_context draw
;
55 * Buffers containing dynamically generated SPU code:
57 static unsigned char attribute_fetch_code_buffer
[136 * PIPE_MAX_ATTRIBS
]
63 * Tell the PPU that this SPU has finished copying a buffer to
64 * local store and that it may be reused by the PPU.
65 * This is done by writting a 16-byte batch-buffer-status block back into
66 * main memory (in cell_context->buffer_status[]).
69 release_buffer(uint buffer
)
71 /* Evidently, using less than a 16-byte status doesn't work reliably */
72 static const uint status
[4] ALIGN16_ATTRIB
73 = {CELL_BUFFER_STATUS_FREE
, 0, 0, 0};
75 const uint index
= 4 * (spu
.init
.id
* CELL_NUM_BUFFERS
+ buffer
);
76 uint
*dst
= spu
.init
.buffer_status
+ index
;
78 ASSERT(buffer
< CELL_NUM_BUFFERS
);
80 mfc_put((void *) &status
, /* src in local memory */
81 (unsigned int) dst
, /* dst in main memory */
82 sizeof(status
), /* size */
83 TAG_MISC
, /* tag is unimportant */
90 cmd_clear_surface(const struct cell_command_clear_surface
*clear
)
92 DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear
->surface
, clear
->value
);
94 if (clear
->surface
== 0) {
95 spu
.fb
.color_clear_value
= clear
->value
;
96 if (spu
.init
.debug_flags
& CELL_DEBUG_CHECKER
) {
97 uint x
= (spu
.init
.id
<< 4) | (spu
.init
.id
<< 12) |
98 (spu
.init
.id
<< 20) | (spu
.init
.id
<< 28);
99 spu
.fb
.color_clear_value
^= x
;
103 spu
.fb
.depth_clear_value
= clear
->value
;
109 /* Simply set all tiles' status to CLEAR.
110 * When we actually begin rendering into a tile, we'll initialize it to
111 * the clear value. If any tiles go untouched during the frame,
112 * really_clear_tiles() will set them to the clear value.
114 if (clear
->surface
== 0) {
115 memset(spu
.ctile_status
, TILE_STATUS_CLEAR
, sizeof(spu
.ctile_status
));
118 memset(spu
.ztile_status
, TILE_STATUS_CLEAR
, sizeof(spu
.ztile_status
));
124 * This path clears the whole framebuffer to the clear color right now.
128 printf("SPU: %s num=%d w=%d h=%d\n",
129 __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
132 /* init a single tile to the clear value */
133 if (clear
->surface
== 0) {
134 clear_c_tile(&spu
.ctile
);
137 clear_z_tile(&spu
.ztile
);
140 /* walk over my tiles, writing the 'clear' tile's data */
142 const uint num_tiles
= spu
.fb
.width_tiles
* spu
.fb
.height_tiles
;
144 for (i
= spu
.init
.id
; i
< num_tiles
; i
+= spu
.init
.num_spus
) {
145 uint tx
= i
% spu
.fb
.width_tiles
;
146 uint ty
= i
/ spu
.fb
.width_tiles
;
147 if (clear
->surface
== 0)
148 put_tile(tx
, ty
, &spu
.ctile
, TAG_SURFACE_CLEAR
, 0);
150 put_tile(tx
, ty
, &spu
.ztile
, TAG_SURFACE_CLEAR
, 1);
154 if (spu
.init
.debug_flags
& CELL_DEBUG_SYNC
) {
155 wait_on_mask(1 << TAG_SURFACE_CLEAR
);
158 #endif /* CLEAR_OPT */
160 DEBUG_PRINTF("CLEAR SURF done\n");
165 cmd_release_verts(const struct cell_command_release_verts
*release
)
167 DEBUG_PRINTF("RELEASE VERTS %u\n", release
->vertex_buf
);
168 ASSERT(release
->vertex_buf
!= ~0U);
169 release_buffer(release
->vertex_buf
);
174 * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
175 * This involves installing new fragment ops SPU code.
176 * If this function is never called, we'll use a regular C fallback function
177 * for fragment processing.
180 cmd_state_fragment_ops(const struct cell_command_fragment_ops
*fops
)
182 static int warned
= 0;
184 DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n");
185 /* Copy SPU code from batch buffer to spu buffer */
186 memcpy(spu
.fragment_ops_code
, fops
->code
, SPU_MAX_FRAGMENT_OPS_INSTS
* 4);
187 /* Copy state info (for fallback case only) */
188 memcpy(&spu
.depth_stencil_alpha
, &fops
->dsa
, sizeof(fops
->dsa
));
189 memcpy(&spu
.blend
, &fops
->blend
, sizeof(fops
->blend
));
191 /* Parity twist! For now, always use the fallback code by default,
192 * only switching to codegen when specifically requested. This
193 * allows us to develop freely without risking taking down the
196 * Later, the parity of this check will be reversed, so that
197 * codegen is *always* used, unless we specifically indicate that
200 * Eventually, the option will be removed completely, because in
201 * final code we'll always use codegen and won't even provide the
202 * raw state records that the fallback code requires.
204 if ((spu
.init
.debug_flags
& CELL_DEBUG_FRAGMENT_OP_FALLBACK
) == 0) {
205 spu
.fragment_ops
= (spu_fragment_ops_func
) spu
.fragment_ops_code
;
208 /* otherwise, the default fallback code remains in place */
210 fprintf(stderr
, "Cell Warning: using fallback per-fragment code\n");
215 spu
.read_depth
= spu
.depth_stencil_alpha
.depth
.enabled
;
216 spu
.read_stencil
= spu
.depth_stencil_alpha
.stencil
[0].enabled
;
221 cmd_state_fragment_program(const struct cell_command_fragment_program
*fp
)
223 DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n");
224 /* Copy SPU code from batch buffer to spu buffer */
225 memcpy(spu
.fragment_program_code
, fp
->code
,
226 SPU_MAX_FRAGMENT_PROGRAM_INSTS
* 4);
228 /* Point function pointer at new code */
229 spu
.fragment_program
= (spu_fragment_program_func
)spu
.fragment_program_code
;
235 cmd_state_fs_constants(const uint64_t *buffer
, uint pos
)
237 const uint num_const
= buffer
[pos
+ 1];
238 const float *constants
= (const float *) &buffer
[pos
+ 2];
241 DEBUG_PRINTF("CMD_STATE_FS_CONSTANTS (%u)\n", num_const
);
243 /* Expand each float to float[4] for SOA execution */
244 for (i
= 0; i
< num_const
; i
++) {
245 spu
.constants
[i
] = spu_splats(constants
[i
]);
248 /* return new buffer pos (in 8-byte words) */
249 return pos
+ 2 + num_const
/ 2;
254 cmd_state_framebuffer(const struct cell_command_framebuffer
*cmd
)
256 DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
263 ASSERT_ALIGN16(cmd
->color_start
);
264 ASSERT_ALIGN16(cmd
->depth_start
);
266 spu
.fb
.color_start
= cmd
->color_start
;
267 spu
.fb
.depth_start
= cmd
->depth_start
;
268 spu
.fb
.color_format
= cmd
->color_format
;
269 spu
.fb
.depth_format
= cmd
->depth_format
;
270 spu
.fb
.width
= cmd
->width
;
271 spu
.fb
.height
= cmd
->height
;
272 spu
.fb
.width_tiles
= (spu
.fb
.width
+ TILE_SIZE
- 1) / TILE_SIZE
;
273 spu
.fb
.height_tiles
= (spu
.fb
.height
+ TILE_SIZE
- 1) / TILE_SIZE
;
275 switch (spu
.fb
.depth_format
) {
276 case PIPE_FORMAT_Z32_UNORM
:
278 spu
.fb
.zscale
= (float) 0xffffffffu
;
280 case PIPE_FORMAT_Z24S8_UNORM
:
281 case PIPE_FORMAT_S8Z24_UNORM
:
282 case PIPE_FORMAT_Z24X8_UNORM
:
283 case PIPE_FORMAT_X8Z24_UNORM
:
285 spu
.fb
.zscale
= (float) 0x00ffffffu
;
287 case PIPE_FORMAT_Z16_UNORM
:
289 spu
.fb
.zscale
= (float) 0xffffu
;
299 cmd_state_sampler(const struct cell_command_sampler
*sampler
)
301 DEBUG_PRINTF("SAMPLER [%u]\n", sampler
->unit
);
303 spu
.sampler
[sampler
->unit
] = sampler
->state
;
305 if (spu
.sampler
[sampler
->unit
].min_mip_filter
!= PIPE_TEX_MIPFILTER_NONE
) {
306 spu
.sample_texture4
[sampler
->unit
] = sample_texture4_lod
;
310 if (spu
.sampler
[sampler
->unit
].min_img_filter
== PIPE_TEX_FILTER_LINEAR
) {
311 spu
.sample_texture4
[sampler
->unit
] = sample_texture4_bilinear
;
314 spu
.sample_texture4
[sampler
->unit
] = sample_texture4_nearest
;
320 cmd_state_texture(const struct cell_command_texture
*texture
)
322 const uint unit
= texture
->unit
;
325 DEBUG_PRINTF("TEXTURE [%u]\n", texture
->unit
);
327 for (i
= 0; i
< CELL_MAX_TEXTURE_LEVELS
; i
++) {
328 uint width
= texture
->width
[i
];
329 uint height
= texture
->height
[i
];
331 DEBUG_PRINTF(" LEVEL %u: at %p size[0] %u x %u\n", i
,
332 texture
->start
[i
], texture
->width
[i
], texture
->height
[i
]);
334 spu
.texture
[unit
].level
[i
].start
= texture
->start
[i
];
335 spu
.texture
[unit
].level
[i
].width
= width
;
336 spu
.texture
[unit
].level
[i
].height
= height
;
338 spu
.texture
[unit
].level
[i
].tiles_per_row
= width
/ TILE_SIZE
;
340 spu
.texture
[unit
].level
[i
].width4
= spu_splats((float) width
);
341 spu
.texture
[unit
].level
[i
].height4
= spu_splats((float) height
);
343 spu
.texture
[unit
].level
[i
].tex_size_x_mask
= spu_splats(width
- 1);
344 spu
.texture
[unit
].level
[i
].tex_size_y_mask
= spu_splats(height
- 1);
350 cmd_state_vertex_info(const struct vertex_info
*vinfo
)
352 DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo
->num_attribs
);
353 ASSERT(vinfo
->num_attribs
>= 1);
354 ASSERT(vinfo
->num_attribs
<= 8);
355 memcpy(&spu
.vertex_info
, vinfo
, sizeof(*vinfo
));
360 cmd_state_vs_array_info(const struct cell_array_info
*vs_info
)
362 const unsigned attr
= vs_info
->attr
;
364 ASSERT(attr
< PIPE_MAX_ATTRIBS
);
365 draw
.vertex_fetch
.src_ptr
[attr
] = vs_info
->base
;
366 draw
.vertex_fetch
.pitch
[attr
] = vs_info
->pitch
;
367 draw
.vertex_fetch
.size
[attr
] = vs_info
->size
;
368 draw
.vertex_fetch
.code_offset
[attr
] = vs_info
->function_offset
;
369 draw
.vertex_fetch
.dirty
= 1;
374 cmd_state_attrib_fetch(const struct cell_attribute_fetch_code
*code
)
376 mfc_get(attribute_fetch_code_buffer
,
377 (unsigned int) code
->base
, /* src */
382 wait_on_mask(1 << TAG_BATCH_BUFFER
);
384 draw
.vertex_fetch
.code
= attribute_fetch_code_buffer
;
391 DEBUG_PRINTF("FINISH\n");
392 really_clear_tiles(0);
393 /* wait for all outstanding DMAs to finish */
394 mfc_write_tag_mask(~0);
395 mfc_read_tag_status_all();
396 /* send mbox message to PPU */
397 spu_write_out_mbox(CELL_CMD_FINISH
);
402 * Execute a batch of commands which was sent to us by the PPU.
403 * See the cell_emit_state.c code to see where the commands come from.
405 * The opcode param encodes the location of the buffer and its size.
408 cmd_batch(uint opcode
)
410 const uint buf
= (opcode
>> 8) & 0xff;
411 uint size
= (opcode
>> 16);
412 uint64_t buffer
[CELL_BUFFER_SIZE
/ 8] ALIGN16_ATTRIB
;
413 const unsigned usize
= size
/ sizeof(buffer
[0]);
416 DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n",
417 buf
, size
, spu
.init
.buffers
[buf
]);
419 ASSERT((opcode
& CELL_CMD_OPCODE_MASK
) == CELL_CMD_BATCH
);
421 ASSERT_ALIGN16(spu
.init
.buffers
[buf
]);
423 size
= ROUNDUP16(size
);
425 ASSERT_ALIGN16(spu
.init
.buffers
[buf
]);
427 mfc_get(buffer
, /* dest */
428 (unsigned int) spu
.init
.buffers
[buf
], /* src */
433 wait_on_mask(1 << TAG_BATCH_BUFFER
);
435 /* Tell PPU we're done copying the buffer to local store */
436 DEBUG_PRINTF("release batch buf %u\n", buf
);
440 * Loop over commands in the batch buffer
442 for (pos
= 0; pos
< usize
; /* no incr */) {
443 switch (buffer
[pos
]) {
447 case CELL_CMD_CLEAR_SURFACE
:
449 struct cell_command_clear_surface
*clr
450 = (struct cell_command_clear_surface
*) &buffer
[pos
];
451 cmd_clear_surface(clr
);
452 pos
+= sizeof(*clr
) / 8;
455 case CELL_CMD_RENDER
:
457 struct cell_command_render
*render
458 = (struct cell_command_render
*) &buffer
[pos
];
460 cmd_render(render
, &pos_incr
);
465 * state-update commands
467 case CELL_CMD_STATE_FRAMEBUFFER
:
469 struct cell_command_framebuffer
*fb
470 = (struct cell_command_framebuffer
*) &buffer
[pos
];
471 cmd_state_framebuffer(fb
);
472 pos
+= sizeof(*fb
) / 8;
475 case CELL_CMD_STATE_FRAGMENT_OPS
:
477 struct cell_command_fragment_ops
*fops
478 = (struct cell_command_fragment_ops
*) &buffer
[pos
];
479 cmd_state_fragment_ops(fops
);
480 pos
+= sizeof(*fops
) / 8;
483 case CELL_CMD_STATE_FRAGMENT_PROGRAM
:
485 struct cell_command_fragment_program
*fp
486 = (struct cell_command_fragment_program
*) &buffer
[pos
];
487 cmd_state_fragment_program(fp
);
488 pos
+= sizeof(*fp
) / 8;
491 case CELL_CMD_STATE_FS_CONSTANTS
:
492 pos
= cmd_state_fs_constants(buffer
, pos
);
494 case CELL_CMD_STATE_SAMPLER
:
496 struct cell_command_sampler
*sampler
497 = (struct cell_command_sampler
*) &buffer
[pos
];
498 cmd_state_sampler(sampler
);
499 pos
+= sizeof(*sampler
) / 8;
502 case CELL_CMD_STATE_TEXTURE
:
504 struct cell_command_texture
*texture
505 = (struct cell_command_texture
*) &buffer
[pos
];
506 cmd_state_texture(texture
);
507 pos
+= sizeof(*texture
) / 8;
510 case CELL_CMD_STATE_VERTEX_INFO
:
511 cmd_state_vertex_info((struct vertex_info
*) &buffer
[pos
+1]);
512 pos
+= (1 + ROUNDUP8(sizeof(struct vertex_info
)) / 8);
514 case CELL_CMD_STATE_VIEWPORT
:
515 (void) memcpy(& draw
.viewport
, &buffer
[pos
+1],
516 sizeof(struct pipe_viewport_state
));
517 pos
+= (1 + ROUNDUP8(sizeof(struct pipe_viewport_state
)) / 8);
519 case CELL_CMD_STATE_UNIFORMS
:
520 draw
.constants
= (const float (*)[4]) (uintptr_t) buffer
[pos
+ 1];
523 case CELL_CMD_STATE_VS_ARRAY_INFO
:
524 cmd_state_vs_array_info((struct cell_array_info
*) &buffer
[pos
+1]);
525 pos
+= (1 + ROUNDUP8(sizeof(struct cell_array_info
)) / 8);
527 case CELL_CMD_STATE_BIND_VS
:
529 spu_bind_vertex_shader(&draw
,
530 (struct cell_shader_info
*) &buffer
[pos
+1]);
532 pos
+= (1 + ROUNDUP8(sizeof(struct cell_shader_info
)) / 8);
534 case CELL_CMD_STATE_ATTRIB_FETCH
:
535 cmd_state_attrib_fetch((struct cell_attribute_fetch_code
*)
537 pos
+= (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code
)) / 8);
542 case CELL_CMD_FINISH
:
546 case CELL_CMD_RELEASE_VERTS
:
548 struct cell_command_release_verts
*release
549 = (struct cell_command_release_verts
*) &buffer
[pos
];
550 cmd_release_verts(release
);
551 pos
+= sizeof(*release
) / 8;
554 case CELL_CMD_FLUSH_BUFFER_RANGE
: {
555 struct cell_buffer_range
*br
= (struct cell_buffer_range
*)
558 spu_dcache_mark_dirty((unsigned) br
->base
, br
->size
);
559 pos
+= (1 + ROUNDUP8(sizeof(struct cell_buffer_range
)) / 8);
563 printf("SPU %u: bad opcode: 0x%llx\n", spu
.init
.id
, buffer
[pos
]);
569 DEBUG_PRINTF("BATCH complete\n");
575 * Main loop for SPEs: Get a command, execute it, repeat.
580 struct cell_command cmd
;
583 DEBUG_PRINTF("Enter command loop\n");
585 ASSERT((sizeof(struct cell_command
) & 0xf) == 0);
586 ASSERT_ALIGN16(&cmd
);
592 DEBUG_PRINTF("Wait for cmd...\n");
594 /* read/wait from mailbox */
595 opcode
= (unsigned int) spu_read_in_mbox();
597 DEBUG_PRINTF("got cmd 0x%x\n", opcode
);
599 /* command payload */
600 mfc_get(&cmd
, /* dest */
601 (unsigned int) spu
.init
.cmd
, /* src */
602 sizeof(struct cell_command
), /* bytes */
606 wait_on_mask( 1 << tag
);
609 * NOTE: most commands should be contained in a batch buffer
612 switch (opcode
& CELL_CMD_OPCODE_MASK
) {
614 DEBUG_PRINTF("EXIT\n");
617 case CELL_CMD_VS_EXECUTE
:
619 spu_execute_vertex_shader(&draw
, &cmd
.vs
);
626 printf("Bad opcode 0x%x!\n", opcode
& CELL_CMD_OPCODE_MASK
);
631 DEBUG_PRINTF("Exit command loop\n");