1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 /* main() for Cell SPU code */
34 #include <spu_mfcio.h>
37 #include "spu_texture.h"
40 #include "pipe/cell/common.h"
41 #include "pipe/p_defines.h"
46 /usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
47 /opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
50 static boolean Debug
= FALSE
;
52 struct spu_global spu
;
56 wait_on_mask(unsigned tagMask
)
58 mfc_write_tag_mask( tagMask
);
59 /* wait for completion of _any_ DMAs specified by tagMask */
60 mfc_read_tag_status_any();
65 wait_on_mask_all(unsigned tagMask
)
67 mfc_write_tag_mask( tagMask
);
68 /* wait for completion of _any_ DMAs specified by tagMask */
69 mfc_read_tag_status_all();
74 * Tell the PPU that this SPU has finished copying a buffer to
75 * local store and that it may be reused by the PPU.
76 * This is done by writting a 16-byte batch-buffer-status block back into
77 * main memory (in cell_context->buffer_status[]).
80 release_buffer(uint buffer
)
82 /* Evidently, using less than a 16-byte status doesn't work reliably */
83 static const uint status
[4] ALIGN16_ATTRIB
84 = {CELL_BUFFER_STATUS_FREE
, 0, 0, 0};
86 const uint index
= 4 * (spu
.init
.id
* CELL_NUM_BUFFERS
+ buffer
);
87 uint
*dst
= spu
.init
.buffer_status
+ index
;
89 ASSERT(buffer
< CELL_NUM_BUFFERS
);
91 mfc_put((void *) &status
, /* src in local memory */
92 (unsigned int) dst
, /* dst in main memory */
93 sizeof(status
), /* size */
94 TAG_MISC
, /* tag is unimportant */
101 * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
102 * tiles back to the main framebuffer.
105 really_clear_tiles(uint surfaceIndex
)
107 const uint num_tiles
= spu
.fb
.width_tiles
* spu
.fb
.height_tiles
;
110 if (surfaceIndex
== 0) {
111 clear_c_tile(&ctile
);
113 for (i
= spu
.init
.id
; i
< num_tiles
; i
+= spu
.init
.num_spus
) {
114 uint tx
= i
% spu
.fb
.width_tiles
;
115 uint ty
= i
/ spu
.fb
.width_tiles
;
116 if (tile_status
[ty
][tx
] == TILE_STATUS_CLEAR
) {
117 put_tile(tx
, ty
, &ctile
, TAG_SURFACE_CLEAR
, 0);
122 clear_z_tile(&ztile
);
124 for (i
= spu
.init
.id
; i
< num_tiles
; i
+= spu
.init
.num_spus
) {
125 uint tx
= i
% spu
.fb
.width_tiles
;
126 uint ty
= i
/ spu
.fb
.width_tiles
;
127 if (tile_status_z
[ty
][tx
] == TILE_STATUS_CLEAR
)
128 put_tile(tx
, ty
, &ctile
, TAG_SURFACE_CLEAR
, 1);
133 wait_on_mask(1 << TAG_SURFACE_CLEAR
);
139 cmd_clear_surface(const struct cell_command_clear_surface
*clear
)
141 const uint num_tiles
= spu
.fb
.width_tiles
* spu
.fb
.height_tiles
;
145 printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu
.init
.id
,
146 clear
->surface
, clear
->value
);
150 /* set all tile's status to CLEAR */
151 if (clear
->surface
== 0) {
152 memset(tile_status
, TILE_STATUS_CLEAR
, sizeof(tile_status
));
153 spu
.fb
.color_clear_value
= clear
->value
;
156 memset(tile_status_z
, TILE_STATUS_CLEAR
, sizeof(tile_status_z
));
157 spu
.fb
.depth_clear_value
= clear
->value
;
162 if (clear
->surface
== 0) {
163 spu
.fb
.color_clear_value
= clear
->value
;
164 clear_c_tile(&ctile
);
167 spu
.fb
.depth_clear_value
= clear
->value
;
168 clear_z_tile(&ztile
);
172 printf("SPU: %s num=%d w=%d h=%d\n",
173 __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
176 for (i
= spu
.init
.id
; i
< num_tiles
; i
+= spu
.init
.num_spus
) {
177 uint tx
= i
% spu
.fb
.width_tiles
;
178 uint ty
= i
/ spu
.fb
.width_tiles
;
179 if (clear
->surface
== 0)
180 put_tile(tx
, ty
, &ctile
, TAG_SURFACE_CLEAR
, 0);
182 put_tile(tx
, ty
, &ztile
, TAG_SURFACE_CLEAR
, 1);
183 /* XXX we don't want this here, but it fixes bad tile results */
187 wait_on_mask(1 << TAG_SURFACE_CLEAR
);
191 printf("SPU %u: CLEAR SURF done\n", spu
.init
.id
);
196 * Given a rendering command's bounding box (in pixels) compute the
197 * location of the corresponding screen tile bounding box.
200 tile_bounding_box(const struct cell_command_render
*render
,
201 uint
*txmin
, uint
*tymin
,
202 uint
*box_num_tiles
, uint
*box_width_tiles
)
205 /* Debug: full-window bounding box */
206 uint txmax
= spu
.fb
.width_tiles
- 1;
207 uint tymax
= spu
.fb
.height_tiles
- 1;
210 *box_num_tiles
= spu
.fb
.width_tiles
* spu
.fb
.height_tiles
;
211 *box_width_tiles
= spu
.fb
.width_tiles
;
216 uint txmax
, tymax
, box_height_tiles
;
218 *txmin
= (uint
) render
->xmin
/ TILE_SIZE
;
219 *tymin
= (uint
) render
->ymin
/ TILE_SIZE
;
220 txmax
= (uint
) render
->xmax
/ TILE_SIZE
;
221 tymax
= (uint
) render
->ymax
/ TILE_SIZE
;
222 *box_width_tiles
= txmax
- *txmin
+ 1;
223 box_height_tiles
= tymax
- *tymin
+ 1;
224 *box_num_tiles
= *box_width_tiles
* box_height_tiles
;
227 printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu
.init
.id
,
228 render
->xmin
, render
->ymin
, render
->xmax
, render
->ymax
);
229 printf("SPU %u: tiles: %u, %u .. %u, %u\n",
230 spu
.init
.id
, *txmin
, *tymin
, txmax
, tymax
);
231 ASSERT(render
->xmin
<= render
->xmax
);
232 ASSERT(render
->ymin
<= render
->ymax
);
237 /** Check if the tile at (tx,ty) belongs to this SPU */
238 static INLINE boolean
239 my_tile(uint tx
, uint ty
)
241 return (spu
.fb
.width_tiles
* ty
+ tx
) % spu
.init
.num_spus
== spu
.init
.id
;
247 * \param pos_incr returns value indicating how may words to skip after
248 * this command in the batch buffer
251 cmd_render(const struct cell_command_render
*render
, uint
*pos_incr
)
253 /* we'll DMA into these buffers */
254 ubyte vertex_data
[CELL_BUFFER_SIZE
] ALIGN16_ATTRIB
;
255 const uint vertex_size
= render
->vertex_size
; /* in bytes */
256 /*const*/ uint total_vertex_bytes
= render
->num_verts
* vertex_size
;
257 const ubyte
*vertices
;
258 const ushort
*indexes
;
263 printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u "
269 render
->inline_verts
);
272 printf(" bound: %g, %g .. %g, %g\n",
273 render->xmin, render->ymin, render->xmax, render->ymax);
277 ASSERT(sizeof(*render
) % 4 == 0);
278 ASSERT(total_vertex_bytes
% 16 == 0);
280 /* indexes are right after the render command in the batch buffer */
281 indexes
= (const ushort
*) (render
+ 1);
282 *pos_incr
= (render
->num_indexes
* 2 + 3) / 4;
285 if (render
->inline_verts
) {
286 /* Vertices are right after indexes in batch buffer */
287 vertices
= (const ubyte
*) (render
+ 1) + *pos_incr
* 4;
288 *pos_incr
= *pos_incr
+ total_vertex_bytes
/ 4;
291 /* Begin DMA fetch of vertex buffer */
292 ubyte
*src
= spu
.init
.buffers
[render
->vertex_buf
];
293 ubyte
*dest
= vertex_data
;
295 /* skip vertex data we won't use */
297 src
+= render
->min_index
* vertex_size
;
298 dest
+= render
->min_index
* vertex_size
;
299 total_vertex_bytes
-= render
->min_index
* vertex_size
;
301 ASSERT(total_vertex_bytes
% 16 == 0);
302 ASSERT_ALIGN16(dest
);
305 mfc_get(dest
, /* in vertex_data[] array */
306 (unsigned int) src
, /* src in main memory */
307 total_vertex_bytes
, /* size */
312 vertices
= vertex_data
;
314 wait_on_mask(1 << TAG_VERTEX_BUFFER
);
319 ** find tiles which intersect the prim bounding box
321 uint txmin
, tymin
, box_width_tiles
, box_num_tiles
;
322 tile_bounding_box(render
, &txmin
, &tymin
,
323 &box_num_tiles
, &box_width_tiles
);
326 /* make sure any pending clears have completed */
327 wait_on_mask(1 << TAG_SURFACE_CLEAR
); /* XXX temporary */
331 ** loop over tiles, rendering tris
333 for (i
= 0; i
< box_num_tiles
; i
++) {
334 const uint tx
= txmin
+ i
% box_width_tiles
;
335 const uint ty
= tymin
+ i
/ box_width_tiles
;
337 ASSERT(tx
< spu
.fb
.width_tiles
);
338 ASSERT(ty
< spu
.fb
.height_tiles
);
340 if (!my_tile(tx
, ty
))
343 /* Start fetching color/z tiles. We'll wait for completion when
344 * we need read/write to them later in triangle rasterization.
346 if (spu
.depth_stencil
.depth
.enabled
) {
347 if (tile_status_z
[ty
][tx
] != TILE_STATUS_CLEAR
) {
348 get_tile(tx
, ty
, &ztile
, TAG_READ_TILE_Z
, 1);
352 if (tile_status
[ty
][tx
] != TILE_STATUS_CLEAR
) {
353 get_tile(tx
, ty
, &ctile
, TAG_READ_TILE_COLOR
, 0);
356 ASSERT(render
->prim_type
== PIPE_PRIM_TRIANGLES
);
357 ASSERT(render
->num_indexes
% 3 == 0);
360 for (j
= 0; j
< render
->num_indexes
; j
+= 3) {
361 const float *v0
, *v1
, *v2
;
363 v0
= (const float *) (vertices
+ indexes
[j
+0] * vertex_size
);
364 v1
= (const float *) (vertices
+ indexes
[j
+1] * vertex_size
);
365 v2
= (const float *) (vertices
+ indexes
[j
+2] * vertex_size
);
367 tri_draw(v0
, v1
, v2
, tx
, ty
);
370 /* write color/z tiles back to main framebuffer, if dirtied */
371 if (tile_status
[ty
][tx
] == TILE_STATUS_DIRTY
) {
372 put_tile(tx
, ty
, &ctile
, TAG_WRITE_TILE_COLOR
, 0);
373 tile_status
[ty
][tx
] = TILE_STATUS_DEFINED
;
375 if (spu
.depth_stencil
.depth
.enabled
) {
376 if (tile_status_z
[ty
][tx
] == TILE_STATUS_DIRTY
) {
377 put_tile(tx
, ty
, &ztile
, TAG_WRITE_TILE_Z
, 1);
378 tile_status_z
[ty
][tx
] = TILE_STATUS_DEFINED
;
382 /* XXX move these... */
383 wait_on_mask(1 << TAG_WRITE_TILE_COLOR
);
384 if (spu
.depth_stencil
.depth
.enabled
) {
385 wait_on_mask(1 << TAG_WRITE_TILE_Z
);
390 printf("SPU %u: RENDER done\n",
396 cmd_release_verts(const struct cell_command_release_verts
*release
)
399 printf("SPU %u: RELEASE VERTS %u\n",
400 spu
.init
.id
, release
->vertex_buf
);
401 ASSERT(release
->vertex_buf
!= ~0U);
402 release_buffer(release
->vertex_buf
);
407 cmd_state_framebuffer(const struct cell_command_framebuffer
*cmd
)
410 printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
418 ASSERT_ALIGN16(cmd
->color_start
);
419 ASSERT_ALIGN16(cmd
->depth_start
);
421 spu
.fb
.color_start
= cmd
->color_start
;
422 spu
.fb
.depth_start
= cmd
->depth_start
;
423 spu
.fb
.color_format
= cmd
->color_format
;
424 spu
.fb
.depth_format
= cmd
->depth_format
;
425 spu
.fb
.width
= cmd
->width
;
426 spu
.fb
.height
= cmd
->height
;
427 spu
.fb
.width_tiles
= (spu
.fb
.width
+ TILE_SIZE
- 1) / TILE_SIZE
;
428 spu
.fb
.height_tiles
= (spu
.fb
.height
+ TILE_SIZE
- 1) / TILE_SIZE
;
430 if (spu
.fb
.depth_format
== PIPE_FORMAT_Z32_UNORM
)
432 else if (spu
.fb
.depth_format
== PIPE_FORMAT_Z16_UNORM
)
440 cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state
*state
)
443 printf("SPU %u: DEPTH_STENCIL: ztest %d\n",
445 state
->depth
.enabled
);
447 memcpy(&spu
.depth_stencil
, state
, sizeof(*state
));
452 cmd_state_sampler(const struct pipe_sampler_state
*state
)
455 printf("SPU %u: SAMPLER\n",
458 memcpy(&spu
.sampler
[0], state
, sizeof(*state
));
463 cmd_state_texture(const struct cell_command_texture
*texture
)
466 printf("SPU %u: TEXTURE at %p size %u x %u\n",
467 spu
.init
.id
, texture
->start
, texture
->width
, texture
->height
);
469 memcpy(&spu
.texture
, texture
, sizeof(*texture
));
474 cmd_state_vertex_info(const struct vertex_info
*vinfo
)
477 printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu
.init
.id
,
480 ASSERT(vinfo
->num_attribs
>= 1);
481 ASSERT(vinfo
->num_attribs
<= 8);
482 memcpy(&spu
.vertex_info
, vinfo
, sizeof(*vinfo
));
491 printf("SPU %u: FINISH\n", spu
.init
.id
);
492 really_clear_tiles(0);
493 /* wait for all outstanding DMAs to finish */
494 mfc_write_tag_mask(~0);
495 mfc_read_tag_status_all();
496 /* send mbox message to PPU */
497 spu_write_out_mbox(CELL_CMD_FINISH
);
502 * Execute a batch of commands
503 * The opcode param encodes the location of the buffer and its size.
506 cmd_batch(uint opcode
)
508 const uint buf
= (opcode
>> 8) & 0xff;
509 uint size
= (opcode
>> 16);
510 uint buffer
[CELL_BUFFER_SIZE
/ 4] ALIGN16_ATTRIB
;
511 const uint usize
= size
/ sizeof(uint
);
515 printf("SPU %u: BATCH buffer %u, len %u, from %p\n",
516 spu
.init
.id
, buf
, size
, spu
.init
.buffers
[buf
]);
518 ASSERT((opcode
& CELL_CMD_OPCODE_MASK
) == CELL_CMD_BATCH
);
520 ASSERT_ALIGN16(spu
.init
.buffers
[buf
]);
522 size
= ROUNDUP16(size
);
524 ASSERT_ALIGN16(spu
.init
.buffers
[buf
]);
526 mfc_get(buffer
, /* dest */
527 (unsigned int) spu
.init
.buffers
[buf
], /* src */
532 wait_on_mask(1 << TAG_BATCH_BUFFER
);
534 /* Tell PPU we're done copying the buffer to local store */
536 printf("SPU %u: release batch buf %u\n", spu
.init
.id
, buf
);
539 for (pos
= 0; pos
< usize
; /* no incr */) {
540 switch (buffer
[pos
]) {
541 case CELL_CMD_STATE_FRAMEBUFFER
:
543 struct cell_command_framebuffer
*fb
544 = (struct cell_command_framebuffer
*) &buffer
[pos
];
545 cmd_state_framebuffer(fb
);
546 pos
+= sizeof(*fb
) / 4;
549 case CELL_CMD_CLEAR_SURFACE
:
551 struct cell_command_clear_surface
*clr
552 = (struct cell_command_clear_surface
*) &buffer
[pos
];
553 cmd_clear_surface(clr
);
554 pos
+= sizeof(*clr
) / 4;
557 case CELL_CMD_RENDER
:
559 struct cell_command_render
*render
560 = (struct cell_command_render
*) &buffer
[pos
];
562 cmd_render(render
, &pos_incr
);
563 pos
+= sizeof(*render
) / 4 + pos_incr
;
566 case CELL_CMD_RELEASE_VERTS
:
568 struct cell_command_release_verts
*release
569 = (struct cell_command_release_verts
*) &buffer
[pos
];
570 cmd_release_verts(release
);
571 ASSERT(sizeof(*release
) == 8);
572 pos
+= sizeof(*release
) / 4;
575 case CELL_CMD_FINISH
:
579 case CELL_CMD_STATE_DEPTH_STENCIL
:
580 cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state
*)
582 pos
+= (1 + sizeof(struct pipe_depth_stencil_alpha_state
) / 4);
584 case CELL_CMD_STATE_SAMPLER
:
585 cmd_state_sampler((struct pipe_sampler_state
*) &buffer
[pos
+1]);
586 pos
+= (1 + sizeof(struct pipe_sampler_state
) / 4);
588 case CELL_CMD_STATE_TEXTURE
:
589 cmd_state_texture((struct cell_command_texture
*) &buffer
[pos
+1]);
590 pos
+= (1 + sizeof(struct cell_command_texture
) / 4);
592 case CELL_CMD_STATE_VERTEX_INFO
:
593 cmd_state_vertex_info((struct vertex_info
*) &buffer
[pos
+1]);
594 pos
+= (1 + sizeof(struct vertex_info
) / 4);
597 printf("SPU %u: bad opcode: 0x%x\n", spu
.init
.id
, buffer
[pos
]);
604 printf("SPU %u: BATCH complete\n", spu
.init
.id
);
609 * Temporary/simple main loop for SPEs: Get a command, execute it, repeat.
614 struct cell_command cmd
;
618 printf("SPU %u: Enter main loop\n", spu
.init
.id
);
620 ASSERT((sizeof(struct cell_command
) & 0xf) == 0);
621 ASSERT_ALIGN16(&cmd
);
628 printf("SPU %u: Wait for cmd...\n", spu
.init
.id
);
630 /* read/wait from mailbox */
631 opcode
= (unsigned int) spu_read_in_mbox();
634 printf("SPU %u: got cmd 0x%x\n", spu
.init
.id
, opcode
);
636 /* command payload */
637 mfc_get(&cmd
, /* dest */
638 (unsigned int) spu
.init
.cmd
, /* src */
639 sizeof(struct cell_command
), /* bytes */
643 wait_on_mask( 1 << tag
);
645 switch (opcode
& CELL_CMD_OPCODE_MASK
) {
648 printf("SPU %u: EXIT\n", spu
.init
.id
);
651 case CELL_CMD_STATE_FRAMEBUFFER
:
652 cmd_state_framebuffer(&cmd
.fb
);
654 case CELL_CMD_CLEAR_SURFACE
:
655 cmd_clear_surface(&cmd
.clear
);
657 case CELL_CMD_RENDER
:
660 cmd_render(&cmd
.render
, &pos_incr
);
661 assert(pos_incr
== 0);
667 case CELL_CMD_FINISH
:
671 printf("Bad opcode!\n");
677 printf("SPU %u: Exit main loop\n", spu
.init
.id
);
685 memset(tile_status
, TILE_STATUS_DEFINED
, sizeof(tile_status
));
686 memset(tile_status_z
, TILE_STATUS_DEFINED
, sizeof(tile_status_z
));
687 invalidate_tex_cache();
691 /* In some versions of the SDK the SPE main takes 'unsigned long' as a
692 * parameter. In others it takes 'unsigned long long'. Use a define to
693 * select between the two.
695 #ifdef SPU_MAIN_PARAM_LONG_LONG
696 typedef unsigned long long main_param_t
;
698 typedef unsigned long main_param_t
;
705 main(main_param_t speid
, main_param_t argp
)
714 printf("SPU: main() speid=%lu\n", speid
);
716 mfc_get(&spu
.init
, /* dest */
717 (unsigned int) argp
, /* src */
718 sizeof(struct cell_init_info
), /* bytes */
722 wait_on_mask( 1 << tag
);