1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 /* main() for Cell SPU code */
36 #include "spu_render.h"
37 #include "spu_texture.h"
39 //#include "spu_test.h"
40 #include "spu_vertex_shader.h"
41 #include "spu_dcache.h"
42 #include "cell/common.h"
43 #include "pipe/p_defines.h"
48 /usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
49 /opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
52 boolean Debug
= FALSE
;
54 struct spu_global spu
;
56 struct spu_vs_context draw
;
58 static unsigned char attribute_fetch_code_buffer
[136 * PIPE_MAX_ATTRIBS
]
61 static unsigned char depth_stencil_code_buffer
[4 * 64]
64 static unsigned char fb_blend_code_buffer
[4 * 64]
67 static unsigned char logicop_code_buffer
[4 * 64]
72 * Tell the PPU that this SPU has finished copying a buffer to
73 * local store and that it may be reused by the PPU.
74 * This is done by writting a 16-byte batch-buffer-status block back into
75 * main memory (in cell_context->buffer_status[]).
78 release_buffer(uint buffer
)
80 /* Evidently, using less than a 16-byte status doesn't work reliably */
81 static const uint status
[4] ALIGN16_ATTRIB
82 = {CELL_BUFFER_STATUS_FREE
, 0, 0, 0};
84 const uint index
= 4 * (spu
.init
.id
* CELL_NUM_BUFFERS
+ buffer
);
85 uint
*dst
= spu
.init
.buffer_status
+ index
;
87 ASSERT(buffer
< CELL_NUM_BUFFERS
);
89 mfc_put((void *) &status
, /* src in local memory */
90 (unsigned int) dst
, /* dst in main memory */
91 sizeof(status
), /* size */
92 TAG_MISC
, /* tag is unimportant */
99 * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
100 * tiles back to the main framebuffer.
103 really_clear_tiles(uint surfaceIndex
)
105 const uint num_tiles
= spu
.fb
.width_tiles
* spu
.fb
.height_tiles
;
108 if (surfaceIndex
== 0) {
109 clear_c_tile(&spu
.ctile
);
111 for (i
= spu
.init
.id
; i
< num_tiles
; i
+= spu
.init
.num_spus
) {
112 uint tx
= i
% spu
.fb
.width_tiles
;
113 uint ty
= i
/ spu
.fb
.width_tiles
;
114 if (spu
.ctile_status
[ty
][tx
] == TILE_STATUS_CLEAR
) {
115 put_tile(tx
, ty
, &spu
.ctile
, TAG_SURFACE_CLEAR
, 0);
120 clear_z_tile(&spu
.ztile
);
122 for (i
= spu
.init
.id
; i
< num_tiles
; i
+= spu
.init
.num_spus
) {
123 uint tx
= i
% spu
.fb
.width_tiles
;
124 uint ty
= i
/ spu
.fb
.width_tiles
;
125 if (spu
.ztile_status
[ty
][tx
] == TILE_STATUS_CLEAR
)
126 put_tile(tx
, ty
, &spu
.ctile
, TAG_SURFACE_CLEAR
, 1);
131 wait_on_mask(1 << TAG_SURFACE_CLEAR
);
137 cmd_clear_surface(const struct cell_command_clear_surface
*clear
)
139 const uint num_tiles
= spu
.fb
.width_tiles
* spu
.fb
.height_tiles
;
143 printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu
.init
.id
,
144 clear
->surface
, clear
->value
);
148 /* set all tile's status to CLEAR */
149 if (clear
->surface
== 0) {
150 memset(spu
.ctile_status
, TILE_STATUS_CLEAR
, sizeof(spu
.ctile_status
));
151 spu
.fb
.color_clear_value
= clear
->value
;
154 memset(spu
.ztile_status
, TILE_STATUS_CLEAR
, sizeof(spu
.ztile_status
));
155 spu
.fb
.depth_clear_value
= clear
->value
;
160 if (clear
->surface
== 0) {
161 spu
.fb
.color_clear_value
= clear
->value
;
162 clear_c_tile(&spu
.ctile
);
165 spu
.fb
.depth_clear_value
= clear
->value
;
166 clear_z_tile(&spu
.ztile
);
170 printf("SPU: %s num=%d w=%d h=%d\n",
171 __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
174 for (i
= spu
.init
.id
; i
< num_tiles
; i
+= spu
.init
.num_spus
) {
175 uint tx
= i
% spu
.fb
.width_tiles
;
176 uint ty
= i
/ spu
.fb
.width_tiles
;
177 if (clear
->surface
== 0)
178 put_tile(tx
, ty
, &spu
.ctile
, TAG_SURFACE_CLEAR
, 0);
180 put_tile(tx
, ty
, &spu
.ztile
, TAG_SURFACE_CLEAR
, 1);
181 /* XXX we don't want this here, but it fixes bad tile results */
185 wait_on_mask(1 << TAG_SURFACE_CLEAR
);
189 printf("SPU %u: CLEAR SURF done\n", spu
.init
.id
);
194 cmd_release_verts(const struct cell_command_release_verts
*release
)
197 printf("SPU %u: RELEASE VERTS %u\n",
198 spu
.init
.id
, release
->vertex_buf
);
199 ASSERT(release
->vertex_buf
!= ~0U);
200 release_buffer(release
->vertex_buf
);
205 cmd_state_framebuffer(const struct cell_command_framebuffer
*cmd
)
208 printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
216 ASSERT_ALIGN16(cmd
->color_start
);
217 ASSERT_ALIGN16(cmd
->depth_start
);
219 spu
.fb
.color_start
= cmd
->color_start
;
220 spu
.fb
.depth_start
= cmd
->depth_start
;
221 spu
.fb
.color_format
= cmd
->color_format
;
222 spu
.fb
.depth_format
= cmd
->depth_format
;
223 spu
.fb
.width
= cmd
->width
;
224 spu
.fb
.height
= cmd
->height
;
225 spu
.fb
.width_tiles
= (spu
.fb
.width
+ TILE_SIZE
- 1) / TILE_SIZE
;
226 spu
.fb
.height_tiles
= (spu
.fb
.height
+ TILE_SIZE
- 1) / TILE_SIZE
;
228 switch (spu
.fb
.depth_format
) {
229 case PIPE_FORMAT_Z32_UNORM
:
230 case PIPE_FORMAT_Z24S8_UNORM
:
231 case PIPE_FORMAT_S8Z24_UNORM
:
234 case PIPE_FORMAT_Z16_UNORM
:
242 if (spu
.fb
.color_format
== PIPE_FORMAT_A8R8G8B8_UNORM
)
243 spu
.color_shuffle
= ((vector
unsigned char) {
244 12, 0, 4, 8, 0, 0, 0, 0,
245 0, 0, 0, 0, 0, 0, 0, 0});
246 else if (spu
.fb
.color_format
== PIPE_FORMAT_B8G8R8A8_UNORM
)
247 spu
.color_shuffle
= ((vector
unsigned char) {
248 8, 4, 0, 12, 0, 0, 0, 0,
249 0, 0, 0, 0, 0, 0, 0, 0});
256 cmd_state_blend(const struct cell_command_blend
*state
)
259 printf("SPU %u: BLEND: enabled %d\n",
263 ASSERT_ALIGN16(state
->base
);
265 if (state
->size
!= 0) {
266 mfc_get(fb_blend_code_buffer
,
267 (unsigned int) state
->base
, /* src */
268 ROUNDUP16(state
->size
),
272 wait_on_mask(1 << TAG_BATCH_BUFFER
);
273 spu
.blend
= (blend_func
) fb_blend_code_buffer
;
274 spu
.read_fb
= state
->read_fb
;
282 cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test
*state
)
285 printf("SPU %u: DEPTH_STENCIL: ztest %d\n",
289 ASSERT_ALIGN16(state
->base
);
291 if (state
->size
!= 0) {
292 mfc_get(depth_stencil_code_buffer
,
293 (unsigned int) state
->base
, /* src */
294 ROUNDUP16(state
->size
),
298 wait_on_mask(1 << TAG_BATCH_BUFFER
);
300 /* If there is no code, emit a return instruction.
302 depth_stencil_code_buffer
[0] = 0x35;
303 depth_stencil_code_buffer
[1] = 0x00;
304 depth_stencil_code_buffer
[2] = 0x00;
305 depth_stencil_code_buffer
[3] = 0x00;
308 spu
.frag_test
= (frag_test_func
) depth_stencil_code_buffer
;
309 spu
.read_depth
= state
->read_depth
;
310 spu
.read_stencil
= state
->read_stencil
;
315 cmd_state_sampler(const struct cell_command_sampler
*sampler
)
318 printf("SPU %u: SAMPLER [%u]\n",
319 spu
.init
.id
, sampler
->unit
);
321 spu
.sampler
[sampler
->unit
] = sampler
->state
;
322 if (spu
.sampler
[0].min_img_filter
== PIPE_TEX_FILTER_LINEAR
)
323 spu
.sample_texture
= sample_texture_bilinear
;
325 spu
.sample_texture
= sample_texture_nearest
;
330 cmd_state_texture(const struct cell_command_texture
*texture
)
332 const uint unit
= texture
->unit
;
333 const uint width
= texture
->width
;
334 const uint height
= texture
->height
;
337 printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu
.init
.id
,
338 texture
->unit
, texture
->start
,
339 texture
->width
, texture
->height
);
342 spu
.texture
[unit
].start
= texture
->start
;
343 spu
.texture
[unit
].width
= width
;
344 spu
.texture
[unit
].height
= height
;
346 spu
.texture
[unit
].tex_size
= (vector
float) { width
, height
, 0.0, 0.0};
347 spu
.texture
[unit
].tex_size_mask
= (vector
unsigned int)
348 { width
- 1, height
- 1, 0, 0 };
349 spu
.texture
[unit
].tex_size_x_mask
= spu_splats(width
- 1);
350 spu
.texture
[unit
].tex_size_y_mask
= spu_splats(height
- 1);
355 cmd_state_vertex_info(const struct vertex_info
*vinfo
)
358 printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu
.init
.id
,
361 ASSERT(vinfo
->num_attribs
>= 1);
362 ASSERT(vinfo
->num_attribs
<= 8);
363 memcpy(&spu
.vertex_info
, vinfo
, sizeof(*vinfo
));
368 cmd_state_vs_array_info(const struct cell_array_info
*vs_info
)
370 const unsigned attr
= vs_info
->attr
;
372 ASSERT(attr
< PIPE_MAX_ATTRIBS
);
373 draw
.vertex_fetch
.src_ptr
[attr
] = vs_info
->base
;
374 draw
.vertex_fetch
.pitch
[attr
] = vs_info
->pitch
;
375 draw
.vertex_fetch
.size
[attr
] = vs_info
->size
;
376 draw
.vertex_fetch
.code_offset
[attr
] = vs_info
->function_offset
;
377 draw
.vertex_fetch
.dirty
= 1;
385 printf("SPU %u: FINISH\n", spu
.init
.id
);
386 really_clear_tiles(0);
387 /* wait for all outstanding DMAs to finish */
388 mfc_write_tag_mask(~0);
389 mfc_read_tag_status_all();
390 /* send mbox message to PPU */
391 spu_write_out_mbox(CELL_CMD_FINISH
);
396 * Execute a batch of commands
397 * The opcode param encodes the location of the buffer and its size.
400 cmd_batch(uint opcode
)
402 const uint buf
= (opcode
>> 8) & 0xff;
403 uint size
= (opcode
>> 16);
404 uint64_t buffer
[CELL_BUFFER_SIZE
/ 8] ALIGN16_ATTRIB
;
405 const unsigned usize
= size
/ sizeof(buffer
[0]);
409 printf("SPU %u: BATCH buffer %u, len %u, from %p\n",
410 spu
.init
.id
, buf
, size
, spu
.init
.buffers
[buf
]);
412 ASSERT((opcode
& CELL_CMD_OPCODE_MASK
) == CELL_CMD_BATCH
);
414 ASSERT_ALIGN16(spu
.init
.buffers
[buf
]);
416 size
= ROUNDUP16(size
);
418 ASSERT_ALIGN16(spu
.init
.buffers
[buf
]);
420 mfc_get(buffer
, /* dest */
421 (unsigned int) spu
.init
.buffers
[buf
], /* src */
426 wait_on_mask(1 << TAG_BATCH_BUFFER
);
428 /* Tell PPU we're done copying the buffer to local store */
430 printf("SPU %u: release batch buf %u\n", spu
.init
.id
, buf
);
433 for (pos
= 0; pos
< usize
; /* no incr */) {
434 switch (buffer
[pos
]) {
435 case CELL_CMD_STATE_FRAMEBUFFER
:
437 struct cell_command_framebuffer
*fb
438 = (struct cell_command_framebuffer
*) &buffer
[pos
];
439 cmd_state_framebuffer(fb
);
440 pos
+= sizeof(*fb
) / 8;
443 case CELL_CMD_CLEAR_SURFACE
:
445 struct cell_command_clear_surface
*clr
446 = (struct cell_command_clear_surface
*) &buffer
[pos
];
447 cmd_clear_surface(clr
);
448 pos
+= sizeof(*clr
) / 8;
451 case CELL_CMD_RENDER
:
453 struct cell_command_render
*render
454 = (struct cell_command_render
*) &buffer
[pos
];
456 cmd_render(render
, &pos_incr
);
460 case CELL_CMD_RELEASE_VERTS
:
462 struct cell_command_release_verts
*release
463 = (struct cell_command_release_verts
*) &buffer
[pos
];
464 cmd_release_verts(release
);
465 pos
+= sizeof(*release
) / 8;
468 case CELL_CMD_FINISH
:
472 case CELL_CMD_STATE_BLEND
:
473 cmd_state_blend((struct cell_command_blend
*) &buffer
[pos
+1]);
474 pos
+= (1 + ROUNDUP8(sizeof(struct cell_command_blend
)) / 8);
476 case CELL_CMD_STATE_DEPTH_STENCIL
:
477 cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test
*)
479 pos
+= (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test
)) / 8);
481 case CELL_CMD_STATE_SAMPLER
:
483 struct cell_command_sampler
*sampler
484 = (struct cell_command_sampler
*) &buffer
[pos
];
485 cmd_state_sampler(sampler
);
486 pos
+= sizeof(*sampler
) / 8;
489 case CELL_CMD_STATE_TEXTURE
:
491 struct cell_command_texture
*texture
492 = (struct cell_command_texture
*) &buffer
[pos
];
493 cmd_state_texture(texture
);
494 pos
+= sizeof(*texture
) / 8;
497 case CELL_CMD_STATE_VERTEX_INFO
:
498 cmd_state_vertex_info((struct vertex_info
*) &buffer
[pos
+1]);
499 pos
+= (1 + ROUNDUP8(sizeof(struct vertex_info
)) / 8);
501 case CELL_CMD_STATE_VIEWPORT
:
502 (void) memcpy(& draw
.viewport
, &buffer
[pos
+1],
503 sizeof(struct pipe_viewport_state
));
504 pos
+= (1 + ROUNDUP8(sizeof(struct pipe_viewport_state
)) / 8);
506 case CELL_CMD_STATE_UNIFORMS
:
507 draw
.constants
= (const float (*)[4]) (uintptr_t) buffer
[pos
+ 1];
510 case CELL_CMD_STATE_VS_ARRAY_INFO
:
511 cmd_state_vs_array_info((struct cell_array_info
*) &buffer
[pos
+1]);
512 pos
+= (1 + ROUNDUP8(sizeof(struct cell_array_info
)) / 8);
514 case CELL_CMD_STATE_BIND_VS
:
515 spu_bind_vertex_shader(&draw
,
516 (struct cell_shader_info
*) &buffer
[pos
+1]);
517 pos
+= (1 + ROUNDUP8(sizeof(struct cell_shader_info
)) / 8);
519 case CELL_CMD_STATE_ATTRIB_FETCH
: {
520 struct cell_attribute_fetch_code
*code
=
521 (struct cell_attribute_fetch_code
*) &buffer
[pos
+1];
523 mfc_get(attribute_fetch_code_buffer
,
524 (unsigned int) code
->base
, /* src */
529 wait_on_mask(1 << TAG_BATCH_BUFFER
);
531 draw
.vertex_fetch
.code
= attribute_fetch_code_buffer
;
532 pos
+= (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code
)) / 8);
535 case CELL_CMD_STATE_LOGICOP
: {
536 struct cell_command_logicop
*code
=
537 (struct cell_command_logicop
*) &buffer
[pos
+1];
539 mfc_get(logicop_code_buffer
,
540 (unsigned int) code
->base
, /* src */
545 wait_on_mask(1 << TAG_BATCH_BUFFER
);
547 spu
.logicop
= (logicop_func
) logicop_code_buffer
;
548 pos
+= (1 + ROUNDUP8(sizeof(struct cell_command_logicop
)) / 8);
551 case CELL_CMD_FLUSH_BUFFER_RANGE
: {
552 struct cell_buffer_range
*br
= (struct cell_buffer_range
*)
555 spu_dcache_mark_dirty((unsigned) br
->base
, br
->size
);
556 pos
+= (1 + ROUNDUP8(sizeof(struct cell_buffer_range
)) / 8);
560 printf("SPU %u: bad opcode: 0x%llx\n", spu
.init
.id
, buffer
[pos
]);
567 printf("SPU %u: BATCH complete\n", spu
.init
.id
);
572 * Temporary/simple main loop for SPEs: Get a command, execute it, repeat.
577 struct cell_command cmd
;
581 printf("SPU %u: Enter main loop\n", spu
.init
.id
);
583 ASSERT((sizeof(struct cell_command
) & 0xf) == 0);
584 ASSERT_ALIGN16(&cmd
);
591 printf("SPU %u: Wait for cmd...\n", spu
.init
.id
);
593 /* read/wait from mailbox */
594 opcode
= (unsigned int) spu_read_in_mbox();
597 printf("SPU %u: got cmd 0x%x\n", spu
.init
.id
, opcode
);
599 /* command payload */
600 mfc_get(&cmd
, /* dest */
601 (unsigned int) spu
.init
.cmd
, /* src */
602 sizeof(struct cell_command
), /* bytes */
606 wait_on_mask( 1 << tag
);
609 * NOTE: most commands should be contained in a batch buffer
612 switch (opcode
& CELL_CMD_OPCODE_MASK
) {
615 printf("SPU %u: EXIT\n", spu
.init
.id
);
618 case CELL_CMD_VS_EXECUTE
:
619 spu_execute_vertex_shader(&draw
, &cmd
.vs
);
625 printf("Bad opcode!\n");
631 printf("SPU %u: Exit main loop\n", spu
.init
.id
);
641 memset(spu
.ctile_status
, TILE_STATUS_DEFINED
, sizeof(spu
.ctile_status
));
642 memset(spu
.ztile_status
, TILE_STATUS_DEFINED
, sizeof(spu
.ztile_status
));
643 invalidate_tex_cache();
648 /* In some versions of the SDK the SPE main takes 'unsigned long' as a
649 * parameter. In others it takes 'unsigned long long'. Use a define to
650 * select between the two.
652 #ifdef SPU_MAIN_PARAM_LONG_LONG
653 typedef unsigned long long main_param_t
;
655 typedef unsigned long main_param_t
;
662 main(main_param_t speid
, main_param_t argp
)
668 ASSERT(sizeof(tile_t
) == TILE_SIZE
* TILE_SIZE
* 4);
669 ASSERT(sizeof(struct cell_command_render
) % 8 == 0);
674 printf("SPU: main() speid=%lu\n", (unsigned long) speid
);
676 mfc_get(&spu
.init
, /* dest */
677 (unsigned int) argp
, /* src */
678 sizeof(struct cell_init_info
), /* bytes */
682 wait_on_mask( 1 << tag
);