1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 /* main() for Cell SPU code */
35 #include <spu_mfcio.h>
39 #include "pipe/cell/common.h"
40 #include "pipe/p_defines.h"
44 /usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
45 /opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
48 static boolean Debug
= FALSE
;
50 volatile struct cell_init_info init
;
52 struct framebuffer fb
;
54 uint ctile
[TILE_SIZE
][TILE_SIZE
] ALIGN16_ATTRIB
;
55 ushort ztile
[TILE_SIZE
][TILE_SIZE
] ALIGN16_ATTRIB
;
62 wait_on_mask(unsigned tagMask
)
64 mfc_write_tag_mask( tagMask
);
65 /* wait for completion of _any_ DMAs specified by tagMask */
66 mfc_read_tag_status_any();
72 get_tile(const struct framebuffer
*fb
, uint tx
, uint ty
, uint
*tile
,
75 const uint offset
= ty
* fb
->width_tiles
+ tx
;
76 const uint bytesPerTile
= TILE_SIZE
* TILE_SIZE
* (zBuf
? 2 : 4);
77 const ubyte
*src
= zBuf
? fb
->depth_start
: fb
->color_start
;
79 src
+= offset
* bytesPerTile
;
81 assert(tx
< fb
->width_tiles
);
82 assert(ty
< fb
->height_tiles
);
85 printf("get_tile: dest: %p src: 0x%x size: %d\n",
86 tile, (unsigned int) src, bytesPerTile);
88 mfc_get(tile
, /* dest in local memory */
89 (unsigned int) src
, /* src in main memory */
98 put_tile(const struct framebuffer
*fb
, uint tx
, uint ty
, const uint
*tile
,
101 const uint offset
= ty
* fb
->width_tiles
+ tx
;
102 const uint bytesPerTile
= TILE_SIZE
* TILE_SIZE
* (zBuf
? 2 : 4);
103 ubyte
*dst
= zBuf
? fb
->depth_start
: fb
->color_start
;
105 dst
+= offset
* bytesPerTile
;
107 assert(tx
< fb
->width_tiles
);
108 assert(ty
< fb
->height_tiles
);
109 ASSERT_ALIGN16(tile
);
111 printf("put_tile: src: %p dst: 0x%x size: %d\n",
112 tile, (unsigned int) dst, bytesPerTile);
114 mfc_put((void *) tile
, /* src in local memory */
115 (unsigned int) dst
, /* dst in main memory */
125 clear_surface(const struct cell_command_clear_surface
*clear
)
127 uint num_tiles
= fb
.width_tiles
* fb
.height_tiles
;
130 if (clear
->surface
== 0) {
131 for (i
= 0; i
< TILE_SIZE
; i
++)
132 for (j
= 0; j
< TILE_SIZE
; j
++)
133 ctile
[i
][j
] = clear
->value
;
136 for (i
= 0; i
< TILE_SIZE
; i
++)
137 for (j
= 0; j
< TILE_SIZE
; j
++)
138 ztile
[i
][j
] = clear
->value
;
142 printf("SPU: %s num=%d w=%d h=%d\n",
143 __FUNCTION__, num_tiles, fb.width_tiles, fb.height_tiles);
146 for (i
= init
.id
; i
< num_tiles
; i
+= init
.num_spus
) {
147 uint tx
= i
% fb
.width_tiles
;
148 uint ty
= i
/ fb
.width_tiles
;
149 if (clear
->surface
== 0)
150 put_tile(&fb
, tx
, ty
, (uint
*) ctile
, TAG_SURFACE_CLEAR
, 0);
152 put_tile(&fb
, tx
, ty
, (uint
*) ztile
, TAG_SURFACE_CLEAR
, 1);
153 /* XXX we don't want this here, but it fixes bad tile results */
157 wait_on_mask(1 << TAG_SURFACE_CLEAR
);
163 * Given a rendering command's bounding box (in pixels) compute the
164 * location of the corresponding screen tile bounding box.
167 tile_bounding_box(const struct cell_command_render
*render
,
168 uint
*txmin
, uint
*tymin
,
169 uint
*box_num_tiles
, uint
*box_width_tiles
)
172 /* Debug: full-window bounding box */
173 uint txmax
= fb
.width_tiles
- 1;
174 uint tymax
= fb
.height_tiles
- 1;
177 *box_num_tiles
= fb
.width_tiles
* fb
.height_tiles
;
178 *box_width_tiles
= fb
.width_tiles
;
180 uint txmax
, tymax
, box_height_tiles
;
182 *txmin
= (uint
) render
->xmin
/ TILE_SIZE
;
183 *tymin
= (uint
) render
->ymin
/ TILE_SIZE
;
184 txmax
= (uint
) render
->xmax
/ TILE_SIZE
;
185 tymax
= (uint
) render
->ymax
/ TILE_SIZE
;
186 *box_width_tiles
= txmax
- *txmin
+ 1;
187 box_height_tiles
= tymax
- *tymin
+ 1;
188 *box_num_tiles
= *box_width_tiles
* box_height_tiles
;
191 printf("Render bounds: %g, %g ... %g, %g\n",
192 render
->xmin
, render
->ymin
, render
->xmax
, render
->ymax
);
193 printf("Render tiles: %u, %u .. %u, %u\n", *txmin
, *tymin
, txmax
, tymax
);
200 render(const struct cell_command_render
*render
)
202 struct cell_prim_buffer prim_buffer ALIGN16_ATTRIB
;
203 uint i
, j
, vertex_bytes
;
206 printf("SPU %u: RENDER buffer dst=%p src=%p size=%d\n",
208 &prim_buffer, render->vertex_data, (int)sizeof(prim_buffer));
211 ASSERT_ALIGN16(render
->vertex_data
);
212 ASSERT_ALIGN16(&prim_buffer
);
214 /* how much vertex data */
215 vertex_bytes
= render
->num_verts
* render
->num_attribs
* 4 * sizeof(float);
217 /* get vertex data from main memory */
218 mfc_get(&prim_buffer
, /* dest */
219 (unsigned int) render
->vertex_data
, /* src */
220 vertex_bytes
, /* size */
224 wait_on_mask(1 << TAG_VERTEX_BUFFER
);
226 /* find tiles which intersect the prim bounding box */
227 uint txmin
, tymin
, box_width_tiles
, box_num_tiles
;
228 tile_bounding_box(render
, &txmin
, &tymin
,
229 &box_num_tiles
, &box_width_tiles
);
231 /* make sure any pending clears have completed */
232 wait_on_mask(1 << TAG_SURFACE_CLEAR
);
234 /* loop over tiles */
235 for (i
= init
.id
; i
< box_num_tiles
; i
+= init
.num_spus
) {
236 const uint tx
= txmin
+ i
% box_width_tiles
;
237 const uint ty
= tymin
+ i
/ box_width_tiles
;
239 assert(tx
< fb
.width_tiles
);
240 assert(ty
< fb
.height_tiles
);
242 /* Start fetching color/z tiles. We'll wait for completion when
243 * we need read/write to them later in triangle rasterization.
245 get_tile(&fb
, tx
, ty
, (uint
*) ctile
, TAG_READ_TILE_COLOR
, 0);
246 if (fb
.depth_format
== PIPE_FORMAT_Z16_UNORM
) {
247 get_tile(&fb
, tx
, ty
, (uint
*) ztile
, TAG_READ_TILE_Z
, 1);
250 assert(render
->prim_type
== PIPE_PRIM_TRIANGLES
);
253 for (j
= 0; j
< render
->num_verts
; j
+= 3) {
254 struct prim_header prim
;
257 printf(" %u: Triangle %g,%g %g,%g %g,%g\n",
259 prim_buffer.vertex[j*3+0][0][0],
260 prim_buffer.vertex[j*3+0][0][1],
261 prim_buffer.vertex[j*3+1][0][0],
262 prim_buffer.vertex[j*3+1][0][1],
263 prim_buffer.vertex[j*3+2][0][0],
264 prim_buffer.vertex[j*3+2][0][1]);
268 COPY_4V(prim
.v
[0].data
[0], prim_buffer
.vertex
[j
+0][0]);
269 COPY_4V(prim
.v
[1].data
[0], prim_buffer
.vertex
[j
+1][0]);
270 COPY_4V(prim
.v
[2].data
[0], prim_buffer
.vertex
[j
+2][0]);
273 COPY_4V(prim
.v
[0].data
[1], prim_buffer
.vertex
[j
+0][1]);
274 COPY_4V(prim
.v
[1].data
[1], prim_buffer
.vertex
[j
+1][1]);
275 COPY_4V(prim
.v
[2].data
[1], prim_buffer
.vertex
[j
+2][1]);
277 tri_draw(&prim
, tx
, ty
);
280 /* in case nothing was drawn, wait now for completion */
282 wait_on_mask(1 << TAG_READ_TILE_COLOR
);
283 if (fb
.depth_format
== PIPE_FORMAT_Z16_UNORM
) {
284 wait_on_mask(1 << TAG_READ_TILE_Z
); /* XXX temporary */
287 /* XXX IF we wrote anything into the tile... */
289 put_tile(&fb
, tx
, ty
, (uint
*) ctile
, TAG_WRITE_TILE_COLOR
, 0);
290 if (fb
.depth_format
== PIPE_FORMAT_Z16_UNORM
) {
291 put_tile(&fb
, tx
, ty
, (uint
*) ztile
, TAG_WRITE_TILE_Z
, 1);
294 wait_on_mask(1 << TAG_WRITE_TILE_COLOR
); /* XXX temp */
295 if (fb
.depth_format
== PIPE_FORMAT_Z16_UNORM
) {
296 wait_on_mask(1 << TAG_WRITE_TILE_Z
); /* XXX temporary */
303 * Temporary/simple main loop for SPEs: Get a command, execute it, repeat.
308 struct cell_command cmd
;
312 printf("SPU %u: Enter main loop\n", init
.id
);
314 assert((sizeof(struct cell_command
) & 0xf) == 0);
315 ASSERT_ALIGN16(&cmd
);
322 printf("SPU %u: Wait for cmd...\n", init
.id
);
324 /* read/wait from mailbox */
325 opcode
= (unsigned int) spu_read_in_mbox();
328 printf("SPU %u: got cmd %u\n", init
.id
, opcode
);
330 /* command payload */
331 mfc_get(&cmd
, /* dest */
332 (unsigned int) init
.cmd
, /* src */
333 sizeof(struct cell_command
), /* bytes */
337 wait_on_mask( 1 << tag
);
342 printf("SPU %u: EXIT\n", init
.id
);
345 case CELL_CMD_FRAMEBUFFER
:
347 printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
353 cmd
.fb
.depth_format
);
354 fb
.color_start
= cmd
.fb
.color_start
;
355 fb
.depth_start
= cmd
.fb
.depth_start
;
356 fb
.color_format
= cmd
.fb
.color_format
;
357 fb
.depth_format
= cmd
.fb
.depth_format
;
358 fb
.width
= cmd
.fb
.width
;
359 fb
.height
= cmd
.fb
.height
;
360 fb
.width_tiles
= (fb
.width
+ TILE_SIZE
- 1) / TILE_SIZE
;
361 fb
.height_tiles
= (fb
.height
+ TILE_SIZE
- 1) / TILE_SIZE
;
363 printf("SPU %u: %u x %u tiles\n",
364 init.id, fb.width_tiles, fb.height_tiles);
367 case CELL_CMD_CLEAR_SURFACE
:
369 printf("SPU %u: CLEAR SURF %u to 0x%08x\n", init
.id
,
370 cmd
.clear
.surface
, cmd
.clear
.value
);
371 clear_surface(&cmd
.clear
);
373 case CELL_CMD_RENDER
:
375 printf("SPU %u: RENDER %u verts, prim %u\n",
376 init
.id
, cmd
.render
.num_verts
, cmd
.render
.prim_type
);
380 case CELL_CMD_FINISH
:
382 printf("SPU %u: FINISH\n", init
.id
);
383 /* wait for all outstanding DMAs to finish */
384 mfc_write_tag_mask(~0);
385 mfc_read_tag_status_all();
386 /* send mbox message to PPU */
387 spu_write_out_mbox(CELL_CMD_FINISH
);
390 printf("Bad opcode!\n");
396 printf("SPU %u: Exit main loop\n", init
.id
);
403 * Note: example programs declare params as 'unsigned long long' but
407 main(unsigned long speid
, unsigned long argp
)
416 printf("SPU: main() speid=%lu\n", speid
);
418 mfc_get(&init
, /* dest */
419 (unsigned int) argp
, /* src */
420 sizeof(struct cell_init_info
), /* bytes */
424 wait_on_mask( 1 << tag
);